1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/FileManager.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/CodeGen/ConstantInitBuilder.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/SetOperations.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/Bitcode/BitcodeReader.h" 32 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 33 #include "llvm/IR/Constants.h" 34 #include "llvm/IR/DerivedTypes.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/IR/Value.h" 37 #include "llvm/Support/AtomicOrdering.h" 38 #include "llvm/Support/Format.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include <cassert> 41 #include <numeric> 42 43 using namespace clang; 44 using namespace CodeGen; 45 using namespace llvm::omp; 46 47 namespace { 48 /// Base class for handling code generation inside OpenMP regions. 49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 50 public: 51 /// Kinds of OpenMP regions used in codegen. 52 enum CGOpenMPRegionKind { 53 /// Region with outlined function for standalone 'parallel' 54 /// directive. 55 ParallelOutlinedRegion, 56 /// Region with outlined function for standalone 'task' directive. 57 TaskOutlinedRegion, 58 /// Region for constructs that do not require function outlining, 59 /// like 'for', 'sections', 'atomic' etc. directives. 60 InlinedRegion, 61 /// Region with outlined function for standalone 'target' directive. 62 TargetRegion, 63 }; 64 65 CGOpenMPRegionInfo(const CapturedStmt &CS, 66 const CGOpenMPRegionKind RegionKind, 67 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 68 bool HasCancel) 69 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 70 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 71 72 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 73 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 74 bool HasCancel) 75 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 76 Kind(Kind), HasCancel(HasCancel) {} 77 78 /// Get a variable or parameter for storing global thread id 79 /// inside OpenMP construct. 80 virtual const VarDecl *getThreadIDVariable() const = 0; 81 82 /// Emit the captured statement body. 83 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 84 85 /// Get an LValue for the current ThreadID variable. 86 /// \return LValue for thread id variable. This LValue always has type int32*. 87 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 88 89 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 90 91 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 92 93 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 94 95 bool hasCancel() const { return HasCancel; } 96 97 static bool classof(const CGCapturedStmtInfo *Info) { 98 return Info->getKind() == CR_OpenMP; 99 } 100 101 ~CGOpenMPRegionInfo() override = default; 102 103 protected: 104 CGOpenMPRegionKind RegionKind; 105 RegionCodeGenTy CodeGen; 106 OpenMPDirectiveKind Kind; 107 bool HasCancel; 108 }; 109 110 /// API for captured statement code generation in OpenMP constructs. 111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 112 public: 113 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 114 const RegionCodeGenTy &CodeGen, 115 OpenMPDirectiveKind Kind, bool HasCancel, 116 StringRef HelperName) 117 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 118 HasCancel), 119 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 120 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 121 } 122 123 /// Get a variable or parameter for storing global thread id 124 /// inside OpenMP construct. 125 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 126 127 /// Get the name of the capture helper. 128 StringRef getHelperName() const override { return HelperName; } 129 130 static bool classof(const CGCapturedStmtInfo *Info) { 131 return CGOpenMPRegionInfo::classof(Info) && 132 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 133 ParallelOutlinedRegion; 134 } 135 136 private: 137 /// A variable or parameter storing global thread id for OpenMP 138 /// constructs. 139 const VarDecl *ThreadIDVar; 140 StringRef HelperName; 141 }; 142 143 /// API for captured statement code generation in OpenMP constructs. 144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 145 public: 146 class UntiedTaskActionTy final : public PrePostActionTy { 147 bool Untied; 148 const VarDecl *PartIDVar; 149 const RegionCodeGenTy UntiedCodeGen; 150 llvm::SwitchInst *UntiedSwitch = nullptr; 151 152 public: 153 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 154 const RegionCodeGenTy &UntiedCodeGen) 155 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 156 void Enter(CodeGenFunction &CGF) override { 157 if (Untied) { 158 // Emit task switching point. 159 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 160 CGF.GetAddrOfLocalVar(PartIDVar), 161 PartIDVar->getType()->castAs<PointerType>()); 162 llvm::Value *Res = 163 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 164 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 165 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 166 CGF.EmitBlock(DoneBB); 167 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 168 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 169 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 170 CGF.Builder.GetInsertBlock()); 171 emitUntiedSwitch(CGF); 172 } 173 } 174 void emitUntiedSwitch(CodeGenFunction &CGF) const { 175 if (Untied) { 176 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 177 CGF.GetAddrOfLocalVar(PartIDVar), 178 PartIDVar->getType()->castAs<PointerType>()); 179 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 180 PartIdLVal); 181 UntiedCodeGen(CGF); 182 CodeGenFunction::JumpDest CurPoint = 183 CGF.getJumpDestInCurrentScope(".untied.next."); 184 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 185 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 186 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 187 CGF.Builder.GetInsertBlock()); 188 CGF.EmitBranchThroughCleanup(CurPoint); 189 CGF.EmitBlock(CurPoint.getBlock()); 190 } 191 } 192 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 193 }; 194 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 195 const VarDecl *ThreadIDVar, 196 const RegionCodeGenTy &CodeGen, 197 OpenMPDirectiveKind Kind, bool HasCancel, 198 const UntiedTaskActionTy &Action) 199 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 200 ThreadIDVar(ThreadIDVar), Action(Action) { 201 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 202 } 203 204 /// Get a variable or parameter for storing global thread id 205 /// inside OpenMP construct. 206 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 207 208 /// Get an LValue for the current ThreadID variable. 209 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 210 211 /// Get the name of the capture helper. 212 StringRef getHelperName() const override { return ".omp_outlined."; } 213 214 void emitUntiedSwitch(CodeGenFunction &CGF) override { 215 Action.emitUntiedSwitch(CGF); 216 } 217 218 static bool classof(const CGCapturedStmtInfo *Info) { 219 return CGOpenMPRegionInfo::classof(Info) && 220 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 221 TaskOutlinedRegion; 222 } 223 224 private: 225 /// A variable or parameter storing global thread id for OpenMP 226 /// constructs. 227 const VarDecl *ThreadIDVar; 228 /// Action for emitting code for untied tasks. 229 const UntiedTaskActionTy &Action; 230 }; 231 232 /// API for inlined captured statement code generation in OpenMP 233 /// constructs. 234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 235 public: 236 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 237 const RegionCodeGenTy &CodeGen, 238 OpenMPDirectiveKind Kind, bool HasCancel) 239 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 240 OldCSI(OldCSI), 241 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 242 243 // Retrieve the value of the context parameter. 244 llvm::Value *getContextValue() const override { 245 if (OuterRegionInfo) 246 return OuterRegionInfo->getContextValue(); 247 llvm_unreachable("No context value for inlined OpenMP region"); 248 } 249 250 void setContextValue(llvm::Value *V) override { 251 if (OuterRegionInfo) { 252 OuterRegionInfo->setContextValue(V); 253 return; 254 } 255 llvm_unreachable("No context value for inlined OpenMP region"); 256 } 257 258 /// Lookup the captured field decl for a variable. 259 const FieldDecl *lookup(const VarDecl *VD) const override { 260 if (OuterRegionInfo) 261 return OuterRegionInfo->lookup(VD); 262 // If there is no outer outlined region,no need to lookup in a list of 263 // captured variables, we can use the original one. 264 return nullptr; 265 } 266 267 FieldDecl *getThisFieldDecl() const override { 268 if (OuterRegionInfo) 269 return OuterRegionInfo->getThisFieldDecl(); 270 return nullptr; 271 } 272 273 /// Get a variable or parameter for storing global thread id 274 /// inside OpenMP construct. 275 const VarDecl *getThreadIDVariable() const override { 276 if (OuterRegionInfo) 277 return OuterRegionInfo->getThreadIDVariable(); 278 return nullptr; 279 } 280 281 /// Get an LValue for the current ThreadID variable. 282 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 285 llvm_unreachable("No LValue for inlined OpenMP construct"); 286 } 287 288 /// Get the name of the capture helper. 289 StringRef getHelperName() const override { 290 if (auto *OuterRegionInfo = getOldCSI()) 291 return OuterRegionInfo->getHelperName(); 292 llvm_unreachable("No helper name for inlined OpenMP construct"); 293 } 294 295 void emitUntiedSwitch(CodeGenFunction &CGF) override { 296 if (OuterRegionInfo) 297 OuterRegionInfo->emitUntiedSwitch(CGF); 298 } 299 300 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 301 302 static bool classof(const CGCapturedStmtInfo *Info) { 303 return CGOpenMPRegionInfo::classof(Info) && 304 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 305 } 306 307 ~CGOpenMPInlinedRegionInfo() override = default; 308 309 private: 310 /// CodeGen info about outer OpenMP region. 311 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 312 CGOpenMPRegionInfo *OuterRegionInfo; 313 }; 314 315 /// API for captured statement code generation in OpenMP target 316 /// constructs. For this captures, implicit parameters are used instead of the 317 /// captured fields. The name of the target region has to be unique in a given 318 /// application so it is provided by the client, because only the client has 319 /// the information to generate that. 320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 321 public: 322 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 323 const RegionCodeGenTy &CodeGen, StringRef HelperName) 324 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 325 /*HasCancel=*/false), 326 HelperName(HelperName) {} 327 328 /// This is unused for target regions because each starts executing 329 /// with a single thread. 330 const VarDecl *getThreadIDVariable() const override { return nullptr; } 331 332 /// Get the name of the capture helper. 333 StringRef getHelperName() const override { return HelperName; } 334 335 static bool classof(const CGCapturedStmtInfo *Info) { 336 return CGOpenMPRegionInfo::classof(Info) && 337 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 338 } 339 340 private: 341 StringRef HelperName; 342 }; 343 344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 345 llvm_unreachable("No codegen for expressions"); 346 } 347 /// API for generation of expressions captured in a innermost OpenMP 348 /// region. 349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 350 public: 351 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 352 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 353 OMPD_unknown, 354 /*HasCancel=*/false), 355 PrivScope(CGF) { 356 // Make sure the globals captured in the provided statement are local by 357 // using the privatization logic. We assume the same variable is not 358 // captured more than once. 359 for (const auto &C : CS.captures()) { 360 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 361 continue; 362 363 const VarDecl *VD = C.getCapturedVar(); 364 if (VD->isLocalVarDeclOrParm()) 365 continue; 366 367 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 368 /*RefersToEnclosingVariableOrCapture=*/false, 369 VD->getType().getNonReferenceType(), VK_LValue, 370 C.getLocation()); 371 PrivScope.addPrivate( 372 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 373 } 374 (void)PrivScope.Privatize(); 375 } 376 377 /// Lookup the captured field decl for a variable. 378 const FieldDecl *lookup(const VarDecl *VD) const override { 379 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 380 return FD; 381 return nullptr; 382 } 383 384 /// Emit the captured statement body. 385 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 386 llvm_unreachable("No body for expressions"); 387 } 388 389 /// Get a variable or parameter for storing global thread id 390 /// inside OpenMP construct. 391 const VarDecl *getThreadIDVariable() const override { 392 llvm_unreachable("No thread id for expressions"); 393 } 394 395 /// Get the name of the capture helper. 396 StringRef getHelperName() const override { 397 llvm_unreachable("No helper name for expressions"); 398 } 399 400 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 401 402 private: 403 /// Private scope to capture global variables. 404 CodeGenFunction::OMPPrivateScope PrivScope; 405 }; 406 407 /// RAII for emitting code of OpenMP constructs. 408 class InlinedOpenMPRegionRAII { 409 CodeGenFunction &CGF; 410 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 411 FieldDecl *LambdaThisCaptureField = nullptr; 412 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 413 414 public: 415 /// Constructs region for combined constructs. 416 /// \param CodeGen Code generation sequence for combined directives. Includes 417 /// a list of functions used for code generation of implicitly inlined 418 /// regions. 419 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 420 OpenMPDirectiveKind Kind, bool HasCancel) 421 : CGF(CGF) { 422 // Start emission for the construct. 423 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 424 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 425 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 426 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 427 CGF.LambdaThisCaptureField = nullptr; 428 BlockInfo = CGF.BlockInfo; 429 CGF.BlockInfo = nullptr; 430 } 431 432 ~InlinedOpenMPRegionRAII() { 433 // Restore original CapturedStmtInfo only if we're done with code emission. 434 auto *OldCSI = 435 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 436 delete CGF.CapturedStmtInfo; 437 CGF.CapturedStmtInfo = OldCSI; 438 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 439 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 440 CGF.BlockInfo = BlockInfo; 441 } 442 }; 443 444 /// Values for bit flags used in the ident_t to describe the fields. 445 /// All enumeric elements are named and described in accordance with the code 446 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 447 enum OpenMPLocationFlags : unsigned { 448 /// Use trampoline for internal microtask. 449 OMP_IDENT_IMD = 0x01, 450 /// Use c-style ident structure. 451 OMP_IDENT_KMPC = 0x02, 452 /// Atomic reduction option for kmpc_reduce. 453 OMP_ATOMIC_REDUCE = 0x10, 454 /// Explicit 'barrier' directive. 455 OMP_IDENT_BARRIER_EXPL = 0x20, 456 /// Implicit barrier in code. 457 OMP_IDENT_BARRIER_IMPL = 0x40, 458 /// Implicit barrier in 'for' directive. 459 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 460 /// Implicit barrier in 'sections' directive. 461 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 462 /// Implicit barrier in 'single' directive. 463 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 464 /// Call of __kmp_for_static_init for static loop. 465 OMP_IDENT_WORK_LOOP = 0x200, 466 /// Call of __kmp_for_static_init for sections. 467 OMP_IDENT_WORK_SECTIONS = 0x400, 468 /// Call of __kmp_for_static_init for distribute. 469 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 470 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 471 }; 472 473 namespace { 474 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 475 /// Values for bit flags for marking which requires clauses have been used. 476 enum OpenMPOffloadingRequiresDirFlags : int64_t { 477 /// flag undefined. 478 OMP_REQ_UNDEFINED = 0x000, 479 /// no requires clause present. 480 OMP_REQ_NONE = 0x001, 481 /// reverse_offload clause. 482 OMP_REQ_REVERSE_OFFLOAD = 0x002, 483 /// unified_address clause. 484 OMP_REQ_UNIFIED_ADDRESS = 0x004, 485 /// unified_shared_memory clause. 486 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 487 /// dynamic_allocators clause. 488 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 489 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 490 }; 491 492 enum OpenMPOffloadingReservedDeviceIDs { 493 /// Device ID if the device was not defined, runtime should get it 494 /// from environment variables in the spec. 495 OMP_DEVICEID_UNDEF = -1, 496 }; 497 } // anonymous namespace 498 499 /// Describes ident structure that describes a source location. 500 /// All descriptions are taken from 501 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 502 /// Original structure: 503 /// typedef struct ident { 504 /// kmp_int32 reserved_1; /**< might be used in Fortran; 505 /// see above */ 506 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 507 /// KMP_IDENT_KMPC identifies this union 508 /// member */ 509 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 510 /// see above */ 511 ///#if USE_ITT_BUILD 512 /// /* but currently used for storing 513 /// region-specific ITT */ 514 /// /* contextual information. */ 515 ///#endif /* USE_ITT_BUILD */ 516 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 517 /// C++ */ 518 /// char const *psource; /**< String describing the source location. 519 /// The string is composed of semi-colon separated 520 // fields which describe the source file, 521 /// the function and a pair of line numbers that 522 /// delimit the construct. 523 /// */ 524 /// } ident_t; 525 enum IdentFieldIndex { 526 /// might be used in Fortran 527 IdentField_Reserved_1, 528 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 529 IdentField_Flags, 530 /// Not really used in Fortran any more 531 IdentField_Reserved_2, 532 /// Source[4] in Fortran, do not use for C++ 533 IdentField_Reserved_3, 534 /// String describing the source location. The string is composed of 535 /// semi-colon separated fields which describe the source file, the function 536 /// and a pair of line numbers that delimit the construct. 537 IdentField_PSource 538 }; 539 540 /// Schedule types for 'omp for' loops (these enumerators are taken from 541 /// the enum sched_type in kmp.h). 542 enum OpenMPSchedType { 543 /// Lower bound for default (unordered) versions. 544 OMP_sch_lower = 32, 545 OMP_sch_static_chunked = 33, 546 OMP_sch_static = 34, 547 OMP_sch_dynamic_chunked = 35, 548 OMP_sch_guided_chunked = 36, 549 OMP_sch_runtime = 37, 550 OMP_sch_auto = 38, 551 /// static with chunk adjustment (e.g., simd) 552 OMP_sch_static_balanced_chunked = 45, 553 /// Lower bound for 'ordered' versions. 554 OMP_ord_lower = 64, 555 OMP_ord_static_chunked = 65, 556 OMP_ord_static = 66, 557 OMP_ord_dynamic_chunked = 67, 558 OMP_ord_guided_chunked = 68, 559 OMP_ord_runtime = 69, 560 OMP_ord_auto = 70, 561 OMP_sch_default = OMP_sch_static, 562 /// dist_schedule types 563 OMP_dist_sch_static_chunked = 91, 564 OMP_dist_sch_static = 92, 565 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 566 /// Set if the monotonic schedule modifier was present. 567 OMP_sch_modifier_monotonic = (1 << 29), 568 /// Set if the nonmonotonic schedule modifier was present. 569 OMP_sch_modifier_nonmonotonic = (1 << 30), 570 }; 571 572 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 573 /// region. 574 class CleanupTy final : public EHScopeStack::Cleanup { 575 PrePostActionTy *Action; 576 577 public: 578 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 579 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 580 if (!CGF.HaveInsertPoint()) 581 return; 582 Action->Exit(CGF); 583 } 584 }; 585 586 } // anonymous namespace 587 588 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 589 CodeGenFunction::RunCleanupsScope Scope(CGF); 590 if (PrePostAction) { 591 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 592 Callback(CodeGen, CGF, *PrePostAction); 593 } else { 594 PrePostActionTy Action; 595 Callback(CodeGen, CGF, Action); 596 } 597 } 598 599 /// Check if the combiner is a call to UDR combiner and if it is so return the 600 /// UDR decl used for reduction. 601 static const OMPDeclareReductionDecl * 602 getReductionInit(const Expr *ReductionOp) { 603 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 604 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 605 if (const auto *DRE = 606 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 607 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 608 return DRD; 609 return nullptr; 610 } 611 612 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 613 const OMPDeclareReductionDecl *DRD, 614 const Expr *InitOp, 615 Address Private, Address Original, 616 QualType Ty) { 617 if (DRD->getInitializer()) { 618 std::pair<llvm::Function *, llvm::Function *> Reduction = 619 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 620 const auto *CE = cast<CallExpr>(InitOp); 621 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 622 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 623 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 624 const auto *LHSDRE = 625 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 626 const auto *RHSDRE = 627 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 628 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 629 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 630 [=]() { return Private; }); 631 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 632 [=]() { return Original; }); 633 (void)PrivateScope.Privatize(); 634 RValue Func = RValue::get(Reduction.second); 635 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 636 CGF.EmitIgnoredExpr(InitOp); 637 } else { 638 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 639 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 640 auto *GV = new llvm::GlobalVariable( 641 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 642 llvm::GlobalValue::PrivateLinkage, Init, Name); 643 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 644 RValue InitRVal; 645 switch (CGF.getEvaluationKind(Ty)) { 646 case TEK_Scalar: 647 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 648 break; 649 case TEK_Complex: 650 InitRVal = 651 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 652 break; 653 case TEK_Aggregate: 654 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 655 break; 656 } 657 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 658 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 659 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 660 /*IsInitializer=*/false); 661 } 662 } 663 664 /// Emit initialization of arrays of complex types. 665 /// \param DestAddr Address of the array. 666 /// \param Type Type of array. 667 /// \param Init Initial expression of array. 668 /// \param SrcAddr Address of the original array. 669 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 670 QualType Type, bool EmitDeclareReductionInit, 671 const Expr *Init, 672 const OMPDeclareReductionDecl *DRD, 673 Address SrcAddr = Address::invalid()) { 674 // Perform element-by-element initialization. 675 QualType ElementTy; 676 677 // Drill down to the base element type on both arrays. 678 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 679 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 680 DestAddr = 681 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 682 if (DRD) 683 SrcAddr = 684 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 685 686 llvm::Value *SrcBegin = nullptr; 687 if (DRD) 688 SrcBegin = SrcAddr.getPointer(); 689 llvm::Value *DestBegin = DestAddr.getPointer(); 690 // Cast from pointer to array type to pointer to single element. 691 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 692 // The basic structure here is a while-do loop. 693 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 694 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 695 llvm::Value *IsEmpty = 696 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 697 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 698 699 // Enter the loop body, making that address the current address. 700 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 701 CGF.EmitBlock(BodyBB); 702 703 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 704 705 llvm::PHINode *SrcElementPHI = nullptr; 706 Address SrcElementCurrent = Address::invalid(); 707 if (DRD) { 708 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 709 "omp.arraycpy.srcElementPast"); 710 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 711 SrcElementCurrent = 712 Address(SrcElementPHI, 713 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 714 } 715 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 716 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 717 DestElementPHI->addIncoming(DestBegin, EntryBB); 718 Address DestElementCurrent = 719 Address(DestElementPHI, 720 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 721 722 // Emit copy. 723 { 724 CodeGenFunction::RunCleanupsScope InitScope(CGF); 725 if (EmitDeclareReductionInit) { 726 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 727 SrcElementCurrent, ElementTy); 728 } else 729 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 730 /*IsInitializer=*/false); 731 } 732 733 if (DRD) { 734 // Shift the address forward by one element. 735 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 736 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 737 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 738 } 739 740 // Shift the address forward by one element. 741 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 742 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 743 // Check whether we've reached the end. 744 llvm::Value *Done = 745 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 746 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 747 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 748 749 // Done. 750 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 751 } 752 753 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 754 return CGF.EmitOMPSharedLValue(E); 755 } 756 757 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 758 const Expr *E) { 759 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 760 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 761 return LValue(); 762 } 763 764 void ReductionCodeGen::emitAggregateInitialization( 765 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 766 const OMPDeclareReductionDecl *DRD) { 767 // Emit VarDecl with copy init for arrays. 768 // Get the address of the original variable captured in current 769 // captured region. 770 const auto *PrivateVD = 771 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 772 bool EmitDeclareReductionInit = 773 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 774 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 775 EmitDeclareReductionInit, 776 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 777 : PrivateVD->getInit(), 778 DRD, SharedLVal.getAddress(CGF)); 779 } 780 781 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 782 ArrayRef<const Expr *> Origs, 783 ArrayRef<const Expr *> Privates, 784 ArrayRef<const Expr *> ReductionOps) { 785 ClausesData.reserve(Shareds.size()); 786 SharedAddresses.reserve(Shareds.size()); 787 Sizes.reserve(Shareds.size()); 788 BaseDecls.reserve(Shareds.size()); 789 const auto *IOrig = Origs.begin(); 790 const auto *IPriv = Privates.begin(); 791 const auto *IRed = ReductionOps.begin(); 792 for (const Expr *Ref : Shareds) { 793 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 794 std::advance(IOrig, 1); 795 std::advance(IPriv, 1); 796 std::advance(IRed, 1); 797 } 798 } 799 800 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 801 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 802 "Number of generated lvalues must be exactly N."); 803 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 804 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 805 SharedAddresses.emplace_back(First, Second); 806 if (ClausesData[N].Shared == ClausesData[N].Ref) { 807 OrigAddresses.emplace_back(First, Second); 808 } else { 809 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 810 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 811 OrigAddresses.emplace_back(First, Second); 812 } 813 } 814 815 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 816 const auto *PrivateVD = 817 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 818 QualType PrivateType = PrivateVD->getType(); 819 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 820 if (!PrivateType->isVariablyModifiedType()) { 821 Sizes.emplace_back( 822 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 823 nullptr); 824 return; 825 } 826 llvm::Value *Size; 827 llvm::Value *SizeInChars; 828 auto *ElemType = 829 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 830 ->getElementType(); 831 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 832 if (AsArraySection) { 833 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 834 OrigAddresses[N].first.getPointer(CGF)); 835 Size = CGF.Builder.CreateNUWAdd( 836 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 837 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 838 } else { 839 SizeInChars = 840 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 841 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 842 } 843 Sizes.emplace_back(SizeInChars, Size); 844 CodeGenFunction::OpaqueValueMapping OpaqueMap( 845 CGF, 846 cast<OpaqueValueExpr>( 847 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 848 RValue::get(Size)); 849 CGF.EmitVariablyModifiedType(PrivateType); 850 } 851 852 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 853 llvm::Value *Size) { 854 const auto *PrivateVD = 855 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 856 QualType PrivateType = PrivateVD->getType(); 857 if (!PrivateType->isVariablyModifiedType()) { 858 assert(!Size && !Sizes[N].second && 859 "Size should be nullptr for non-variably modified reduction " 860 "items."); 861 return; 862 } 863 CodeGenFunction::OpaqueValueMapping OpaqueMap( 864 CGF, 865 cast<OpaqueValueExpr>( 866 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 867 RValue::get(Size)); 868 CGF.EmitVariablyModifiedType(PrivateType); 869 } 870 871 void ReductionCodeGen::emitInitialization( 872 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 873 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 874 assert(SharedAddresses.size() > N && "No variable was generated"); 875 const auto *PrivateVD = 876 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 877 const OMPDeclareReductionDecl *DRD = 878 getReductionInit(ClausesData[N].ReductionOp); 879 QualType PrivateType = PrivateVD->getType(); 880 PrivateAddr = CGF.Builder.CreateElementBitCast( 881 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 882 QualType SharedType = SharedAddresses[N].first.getType(); 883 SharedLVal = CGF.MakeAddrLValue( 884 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 885 CGF.ConvertTypeForMem(SharedType)), 886 SharedType, SharedAddresses[N].first.getBaseInfo(), 887 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 888 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 889 if (DRD && DRD->getInitializer()) 890 (void)DefaultInit(CGF); 891 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 892 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 893 (void)DefaultInit(CGF); 894 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 895 PrivateAddr, SharedLVal.getAddress(CGF), 896 SharedLVal.getType()); 897 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 898 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 899 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 900 PrivateVD->getType().getQualifiers(), 901 /*IsInitializer=*/false); 902 } 903 } 904 905 bool ReductionCodeGen::needCleanups(unsigned N) { 906 const auto *PrivateVD = 907 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 908 QualType PrivateType = PrivateVD->getType(); 909 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 910 return DTorKind != QualType::DK_none; 911 } 912 913 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 914 Address PrivateAddr) { 915 const auto *PrivateVD = 916 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 917 QualType PrivateType = PrivateVD->getType(); 918 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 919 if (needCleanups(N)) { 920 PrivateAddr = CGF.Builder.CreateElementBitCast( 921 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 922 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 923 } 924 } 925 926 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 927 LValue BaseLV) { 928 BaseTy = BaseTy.getNonReferenceType(); 929 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 930 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 931 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 932 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 933 } else { 934 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 935 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 936 } 937 BaseTy = BaseTy->getPointeeType(); 938 } 939 return CGF.MakeAddrLValue( 940 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 941 CGF.ConvertTypeForMem(ElTy)), 942 BaseLV.getType(), BaseLV.getBaseInfo(), 943 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 944 } 945 946 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 947 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 948 llvm::Value *Addr) { 949 Address Tmp = Address::invalid(); 950 Address TopTmp = Address::invalid(); 951 Address MostTopTmp = Address::invalid(); 952 BaseTy = BaseTy.getNonReferenceType(); 953 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 954 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 955 Tmp = CGF.CreateMemTemp(BaseTy); 956 if (TopTmp.isValid()) 957 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 958 else 959 MostTopTmp = Tmp; 960 TopTmp = Tmp; 961 BaseTy = BaseTy->getPointeeType(); 962 } 963 llvm::Type *Ty = BaseLVType; 964 if (Tmp.isValid()) 965 Ty = Tmp.getElementType(); 966 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 967 if (Tmp.isValid()) { 968 CGF.Builder.CreateStore(Addr, Tmp); 969 return MostTopTmp; 970 } 971 return Address(Addr, BaseLVAlignment); 972 } 973 974 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 975 const VarDecl *OrigVD = nullptr; 976 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 977 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 978 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 979 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 980 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 981 Base = TempASE->getBase()->IgnoreParenImpCasts(); 982 DE = cast<DeclRefExpr>(Base); 983 OrigVD = cast<VarDecl>(DE->getDecl()); 984 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 985 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 986 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 987 Base = TempASE->getBase()->IgnoreParenImpCasts(); 988 DE = cast<DeclRefExpr>(Base); 989 OrigVD = cast<VarDecl>(DE->getDecl()); 990 } 991 return OrigVD; 992 } 993 994 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 995 Address PrivateAddr) { 996 const DeclRefExpr *DE; 997 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 998 BaseDecls.emplace_back(OrigVD); 999 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1000 LValue BaseLValue = 1001 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1002 OriginalBaseLValue); 1003 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1004 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1005 llvm::Value *PrivatePointer = 1006 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1007 PrivateAddr.getPointer(), 1008 SharedAddresses[N].first.getAddress(CGF).getType()); 1009 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1010 return castToBase(CGF, OrigVD->getType(), 1011 SharedAddresses[N].first.getType(), 1012 OriginalBaseLValue.getAddress(CGF).getType(), 1013 OriginalBaseLValue.getAlignment(), Ptr); 1014 } 1015 BaseDecls.emplace_back( 1016 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1017 return PrivateAddr; 1018 } 1019 1020 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1021 const OMPDeclareReductionDecl *DRD = 1022 getReductionInit(ClausesData[N].ReductionOp); 1023 return DRD && DRD->getInitializer(); 1024 } 1025 1026 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1027 return CGF.EmitLoadOfPointerLValue( 1028 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1029 getThreadIDVariable()->getType()->castAs<PointerType>()); 1030 } 1031 1032 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1033 if (!CGF.HaveInsertPoint()) 1034 return; 1035 // 1.2.2 OpenMP Language Terminology 1036 // Structured block - An executable statement with a single entry at the 1037 // top and a single exit at the bottom. 1038 // The point of exit cannot be a branch out of the structured block. 1039 // longjmp() and throw() must not violate the entry/exit criteria. 1040 CGF.EHStack.pushTerminate(); 1041 CodeGen(CGF); 1042 CGF.EHStack.popTerminate(); 1043 } 1044 1045 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1046 CodeGenFunction &CGF) { 1047 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1048 getThreadIDVariable()->getType(), 1049 AlignmentSource::Decl); 1050 } 1051 1052 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1053 QualType FieldTy) { 1054 auto *Field = FieldDecl::Create( 1055 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1056 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1057 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1058 Field->setAccess(AS_public); 1059 DC->addDecl(Field); 1060 return Field; 1061 } 1062 1063 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1064 StringRef Separator) 1065 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1066 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1067 ASTContext &C = CGM.getContext(); 1068 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1069 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1070 RD->startDefinition(); 1071 // reserved_1 1072 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1073 // flags 1074 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1075 // reserved_2 1076 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1077 // reserved_3 1078 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1079 // psource 1080 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1081 RD->completeDefinition(); 1082 IdentQTy = C.getRecordType(RD); 1083 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1084 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1085 1086 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1087 OMPBuilder.initialize(); 1088 loadOffloadInfoMetadata(); 1089 } 1090 1091 void CGOpenMPRuntime::clear() { 1092 InternalVars.clear(); 1093 // Clean non-target variable declarations possibly used only in debug info. 1094 for (const auto &Data : EmittedNonTargetVariables) { 1095 if (!Data.getValue().pointsToAliveValue()) 1096 continue; 1097 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1098 if (!GV) 1099 continue; 1100 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1101 continue; 1102 GV->eraseFromParent(); 1103 } 1104 } 1105 1106 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1107 SmallString<128> Buffer; 1108 llvm::raw_svector_ostream OS(Buffer); 1109 StringRef Sep = FirstSeparator; 1110 for (StringRef Part : Parts) { 1111 OS << Sep << Part; 1112 Sep = Separator; 1113 } 1114 return std::string(OS.str()); 1115 } 1116 1117 static llvm::Function * 1118 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1119 const Expr *CombinerInitializer, const VarDecl *In, 1120 const VarDecl *Out, bool IsCombiner) { 1121 // void .omp_combiner.(Ty *in, Ty *out); 1122 ASTContext &C = CGM.getContext(); 1123 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1124 FunctionArgList Args; 1125 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1126 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1127 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1128 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1129 Args.push_back(&OmpOutParm); 1130 Args.push_back(&OmpInParm); 1131 const CGFunctionInfo &FnInfo = 1132 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1133 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1134 std::string Name = CGM.getOpenMPRuntime().getName( 1135 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1136 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1137 Name, &CGM.getModule()); 1138 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1139 if (CGM.getLangOpts().Optimize) { 1140 Fn->removeFnAttr(llvm::Attribute::NoInline); 1141 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1142 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1143 } 1144 CodeGenFunction CGF(CGM); 1145 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1146 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1147 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1148 Out->getLocation()); 1149 CodeGenFunction::OMPPrivateScope Scope(CGF); 1150 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1151 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1152 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1153 .getAddress(CGF); 1154 }); 1155 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1156 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1157 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1158 .getAddress(CGF); 1159 }); 1160 (void)Scope.Privatize(); 1161 if (!IsCombiner && Out->hasInit() && 1162 !CGF.isTrivialInitializer(Out->getInit())) { 1163 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1164 Out->getType().getQualifiers(), 1165 /*IsInitializer=*/true); 1166 } 1167 if (CombinerInitializer) 1168 CGF.EmitIgnoredExpr(CombinerInitializer); 1169 Scope.ForceCleanup(); 1170 CGF.FinishFunction(); 1171 return Fn; 1172 } 1173 1174 void CGOpenMPRuntime::emitUserDefinedReduction( 1175 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1176 if (UDRMap.count(D) > 0) 1177 return; 1178 llvm::Function *Combiner = emitCombinerOrInitializer( 1179 CGM, D->getType(), D->getCombiner(), 1180 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1181 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1182 /*IsCombiner=*/true); 1183 llvm::Function *Initializer = nullptr; 1184 if (const Expr *Init = D->getInitializer()) { 1185 Initializer = emitCombinerOrInitializer( 1186 CGM, D->getType(), 1187 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1188 : nullptr, 1189 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1190 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1191 /*IsCombiner=*/false); 1192 } 1193 UDRMap.try_emplace(D, Combiner, Initializer); 1194 if (CGF) { 1195 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1196 Decls.second.push_back(D); 1197 } 1198 } 1199 1200 std::pair<llvm::Function *, llvm::Function *> 1201 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1202 auto I = UDRMap.find(D); 1203 if (I != UDRMap.end()) 1204 return I->second; 1205 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1206 return UDRMap.lookup(D); 1207 } 1208 1209 namespace { 1210 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1211 // Builder if one is present. 1212 struct PushAndPopStackRAII { 1213 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1214 bool HasCancel) 1215 : OMPBuilder(OMPBuilder) { 1216 if (!OMPBuilder) 1217 return; 1218 1219 // The following callback is the crucial part of clangs cleanup process. 1220 // 1221 // NOTE: 1222 // Once the OpenMPIRBuilder is used to create parallel regions (and 1223 // similar), the cancellation destination (Dest below) is determined via 1224 // IP. That means if we have variables to finalize we split the block at IP, 1225 // use the new block (=BB) as destination to build a JumpDest (via 1226 // getJumpDestInCurrentScope(BB)) which then is fed to 1227 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1228 // to push & pop an FinalizationInfo object. 1229 // The FiniCB will still be needed but at the point where the 1230 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1231 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1232 assert(IP.getBlock()->end() == IP.getPoint() && 1233 "Clang CG should cause non-terminated block!"); 1234 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1235 CGF.Builder.restoreIP(IP); 1236 CodeGenFunction::JumpDest Dest = 1237 CGF.getOMPCancelDestination(OMPD_parallel); 1238 CGF.EmitBranchThroughCleanup(Dest); 1239 }; 1240 1241 // TODO: Remove this once we emit parallel regions through the 1242 // OpenMPIRBuilder as it can do this setup internally. 1243 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1244 {FiniCB, OMPD_parallel, HasCancel}); 1245 OMPBuilder->pushFinalizationCB(std::move(FI)); 1246 } 1247 ~PushAndPopStackRAII() { 1248 if (OMPBuilder) 1249 OMPBuilder->popFinalizationCB(); 1250 } 1251 llvm::OpenMPIRBuilder *OMPBuilder; 1252 }; 1253 } // namespace 1254 1255 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1256 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1257 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1258 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1259 assert(ThreadIDVar->getType()->isPointerType() && 1260 "thread id variable must be of type kmp_int32 *"); 1261 CodeGenFunction CGF(CGM, true); 1262 bool HasCancel = false; 1263 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1264 HasCancel = OPD->hasCancel(); 1265 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1266 HasCancel = OPD->hasCancel(); 1267 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1268 HasCancel = OPSD->hasCancel(); 1269 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1270 HasCancel = OPFD->hasCancel(); 1271 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1272 HasCancel = OPFD->hasCancel(); 1273 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1274 HasCancel = OPFD->hasCancel(); 1275 else if (const auto *OPFD = 1276 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1277 HasCancel = OPFD->hasCancel(); 1278 else if (const auto *OPFD = 1279 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1280 HasCancel = OPFD->hasCancel(); 1281 1282 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1283 // parallel region to make cancellation barriers work properly. 1284 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1285 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel); 1286 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1287 HasCancel, OutlinedHelperName); 1288 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1289 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1290 } 1291 1292 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1293 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1294 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1295 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1296 return emitParallelOrTeamsOutlinedFunction( 1297 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1298 } 1299 1300 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1301 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1302 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1303 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1304 return emitParallelOrTeamsOutlinedFunction( 1305 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1306 } 1307 1308 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1309 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1310 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1311 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1312 bool Tied, unsigned &NumberOfParts) { 1313 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1314 PrePostActionTy &) { 1315 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1316 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1317 llvm::Value *TaskArgs[] = { 1318 UpLoc, ThreadID, 1319 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1320 TaskTVar->getType()->castAs<PointerType>()) 1321 .getPointer(CGF)}; 1322 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1323 CGM.getModule(), OMPRTL___kmpc_omp_task), 1324 TaskArgs); 1325 }; 1326 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1327 UntiedCodeGen); 1328 CodeGen.setAction(Action); 1329 assert(!ThreadIDVar->getType()->isPointerType() && 1330 "thread id variable must be of type kmp_int32 for tasks"); 1331 const OpenMPDirectiveKind Region = 1332 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1333 : OMPD_task; 1334 const CapturedStmt *CS = D.getCapturedStmt(Region); 1335 bool HasCancel = false; 1336 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1337 HasCancel = TD->hasCancel(); 1338 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1339 HasCancel = TD->hasCancel(); 1340 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1341 HasCancel = TD->hasCancel(); 1342 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1343 HasCancel = TD->hasCancel(); 1344 1345 CodeGenFunction CGF(CGM, true); 1346 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1347 InnermostKind, HasCancel, Action); 1348 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1349 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1350 if (!Tied) 1351 NumberOfParts = Action.getNumberOfParts(); 1352 return Res; 1353 } 1354 1355 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1356 const RecordDecl *RD, const CGRecordLayout &RL, 1357 ArrayRef<llvm::Constant *> Data) { 1358 llvm::StructType *StructTy = RL.getLLVMType(); 1359 unsigned PrevIdx = 0; 1360 ConstantInitBuilder CIBuilder(CGM); 1361 auto DI = Data.begin(); 1362 for (const FieldDecl *FD : RD->fields()) { 1363 unsigned Idx = RL.getLLVMFieldNo(FD); 1364 // Fill the alignment. 1365 for (unsigned I = PrevIdx; I < Idx; ++I) 1366 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1367 PrevIdx = Idx + 1; 1368 Fields.add(*DI); 1369 ++DI; 1370 } 1371 } 1372 1373 template <class... As> 1374 static llvm::GlobalVariable * 1375 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1376 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1377 As &&... Args) { 1378 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1379 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1380 ConstantInitBuilder CIBuilder(CGM); 1381 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1382 buildStructValue(Fields, CGM, RD, RL, Data); 1383 return Fields.finishAndCreateGlobal( 1384 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1385 std::forward<As>(Args)...); 1386 } 1387 1388 template <typename T> 1389 static void 1390 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1391 ArrayRef<llvm::Constant *> Data, 1392 T &Parent) { 1393 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1394 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1395 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1396 buildStructValue(Fields, CGM, RD, RL, Data); 1397 Fields.finishAndAddTo(Parent); 1398 } 1399 1400 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1401 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1402 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1403 FlagsTy FlagsKey(Flags, Reserved2Flags); 1404 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1405 if (!Entry) { 1406 if (!DefaultOpenMPPSource) { 1407 // Initialize default location for psource field of ident_t structure of 1408 // all ident_t objects. Format is ";file;function;line;column;;". 1409 // Taken from 1410 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1411 DefaultOpenMPPSource = 1412 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1413 DefaultOpenMPPSource = 1414 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1415 } 1416 1417 llvm::Constant *Data[] = { 1418 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1419 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1420 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1421 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1422 llvm::GlobalValue *DefaultOpenMPLocation = 1423 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1424 llvm::GlobalValue::PrivateLinkage); 1425 DefaultOpenMPLocation->setUnnamedAddr( 1426 llvm::GlobalValue::UnnamedAddr::Global); 1427 1428 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1429 } 1430 return Address(Entry, Align); 1431 } 1432 1433 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1434 bool AtCurrentPoint) { 1435 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1436 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1437 1438 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1439 if (AtCurrentPoint) { 1440 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1441 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1442 } else { 1443 Elem.second.ServiceInsertPt = 1444 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1445 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1446 } 1447 } 1448 1449 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1450 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1451 if (Elem.second.ServiceInsertPt) { 1452 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1453 Elem.second.ServiceInsertPt = nullptr; 1454 Ptr->eraseFromParent(); 1455 } 1456 } 1457 1458 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1459 SourceLocation Loc, 1460 SmallString<128> &Buffer) { 1461 llvm::raw_svector_ostream OS(Buffer); 1462 // Build debug location 1463 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1464 OS << ";" << PLoc.getFilename() << ";"; 1465 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1466 OS << FD->getQualifiedNameAsString(); 1467 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1468 return OS.str(); 1469 } 1470 1471 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1472 SourceLocation Loc, 1473 unsigned Flags) { 1474 Flags |= OMP_IDENT_KMPC; 1475 // If no debug info is generated - return global default location. 1476 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1477 Loc.isInvalid()) 1478 return getOrCreateDefaultLocation(Flags).getPointer(); 1479 1480 // If the OpenMPIRBuilder is used we need to use it for all location handling 1481 // as the clang invariants used below might be broken. 1482 if (CGM.getLangOpts().OpenMPIRBuilder) { 1483 SmallString<128> Buffer; 1484 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1485 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1486 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1487 return OMPBuilder.getOrCreateIdent(SrcLocStr, IdentFlag(Flags)); 1488 } 1489 1490 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1491 1492 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1493 Address LocValue = Address::invalid(); 1494 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1495 if (I != OpenMPLocThreadIDMap.end()) 1496 LocValue = Address(I->second.DebugLoc, Align); 1497 1498 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1499 // GetOpenMPThreadID was called before this routine. 1500 if (!LocValue.isValid()) { 1501 // Generate "ident_t .kmpc_loc.addr;" 1502 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1503 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1504 Elem.second.DebugLoc = AI.getPointer(); 1505 LocValue = AI; 1506 1507 if (!Elem.second.ServiceInsertPt) 1508 setLocThreadIdInsertPt(CGF); 1509 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1510 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1511 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1512 CGF.getTypeSize(IdentQTy)); 1513 } 1514 1515 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1516 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1517 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1518 LValue PSource = 1519 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1520 1521 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1522 if (OMPDebugLoc == nullptr) { 1523 SmallString<128> Buffer; 1524 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr( 1525 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1526 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1527 } 1528 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1529 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1530 1531 // Our callers always pass this to a runtime function, so for 1532 // convenience, go ahead and return a naked pointer. 1533 return LocValue.getPointer(); 1534 } 1535 1536 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1537 SourceLocation Loc) { 1538 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1539 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1540 // the clang invariants used below might be broken. 1541 if (CGM.getLangOpts().OpenMPIRBuilder) { 1542 SmallString<128> Buffer; 1543 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1544 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1545 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1546 return OMPBuilder.getOrCreateThreadID( 1547 OMPBuilder.getOrCreateIdent(SrcLocStr)); 1548 } 1549 1550 llvm::Value *ThreadID = nullptr; 1551 // Check whether we've already cached a load of the thread id in this 1552 // function. 1553 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1554 if (I != OpenMPLocThreadIDMap.end()) { 1555 ThreadID = I->second.ThreadID; 1556 if (ThreadID != nullptr) 1557 return ThreadID; 1558 } 1559 // If exceptions are enabled, do not use parameter to avoid possible crash. 1560 if (auto *OMPRegionInfo = 1561 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1562 if (OMPRegionInfo->getThreadIDVariable()) { 1563 // Check if this an outlined function with thread id passed as argument. 1564 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1565 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1566 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1567 !CGF.getLangOpts().CXXExceptions || 1568 CGF.Builder.GetInsertBlock() == TopBlock || 1569 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1570 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1571 TopBlock || 1572 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1573 CGF.Builder.GetInsertBlock()) { 1574 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1575 // If value loaded in entry block, cache it and use it everywhere in 1576 // function. 1577 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1578 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1579 Elem.second.ThreadID = ThreadID; 1580 } 1581 return ThreadID; 1582 } 1583 } 1584 } 1585 1586 // This is not an outlined function region - need to call __kmpc_int32 1587 // kmpc_global_thread_num(ident_t *loc). 1588 // Generate thread id value and cache this value for use across the 1589 // function. 1590 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1591 if (!Elem.second.ServiceInsertPt) 1592 setLocThreadIdInsertPt(CGF); 1593 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1594 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1595 llvm::CallInst *Call = CGF.Builder.CreateCall( 1596 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1597 OMPRTL___kmpc_global_thread_num), 1598 emitUpdateLocation(CGF, Loc)); 1599 Call->setCallingConv(CGF.getRuntimeCC()); 1600 Elem.second.ThreadID = Call; 1601 return Call; 1602 } 1603 1604 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1605 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1606 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1607 clearLocThreadIdInsertPt(CGF); 1608 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1609 } 1610 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1611 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1612 UDRMap.erase(D); 1613 FunctionUDRMap.erase(CGF.CurFn); 1614 } 1615 auto I = FunctionUDMMap.find(CGF.CurFn); 1616 if (I != FunctionUDMMap.end()) { 1617 for(const auto *D : I->second) 1618 UDMMap.erase(D); 1619 FunctionUDMMap.erase(I); 1620 } 1621 LastprivateConditionalToTypes.erase(CGF.CurFn); 1622 } 1623 1624 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1625 return IdentTy->getPointerTo(); 1626 } 1627 1628 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1629 if (!Kmpc_MicroTy) { 1630 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1631 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1632 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1633 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1634 } 1635 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1636 } 1637 1638 llvm::FunctionCallee 1639 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 1640 assert((IVSize == 32 || IVSize == 64) && 1641 "IV size is not compatible with the omp runtime"); 1642 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1643 : "__kmpc_for_static_init_4u") 1644 : (IVSigned ? "__kmpc_for_static_init_8" 1645 : "__kmpc_for_static_init_8u"); 1646 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1647 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1648 llvm::Type *TypeParams[] = { 1649 getIdentTyPointerTy(), // loc 1650 CGM.Int32Ty, // tid 1651 CGM.Int32Ty, // schedtype 1652 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1653 PtrTy, // p_lower 1654 PtrTy, // p_upper 1655 PtrTy, // p_stride 1656 ITy, // incr 1657 ITy // chunk 1658 }; 1659 auto *FnTy = 1660 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1661 return CGM.CreateRuntimeFunction(FnTy, Name); 1662 } 1663 1664 llvm::FunctionCallee 1665 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1666 assert((IVSize == 32 || IVSize == 64) && 1667 "IV size is not compatible with the omp runtime"); 1668 StringRef Name = 1669 IVSize == 32 1670 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1671 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1672 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1673 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1674 CGM.Int32Ty, // tid 1675 CGM.Int32Ty, // schedtype 1676 ITy, // lower 1677 ITy, // upper 1678 ITy, // stride 1679 ITy // chunk 1680 }; 1681 auto *FnTy = 1682 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1683 return CGM.CreateRuntimeFunction(FnTy, Name); 1684 } 1685 1686 llvm::FunctionCallee 1687 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1688 assert((IVSize == 32 || IVSize == 64) && 1689 "IV size is not compatible with the omp runtime"); 1690 StringRef Name = 1691 IVSize == 32 1692 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1693 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1694 llvm::Type *TypeParams[] = { 1695 getIdentTyPointerTy(), // loc 1696 CGM.Int32Ty, // tid 1697 }; 1698 auto *FnTy = 1699 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1700 return CGM.CreateRuntimeFunction(FnTy, Name); 1701 } 1702 1703 llvm::FunctionCallee 1704 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1705 assert((IVSize == 32 || IVSize == 64) && 1706 "IV size is not compatible with the omp runtime"); 1707 StringRef Name = 1708 IVSize == 32 1709 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1710 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1711 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1712 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1713 llvm::Type *TypeParams[] = { 1714 getIdentTyPointerTy(), // loc 1715 CGM.Int32Ty, // tid 1716 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1717 PtrTy, // p_lower 1718 PtrTy, // p_upper 1719 PtrTy // p_stride 1720 }; 1721 auto *FnTy = 1722 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1723 return CGM.CreateRuntimeFunction(FnTy, Name); 1724 } 1725 1726 /// Obtain information that uniquely identifies a target entry. This 1727 /// consists of the file and device IDs as well as line number associated with 1728 /// the relevant entry source location. 1729 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1730 unsigned &DeviceID, unsigned &FileID, 1731 unsigned &LineNum) { 1732 SourceManager &SM = C.getSourceManager(); 1733 1734 // The loc should be always valid and have a file ID (the user cannot use 1735 // #pragma directives in macros) 1736 1737 assert(Loc.isValid() && "Source location is expected to be always valid."); 1738 1739 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1740 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1741 1742 llvm::sys::fs::UniqueID ID; 1743 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1744 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1745 << PLoc.getFilename() << EC.message(); 1746 1747 DeviceID = ID.getDevice(); 1748 FileID = ID.getFile(); 1749 LineNum = PLoc.getLine(); 1750 } 1751 1752 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1753 if (CGM.getLangOpts().OpenMPSimd) 1754 return Address::invalid(); 1755 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1756 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1757 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1758 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1759 HasRequiresUnifiedSharedMemory))) { 1760 SmallString<64> PtrName; 1761 { 1762 llvm::raw_svector_ostream OS(PtrName); 1763 OS << CGM.getMangledName(GlobalDecl(VD)); 1764 if (!VD->isExternallyVisible()) { 1765 unsigned DeviceID, FileID, Line; 1766 getTargetEntryUniqueInfo(CGM.getContext(), 1767 VD->getCanonicalDecl()->getBeginLoc(), 1768 DeviceID, FileID, Line); 1769 OS << llvm::format("_%x", FileID); 1770 } 1771 OS << "_decl_tgt_ref_ptr"; 1772 } 1773 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1774 if (!Ptr) { 1775 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1776 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1777 PtrName); 1778 1779 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1780 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1781 1782 if (!CGM.getLangOpts().OpenMPIsDevice) 1783 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1784 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1785 } 1786 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1787 } 1788 return Address::invalid(); 1789 } 1790 1791 llvm::Constant * 1792 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1793 assert(!CGM.getLangOpts().OpenMPUseTLS || 1794 !CGM.getContext().getTargetInfo().isTLSSupported()); 1795 // Lookup the entry, lazily creating it if necessary. 1796 std::string Suffix = getName({"cache", ""}); 1797 return getOrCreateInternalVariable( 1798 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1799 } 1800 1801 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1802 const VarDecl *VD, 1803 Address VDAddr, 1804 SourceLocation Loc) { 1805 if (CGM.getLangOpts().OpenMPUseTLS && 1806 CGM.getContext().getTargetInfo().isTLSSupported()) 1807 return VDAddr; 1808 1809 llvm::Type *VarTy = VDAddr.getElementType(); 1810 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1811 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1812 CGM.Int8PtrTy), 1813 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1814 getOrCreateThreadPrivateCache(VD)}; 1815 return Address(CGF.EmitRuntimeCall( 1816 OMPBuilder.getOrCreateRuntimeFunction( 1817 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1818 Args), 1819 VDAddr.getAlignment()); 1820 } 1821 1822 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1823 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1824 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1825 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1826 // library. 1827 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1828 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1829 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1830 OMPLoc); 1831 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1832 // to register constructor/destructor for variable. 1833 llvm::Value *Args[] = { 1834 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1835 Ctor, CopyCtor, Dtor}; 1836 CGF.EmitRuntimeCall( 1837 OMPBuilder.getOrCreateRuntimeFunction( 1838 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1839 Args); 1840 } 1841 1842 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1843 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1844 bool PerformInit, CodeGenFunction *CGF) { 1845 if (CGM.getLangOpts().OpenMPUseTLS && 1846 CGM.getContext().getTargetInfo().isTLSSupported()) 1847 return nullptr; 1848 1849 VD = VD->getDefinition(CGM.getContext()); 1850 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1851 QualType ASTTy = VD->getType(); 1852 1853 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1854 const Expr *Init = VD->getAnyInitializer(); 1855 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1856 // Generate function that re-emits the declaration's initializer into the 1857 // threadprivate copy of the variable VD 1858 CodeGenFunction CtorCGF(CGM); 1859 FunctionArgList Args; 1860 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1861 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1862 ImplicitParamDecl::Other); 1863 Args.push_back(&Dst); 1864 1865 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1866 CGM.getContext().VoidPtrTy, Args); 1867 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1868 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1869 llvm::Function *Fn = 1870 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1871 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1872 Args, Loc, Loc); 1873 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1874 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1875 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1876 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1877 Arg = CtorCGF.Builder.CreateElementBitCast( 1878 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1879 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1880 /*IsInitializer=*/true); 1881 ArgVal = CtorCGF.EmitLoadOfScalar( 1882 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1883 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1884 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1885 CtorCGF.FinishFunction(); 1886 Ctor = Fn; 1887 } 1888 if (VD->getType().isDestructedType() != QualType::DK_none) { 1889 // Generate function that emits destructor call for the threadprivate copy 1890 // of the variable VD 1891 CodeGenFunction DtorCGF(CGM); 1892 FunctionArgList Args; 1893 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1894 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1895 ImplicitParamDecl::Other); 1896 Args.push_back(&Dst); 1897 1898 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1899 CGM.getContext().VoidTy, Args); 1900 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1901 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1902 llvm::Function *Fn = 1903 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1904 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1905 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1906 Loc, Loc); 1907 // Create a scope with an artificial location for the body of this function. 1908 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1909 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1910 DtorCGF.GetAddrOfLocalVar(&Dst), 1911 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1912 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1913 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1914 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1915 DtorCGF.FinishFunction(); 1916 Dtor = Fn; 1917 } 1918 // Do not emit init function if it is not required. 1919 if (!Ctor && !Dtor) 1920 return nullptr; 1921 1922 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1923 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1924 /*isVarArg=*/false) 1925 ->getPointerTo(); 1926 // Copying constructor for the threadprivate variable. 1927 // Must be NULL - reserved by runtime, but currently it requires that this 1928 // parameter is always NULL. Otherwise it fires assertion. 1929 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1930 if (Ctor == nullptr) { 1931 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1932 /*isVarArg=*/false) 1933 ->getPointerTo(); 1934 Ctor = llvm::Constant::getNullValue(CtorTy); 1935 } 1936 if (Dtor == nullptr) { 1937 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1938 /*isVarArg=*/false) 1939 ->getPointerTo(); 1940 Dtor = llvm::Constant::getNullValue(DtorTy); 1941 } 1942 if (!CGF) { 1943 auto *InitFunctionTy = 1944 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1945 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1946 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1947 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1948 CodeGenFunction InitCGF(CGM); 1949 FunctionArgList ArgList; 1950 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1951 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1952 Loc, Loc); 1953 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1954 InitCGF.FinishFunction(); 1955 return InitFunction; 1956 } 1957 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1958 } 1959 return nullptr; 1960 } 1961 1962 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1963 llvm::GlobalVariable *Addr, 1964 bool PerformInit) { 1965 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1966 !CGM.getLangOpts().OpenMPIsDevice) 1967 return false; 1968 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1969 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1970 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1971 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1972 HasRequiresUnifiedSharedMemory)) 1973 return CGM.getLangOpts().OpenMPIsDevice; 1974 VD = VD->getDefinition(CGM.getContext()); 1975 assert(VD && "Unknown VarDecl"); 1976 1977 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1978 return CGM.getLangOpts().OpenMPIsDevice; 1979 1980 QualType ASTTy = VD->getType(); 1981 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1982 1983 // Produce the unique prefix to identify the new target regions. We use 1984 // the source location of the variable declaration which we know to not 1985 // conflict with any target region. 1986 unsigned DeviceID; 1987 unsigned FileID; 1988 unsigned Line; 1989 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1990 SmallString<128> Buffer, Out; 1991 { 1992 llvm::raw_svector_ostream OS(Buffer); 1993 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1994 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1995 } 1996 1997 const Expr *Init = VD->getAnyInitializer(); 1998 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1999 llvm::Constant *Ctor; 2000 llvm::Constant *ID; 2001 if (CGM.getLangOpts().OpenMPIsDevice) { 2002 // Generate function that re-emits the declaration's initializer into 2003 // the threadprivate copy of the variable VD 2004 CodeGenFunction CtorCGF(CGM); 2005 2006 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2007 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2008 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 2009 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2010 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2011 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2012 FunctionArgList(), Loc, Loc); 2013 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2014 CtorCGF.EmitAnyExprToMem(Init, 2015 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2016 Init->getType().getQualifiers(), 2017 /*IsInitializer=*/true); 2018 CtorCGF.FinishFunction(); 2019 Ctor = Fn; 2020 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2021 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 2022 } else { 2023 Ctor = new llvm::GlobalVariable( 2024 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2025 llvm::GlobalValue::PrivateLinkage, 2026 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2027 ID = Ctor; 2028 } 2029 2030 // Register the information for the entry associated with the constructor. 2031 Out.clear(); 2032 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2033 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2034 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2035 } 2036 if (VD->getType().isDestructedType() != QualType::DK_none) { 2037 llvm::Constant *Dtor; 2038 llvm::Constant *ID; 2039 if (CGM.getLangOpts().OpenMPIsDevice) { 2040 // Generate function that emits destructor call for the threadprivate 2041 // copy of the variable VD 2042 CodeGenFunction DtorCGF(CGM); 2043 2044 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2045 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2046 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 2047 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2048 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2049 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2050 FunctionArgList(), Loc, Loc); 2051 // Create a scope with an artificial location for the body of this 2052 // function. 2053 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2054 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 2055 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2056 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2057 DtorCGF.FinishFunction(); 2058 Dtor = Fn; 2059 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2060 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 2061 } else { 2062 Dtor = new llvm::GlobalVariable( 2063 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2064 llvm::GlobalValue::PrivateLinkage, 2065 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2066 ID = Dtor; 2067 } 2068 // Register the information for the entry associated with the destructor. 2069 Out.clear(); 2070 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2071 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2072 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2073 } 2074 return CGM.getLangOpts().OpenMPIsDevice; 2075 } 2076 2077 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2078 QualType VarType, 2079 StringRef Name) { 2080 std::string Suffix = getName({"artificial", ""}); 2081 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2082 llvm::Value *GAddr = 2083 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2084 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 2085 CGM.getTarget().isTLSSupported()) { 2086 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 2087 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 2088 } 2089 std::string CacheSuffix = getName({"cache", ""}); 2090 llvm::Value *Args[] = { 2091 emitUpdateLocation(CGF, SourceLocation()), 2092 getThreadID(CGF, SourceLocation()), 2093 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2094 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2095 /*isSigned=*/false), 2096 getOrCreateInternalVariable( 2097 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2098 return Address( 2099 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2100 CGF.EmitRuntimeCall( 2101 OMPBuilder.getOrCreateRuntimeFunction( 2102 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2103 Args), 2104 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2105 CGM.getContext().getTypeAlignInChars(VarType)); 2106 } 2107 2108 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2109 const RegionCodeGenTy &ThenGen, 2110 const RegionCodeGenTy &ElseGen) { 2111 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2112 2113 // If the condition constant folds and can be elided, try to avoid emitting 2114 // the condition and the dead arm of the if/else. 2115 bool CondConstant; 2116 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2117 if (CondConstant) 2118 ThenGen(CGF); 2119 else 2120 ElseGen(CGF); 2121 return; 2122 } 2123 2124 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2125 // emit the conditional branch. 2126 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2127 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2128 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2129 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2130 2131 // Emit the 'then' code. 2132 CGF.EmitBlock(ThenBlock); 2133 ThenGen(CGF); 2134 CGF.EmitBranch(ContBlock); 2135 // Emit the 'else' code if present. 2136 // There is no need to emit line number for unconditional branch. 2137 (void)ApplyDebugLocation::CreateEmpty(CGF); 2138 CGF.EmitBlock(ElseBlock); 2139 ElseGen(CGF); 2140 // There is no need to emit line number for unconditional branch. 2141 (void)ApplyDebugLocation::CreateEmpty(CGF); 2142 CGF.EmitBranch(ContBlock); 2143 // Emit the continuation block for code after the if. 2144 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2145 } 2146 2147 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2148 llvm::Function *OutlinedFn, 2149 ArrayRef<llvm::Value *> CapturedVars, 2150 const Expr *IfCond) { 2151 if (!CGF.HaveInsertPoint()) 2152 return; 2153 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2154 auto &M = CGM.getModule(); 2155 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2156 this](CodeGenFunction &CGF, PrePostActionTy &) { 2157 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2158 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2159 llvm::Value *Args[] = { 2160 RTLoc, 2161 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2162 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2163 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2164 RealArgs.append(std::begin(Args), std::end(Args)); 2165 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2166 2167 llvm::FunctionCallee RTLFn = 2168 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2169 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2170 }; 2171 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2172 this](CodeGenFunction &CGF, PrePostActionTy &) { 2173 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2174 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2175 // Build calls: 2176 // __kmpc_serialized_parallel(&Loc, GTid); 2177 llvm::Value *Args[] = {RTLoc, ThreadID}; 2178 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2179 M, OMPRTL___kmpc_serialized_parallel), 2180 Args); 2181 2182 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2183 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2184 Address ZeroAddrBound = 2185 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2186 /*Name=*/".bound.zero.addr"); 2187 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 2188 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2189 // ThreadId for serialized parallels is 0. 2190 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2191 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2192 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2193 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2194 2195 // __kmpc_end_serialized_parallel(&Loc, GTid); 2196 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2197 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2198 M, OMPRTL___kmpc_end_serialized_parallel), 2199 EndArgs); 2200 }; 2201 if (IfCond) { 2202 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2203 } else { 2204 RegionCodeGenTy ThenRCG(ThenGen); 2205 ThenRCG(CGF); 2206 } 2207 } 2208 2209 // If we're inside an (outlined) parallel region, use the region info's 2210 // thread-ID variable (it is passed in a first argument of the outlined function 2211 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2212 // regular serial code region, get thread ID by calling kmp_int32 2213 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2214 // return the address of that temp. 2215 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2216 SourceLocation Loc) { 2217 if (auto *OMPRegionInfo = 2218 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2219 if (OMPRegionInfo->getThreadIDVariable()) 2220 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2221 2222 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2223 QualType Int32Ty = 2224 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2225 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2226 CGF.EmitStoreOfScalar(ThreadID, 2227 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2228 2229 return ThreadIDTemp; 2230 } 2231 2232 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2233 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2234 SmallString<256> Buffer; 2235 llvm::raw_svector_ostream Out(Buffer); 2236 Out << Name; 2237 StringRef RuntimeName = Out.str(); 2238 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2239 if (Elem.second) { 2240 assert(Elem.second->getType()->getPointerElementType() == Ty && 2241 "OMP internal variable has different type than requested"); 2242 return &*Elem.second; 2243 } 2244 2245 return Elem.second = new llvm::GlobalVariable( 2246 CGM.getModule(), Ty, /*IsConstant*/ false, 2247 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2248 Elem.first(), /*InsertBefore=*/nullptr, 2249 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2250 } 2251 2252 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2253 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2254 std::string Name = getName({Prefix, "var"}); 2255 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2256 } 2257 2258 namespace { 2259 /// Common pre(post)-action for different OpenMP constructs. 2260 class CommonActionTy final : public PrePostActionTy { 2261 llvm::FunctionCallee EnterCallee; 2262 ArrayRef<llvm::Value *> EnterArgs; 2263 llvm::FunctionCallee ExitCallee; 2264 ArrayRef<llvm::Value *> ExitArgs; 2265 bool Conditional; 2266 llvm::BasicBlock *ContBlock = nullptr; 2267 2268 public: 2269 CommonActionTy(llvm::FunctionCallee EnterCallee, 2270 ArrayRef<llvm::Value *> EnterArgs, 2271 llvm::FunctionCallee ExitCallee, 2272 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2273 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2274 ExitArgs(ExitArgs), Conditional(Conditional) {} 2275 void Enter(CodeGenFunction &CGF) override { 2276 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2277 if (Conditional) { 2278 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2279 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2280 ContBlock = CGF.createBasicBlock("omp_if.end"); 2281 // Generate the branch (If-stmt) 2282 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2283 CGF.EmitBlock(ThenBlock); 2284 } 2285 } 2286 void Done(CodeGenFunction &CGF) { 2287 // Emit the rest of blocks/branches 2288 CGF.EmitBranch(ContBlock); 2289 CGF.EmitBlock(ContBlock, true); 2290 } 2291 void Exit(CodeGenFunction &CGF) override { 2292 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2293 } 2294 }; 2295 } // anonymous namespace 2296 2297 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2298 StringRef CriticalName, 2299 const RegionCodeGenTy &CriticalOpGen, 2300 SourceLocation Loc, const Expr *Hint) { 2301 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2302 // CriticalOpGen(); 2303 // __kmpc_end_critical(ident_t *, gtid, Lock); 2304 // Prepare arguments and build a call to __kmpc_critical 2305 if (!CGF.HaveInsertPoint()) 2306 return; 2307 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2308 getCriticalRegionLock(CriticalName)}; 2309 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2310 std::end(Args)); 2311 if (Hint) { 2312 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2313 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2314 } 2315 CommonActionTy Action( 2316 OMPBuilder.getOrCreateRuntimeFunction( 2317 CGM.getModule(), 2318 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2319 EnterArgs, 2320 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2321 OMPRTL___kmpc_end_critical), 2322 Args); 2323 CriticalOpGen.setAction(Action); 2324 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2325 } 2326 2327 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2328 const RegionCodeGenTy &MasterOpGen, 2329 SourceLocation Loc) { 2330 if (!CGF.HaveInsertPoint()) 2331 return; 2332 // if(__kmpc_master(ident_t *, gtid)) { 2333 // MasterOpGen(); 2334 // __kmpc_end_master(ident_t *, gtid); 2335 // } 2336 // Prepare arguments and build a call to __kmpc_master 2337 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2338 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2339 CGM.getModule(), OMPRTL___kmpc_master), 2340 Args, 2341 OMPBuilder.getOrCreateRuntimeFunction( 2342 CGM.getModule(), OMPRTL___kmpc_end_master), 2343 Args, 2344 /*Conditional=*/true); 2345 MasterOpGen.setAction(Action); 2346 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2347 Action.Done(CGF); 2348 } 2349 2350 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2351 SourceLocation Loc) { 2352 if (!CGF.HaveInsertPoint()) 2353 return; 2354 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2355 OMPBuilder.CreateTaskyield(CGF.Builder); 2356 } else { 2357 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2358 llvm::Value *Args[] = { 2359 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2360 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2361 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2362 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2363 Args); 2364 } 2365 2366 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2367 Region->emitUntiedSwitch(CGF); 2368 } 2369 2370 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2371 const RegionCodeGenTy &TaskgroupOpGen, 2372 SourceLocation Loc) { 2373 if (!CGF.HaveInsertPoint()) 2374 return; 2375 // __kmpc_taskgroup(ident_t *, gtid); 2376 // TaskgroupOpGen(); 2377 // __kmpc_end_taskgroup(ident_t *, gtid); 2378 // Prepare arguments and build a call to __kmpc_taskgroup 2379 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2380 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2381 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2382 Args, 2383 OMPBuilder.getOrCreateRuntimeFunction( 2384 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2385 Args); 2386 TaskgroupOpGen.setAction(Action); 2387 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2388 } 2389 2390 /// Given an array of pointers to variables, project the address of a 2391 /// given variable. 2392 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2393 unsigned Index, const VarDecl *Var) { 2394 // Pull out the pointer to the variable. 2395 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2396 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2397 2398 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2399 Addr = CGF.Builder.CreateElementBitCast( 2400 Addr, CGF.ConvertTypeForMem(Var->getType())); 2401 return Addr; 2402 } 2403 2404 static llvm::Value *emitCopyprivateCopyFunction( 2405 CodeGenModule &CGM, llvm::Type *ArgsType, 2406 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2407 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2408 SourceLocation Loc) { 2409 ASTContext &C = CGM.getContext(); 2410 // void copy_func(void *LHSArg, void *RHSArg); 2411 FunctionArgList Args; 2412 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2413 ImplicitParamDecl::Other); 2414 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2415 ImplicitParamDecl::Other); 2416 Args.push_back(&LHSArg); 2417 Args.push_back(&RHSArg); 2418 const auto &CGFI = 2419 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2420 std::string Name = 2421 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2422 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2423 llvm::GlobalValue::InternalLinkage, Name, 2424 &CGM.getModule()); 2425 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2426 Fn->setDoesNotRecurse(); 2427 CodeGenFunction CGF(CGM); 2428 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2429 // Dest = (void*[n])(LHSArg); 2430 // Src = (void*[n])(RHSArg); 2431 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2432 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2433 ArgsType), CGF.getPointerAlign()); 2434 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2435 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2436 ArgsType), CGF.getPointerAlign()); 2437 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2438 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2439 // ... 2440 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2441 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2442 const auto *DestVar = 2443 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2444 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2445 2446 const auto *SrcVar = 2447 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2448 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2449 2450 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2451 QualType Type = VD->getType(); 2452 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2453 } 2454 CGF.FinishFunction(); 2455 return Fn; 2456 } 2457 2458 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2459 const RegionCodeGenTy &SingleOpGen, 2460 SourceLocation Loc, 2461 ArrayRef<const Expr *> CopyprivateVars, 2462 ArrayRef<const Expr *> SrcExprs, 2463 ArrayRef<const Expr *> DstExprs, 2464 ArrayRef<const Expr *> AssignmentOps) { 2465 if (!CGF.HaveInsertPoint()) 2466 return; 2467 assert(CopyprivateVars.size() == SrcExprs.size() && 2468 CopyprivateVars.size() == DstExprs.size() && 2469 CopyprivateVars.size() == AssignmentOps.size()); 2470 ASTContext &C = CGM.getContext(); 2471 // int32 did_it = 0; 2472 // if(__kmpc_single(ident_t *, gtid)) { 2473 // SingleOpGen(); 2474 // __kmpc_end_single(ident_t *, gtid); 2475 // did_it = 1; 2476 // } 2477 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2478 // <copy_func>, did_it); 2479 2480 Address DidIt = Address::invalid(); 2481 if (!CopyprivateVars.empty()) { 2482 // int32 did_it = 0; 2483 QualType KmpInt32Ty = 2484 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2485 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2486 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2487 } 2488 // Prepare arguments and build a call to __kmpc_single 2489 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2490 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2491 CGM.getModule(), OMPRTL___kmpc_single), 2492 Args, 2493 OMPBuilder.getOrCreateRuntimeFunction( 2494 CGM.getModule(), OMPRTL___kmpc_end_single), 2495 Args, 2496 /*Conditional=*/true); 2497 SingleOpGen.setAction(Action); 2498 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2499 if (DidIt.isValid()) { 2500 // did_it = 1; 2501 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2502 } 2503 Action.Done(CGF); 2504 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2505 // <copy_func>, did_it); 2506 if (DidIt.isValid()) { 2507 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2508 QualType CopyprivateArrayTy = C.getConstantArrayType( 2509 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2510 /*IndexTypeQuals=*/0); 2511 // Create a list of all private variables for copyprivate. 2512 Address CopyprivateList = 2513 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2514 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2515 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2516 CGF.Builder.CreateStore( 2517 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2518 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2519 CGF.VoidPtrTy), 2520 Elem); 2521 } 2522 // Build function that copies private values from single region to all other 2523 // threads in the corresponding parallel region. 2524 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2525 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2526 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2527 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2528 Address CL = 2529 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2530 CGF.VoidPtrTy); 2531 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2532 llvm::Value *Args[] = { 2533 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2534 getThreadID(CGF, Loc), // i32 <gtid> 2535 BufSize, // size_t <buf_size> 2536 CL.getPointer(), // void *<copyprivate list> 2537 CpyFn, // void (*) (void *, void *) <copy_func> 2538 DidItVal // i32 did_it 2539 }; 2540 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2541 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2542 Args); 2543 } 2544 } 2545 2546 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2547 const RegionCodeGenTy &OrderedOpGen, 2548 SourceLocation Loc, bool IsThreads) { 2549 if (!CGF.HaveInsertPoint()) 2550 return; 2551 // __kmpc_ordered(ident_t *, gtid); 2552 // OrderedOpGen(); 2553 // __kmpc_end_ordered(ident_t *, gtid); 2554 // Prepare arguments and build a call to __kmpc_ordered 2555 if (IsThreads) { 2556 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2557 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2558 CGM.getModule(), OMPRTL___kmpc_ordered), 2559 Args, 2560 OMPBuilder.getOrCreateRuntimeFunction( 2561 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2562 Args); 2563 OrderedOpGen.setAction(Action); 2564 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2565 return; 2566 } 2567 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2568 } 2569 2570 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2571 unsigned Flags; 2572 if (Kind == OMPD_for) 2573 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2574 else if (Kind == OMPD_sections) 2575 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2576 else if (Kind == OMPD_single) 2577 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2578 else if (Kind == OMPD_barrier) 2579 Flags = OMP_IDENT_BARRIER_EXPL; 2580 else 2581 Flags = OMP_IDENT_BARRIER_IMPL; 2582 return Flags; 2583 } 2584 2585 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2586 CodeGenFunction &CGF, const OMPLoopDirective &S, 2587 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2588 // Check if the loop directive is actually a doacross loop directive. In this 2589 // case choose static, 1 schedule. 2590 if (llvm::any_of( 2591 S.getClausesOfKind<OMPOrderedClause>(), 2592 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2593 ScheduleKind = OMPC_SCHEDULE_static; 2594 // Chunk size is 1 in this case. 2595 llvm::APInt ChunkSize(32, 1); 2596 ChunkExpr = IntegerLiteral::Create( 2597 CGF.getContext(), ChunkSize, 2598 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2599 SourceLocation()); 2600 } 2601 } 2602 2603 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2604 OpenMPDirectiveKind Kind, bool EmitChecks, 2605 bool ForceSimpleCall) { 2606 // Check if we should use the OMPBuilder 2607 auto *OMPRegionInfo = 2608 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2609 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2610 CGF.Builder.restoreIP(OMPBuilder.CreateBarrier( 2611 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2612 return; 2613 } 2614 2615 if (!CGF.HaveInsertPoint()) 2616 return; 2617 // Build call __kmpc_cancel_barrier(loc, thread_id); 2618 // Build call __kmpc_barrier(loc, thread_id); 2619 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2620 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2621 // thread_id); 2622 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2623 getThreadID(CGF, Loc)}; 2624 if (OMPRegionInfo) { 2625 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2626 llvm::Value *Result = CGF.EmitRuntimeCall( 2627 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2628 OMPRTL___kmpc_cancel_barrier), 2629 Args); 2630 if (EmitChecks) { 2631 // if (__kmpc_cancel_barrier()) { 2632 // exit from construct; 2633 // } 2634 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2635 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2636 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2637 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2638 CGF.EmitBlock(ExitBB); 2639 // exit from construct; 2640 CodeGenFunction::JumpDest CancelDestination = 2641 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2642 CGF.EmitBranchThroughCleanup(CancelDestination); 2643 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2644 } 2645 return; 2646 } 2647 } 2648 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2649 CGM.getModule(), OMPRTL___kmpc_barrier), 2650 Args); 2651 } 2652 2653 /// Map the OpenMP loop schedule to the runtime enumeration. 2654 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2655 bool Chunked, bool Ordered) { 2656 switch (ScheduleKind) { 2657 case OMPC_SCHEDULE_static: 2658 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2659 : (Ordered ? OMP_ord_static : OMP_sch_static); 2660 case OMPC_SCHEDULE_dynamic: 2661 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2662 case OMPC_SCHEDULE_guided: 2663 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2664 case OMPC_SCHEDULE_runtime: 2665 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2666 case OMPC_SCHEDULE_auto: 2667 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2668 case OMPC_SCHEDULE_unknown: 2669 assert(!Chunked && "chunk was specified but schedule kind not known"); 2670 return Ordered ? OMP_ord_static : OMP_sch_static; 2671 } 2672 llvm_unreachable("Unexpected runtime schedule"); 2673 } 2674 2675 /// Map the OpenMP distribute schedule to the runtime enumeration. 2676 static OpenMPSchedType 2677 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2678 // only static is allowed for dist_schedule 2679 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2680 } 2681 2682 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2683 bool Chunked) const { 2684 OpenMPSchedType Schedule = 2685 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2686 return Schedule == OMP_sch_static; 2687 } 2688 2689 bool CGOpenMPRuntime::isStaticNonchunked( 2690 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2691 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2692 return Schedule == OMP_dist_sch_static; 2693 } 2694 2695 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2696 bool Chunked) const { 2697 OpenMPSchedType Schedule = 2698 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2699 return Schedule == OMP_sch_static_chunked; 2700 } 2701 2702 bool CGOpenMPRuntime::isStaticChunked( 2703 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2704 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2705 return Schedule == OMP_dist_sch_static_chunked; 2706 } 2707 2708 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2709 OpenMPSchedType Schedule = 2710 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2711 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2712 return Schedule != OMP_sch_static; 2713 } 2714 2715 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2716 OpenMPScheduleClauseModifier M1, 2717 OpenMPScheduleClauseModifier M2) { 2718 int Modifier = 0; 2719 switch (M1) { 2720 case OMPC_SCHEDULE_MODIFIER_monotonic: 2721 Modifier = OMP_sch_modifier_monotonic; 2722 break; 2723 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2724 Modifier = OMP_sch_modifier_nonmonotonic; 2725 break; 2726 case OMPC_SCHEDULE_MODIFIER_simd: 2727 if (Schedule == OMP_sch_static_chunked) 2728 Schedule = OMP_sch_static_balanced_chunked; 2729 break; 2730 case OMPC_SCHEDULE_MODIFIER_last: 2731 case OMPC_SCHEDULE_MODIFIER_unknown: 2732 break; 2733 } 2734 switch (M2) { 2735 case OMPC_SCHEDULE_MODIFIER_monotonic: 2736 Modifier = OMP_sch_modifier_monotonic; 2737 break; 2738 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2739 Modifier = OMP_sch_modifier_nonmonotonic; 2740 break; 2741 case OMPC_SCHEDULE_MODIFIER_simd: 2742 if (Schedule == OMP_sch_static_chunked) 2743 Schedule = OMP_sch_static_balanced_chunked; 2744 break; 2745 case OMPC_SCHEDULE_MODIFIER_last: 2746 case OMPC_SCHEDULE_MODIFIER_unknown: 2747 break; 2748 } 2749 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2750 // If the static schedule kind is specified or if the ordered clause is 2751 // specified, and if the nonmonotonic modifier is not specified, the effect is 2752 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2753 // modifier is specified, the effect is as if the nonmonotonic modifier is 2754 // specified. 2755 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2756 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2757 Schedule == OMP_sch_static_balanced_chunked || 2758 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2759 Schedule == OMP_dist_sch_static_chunked || 2760 Schedule == OMP_dist_sch_static)) 2761 Modifier = OMP_sch_modifier_nonmonotonic; 2762 } 2763 return Schedule | Modifier; 2764 } 2765 2766 void CGOpenMPRuntime::emitForDispatchInit( 2767 CodeGenFunction &CGF, SourceLocation Loc, 2768 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2769 bool Ordered, const DispatchRTInput &DispatchValues) { 2770 if (!CGF.HaveInsertPoint()) 2771 return; 2772 OpenMPSchedType Schedule = getRuntimeSchedule( 2773 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2774 assert(Ordered || 2775 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2776 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2777 Schedule != OMP_sch_static_balanced_chunked)); 2778 // Call __kmpc_dispatch_init( 2779 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2780 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2781 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2782 2783 // If the Chunk was not specified in the clause - use default value 1. 2784 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2785 : CGF.Builder.getIntN(IVSize, 1); 2786 llvm::Value *Args[] = { 2787 emitUpdateLocation(CGF, Loc), 2788 getThreadID(CGF, Loc), 2789 CGF.Builder.getInt32(addMonoNonMonoModifier( 2790 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2791 DispatchValues.LB, // Lower 2792 DispatchValues.UB, // Upper 2793 CGF.Builder.getIntN(IVSize, 1), // Stride 2794 Chunk // Chunk 2795 }; 2796 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2797 } 2798 2799 static void emitForStaticInitCall( 2800 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2801 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2802 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2803 const CGOpenMPRuntime::StaticRTInput &Values) { 2804 if (!CGF.HaveInsertPoint()) 2805 return; 2806 2807 assert(!Values.Ordered); 2808 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2809 Schedule == OMP_sch_static_balanced_chunked || 2810 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2811 Schedule == OMP_dist_sch_static || 2812 Schedule == OMP_dist_sch_static_chunked); 2813 2814 // Call __kmpc_for_static_init( 2815 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2816 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2817 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2818 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2819 llvm::Value *Chunk = Values.Chunk; 2820 if (Chunk == nullptr) { 2821 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2822 Schedule == OMP_dist_sch_static) && 2823 "expected static non-chunked schedule"); 2824 // If the Chunk was not specified in the clause - use default value 1. 2825 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2826 } else { 2827 assert((Schedule == OMP_sch_static_chunked || 2828 Schedule == OMP_sch_static_balanced_chunked || 2829 Schedule == OMP_ord_static_chunked || 2830 Schedule == OMP_dist_sch_static_chunked) && 2831 "expected static chunked schedule"); 2832 } 2833 llvm::Value *Args[] = { 2834 UpdateLocation, 2835 ThreadId, 2836 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2837 M2)), // Schedule type 2838 Values.IL.getPointer(), // &isLastIter 2839 Values.LB.getPointer(), // &LB 2840 Values.UB.getPointer(), // &UB 2841 Values.ST.getPointer(), // &Stride 2842 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2843 Chunk // Chunk 2844 }; 2845 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2846 } 2847 2848 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2849 SourceLocation Loc, 2850 OpenMPDirectiveKind DKind, 2851 const OpenMPScheduleTy &ScheduleKind, 2852 const StaticRTInput &Values) { 2853 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2854 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2855 assert(isOpenMPWorksharingDirective(DKind) && 2856 "Expected loop-based or sections-based directive."); 2857 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2858 isOpenMPLoopDirective(DKind) 2859 ? OMP_IDENT_WORK_LOOP 2860 : OMP_IDENT_WORK_SECTIONS); 2861 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2862 llvm::FunctionCallee StaticInitFunction = 2863 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2864 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2865 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2866 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2867 } 2868 2869 void CGOpenMPRuntime::emitDistributeStaticInit( 2870 CodeGenFunction &CGF, SourceLocation Loc, 2871 OpenMPDistScheduleClauseKind SchedKind, 2872 const CGOpenMPRuntime::StaticRTInput &Values) { 2873 OpenMPSchedType ScheduleNum = 2874 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2875 llvm::Value *UpdatedLocation = 2876 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2877 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2878 llvm::FunctionCallee StaticInitFunction = 2879 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2880 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2881 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2882 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2883 } 2884 2885 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2886 SourceLocation Loc, 2887 OpenMPDirectiveKind DKind) { 2888 if (!CGF.HaveInsertPoint()) 2889 return; 2890 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2891 llvm::Value *Args[] = { 2892 emitUpdateLocation(CGF, Loc, 2893 isOpenMPDistributeDirective(DKind) 2894 ? OMP_IDENT_WORK_DISTRIBUTE 2895 : isOpenMPLoopDirective(DKind) 2896 ? OMP_IDENT_WORK_LOOP 2897 : OMP_IDENT_WORK_SECTIONS), 2898 getThreadID(CGF, Loc)}; 2899 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2900 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2901 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2902 Args); 2903 } 2904 2905 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2906 SourceLocation Loc, 2907 unsigned IVSize, 2908 bool IVSigned) { 2909 if (!CGF.HaveInsertPoint()) 2910 return; 2911 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2912 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2913 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2914 } 2915 2916 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2917 SourceLocation Loc, unsigned IVSize, 2918 bool IVSigned, Address IL, 2919 Address LB, Address UB, 2920 Address ST) { 2921 // Call __kmpc_dispatch_next( 2922 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2923 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2924 // kmp_int[32|64] *p_stride); 2925 llvm::Value *Args[] = { 2926 emitUpdateLocation(CGF, Loc), 2927 getThreadID(CGF, Loc), 2928 IL.getPointer(), // &isLastIter 2929 LB.getPointer(), // &Lower 2930 UB.getPointer(), // &Upper 2931 ST.getPointer() // &Stride 2932 }; 2933 llvm::Value *Call = 2934 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2935 return CGF.EmitScalarConversion( 2936 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2937 CGF.getContext().BoolTy, Loc); 2938 } 2939 2940 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2941 llvm::Value *NumThreads, 2942 SourceLocation Loc) { 2943 if (!CGF.HaveInsertPoint()) 2944 return; 2945 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2946 llvm::Value *Args[] = { 2947 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2948 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2949 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2950 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2951 Args); 2952 } 2953 2954 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2955 ProcBindKind ProcBind, 2956 SourceLocation Loc) { 2957 if (!CGF.HaveInsertPoint()) 2958 return; 2959 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2960 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2961 llvm::Value *Args[] = { 2962 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2963 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2964 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2965 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2966 Args); 2967 } 2968 2969 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2970 SourceLocation Loc, llvm::AtomicOrdering AO) { 2971 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2972 OMPBuilder.CreateFlush(CGF.Builder); 2973 } else { 2974 if (!CGF.HaveInsertPoint()) 2975 return; 2976 // Build call void __kmpc_flush(ident_t *loc) 2977 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2978 CGM.getModule(), OMPRTL___kmpc_flush), 2979 emitUpdateLocation(CGF, Loc)); 2980 } 2981 } 2982 2983 namespace { 2984 /// Indexes of fields for type kmp_task_t. 2985 enum KmpTaskTFields { 2986 /// List of shared variables. 2987 KmpTaskTShareds, 2988 /// Task routine. 2989 KmpTaskTRoutine, 2990 /// Partition id for the untied tasks. 2991 KmpTaskTPartId, 2992 /// Function with call of destructors for private variables. 2993 Data1, 2994 /// Task priority. 2995 Data2, 2996 /// (Taskloops only) Lower bound. 2997 KmpTaskTLowerBound, 2998 /// (Taskloops only) Upper bound. 2999 KmpTaskTUpperBound, 3000 /// (Taskloops only) Stride. 3001 KmpTaskTStride, 3002 /// (Taskloops only) Is last iteration flag. 3003 KmpTaskTLastIter, 3004 /// (Taskloops only) Reduction data. 3005 KmpTaskTReductions, 3006 }; 3007 } // anonymous namespace 3008 3009 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3010 return OffloadEntriesTargetRegion.empty() && 3011 OffloadEntriesDeviceGlobalVar.empty(); 3012 } 3013 3014 /// Initialize target region entry. 3015 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3016 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3017 StringRef ParentName, unsigned LineNum, 3018 unsigned Order) { 3019 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3020 "only required for the device " 3021 "code generation."); 3022 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3023 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3024 OMPTargetRegionEntryTargetRegion); 3025 ++OffloadingEntriesNum; 3026 } 3027 3028 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3029 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3030 StringRef ParentName, unsigned LineNum, 3031 llvm::Constant *Addr, llvm::Constant *ID, 3032 OMPTargetRegionEntryKind Flags) { 3033 // If we are emitting code for a target, the entry is already initialized, 3034 // only has to be registered. 3035 if (CGM.getLangOpts().OpenMPIsDevice) { 3036 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3037 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3038 DiagnosticsEngine::Error, 3039 "Unable to find target region on line '%0' in the device code."); 3040 CGM.getDiags().Report(DiagID) << LineNum; 3041 return; 3042 } 3043 auto &Entry = 3044 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3045 assert(Entry.isValid() && "Entry not initialized!"); 3046 Entry.setAddress(Addr); 3047 Entry.setID(ID); 3048 Entry.setFlags(Flags); 3049 } else { 3050 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3051 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3052 ++OffloadingEntriesNum; 3053 } 3054 } 3055 3056 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3057 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3058 unsigned LineNum) const { 3059 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3060 if (PerDevice == OffloadEntriesTargetRegion.end()) 3061 return false; 3062 auto PerFile = PerDevice->second.find(FileID); 3063 if (PerFile == PerDevice->second.end()) 3064 return false; 3065 auto PerParentName = PerFile->second.find(ParentName); 3066 if (PerParentName == PerFile->second.end()) 3067 return false; 3068 auto PerLine = PerParentName->second.find(LineNum); 3069 if (PerLine == PerParentName->second.end()) 3070 return false; 3071 // Fail if this entry is already registered. 3072 if (PerLine->second.getAddress() || PerLine->second.getID()) 3073 return false; 3074 return true; 3075 } 3076 3077 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3078 const OffloadTargetRegionEntryInfoActTy &Action) { 3079 // Scan all target region entries and perform the provided action. 3080 for (const auto &D : OffloadEntriesTargetRegion) 3081 for (const auto &F : D.second) 3082 for (const auto &P : F.second) 3083 for (const auto &L : P.second) 3084 Action(D.first, F.first, P.first(), L.first, L.second); 3085 } 3086 3087 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3088 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3089 OMPTargetGlobalVarEntryKind Flags, 3090 unsigned Order) { 3091 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3092 "only required for the device " 3093 "code generation."); 3094 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3095 ++OffloadingEntriesNum; 3096 } 3097 3098 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3099 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3100 CharUnits VarSize, 3101 OMPTargetGlobalVarEntryKind Flags, 3102 llvm::GlobalValue::LinkageTypes Linkage) { 3103 if (CGM.getLangOpts().OpenMPIsDevice) { 3104 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3105 assert(Entry.isValid() && Entry.getFlags() == Flags && 3106 "Entry not initialized!"); 3107 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3108 "Resetting with the new address."); 3109 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3110 if (Entry.getVarSize().isZero()) { 3111 Entry.setVarSize(VarSize); 3112 Entry.setLinkage(Linkage); 3113 } 3114 return; 3115 } 3116 Entry.setVarSize(VarSize); 3117 Entry.setLinkage(Linkage); 3118 Entry.setAddress(Addr); 3119 } else { 3120 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3121 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3122 assert(Entry.isValid() && Entry.getFlags() == Flags && 3123 "Entry not initialized!"); 3124 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3125 "Resetting with the new address."); 3126 if (Entry.getVarSize().isZero()) { 3127 Entry.setVarSize(VarSize); 3128 Entry.setLinkage(Linkage); 3129 } 3130 return; 3131 } 3132 OffloadEntriesDeviceGlobalVar.try_emplace( 3133 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3134 ++OffloadingEntriesNum; 3135 } 3136 } 3137 3138 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3139 actOnDeviceGlobalVarEntriesInfo( 3140 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3141 // Scan all target region entries and perform the provided action. 3142 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3143 Action(E.getKey(), E.getValue()); 3144 } 3145 3146 void CGOpenMPRuntime::createOffloadEntry( 3147 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3148 llvm::GlobalValue::LinkageTypes Linkage) { 3149 StringRef Name = Addr->getName(); 3150 llvm::Module &M = CGM.getModule(); 3151 llvm::LLVMContext &C = M.getContext(); 3152 3153 // Create constant string with the name. 3154 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3155 3156 std::string StringName = getName({"omp_offloading", "entry_name"}); 3157 auto *Str = new llvm::GlobalVariable( 3158 M, StrPtrInit->getType(), /*isConstant=*/true, 3159 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3160 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3161 3162 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 3163 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 3164 llvm::ConstantInt::get(CGM.SizeTy, Size), 3165 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3166 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3167 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3168 llvm::GlobalVariable *Entry = createGlobalStruct( 3169 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3170 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3171 3172 // The entry has to be created in the section the linker expects it to be. 3173 Entry->setSection("omp_offloading_entries"); 3174 } 3175 3176 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3177 // Emit the offloading entries and metadata so that the device codegen side 3178 // can easily figure out what to emit. The produced metadata looks like 3179 // this: 3180 // 3181 // !omp_offload.info = !{!1, ...} 3182 // 3183 // Right now we only generate metadata for function that contain target 3184 // regions. 3185 3186 // If we are in simd mode or there are no entries, we don't need to do 3187 // anything. 3188 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3189 return; 3190 3191 llvm::Module &M = CGM.getModule(); 3192 llvm::LLVMContext &C = M.getContext(); 3193 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3194 SourceLocation, StringRef>, 3195 16> 3196 OrderedEntries(OffloadEntriesInfoManager.size()); 3197 llvm::SmallVector<StringRef, 16> ParentFunctions( 3198 OffloadEntriesInfoManager.size()); 3199 3200 // Auxiliary methods to create metadata values and strings. 3201 auto &&GetMDInt = [this](unsigned V) { 3202 return llvm::ConstantAsMetadata::get( 3203 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3204 }; 3205 3206 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3207 3208 // Create the offloading info metadata node. 3209 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3210 3211 // Create function that emits metadata for each target region entry; 3212 auto &&TargetRegionMetadataEmitter = 3213 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3214 &GetMDString]( 3215 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3216 unsigned Line, 3217 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3218 // Generate metadata for target regions. Each entry of this metadata 3219 // contains: 3220 // - Entry 0 -> Kind of this type of metadata (0). 3221 // - Entry 1 -> Device ID of the file where the entry was identified. 3222 // - Entry 2 -> File ID of the file where the entry was identified. 3223 // - Entry 3 -> Mangled name of the function where the entry was 3224 // identified. 3225 // - Entry 4 -> Line in the file where the entry was identified. 3226 // - Entry 5 -> Order the entry was created. 3227 // The first element of the metadata node is the kind. 3228 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3229 GetMDInt(FileID), GetMDString(ParentName), 3230 GetMDInt(Line), GetMDInt(E.getOrder())}; 3231 3232 SourceLocation Loc; 3233 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3234 E = CGM.getContext().getSourceManager().fileinfo_end(); 3235 I != E; ++I) { 3236 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3237 I->getFirst()->getUniqueID().getFile() == FileID) { 3238 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3239 I->getFirst(), Line, 1); 3240 break; 3241 } 3242 } 3243 // Save this entry in the right position of the ordered entries array. 3244 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3245 ParentFunctions[E.getOrder()] = ParentName; 3246 3247 // Add metadata to the named metadata node. 3248 MD->addOperand(llvm::MDNode::get(C, Ops)); 3249 }; 3250 3251 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3252 TargetRegionMetadataEmitter); 3253 3254 // Create function that emits metadata for each device global variable entry; 3255 auto &&DeviceGlobalVarMetadataEmitter = 3256 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3257 MD](StringRef MangledName, 3258 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3259 &E) { 3260 // Generate metadata for global variables. Each entry of this metadata 3261 // contains: 3262 // - Entry 0 -> Kind of this type of metadata (1). 3263 // - Entry 1 -> Mangled name of the variable. 3264 // - Entry 2 -> Declare target kind. 3265 // - Entry 3 -> Order the entry was created. 3266 // The first element of the metadata node is the kind. 3267 llvm::Metadata *Ops[] = { 3268 GetMDInt(E.getKind()), GetMDString(MangledName), 3269 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3270 3271 // Save this entry in the right position of the ordered entries array. 3272 OrderedEntries[E.getOrder()] = 3273 std::make_tuple(&E, SourceLocation(), MangledName); 3274 3275 // Add metadata to the named metadata node. 3276 MD->addOperand(llvm::MDNode::get(C, Ops)); 3277 }; 3278 3279 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3280 DeviceGlobalVarMetadataEmitter); 3281 3282 for (const auto &E : OrderedEntries) { 3283 assert(std::get<0>(E) && "All ordered entries must exist!"); 3284 if (const auto *CE = 3285 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3286 std::get<0>(E))) { 3287 if (!CE->getID() || !CE->getAddress()) { 3288 // Do not blame the entry if the parent funtion is not emitted. 3289 StringRef FnName = ParentFunctions[CE->getOrder()]; 3290 if (!CGM.GetGlobalValue(FnName)) 3291 continue; 3292 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3293 DiagnosticsEngine::Error, 3294 "Offloading entry for target region in %0 is incorrect: either the " 3295 "address or the ID is invalid."); 3296 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3297 continue; 3298 } 3299 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3300 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3301 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3302 OffloadEntryInfoDeviceGlobalVar>( 3303 std::get<0>(E))) { 3304 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3305 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3306 CE->getFlags()); 3307 switch (Flags) { 3308 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3309 if (CGM.getLangOpts().OpenMPIsDevice && 3310 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3311 continue; 3312 if (!CE->getAddress()) { 3313 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3314 DiagnosticsEngine::Error, "Offloading entry for declare target " 3315 "variable %0 is incorrect: the " 3316 "address is invalid."); 3317 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3318 continue; 3319 } 3320 // The vaiable has no definition - no need to add the entry. 3321 if (CE->getVarSize().isZero()) 3322 continue; 3323 break; 3324 } 3325 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3326 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3327 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3328 "Declaret target link address is set."); 3329 if (CGM.getLangOpts().OpenMPIsDevice) 3330 continue; 3331 if (!CE->getAddress()) { 3332 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3333 DiagnosticsEngine::Error, 3334 "Offloading entry for declare target variable is incorrect: the " 3335 "address is invalid."); 3336 CGM.getDiags().Report(DiagID); 3337 continue; 3338 } 3339 break; 3340 } 3341 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3342 CE->getVarSize().getQuantity(), Flags, 3343 CE->getLinkage()); 3344 } else { 3345 llvm_unreachable("Unsupported entry kind."); 3346 } 3347 } 3348 } 3349 3350 /// Loads all the offload entries information from the host IR 3351 /// metadata. 3352 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3353 // If we are in target mode, load the metadata from the host IR. This code has 3354 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3355 3356 if (!CGM.getLangOpts().OpenMPIsDevice) 3357 return; 3358 3359 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3360 return; 3361 3362 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3363 if (auto EC = Buf.getError()) { 3364 CGM.getDiags().Report(diag::err_cannot_open_file) 3365 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3366 return; 3367 } 3368 3369 llvm::LLVMContext C; 3370 auto ME = expectedToErrorOrAndEmitErrors( 3371 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3372 3373 if (auto EC = ME.getError()) { 3374 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3375 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3376 CGM.getDiags().Report(DiagID) 3377 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3378 return; 3379 } 3380 3381 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3382 if (!MD) 3383 return; 3384 3385 for (llvm::MDNode *MN : MD->operands()) { 3386 auto &&GetMDInt = [MN](unsigned Idx) { 3387 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3388 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3389 }; 3390 3391 auto &&GetMDString = [MN](unsigned Idx) { 3392 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3393 return V->getString(); 3394 }; 3395 3396 switch (GetMDInt(0)) { 3397 default: 3398 llvm_unreachable("Unexpected metadata!"); 3399 break; 3400 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3401 OffloadingEntryInfoTargetRegion: 3402 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3403 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3404 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3405 /*Order=*/GetMDInt(5)); 3406 break; 3407 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3408 OffloadingEntryInfoDeviceGlobalVar: 3409 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3410 /*MangledName=*/GetMDString(1), 3411 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3412 /*Flags=*/GetMDInt(2)), 3413 /*Order=*/GetMDInt(3)); 3414 break; 3415 } 3416 } 3417 } 3418 3419 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3420 if (!KmpRoutineEntryPtrTy) { 3421 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3422 ASTContext &C = CGM.getContext(); 3423 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3424 FunctionProtoType::ExtProtoInfo EPI; 3425 KmpRoutineEntryPtrQTy = C.getPointerType( 3426 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3427 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3428 } 3429 } 3430 3431 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3432 // Make sure the type of the entry is already created. This is the type we 3433 // have to create: 3434 // struct __tgt_offload_entry{ 3435 // void *addr; // Pointer to the offload entry info. 3436 // // (function or global) 3437 // char *name; // Name of the function or global. 3438 // size_t size; // Size of the entry info (0 if it a function). 3439 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3440 // int32_t reserved; // Reserved, to use by the runtime library. 3441 // }; 3442 if (TgtOffloadEntryQTy.isNull()) { 3443 ASTContext &C = CGM.getContext(); 3444 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3445 RD->startDefinition(); 3446 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3447 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3448 addFieldToRecordDecl(C, RD, C.getSizeType()); 3449 addFieldToRecordDecl( 3450 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3451 addFieldToRecordDecl( 3452 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3453 RD->completeDefinition(); 3454 RD->addAttr(PackedAttr::CreateImplicit(C)); 3455 TgtOffloadEntryQTy = C.getRecordType(RD); 3456 } 3457 return TgtOffloadEntryQTy; 3458 } 3459 3460 namespace { 3461 struct PrivateHelpersTy { 3462 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3463 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3464 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3465 PrivateElemInit(PrivateElemInit) {} 3466 const Expr *OriginalRef = nullptr; 3467 const VarDecl *Original = nullptr; 3468 const VarDecl *PrivateCopy = nullptr; 3469 const VarDecl *PrivateElemInit = nullptr; 3470 }; 3471 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3472 } // anonymous namespace 3473 3474 static RecordDecl * 3475 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3476 if (!Privates.empty()) { 3477 ASTContext &C = CGM.getContext(); 3478 // Build struct .kmp_privates_t. { 3479 // /* private vars */ 3480 // }; 3481 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3482 RD->startDefinition(); 3483 for (const auto &Pair : Privates) { 3484 const VarDecl *VD = Pair.second.Original; 3485 QualType Type = VD->getType().getNonReferenceType(); 3486 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3487 if (VD->hasAttrs()) { 3488 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3489 E(VD->getAttrs().end()); 3490 I != E; ++I) 3491 FD->addAttr(*I); 3492 } 3493 } 3494 RD->completeDefinition(); 3495 return RD; 3496 } 3497 return nullptr; 3498 } 3499 3500 static RecordDecl * 3501 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3502 QualType KmpInt32Ty, 3503 QualType KmpRoutineEntryPointerQTy) { 3504 ASTContext &C = CGM.getContext(); 3505 // Build struct kmp_task_t { 3506 // void * shareds; 3507 // kmp_routine_entry_t routine; 3508 // kmp_int32 part_id; 3509 // kmp_cmplrdata_t data1; 3510 // kmp_cmplrdata_t data2; 3511 // For taskloops additional fields: 3512 // kmp_uint64 lb; 3513 // kmp_uint64 ub; 3514 // kmp_int64 st; 3515 // kmp_int32 liter; 3516 // void * reductions; 3517 // }; 3518 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3519 UD->startDefinition(); 3520 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3521 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3522 UD->completeDefinition(); 3523 QualType KmpCmplrdataTy = C.getRecordType(UD); 3524 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3525 RD->startDefinition(); 3526 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3527 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3528 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3529 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3530 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3531 if (isOpenMPTaskLoopDirective(Kind)) { 3532 QualType KmpUInt64Ty = 3533 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3534 QualType KmpInt64Ty = 3535 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3536 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3537 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3538 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3539 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3540 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3541 } 3542 RD->completeDefinition(); 3543 return RD; 3544 } 3545 3546 static RecordDecl * 3547 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3548 ArrayRef<PrivateDataTy> Privates) { 3549 ASTContext &C = CGM.getContext(); 3550 // Build struct kmp_task_t_with_privates { 3551 // kmp_task_t task_data; 3552 // .kmp_privates_t. privates; 3553 // }; 3554 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3555 RD->startDefinition(); 3556 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3557 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3558 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3559 RD->completeDefinition(); 3560 return RD; 3561 } 3562 3563 /// Emit a proxy function which accepts kmp_task_t as the second 3564 /// argument. 3565 /// \code 3566 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3567 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3568 /// For taskloops: 3569 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3570 /// tt->reductions, tt->shareds); 3571 /// return 0; 3572 /// } 3573 /// \endcode 3574 static llvm::Function * 3575 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3576 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3577 QualType KmpTaskTWithPrivatesPtrQTy, 3578 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3579 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3580 llvm::Value *TaskPrivatesMap) { 3581 ASTContext &C = CGM.getContext(); 3582 FunctionArgList Args; 3583 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3584 ImplicitParamDecl::Other); 3585 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3586 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3587 ImplicitParamDecl::Other); 3588 Args.push_back(&GtidArg); 3589 Args.push_back(&TaskTypeArg); 3590 const auto &TaskEntryFnInfo = 3591 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3592 llvm::FunctionType *TaskEntryTy = 3593 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3594 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3595 auto *TaskEntry = llvm::Function::Create( 3596 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3597 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3598 TaskEntry->setDoesNotRecurse(); 3599 CodeGenFunction CGF(CGM); 3600 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3601 Loc, Loc); 3602 3603 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3604 // tt, 3605 // For taskloops: 3606 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3607 // tt->task_data.shareds); 3608 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3609 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3610 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3611 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3612 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3613 const auto *KmpTaskTWithPrivatesQTyRD = 3614 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3615 LValue Base = 3616 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3617 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3618 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3619 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3620 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3621 3622 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3623 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3624 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3625 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3626 CGF.ConvertTypeForMem(SharedsPtrTy)); 3627 3628 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3629 llvm::Value *PrivatesParam; 3630 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3631 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3632 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3633 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3634 } else { 3635 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3636 } 3637 3638 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3639 TaskPrivatesMap, 3640 CGF.Builder 3641 .CreatePointerBitCastOrAddrSpaceCast( 3642 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3643 .getPointer()}; 3644 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3645 std::end(CommonArgs)); 3646 if (isOpenMPTaskLoopDirective(Kind)) { 3647 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3648 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3649 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3650 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3651 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3652 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3653 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3654 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3655 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3656 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3657 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3658 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3659 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3660 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3661 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3662 CallArgs.push_back(LBParam); 3663 CallArgs.push_back(UBParam); 3664 CallArgs.push_back(StParam); 3665 CallArgs.push_back(LIParam); 3666 CallArgs.push_back(RParam); 3667 } 3668 CallArgs.push_back(SharedsParam); 3669 3670 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3671 CallArgs); 3672 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3673 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3674 CGF.FinishFunction(); 3675 return TaskEntry; 3676 } 3677 3678 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3679 SourceLocation Loc, 3680 QualType KmpInt32Ty, 3681 QualType KmpTaskTWithPrivatesPtrQTy, 3682 QualType KmpTaskTWithPrivatesQTy) { 3683 ASTContext &C = CGM.getContext(); 3684 FunctionArgList Args; 3685 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3686 ImplicitParamDecl::Other); 3687 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3688 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3689 ImplicitParamDecl::Other); 3690 Args.push_back(&GtidArg); 3691 Args.push_back(&TaskTypeArg); 3692 const auto &DestructorFnInfo = 3693 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3694 llvm::FunctionType *DestructorFnTy = 3695 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3696 std::string Name = 3697 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3698 auto *DestructorFn = 3699 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3700 Name, &CGM.getModule()); 3701 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3702 DestructorFnInfo); 3703 DestructorFn->setDoesNotRecurse(); 3704 CodeGenFunction CGF(CGM); 3705 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3706 Args, Loc, Loc); 3707 3708 LValue Base = CGF.EmitLoadOfPointerLValue( 3709 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3710 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3711 const auto *KmpTaskTWithPrivatesQTyRD = 3712 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3713 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3714 Base = CGF.EmitLValueForField(Base, *FI); 3715 for (const auto *Field : 3716 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3717 if (QualType::DestructionKind DtorKind = 3718 Field->getType().isDestructedType()) { 3719 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3720 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3721 } 3722 } 3723 CGF.FinishFunction(); 3724 return DestructorFn; 3725 } 3726 3727 /// Emit a privates mapping function for correct handling of private and 3728 /// firstprivate variables. 3729 /// \code 3730 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3731 /// **noalias priv1,..., <tyn> **noalias privn) { 3732 /// *priv1 = &.privates.priv1; 3733 /// ...; 3734 /// *privn = &.privates.privn; 3735 /// } 3736 /// \endcode 3737 static llvm::Value * 3738 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3739 ArrayRef<const Expr *> PrivateVars, 3740 ArrayRef<const Expr *> FirstprivateVars, 3741 ArrayRef<const Expr *> LastprivateVars, 3742 QualType PrivatesQTy, 3743 ArrayRef<PrivateDataTy> Privates) { 3744 ASTContext &C = CGM.getContext(); 3745 FunctionArgList Args; 3746 ImplicitParamDecl TaskPrivatesArg( 3747 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3748 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3749 ImplicitParamDecl::Other); 3750 Args.push_back(&TaskPrivatesArg); 3751 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 3752 unsigned Counter = 1; 3753 for (const Expr *E : PrivateVars) { 3754 Args.push_back(ImplicitParamDecl::Create( 3755 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3756 C.getPointerType(C.getPointerType(E->getType())) 3757 .withConst() 3758 .withRestrict(), 3759 ImplicitParamDecl::Other)); 3760 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3761 PrivateVarsPos[VD] = Counter; 3762 ++Counter; 3763 } 3764 for (const Expr *E : FirstprivateVars) { 3765 Args.push_back(ImplicitParamDecl::Create( 3766 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3767 C.getPointerType(C.getPointerType(E->getType())) 3768 .withConst() 3769 .withRestrict(), 3770 ImplicitParamDecl::Other)); 3771 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3772 PrivateVarsPos[VD] = Counter; 3773 ++Counter; 3774 } 3775 for (const Expr *E : LastprivateVars) { 3776 Args.push_back(ImplicitParamDecl::Create( 3777 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3778 C.getPointerType(C.getPointerType(E->getType())) 3779 .withConst() 3780 .withRestrict(), 3781 ImplicitParamDecl::Other)); 3782 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3783 PrivateVarsPos[VD] = Counter; 3784 ++Counter; 3785 } 3786 const auto &TaskPrivatesMapFnInfo = 3787 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3788 llvm::FunctionType *TaskPrivatesMapTy = 3789 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3790 std::string Name = 3791 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3792 auto *TaskPrivatesMap = llvm::Function::Create( 3793 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3794 &CGM.getModule()); 3795 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3796 TaskPrivatesMapFnInfo); 3797 if (CGM.getLangOpts().Optimize) { 3798 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3799 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3800 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3801 } 3802 CodeGenFunction CGF(CGM); 3803 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3804 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3805 3806 // *privi = &.privates.privi; 3807 LValue Base = CGF.EmitLoadOfPointerLValue( 3808 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3809 TaskPrivatesArg.getType()->castAs<PointerType>()); 3810 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3811 Counter = 0; 3812 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3813 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3814 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3815 LValue RefLVal = 3816 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3817 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3818 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3819 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3820 ++Counter; 3821 } 3822 CGF.FinishFunction(); 3823 return TaskPrivatesMap; 3824 } 3825 3826 /// Emit initialization for private variables in task-based directives. 3827 static void emitPrivatesInit(CodeGenFunction &CGF, 3828 const OMPExecutableDirective &D, 3829 Address KmpTaskSharedsPtr, LValue TDBase, 3830 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3831 QualType SharedsTy, QualType SharedsPtrTy, 3832 const OMPTaskDataTy &Data, 3833 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3834 ASTContext &C = CGF.getContext(); 3835 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3836 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3837 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3838 ? OMPD_taskloop 3839 : OMPD_task; 3840 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3841 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3842 LValue SrcBase; 3843 bool IsTargetTask = 3844 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3845 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3846 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 3847 // PointersArray and SizesArray. The original variables for these arrays are 3848 // not captured and we get their addresses explicitly. 3849 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3850 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3851 SrcBase = CGF.MakeAddrLValue( 3852 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3853 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3854 SharedsTy); 3855 } 3856 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3857 for (const PrivateDataTy &Pair : Privates) { 3858 const VarDecl *VD = Pair.second.PrivateCopy; 3859 const Expr *Init = VD->getAnyInitializer(); 3860 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3861 !CGF.isTrivialInitializer(Init)))) { 3862 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3863 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3864 const VarDecl *OriginalVD = Pair.second.Original; 3865 // Check if the variable is the target-based BasePointersArray, 3866 // PointersArray or SizesArray. 3867 LValue SharedRefLValue; 3868 QualType Type = PrivateLValue.getType(); 3869 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3870 if (IsTargetTask && !SharedField) { 3871 assert(isa<ImplicitParamDecl>(OriginalVD) && 3872 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3873 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3874 ->getNumParams() == 0 && 3875 isa<TranslationUnitDecl>( 3876 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3877 ->getDeclContext()) && 3878 "Expected artificial target data variable."); 3879 SharedRefLValue = 3880 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3881 } else if (ForDup) { 3882 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3883 SharedRefLValue = CGF.MakeAddrLValue( 3884 Address(SharedRefLValue.getPointer(CGF), 3885 C.getDeclAlign(OriginalVD)), 3886 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3887 SharedRefLValue.getTBAAInfo()); 3888 } else if (CGF.LambdaCaptureFields.count( 3889 Pair.second.Original->getCanonicalDecl()) > 0 || 3890 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 3891 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3892 } else { 3893 // Processing for implicitly captured variables. 3894 InlinedOpenMPRegionRAII Region( 3895 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3896 /*HasCancel=*/false); 3897 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3898 } 3899 if (Type->isArrayType()) { 3900 // Initialize firstprivate array. 3901 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3902 // Perform simple memcpy. 3903 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3904 } else { 3905 // Initialize firstprivate array using element-by-element 3906 // initialization. 3907 CGF.EmitOMPAggregateAssign( 3908 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3909 Type, 3910 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3911 Address SrcElement) { 3912 // Clean up any temporaries needed by the initialization. 3913 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3914 InitScope.addPrivate( 3915 Elem, [SrcElement]() -> Address { return SrcElement; }); 3916 (void)InitScope.Privatize(); 3917 // Emit initialization for single element. 3918 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3919 CGF, &CapturesInfo); 3920 CGF.EmitAnyExprToMem(Init, DestElement, 3921 Init->getType().getQualifiers(), 3922 /*IsInitializer=*/false); 3923 }); 3924 } 3925 } else { 3926 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3927 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3928 return SharedRefLValue.getAddress(CGF); 3929 }); 3930 (void)InitScope.Privatize(); 3931 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3932 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3933 /*capturedByInit=*/false); 3934 } 3935 } else { 3936 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3937 } 3938 } 3939 ++FI; 3940 } 3941 } 3942 3943 /// Check if duplication function is required for taskloops. 3944 static bool checkInitIsRequired(CodeGenFunction &CGF, 3945 ArrayRef<PrivateDataTy> Privates) { 3946 bool InitRequired = false; 3947 for (const PrivateDataTy &Pair : Privates) { 3948 const VarDecl *VD = Pair.second.PrivateCopy; 3949 const Expr *Init = VD->getAnyInitializer(); 3950 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3951 !CGF.isTrivialInitializer(Init)); 3952 if (InitRequired) 3953 break; 3954 } 3955 return InitRequired; 3956 } 3957 3958 3959 /// Emit task_dup function (for initialization of 3960 /// private/firstprivate/lastprivate vars and last_iter flag) 3961 /// \code 3962 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3963 /// lastpriv) { 3964 /// // setup lastprivate flag 3965 /// task_dst->last = lastpriv; 3966 /// // could be constructor calls here... 3967 /// } 3968 /// \endcode 3969 static llvm::Value * 3970 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3971 const OMPExecutableDirective &D, 3972 QualType KmpTaskTWithPrivatesPtrQTy, 3973 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3974 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3975 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3976 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3977 ASTContext &C = CGM.getContext(); 3978 FunctionArgList Args; 3979 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3980 KmpTaskTWithPrivatesPtrQTy, 3981 ImplicitParamDecl::Other); 3982 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3983 KmpTaskTWithPrivatesPtrQTy, 3984 ImplicitParamDecl::Other); 3985 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3986 ImplicitParamDecl::Other); 3987 Args.push_back(&DstArg); 3988 Args.push_back(&SrcArg); 3989 Args.push_back(&LastprivArg); 3990 const auto &TaskDupFnInfo = 3991 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3992 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3993 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3994 auto *TaskDup = llvm::Function::Create( 3995 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3996 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3997 TaskDup->setDoesNotRecurse(); 3998 CodeGenFunction CGF(CGM); 3999 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4000 Loc); 4001 4002 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4003 CGF.GetAddrOfLocalVar(&DstArg), 4004 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4005 // task_dst->liter = lastpriv; 4006 if (WithLastIter) { 4007 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4008 LValue Base = CGF.EmitLValueForField( 4009 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4010 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4011 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4012 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4013 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4014 } 4015 4016 // Emit initial values for private copies (if any). 4017 assert(!Privates.empty()); 4018 Address KmpTaskSharedsPtr = Address::invalid(); 4019 if (!Data.FirstprivateVars.empty()) { 4020 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4021 CGF.GetAddrOfLocalVar(&SrcArg), 4022 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4023 LValue Base = CGF.EmitLValueForField( 4024 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4025 KmpTaskSharedsPtr = Address( 4026 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4027 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4028 KmpTaskTShareds)), 4029 Loc), 4030 CGM.getNaturalTypeAlignment(SharedsTy)); 4031 } 4032 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4033 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4034 CGF.FinishFunction(); 4035 return TaskDup; 4036 } 4037 4038 /// Checks if destructor function is required to be generated. 4039 /// \return true if cleanups are required, false otherwise. 4040 static bool 4041 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4042 bool NeedsCleanup = false; 4043 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4044 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4045 for (const FieldDecl *FD : PrivateRD->fields()) { 4046 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4047 if (NeedsCleanup) 4048 break; 4049 } 4050 return NeedsCleanup; 4051 } 4052 4053 namespace { 4054 /// Loop generator for OpenMP iterator expression. 4055 class OMPIteratorGeneratorScope final 4056 : public CodeGenFunction::OMPPrivateScope { 4057 CodeGenFunction &CGF; 4058 const OMPIteratorExpr *E = nullptr; 4059 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4060 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4061 OMPIteratorGeneratorScope() = delete; 4062 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4063 4064 public: 4065 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4066 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4067 if (!E) 4068 return; 4069 SmallVector<llvm::Value *, 4> Uppers; 4070 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4071 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4072 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4073 addPrivate(VD, [&CGF, VD]() { 4074 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4075 }); 4076 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4077 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4078 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4079 "counter.addr"); 4080 }); 4081 } 4082 Privatize(); 4083 4084 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4085 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4086 LValue CLVal = 4087 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4088 HelperData.CounterVD->getType()); 4089 // Counter = 0; 4090 CGF.EmitStoreOfScalar( 4091 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4092 CLVal); 4093 CodeGenFunction::JumpDest &ContDest = 4094 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4095 CodeGenFunction::JumpDest &ExitDest = 4096 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4097 // N = <number-of_iterations>; 4098 llvm::Value *N = Uppers[I]; 4099 // cont: 4100 // if (Counter < N) goto body; else goto exit; 4101 CGF.EmitBlock(ContDest.getBlock()); 4102 auto *CVal = 4103 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4104 llvm::Value *Cmp = 4105 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4106 ? CGF.Builder.CreateICmpSLT(CVal, N) 4107 : CGF.Builder.CreateICmpULT(CVal, N); 4108 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4109 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4110 // body: 4111 CGF.EmitBlock(BodyBB); 4112 // Iteri = Begini + Counter * Stepi; 4113 CGF.EmitIgnoredExpr(HelperData.Update); 4114 } 4115 } 4116 ~OMPIteratorGeneratorScope() { 4117 if (!E) 4118 return; 4119 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4120 // Counter = Counter + 1; 4121 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4122 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4123 // goto cont; 4124 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4125 // exit: 4126 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4127 } 4128 } 4129 }; 4130 } // namespace 4131 4132 static std::pair<llvm::Value *, llvm::Value *> 4133 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4134 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4135 llvm::Value *Addr; 4136 if (OASE) { 4137 const Expr *Base = OASE->getBase(); 4138 Addr = CGF.EmitScalarExpr(Base); 4139 } else { 4140 Addr = CGF.EmitLValue(E).getPointer(CGF); 4141 } 4142 llvm::Value *SizeVal; 4143 QualType Ty = E->getType(); 4144 if (OASE) { 4145 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4146 for (const Expr *SE : OASE->getDimensions()) { 4147 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4148 Sz = CGF.EmitScalarConversion( 4149 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4150 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4151 } 4152 } else if (const auto *ASE = 4153 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4154 LValue UpAddrLVal = 4155 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4156 llvm::Value *UpAddr = 4157 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 4158 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4159 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4160 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4161 } else { 4162 SizeVal = CGF.getTypeSize(Ty); 4163 } 4164 return std::make_pair(Addr, SizeVal); 4165 } 4166 4167 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4168 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4169 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4170 if (KmpTaskAffinityInfoTy.isNull()) { 4171 RecordDecl *KmpAffinityInfoRD = 4172 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4173 KmpAffinityInfoRD->startDefinition(); 4174 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4175 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4176 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4177 KmpAffinityInfoRD->completeDefinition(); 4178 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4179 } 4180 } 4181 4182 CGOpenMPRuntime::TaskResultTy 4183 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4184 const OMPExecutableDirective &D, 4185 llvm::Function *TaskFunction, QualType SharedsTy, 4186 Address Shareds, const OMPTaskDataTy &Data) { 4187 ASTContext &C = CGM.getContext(); 4188 llvm::SmallVector<PrivateDataTy, 4> Privates; 4189 // Aggregate privates and sort them by the alignment. 4190 const auto *I = Data.PrivateCopies.begin(); 4191 for (const Expr *E : Data.PrivateVars) { 4192 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4193 Privates.emplace_back( 4194 C.getDeclAlign(VD), 4195 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4196 /*PrivateElemInit=*/nullptr)); 4197 ++I; 4198 } 4199 I = Data.FirstprivateCopies.begin(); 4200 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4201 for (const Expr *E : Data.FirstprivateVars) { 4202 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4203 Privates.emplace_back( 4204 C.getDeclAlign(VD), 4205 PrivateHelpersTy( 4206 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4207 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4208 ++I; 4209 ++IElemInitRef; 4210 } 4211 I = Data.LastprivateCopies.begin(); 4212 for (const Expr *E : Data.LastprivateVars) { 4213 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4214 Privates.emplace_back( 4215 C.getDeclAlign(VD), 4216 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4217 /*PrivateElemInit=*/nullptr)); 4218 ++I; 4219 } 4220 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 4221 return L.first > R.first; 4222 }); 4223 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4224 // Build type kmp_routine_entry_t (if not built yet). 4225 emitKmpRoutineEntryT(KmpInt32Ty); 4226 // Build type kmp_task_t (if not built yet). 4227 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4228 if (SavedKmpTaskloopTQTy.isNull()) { 4229 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4230 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4231 } 4232 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4233 } else { 4234 assert((D.getDirectiveKind() == OMPD_task || 4235 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4236 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4237 "Expected taskloop, task or target directive"); 4238 if (SavedKmpTaskTQTy.isNull()) { 4239 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4240 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4241 } 4242 KmpTaskTQTy = SavedKmpTaskTQTy; 4243 } 4244 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4245 // Build particular struct kmp_task_t for the given task. 4246 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4247 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4248 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4249 QualType KmpTaskTWithPrivatesPtrQTy = 4250 C.getPointerType(KmpTaskTWithPrivatesQTy); 4251 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4252 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4253 KmpTaskTWithPrivatesTy->getPointerTo(); 4254 llvm::Value *KmpTaskTWithPrivatesTySize = 4255 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4256 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4257 4258 // Emit initial values for private copies (if any). 4259 llvm::Value *TaskPrivatesMap = nullptr; 4260 llvm::Type *TaskPrivatesMapTy = 4261 std::next(TaskFunction->arg_begin(), 3)->getType(); 4262 if (!Privates.empty()) { 4263 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4264 TaskPrivatesMap = emitTaskPrivateMappingFunction( 4265 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 4266 FI->getType(), Privates); 4267 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4268 TaskPrivatesMap, TaskPrivatesMapTy); 4269 } else { 4270 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4271 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4272 } 4273 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4274 // kmp_task_t *tt); 4275 llvm::Function *TaskEntry = emitProxyTaskFunction( 4276 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4277 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4278 TaskPrivatesMap); 4279 4280 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4281 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4282 // kmp_routine_entry_t *task_entry); 4283 // Task flags. Format is taken from 4284 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 4285 // description of kmp_tasking_flags struct. 4286 enum { 4287 TiedFlag = 0x1, 4288 FinalFlag = 0x2, 4289 DestructorsFlag = 0x8, 4290 PriorityFlag = 0x20, 4291 DetachableFlag = 0x40, 4292 }; 4293 unsigned Flags = Data.Tied ? TiedFlag : 0; 4294 bool NeedsCleanup = false; 4295 if (!Privates.empty()) { 4296 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 4297 if (NeedsCleanup) 4298 Flags = Flags | DestructorsFlag; 4299 } 4300 if (Data.Priority.getInt()) 4301 Flags = Flags | PriorityFlag; 4302 if (D.hasClausesOfKind<OMPDetachClause>()) 4303 Flags = Flags | DetachableFlag; 4304 llvm::Value *TaskFlags = 4305 Data.Final.getPointer() 4306 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4307 CGF.Builder.getInt32(FinalFlag), 4308 CGF.Builder.getInt32(/*C=*/0)) 4309 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4310 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4311 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4312 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4313 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4314 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4315 TaskEntry, KmpRoutineEntryPtrTy)}; 4316 llvm::Value *NewTask; 4317 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4318 // Check if we have any device clause associated with the directive. 4319 const Expr *Device = nullptr; 4320 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4321 Device = C->getDevice(); 4322 // Emit device ID if any otherwise use default value. 4323 llvm::Value *DeviceID; 4324 if (Device) 4325 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4326 CGF.Int64Ty, /*isSigned=*/true); 4327 else 4328 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4329 AllocArgs.push_back(DeviceID); 4330 NewTask = CGF.EmitRuntimeCall( 4331 OMPBuilder.getOrCreateRuntimeFunction( 4332 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4333 AllocArgs); 4334 } else { 4335 NewTask = 4336 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4337 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4338 AllocArgs); 4339 } 4340 // Emit detach clause initialization. 4341 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4342 // task_descriptor); 4343 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4344 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4345 LValue EvtLVal = CGF.EmitLValue(Evt); 4346 4347 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4348 // int gtid, kmp_task_t *task); 4349 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4350 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4351 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4352 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4353 OMPBuilder.getOrCreateRuntimeFunction( 4354 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4355 {Loc, Tid, NewTask}); 4356 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4357 Evt->getExprLoc()); 4358 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4359 } 4360 // Process affinity clauses. 4361 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4362 // Process list of affinity data. 4363 ASTContext &C = CGM.getContext(); 4364 Address AffinitiesArray = Address::invalid(); 4365 // Calculate number of elements to form the array of affinity data. 4366 llvm::Value *NumOfElements = nullptr; 4367 unsigned NumAffinities = 0; 4368 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4369 if (const Expr *Modifier = C->getModifier()) { 4370 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4371 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4372 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4373 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4374 NumOfElements = 4375 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4376 } 4377 } else { 4378 NumAffinities += C->varlist_size(); 4379 } 4380 } 4381 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4382 // Fields ids in kmp_task_affinity_info record. 4383 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4384 4385 QualType KmpTaskAffinityInfoArrayTy; 4386 if (NumOfElements) { 4387 NumOfElements = CGF.Builder.CreateNUWAdd( 4388 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4389 OpaqueValueExpr OVE( 4390 Loc, 4391 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4392 VK_RValue); 4393 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4394 RValue::get(NumOfElements)); 4395 KmpTaskAffinityInfoArrayTy = 4396 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, 4397 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4398 // Properly emit variable-sized array. 4399 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4400 ImplicitParamDecl::Other); 4401 CGF.EmitVarDecl(*PD); 4402 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4403 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4404 /*isSigned=*/false); 4405 } else { 4406 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4407 KmpTaskAffinityInfoTy, 4408 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4409 ArrayType::Normal, /*IndexTypeQuals=*/0); 4410 AffinitiesArray = 4411 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4412 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4413 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4414 /*isSigned=*/false); 4415 } 4416 4417 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4418 // Fill array by elements without iterators. 4419 unsigned Pos = 0; 4420 bool HasIterator = false; 4421 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4422 if (C->getModifier()) { 4423 HasIterator = true; 4424 continue; 4425 } 4426 for (const Expr *E : C->varlists()) { 4427 llvm::Value *Addr; 4428 llvm::Value *Size; 4429 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4430 LValue Base = 4431 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4432 KmpTaskAffinityInfoTy); 4433 // affs[i].base_addr = &<Affinities[i].second>; 4434 LValue BaseAddrLVal = CGF.EmitLValueForField( 4435 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4436 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4437 BaseAddrLVal); 4438 // affs[i].len = sizeof(<Affinities[i].second>); 4439 LValue LenLVal = CGF.EmitLValueForField( 4440 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4441 CGF.EmitStoreOfScalar(Size, LenLVal); 4442 ++Pos; 4443 } 4444 } 4445 LValue PosLVal; 4446 if (HasIterator) { 4447 PosLVal = CGF.MakeAddrLValue( 4448 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4449 C.getSizeType()); 4450 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4451 } 4452 // Process elements with iterators. 4453 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4454 const Expr *Modifier = C->getModifier(); 4455 if (!Modifier) 4456 continue; 4457 OMPIteratorGeneratorScope IteratorScope( 4458 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4459 for (const Expr *E : C->varlists()) { 4460 llvm::Value *Addr; 4461 llvm::Value *Size; 4462 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4463 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4464 LValue Base = CGF.MakeAddrLValue( 4465 Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), 4466 AffinitiesArray.getAlignment()), 4467 KmpTaskAffinityInfoTy); 4468 // affs[i].base_addr = &<Affinities[i].second>; 4469 LValue BaseAddrLVal = CGF.EmitLValueForField( 4470 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4471 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4472 BaseAddrLVal); 4473 // affs[i].len = sizeof(<Affinities[i].second>); 4474 LValue LenLVal = CGF.EmitLValueForField( 4475 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4476 CGF.EmitStoreOfScalar(Size, LenLVal); 4477 Idx = CGF.Builder.CreateNUWAdd( 4478 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4479 CGF.EmitStoreOfScalar(Idx, PosLVal); 4480 } 4481 } 4482 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4483 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4484 // naffins, kmp_task_affinity_info_t *affin_list); 4485 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4486 llvm::Value *GTid = getThreadID(CGF, Loc); 4487 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4488 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4489 // FIXME: Emit the function and ignore its result for now unless the 4490 // runtime function is properly implemented. 4491 (void)CGF.EmitRuntimeCall( 4492 OMPBuilder.getOrCreateRuntimeFunction( 4493 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4494 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4495 } 4496 llvm::Value *NewTaskNewTaskTTy = 4497 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4498 NewTask, KmpTaskTWithPrivatesPtrTy); 4499 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4500 KmpTaskTWithPrivatesQTy); 4501 LValue TDBase = 4502 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4503 // Fill the data in the resulting kmp_task_t record. 4504 // Copy shareds if there are any. 4505 Address KmpTaskSharedsPtr = Address::invalid(); 4506 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4507 KmpTaskSharedsPtr = 4508 Address(CGF.EmitLoadOfScalar( 4509 CGF.EmitLValueForField( 4510 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4511 KmpTaskTShareds)), 4512 Loc), 4513 CGM.getNaturalTypeAlignment(SharedsTy)); 4514 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4515 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4516 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4517 } 4518 // Emit initial values for private copies (if any). 4519 TaskResultTy Result; 4520 if (!Privates.empty()) { 4521 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4522 SharedsTy, SharedsPtrTy, Data, Privates, 4523 /*ForDup=*/false); 4524 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4525 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4526 Result.TaskDupFn = emitTaskDupFunction( 4527 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4528 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4529 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4530 } 4531 } 4532 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4533 enum { Priority = 0, Destructors = 1 }; 4534 // Provide pointer to function with destructors for privates. 4535 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4536 const RecordDecl *KmpCmplrdataUD = 4537 (*FI)->getType()->getAsUnionType()->getDecl(); 4538 if (NeedsCleanup) { 4539 llvm::Value *DestructorFn = emitDestructorsFunction( 4540 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4541 KmpTaskTWithPrivatesQTy); 4542 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4543 LValue DestructorsLV = CGF.EmitLValueForField( 4544 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4545 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4546 DestructorFn, KmpRoutineEntryPtrTy), 4547 DestructorsLV); 4548 } 4549 // Set priority. 4550 if (Data.Priority.getInt()) { 4551 LValue Data2LV = CGF.EmitLValueForField( 4552 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4553 LValue PriorityLV = CGF.EmitLValueForField( 4554 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4555 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4556 } 4557 Result.NewTask = NewTask; 4558 Result.TaskEntry = TaskEntry; 4559 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4560 Result.TDBase = TDBase; 4561 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4562 return Result; 4563 } 4564 4565 namespace { 4566 /// Dependence kind for RTL. 4567 enum RTLDependenceKindTy { 4568 DepIn = 0x01, 4569 DepInOut = 0x3, 4570 DepMutexInOutSet = 0x4 4571 }; 4572 /// Fields ids in kmp_depend_info record. 4573 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4574 } // namespace 4575 4576 /// Translates internal dependency kind into the runtime kind. 4577 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4578 RTLDependenceKindTy DepKind; 4579 switch (K) { 4580 case OMPC_DEPEND_in: 4581 DepKind = DepIn; 4582 break; 4583 // Out and InOut dependencies must use the same code. 4584 case OMPC_DEPEND_out: 4585 case OMPC_DEPEND_inout: 4586 DepKind = DepInOut; 4587 break; 4588 case OMPC_DEPEND_mutexinoutset: 4589 DepKind = DepMutexInOutSet; 4590 break; 4591 case OMPC_DEPEND_source: 4592 case OMPC_DEPEND_sink: 4593 case OMPC_DEPEND_depobj: 4594 case OMPC_DEPEND_unknown: 4595 llvm_unreachable("Unknown task dependence type"); 4596 } 4597 return DepKind; 4598 } 4599 4600 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4601 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4602 QualType &FlagsTy) { 4603 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4604 if (KmpDependInfoTy.isNull()) { 4605 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4606 KmpDependInfoRD->startDefinition(); 4607 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4608 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4609 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4610 KmpDependInfoRD->completeDefinition(); 4611 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4612 } 4613 } 4614 4615 std::pair<llvm::Value *, LValue> 4616 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4617 SourceLocation Loc) { 4618 ASTContext &C = CGM.getContext(); 4619 QualType FlagsTy; 4620 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4621 RecordDecl *KmpDependInfoRD = 4622 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4623 LValue Base = CGF.EmitLoadOfPointerLValue( 4624 DepobjLVal.getAddress(CGF), 4625 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4626 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4627 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4628 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4629 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4630 Base.getTBAAInfo()); 4631 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4632 Addr.getPointer(), 4633 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4634 LValue NumDepsBase = CGF.MakeAddrLValue( 4635 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4636 Base.getBaseInfo(), Base.getTBAAInfo()); 4637 // NumDeps = deps[i].base_addr; 4638 LValue BaseAddrLVal = CGF.EmitLValueForField( 4639 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4640 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4641 return std::make_pair(NumDeps, Base); 4642 } 4643 4644 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4645 llvm::PointerUnion<unsigned *, LValue *> Pos, 4646 const OMPTaskDataTy::DependData &Data, 4647 Address DependenciesArray) { 4648 CodeGenModule &CGM = CGF.CGM; 4649 ASTContext &C = CGM.getContext(); 4650 QualType FlagsTy; 4651 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4652 RecordDecl *KmpDependInfoRD = 4653 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4654 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4655 4656 OMPIteratorGeneratorScope IteratorScope( 4657 CGF, cast_or_null<OMPIteratorExpr>( 4658 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4659 : nullptr)); 4660 for (const Expr *E : Data.DepExprs) { 4661 llvm::Value *Addr; 4662 llvm::Value *Size; 4663 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4664 LValue Base; 4665 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4666 Base = CGF.MakeAddrLValue( 4667 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4668 } else { 4669 LValue &PosLVal = *Pos.get<LValue *>(); 4670 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4671 Base = CGF.MakeAddrLValue( 4672 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 4673 DependenciesArray.getAlignment()), 4674 KmpDependInfoTy); 4675 } 4676 // deps[i].base_addr = &<Dependencies[i].second>; 4677 LValue BaseAddrLVal = CGF.EmitLValueForField( 4678 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4679 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4680 BaseAddrLVal); 4681 // deps[i].len = sizeof(<Dependencies[i].second>); 4682 LValue LenLVal = CGF.EmitLValueForField( 4683 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4684 CGF.EmitStoreOfScalar(Size, LenLVal); 4685 // deps[i].flags = <Dependencies[i].first>; 4686 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4687 LValue FlagsLVal = CGF.EmitLValueForField( 4688 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4689 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4690 FlagsLVal); 4691 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4692 ++(*P); 4693 } else { 4694 LValue &PosLVal = *Pos.get<LValue *>(); 4695 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4696 Idx = CGF.Builder.CreateNUWAdd(Idx, 4697 llvm::ConstantInt::get(Idx->getType(), 1)); 4698 CGF.EmitStoreOfScalar(Idx, PosLVal); 4699 } 4700 } 4701 } 4702 4703 static SmallVector<llvm::Value *, 4> 4704 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4705 const OMPTaskDataTy::DependData &Data) { 4706 assert(Data.DepKind == OMPC_DEPEND_depobj && 4707 "Expected depobj dependecy kind."); 4708 SmallVector<llvm::Value *, 4> Sizes; 4709 SmallVector<LValue, 4> SizeLVals; 4710 ASTContext &C = CGF.getContext(); 4711 QualType FlagsTy; 4712 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4713 RecordDecl *KmpDependInfoRD = 4714 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4715 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4716 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4717 { 4718 OMPIteratorGeneratorScope IteratorScope( 4719 CGF, cast_or_null<OMPIteratorExpr>( 4720 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4721 : nullptr)); 4722 for (const Expr *E : Data.DepExprs) { 4723 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4724 LValue Base = CGF.EmitLoadOfPointerLValue( 4725 DepobjLVal.getAddress(CGF), 4726 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4727 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4728 Base.getAddress(CGF), KmpDependInfoPtrT); 4729 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4730 Base.getTBAAInfo()); 4731 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4732 Addr.getPointer(), 4733 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4734 LValue NumDepsBase = CGF.MakeAddrLValue( 4735 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4736 Base.getBaseInfo(), Base.getTBAAInfo()); 4737 // NumDeps = deps[i].base_addr; 4738 LValue BaseAddrLVal = CGF.EmitLValueForField( 4739 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4740 llvm::Value *NumDeps = 4741 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4742 LValue NumLVal = CGF.MakeAddrLValue( 4743 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4744 C.getUIntPtrType()); 4745 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 4746 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 4747 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4748 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4749 CGF.EmitStoreOfScalar(Add, NumLVal); 4750 SizeLVals.push_back(NumLVal); 4751 } 4752 } 4753 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4754 llvm::Value *Size = 4755 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4756 Sizes.push_back(Size); 4757 } 4758 return Sizes; 4759 } 4760 4761 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4762 LValue PosLVal, 4763 const OMPTaskDataTy::DependData &Data, 4764 Address DependenciesArray) { 4765 assert(Data.DepKind == OMPC_DEPEND_depobj && 4766 "Expected depobj dependecy kind."); 4767 ASTContext &C = CGF.getContext(); 4768 QualType FlagsTy; 4769 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4770 RecordDecl *KmpDependInfoRD = 4771 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4772 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4773 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4774 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4775 { 4776 OMPIteratorGeneratorScope IteratorScope( 4777 CGF, cast_or_null<OMPIteratorExpr>( 4778 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4779 : nullptr)); 4780 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4781 const Expr *E = Data.DepExprs[I]; 4782 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4783 LValue Base = CGF.EmitLoadOfPointerLValue( 4784 DepobjLVal.getAddress(CGF), 4785 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4786 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4787 Base.getAddress(CGF), KmpDependInfoPtrT); 4788 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4789 Base.getTBAAInfo()); 4790 4791 // Get number of elements in a single depobj. 4792 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4793 Addr.getPointer(), 4794 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4795 LValue NumDepsBase = CGF.MakeAddrLValue( 4796 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4797 Base.getBaseInfo(), Base.getTBAAInfo()); 4798 // NumDeps = deps[i].base_addr; 4799 LValue BaseAddrLVal = CGF.EmitLValueForField( 4800 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4801 llvm::Value *NumDeps = 4802 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4803 4804 // memcopy dependency data. 4805 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4806 ElSize, 4807 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4808 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4809 Address DepAddr = 4810 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 4811 DependenciesArray.getAlignment()); 4812 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4813 4814 // Increase pos. 4815 // pos += size; 4816 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4817 CGF.EmitStoreOfScalar(Add, PosLVal); 4818 } 4819 } 4820 } 4821 4822 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4823 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4824 SourceLocation Loc) { 4825 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4826 return D.DepExprs.empty(); 4827 })) 4828 return std::make_pair(nullptr, Address::invalid()); 4829 // Process list of dependencies. 4830 ASTContext &C = CGM.getContext(); 4831 Address DependenciesArray = Address::invalid(); 4832 llvm::Value *NumOfElements = nullptr; 4833 unsigned NumDependencies = std::accumulate( 4834 Dependencies.begin(), Dependencies.end(), 0, 4835 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4836 return D.DepKind == OMPC_DEPEND_depobj 4837 ? V 4838 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4839 }); 4840 QualType FlagsTy; 4841 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4842 bool HasDepobjDeps = false; 4843 bool HasRegularWithIterators = false; 4844 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4845 llvm::Value *NumOfRegularWithIterators = 4846 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4847 // Calculate number of depobj dependecies and regular deps with the iterators. 4848 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4849 if (D.DepKind == OMPC_DEPEND_depobj) { 4850 SmallVector<llvm::Value *, 4> Sizes = 4851 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4852 for (llvm::Value *Size : Sizes) { 4853 NumOfDepobjElements = 4854 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4855 } 4856 HasDepobjDeps = true; 4857 continue; 4858 } 4859 // Include number of iterations, if any. 4860 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4861 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4862 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4863 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4864 NumOfRegularWithIterators = 4865 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 4866 } 4867 HasRegularWithIterators = true; 4868 continue; 4869 } 4870 } 4871 4872 QualType KmpDependInfoArrayTy; 4873 if (HasDepobjDeps || HasRegularWithIterators) { 4874 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4875 /*isSigned=*/false); 4876 if (HasDepobjDeps) { 4877 NumOfElements = 4878 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4879 } 4880 if (HasRegularWithIterators) { 4881 NumOfElements = 4882 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4883 } 4884 OpaqueValueExpr OVE(Loc, 4885 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4886 VK_RValue); 4887 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4888 RValue::get(NumOfElements)); 4889 KmpDependInfoArrayTy = 4890 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 4891 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4892 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4893 // Properly emit variable-sized array. 4894 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4895 ImplicitParamDecl::Other); 4896 CGF.EmitVarDecl(*PD); 4897 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4898 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4899 /*isSigned=*/false); 4900 } else { 4901 KmpDependInfoArrayTy = C.getConstantArrayType( 4902 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4903 ArrayType::Normal, /*IndexTypeQuals=*/0); 4904 DependenciesArray = 4905 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4906 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4907 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4908 /*isSigned=*/false); 4909 } 4910 unsigned Pos = 0; 4911 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4912 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4913 Dependencies[I].IteratorExpr) 4914 continue; 4915 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4916 DependenciesArray); 4917 } 4918 // Copy regular dependecies with iterators. 4919 LValue PosLVal = CGF.MakeAddrLValue( 4920 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4921 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4922 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4923 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4924 !Dependencies[I].IteratorExpr) 4925 continue; 4926 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4927 DependenciesArray); 4928 } 4929 // Copy final depobj arrays without iterators. 4930 if (HasDepobjDeps) { 4931 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4932 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4933 continue; 4934 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4935 DependenciesArray); 4936 } 4937 } 4938 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4939 DependenciesArray, CGF.VoidPtrTy); 4940 return std::make_pair(NumOfElements, DependenciesArray); 4941 } 4942 4943 Address CGOpenMPRuntime::emitDepobjDependClause( 4944 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4945 SourceLocation Loc) { 4946 if (Dependencies.DepExprs.empty()) 4947 return Address::invalid(); 4948 // Process list of dependencies. 4949 ASTContext &C = CGM.getContext(); 4950 Address DependenciesArray = Address::invalid(); 4951 unsigned NumDependencies = Dependencies.DepExprs.size(); 4952 QualType FlagsTy; 4953 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4954 RecordDecl *KmpDependInfoRD = 4955 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4956 4957 llvm::Value *Size; 4958 // Define type kmp_depend_info[<Dependencies.size()>]; 4959 // For depobj reserve one extra element to store the number of elements. 4960 // It is required to handle depobj(x) update(in) construct. 4961 // kmp_depend_info[<Dependencies.size()>] deps; 4962 llvm::Value *NumDepsVal; 4963 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4964 if (const auto *IE = 4965 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4966 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4967 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4968 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4969 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4970 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4971 } 4972 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4973 NumDepsVal); 4974 CharUnits SizeInBytes = 4975 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4976 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4977 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4978 NumDepsVal = 4979 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4980 } else { 4981 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4982 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4983 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4984 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4985 Size = CGM.getSize(Sz.alignTo(Align)); 4986 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4987 } 4988 // Need to allocate on the dynamic memory. 4989 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4990 // Use default allocator. 4991 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4992 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4993 4994 llvm::Value *Addr = 4995 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4996 CGM.getModule(), OMPRTL___kmpc_alloc), 4997 Args, ".dep.arr.addr"); 4998 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4999 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 5000 DependenciesArray = Address(Addr, Align); 5001 // Write number of elements in the first element of array for depobj. 5002 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 5003 // deps[i].base_addr = NumDependencies; 5004 LValue BaseAddrLVal = CGF.EmitLValueForField( 5005 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5006 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5007 llvm::PointerUnion<unsigned *, LValue *> Pos; 5008 unsigned Idx = 1; 5009 LValue PosLVal; 5010 if (Dependencies.IteratorExpr) { 5011 PosLVal = CGF.MakeAddrLValue( 5012 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5013 C.getSizeType()); 5014 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5015 /*IsInit=*/true); 5016 Pos = &PosLVal; 5017 } else { 5018 Pos = &Idx; 5019 } 5020 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5021 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5022 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 5023 return DependenciesArray; 5024 } 5025 5026 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5027 SourceLocation Loc) { 5028 ASTContext &C = CGM.getContext(); 5029 QualType FlagsTy; 5030 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5031 LValue Base = CGF.EmitLoadOfPointerLValue( 5032 DepobjLVal.getAddress(CGF), 5033 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5034 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5035 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5036 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5037 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5038 Addr.getPointer(), 5039 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5040 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5041 CGF.VoidPtrTy); 5042 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5043 // Use default allocator. 5044 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5045 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5046 5047 // _kmpc_free(gtid, addr, nullptr); 5048 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5049 CGM.getModule(), OMPRTL___kmpc_free), 5050 Args); 5051 } 5052 5053 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5054 OpenMPDependClauseKind NewDepKind, 5055 SourceLocation Loc) { 5056 ASTContext &C = CGM.getContext(); 5057 QualType FlagsTy; 5058 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5059 RecordDecl *KmpDependInfoRD = 5060 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5061 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5062 llvm::Value *NumDeps; 5063 LValue Base; 5064 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5065 5066 Address Begin = Base.getAddress(CGF); 5067 // Cast from pointer to array type to pointer to single element. 5068 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5069 // The basic structure here is a while-do loop. 5070 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5071 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5072 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5073 CGF.EmitBlock(BodyBB); 5074 llvm::PHINode *ElementPHI = 5075 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5076 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5077 Begin = Address(ElementPHI, Begin.getAlignment()); 5078 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5079 Base.getTBAAInfo()); 5080 // deps[i].flags = NewDepKind; 5081 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5082 LValue FlagsLVal = CGF.EmitLValueForField( 5083 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5084 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5085 FlagsLVal); 5086 5087 // Shift the address forward by one element. 5088 Address ElementNext = 5089 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5090 ElementPHI->addIncoming(ElementNext.getPointer(), 5091 CGF.Builder.GetInsertBlock()); 5092 llvm::Value *IsEmpty = 5093 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5094 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5095 // Done. 5096 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5097 } 5098 5099 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5100 const OMPExecutableDirective &D, 5101 llvm::Function *TaskFunction, 5102 QualType SharedsTy, Address Shareds, 5103 const Expr *IfCond, 5104 const OMPTaskDataTy &Data) { 5105 if (!CGF.HaveInsertPoint()) 5106 return; 5107 5108 TaskResultTy Result = 5109 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5110 llvm::Value *NewTask = Result.NewTask; 5111 llvm::Function *TaskEntry = Result.TaskEntry; 5112 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5113 LValue TDBase = Result.TDBase; 5114 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5115 // Process list of dependences. 5116 Address DependenciesArray = Address::invalid(); 5117 llvm::Value *NumOfElements; 5118 std::tie(NumOfElements, DependenciesArray) = 5119 emitDependClause(CGF, Data.Dependences, Loc); 5120 5121 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5122 // libcall. 5123 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5124 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5125 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5126 // list is not empty 5127 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5128 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5129 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5130 llvm::Value *DepTaskArgs[7]; 5131 if (!Data.Dependences.empty()) { 5132 DepTaskArgs[0] = UpLoc; 5133 DepTaskArgs[1] = ThreadID; 5134 DepTaskArgs[2] = NewTask; 5135 DepTaskArgs[3] = NumOfElements; 5136 DepTaskArgs[4] = DependenciesArray.getPointer(); 5137 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5138 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5139 } 5140 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5141 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5142 if (!Data.Tied) { 5143 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5144 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5145 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5146 } 5147 if (!Data.Dependences.empty()) { 5148 CGF.EmitRuntimeCall( 5149 OMPBuilder.getOrCreateRuntimeFunction( 5150 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5151 DepTaskArgs); 5152 } else { 5153 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5154 CGM.getModule(), OMPRTL___kmpc_omp_task), 5155 TaskArgs); 5156 } 5157 // Check if parent region is untied and build return for untied task; 5158 if (auto *Region = 5159 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5160 Region->emitUntiedSwitch(CGF); 5161 }; 5162 5163 llvm::Value *DepWaitTaskArgs[6]; 5164 if (!Data.Dependences.empty()) { 5165 DepWaitTaskArgs[0] = UpLoc; 5166 DepWaitTaskArgs[1] = ThreadID; 5167 DepWaitTaskArgs[2] = NumOfElements; 5168 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5169 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5170 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5171 } 5172 auto &M = CGM.getModule(); 5173 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5174 TaskEntry, &Data, &DepWaitTaskArgs, 5175 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5176 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5177 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5178 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5179 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5180 // is specified. 5181 if (!Data.Dependences.empty()) 5182 CGF.EmitRuntimeCall( 5183 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5184 DepWaitTaskArgs); 5185 // Call proxy_task_entry(gtid, new_task); 5186 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5187 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5188 Action.Enter(CGF); 5189 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5190 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5191 OutlinedFnArgs); 5192 }; 5193 5194 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5195 // kmp_task_t *new_task); 5196 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5197 // kmp_task_t *new_task); 5198 RegionCodeGenTy RCG(CodeGen); 5199 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5200 M, OMPRTL___kmpc_omp_task_begin_if0), 5201 TaskArgs, 5202 OMPBuilder.getOrCreateRuntimeFunction( 5203 M, OMPRTL___kmpc_omp_task_complete_if0), 5204 TaskArgs); 5205 RCG.setAction(Action); 5206 RCG(CGF); 5207 }; 5208 5209 if (IfCond) { 5210 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5211 } else { 5212 RegionCodeGenTy ThenRCG(ThenCodeGen); 5213 ThenRCG(CGF); 5214 } 5215 } 5216 5217 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5218 const OMPLoopDirective &D, 5219 llvm::Function *TaskFunction, 5220 QualType SharedsTy, Address Shareds, 5221 const Expr *IfCond, 5222 const OMPTaskDataTy &Data) { 5223 if (!CGF.HaveInsertPoint()) 5224 return; 5225 TaskResultTy Result = 5226 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5227 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5228 // libcall. 5229 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5230 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5231 // sched, kmp_uint64 grainsize, void *task_dup); 5232 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5233 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5234 llvm::Value *IfVal; 5235 if (IfCond) { 5236 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5237 /*isSigned=*/true); 5238 } else { 5239 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5240 } 5241 5242 LValue LBLVal = CGF.EmitLValueForField( 5243 Result.TDBase, 5244 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5245 const auto *LBVar = 5246 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5247 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5248 LBLVal.getQuals(), 5249 /*IsInitializer=*/true); 5250 LValue UBLVal = CGF.EmitLValueForField( 5251 Result.TDBase, 5252 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5253 const auto *UBVar = 5254 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5255 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5256 UBLVal.getQuals(), 5257 /*IsInitializer=*/true); 5258 LValue StLVal = CGF.EmitLValueForField( 5259 Result.TDBase, 5260 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5261 const auto *StVar = 5262 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5263 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5264 StLVal.getQuals(), 5265 /*IsInitializer=*/true); 5266 // Store reductions address. 5267 LValue RedLVal = CGF.EmitLValueForField( 5268 Result.TDBase, 5269 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5270 if (Data.Reductions) { 5271 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5272 } else { 5273 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5274 CGF.getContext().VoidPtrTy); 5275 } 5276 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5277 llvm::Value *TaskArgs[] = { 5278 UpLoc, 5279 ThreadID, 5280 Result.NewTask, 5281 IfVal, 5282 LBLVal.getPointer(CGF), 5283 UBLVal.getPointer(CGF), 5284 CGF.EmitLoadOfScalar(StLVal, Loc), 5285 llvm::ConstantInt::getSigned( 5286 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5287 llvm::ConstantInt::getSigned( 5288 CGF.IntTy, Data.Schedule.getPointer() 5289 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5290 : NoSchedule), 5291 Data.Schedule.getPointer() 5292 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5293 /*isSigned=*/false) 5294 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5295 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5296 Result.TaskDupFn, CGF.VoidPtrTy) 5297 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5298 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5299 CGM.getModule(), OMPRTL___kmpc_taskloop), 5300 TaskArgs); 5301 } 5302 5303 /// Emit reduction operation for each element of array (required for 5304 /// array sections) LHS op = RHS. 5305 /// \param Type Type of array. 5306 /// \param LHSVar Variable on the left side of the reduction operation 5307 /// (references element of array in original variable). 5308 /// \param RHSVar Variable on the right side of the reduction operation 5309 /// (references element of array in original variable). 5310 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5311 /// RHSVar. 5312 static void EmitOMPAggregateReduction( 5313 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5314 const VarDecl *RHSVar, 5315 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5316 const Expr *, const Expr *)> &RedOpGen, 5317 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5318 const Expr *UpExpr = nullptr) { 5319 // Perform element-by-element initialization. 5320 QualType ElementTy; 5321 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5322 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5323 5324 // Drill down to the base element type on both arrays. 5325 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5326 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5327 5328 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5329 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5330 // Cast from pointer to array type to pointer to single element. 5331 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5332 // The basic structure here is a while-do loop. 5333 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5334 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5335 llvm::Value *IsEmpty = 5336 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5337 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5338 5339 // Enter the loop body, making that address the current address. 5340 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5341 CGF.EmitBlock(BodyBB); 5342 5343 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5344 5345 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5346 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5347 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5348 Address RHSElementCurrent = 5349 Address(RHSElementPHI, 5350 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5351 5352 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5353 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5354 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5355 Address LHSElementCurrent = 5356 Address(LHSElementPHI, 5357 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5358 5359 // Emit copy. 5360 CodeGenFunction::OMPPrivateScope Scope(CGF); 5361 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5362 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5363 Scope.Privatize(); 5364 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5365 Scope.ForceCleanup(); 5366 5367 // Shift the address forward by one element. 5368 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5369 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5370 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5371 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5372 // Check whether we've reached the end. 5373 llvm::Value *Done = 5374 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5375 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5376 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5377 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5378 5379 // Done. 5380 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5381 } 5382 5383 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5384 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5385 /// UDR combiner function. 5386 static void emitReductionCombiner(CodeGenFunction &CGF, 5387 const Expr *ReductionOp) { 5388 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5389 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5390 if (const auto *DRE = 5391 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5392 if (const auto *DRD = 5393 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5394 std::pair<llvm::Function *, llvm::Function *> Reduction = 5395 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5396 RValue Func = RValue::get(Reduction.first); 5397 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5398 CGF.EmitIgnoredExpr(ReductionOp); 5399 return; 5400 } 5401 CGF.EmitIgnoredExpr(ReductionOp); 5402 } 5403 5404 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5405 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5406 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5407 ArrayRef<const Expr *> ReductionOps) { 5408 ASTContext &C = CGM.getContext(); 5409 5410 // void reduction_func(void *LHSArg, void *RHSArg); 5411 FunctionArgList Args; 5412 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5413 ImplicitParamDecl::Other); 5414 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5415 ImplicitParamDecl::Other); 5416 Args.push_back(&LHSArg); 5417 Args.push_back(&RHSArg); 5418 const auto &CGFI = 5419 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5420 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5421 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5422 llvm::GlobalValue::InternalLinkage, Name, 5423 &CGM.getModule()); 5424 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5425 Fn->setDoesNotRecurse(); 5426 CodeGenFunction CGF(CGM); 5427 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5428 5429 // Dst = (void*[n])(LHSArg); 5430 // Src = (void*[n])(RHSArg); 5431 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5432 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5433 ArgsType), CGF.getPointerAlign()); 5434 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5435 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5436 ArgsType), CGF.getPointerAlign()); 5437 5438 // ... 5439 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5440 // ... 5441 CodeGenFunction::OMPPrivateScope Scope(CGF); 5442 auto IPriv = Privates.begin(); 5443 unsigned Idx = 0; 5444 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5445 const auto *RHSVar = 5446 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5447 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5448 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5449 }); 5450 const auto *LHSVar = 5451 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5452 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5453 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5454 }); 5455 QualType PrivTy = (*IPriv)->getType(); 5456 if (PrivTy->isVariablyModifiedType()) { 5457 // Get array size and emit VLA type. 5458 ++Idx; 5459 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5460 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5461 const VariableArrayType *VLA = 5462 CGF.getContext().getAsVariableArrayType(PrivTy); 5463 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5464 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5465 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5466 CGF.EmitVariablyModifiedType(PrivTy); 5467 } 5468 } 5469 Scope.Privatize(); 5470 IPriv = Privates.begin(); 5471 auto ILHS = LHSExprs.begin(); 5472 auto IRHS = RHSExprs.begin(); 5473 for (const Expr *E : ReductionOps) { 5474 if ((*IPriv)->getType()->isArrayType()) { 5475 // Emit reduction for array section. 5476 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5477 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5478 EmitOMPAggregateReduction( 5479 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5480 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5481 emitReductionCombiner(CGF, E); 5482 }); 5483 } else { 5484 // Emit reduction for array subscript or single variable. 5485 emitReductionCombiner(CGF, E); 5486 } 5487 ++IPriv; 5488 ++ILHS; 5489 ++IRHS; 5490 } 5491 Scope.ForceCleanup(); 5492 CGF.FinishFunction(); 5493 return Fn; 5494 } 5495 5496 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5497 const Expr *ReductionOp, 5498 const Expr *PrivateRef, 5499 const DeclRefExpr *LHS, 5500 const DeclRefExpr *RHS) { 5501 if (PrivateRef->getType()->isArrayType()) { 5502 // Emit reduction for array section. 5503 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5504 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5505 EmitOMPAggregateReduction( 5506 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5507 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5508 emitReductionCombiner(CGF, ReductionOp); 5509 }); 5510 } else { 5511 // Emit reduction for array subscript or single variable. 5512 emitReductionCombiner(CGF, ReductionOp); 5513 } 5514 } 5515 5516 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5517 ArrayRef<const Expr *> Privates, 5518 ArrayRef<const Expr *> LHSExprs, 5519 ArrayRef<const Expr *> RHSExprs, 5520 ArrayRef<const Expr *> ReductionOps, 5521 ReductionOptionsTy Options) { 5522 if (!CGF.HaveInsertPoint()) 5523 return; 5524 5525 bool WithNowait = Options.WithNowait; 5526 bool SimpleReduction = Options.SimpleReduction; 5527 5528 // Next code should be emitted for reduction: 5529 // 5530 // static kmp_critical_name lock = { 0 }; 5531 // 5532 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5533 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5534 // ... 5535 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5536 // *(Type<n>-1*)rhs[<n>-1]); 5537 // } 5538 // 5539 // ... 5540 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5541 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5542 // RedList, reduce_func, &<lock>)) { 5543 // case 1: 5544 // ... 5545 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5546 // ... 5547 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5548 // break; 5549 // case 2: 5550 // ... 5551 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5552 // ... 5553 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5554 // break; 5555 // default:; 5556 // } 5557 // 5558 // if SimpleReduction is true, only the next code is generated: 5559 // ... 5560 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5561 // ... 5562 5563 ASTContext &C = CGM.getContext(); 5564 5565 if (SimpleReduction) { 5566 CodeGenFunction::RunCleanupsScope Scope(CGF); 5567 auto IPriv = Privates.begin(); 5568 auto ILHS = LHSExprs.begin(); 5569 auto IRHS = RHSExprs.begin(); 5570 for (const Expr *E : ReductionOps) { 5571 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5572 cast<DeclRefExpr>(*IRHS)); 5573 ++IPriv; 5574 ++ILHS; 5575 ++IRHS; 5576 } 5577 return; 5578 } 5579 5580 // 1. Build a list of reduction variables. 5581 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5582 auto Size = RHSExprs.size(); 5583 for (const Expr *E : Privates) { 5584 if (E->getType()->isVariablyModifiedType()) 5585 // Reserve place for array size. 5586 ++Size; 5587 } 5588 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5589 QualType ReductionArrayTy = 5590 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5591 /*IndexTypeQuals=*/0); 5592 Address ReductionList = 5593 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5594 auto IPriv = Privates.begin(); 5595 unsigned Idx = 0; 5596 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5597 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5598 CGF.Builder.CreateStore( 5599 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5600 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5601 Elem); 5602 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5603 // Store array size. 5604 ++Idx; 5605 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5606 llvm::Value *Size = CGF.Builder.CreateIntCast( 5607 CGF.getVLASize( 5608 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5609 .NumElts, 5610 CGF.SizeTy, /*isSigned=*/false); 5611 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5612 Elem); 5613 } 5614 } 5615 5616 // 2. Emit reduce_func(). 5617 llvm::Function *ReductionFn = emitReductionFunction( 5618 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5619 LHSExprs, RHSExprs, ReductionOps); 5620 5621 // 3. Create static kmp_critical_name lock = { 0 }; 5622 std::string Name = getName({"reduction"}); 5623 llvm::Value *Lock = getCriticalRegionLock(Name); 5624 5625 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5626 // RedList, reduce_func, &<lock>); 5627 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5628 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5629 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5630 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5631 ReductionList.getPointer(), CGF.VoidPtrTy); 5632 llvm::Value *Args[] = { 5633 IdentTLoc, // ident_t *<loc> 5634 ThreadId, // i32 <gtid> 5635 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5636 ReductionArrayTySize, // size_type sizeof(RedList) 5637 RL, // void *RedList 5638 ReductionFn, // void (*) (void *, void *) <reduce_func> 5639 Lock // kmp_critical_name *&<lock> 5640 }; 5641 llvm::Value *Res = CGF.EmitRuntimeCall( 5642 OMPBuilder.getOrCreateRuntimeFunction( 5643 CGM.getModule(), 5644 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5645 Args); 5646 5647 // 5. Build switch(res) 5648 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5649 llvm::SwitchInst *SwInst = 5650 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5651 5652 // 6. Build case 1: 5653 // ... 5654 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5655 // ... 5656 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5657 // break; 5658 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5659 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5660 CGF.EmitBlock(Case1BB); 5661 5662 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5663 llvm::Value *EndArgs[] = { 5664 IdentTLoc, // ident_t *<loc> 5665 ThreadId, // i32 <gtid> 5666 Lock // kmp_critical_name *&<lock> 5667 }; 5668 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5669 CodeGenFunction &CGF, PrePostActionTy &Action) { 5670 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5671 auto IPriv = Privates.begin(); 5672 auto ILHS = LHSExprs.begin(); 5673 auto IRHS = RHSExprs.begin(); 5674 for (const Expr *E : ReductionOps) { 5675 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5676 cast<DeclRefExpr>(*IRHS)); 5677 ++IPriv; 5678 ++ILHS; 5679 ++IRHS; 5680 } 5681 }; 5682 RegionCodeGenTy RCG(CodeGen); 5683 CommonActionTy Action( 5684 nullptr, llvm::None, 5685 OMPBuilder.getOrCreateRuntimeFunction( 5686 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5687 : OMPRTL___kmpc_end_reduce), 5688 EndArgs); 5689 RCG.setAction(Action); 5690 RCG(CGF); 5691 5692 CGF.EmitBranch(DefaultBB); 5693 5694 // 7. Build case 2: 5695 // ... 5696 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5697 // ... 5698 // break; 5699 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5700 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5701 CGF.EmitBlock(Case2BB); 5702 5703 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5704 CodeGenFunction &CGF, PrePostActionTy &Action) { 5705 auto ILHS = LHSExprs.begin(); 5706 auto IRHS = RHSExprs.begin(); 5707 auto IPriv = Privates.begin(); 5708 for (const Expr *E : ReductionOps) { 5709 const Expr *XExpr = nullptr; 5710 const Expr *EExpr = nullptr; 5711 const Expr *UpExpr = nullptr; 5712 BinaryOperatorKind BO = BO_Comma; 5713 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5714 if (BO->getOpcode() == BO_Assign) { 5715 XExpr = BO->getLHS(); 5716 UpExpr = BO->getRHS(); 5717 } 5718 } 5719 // Try to emit update expression as a simple atomic. 5720 const Expr *RHSExpr = UpExpr; 5721 if (RHSExpr) { 5722 // Analyze RHS part of the whole expression. 5723 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5724 RHSExpr->IgnoreParenImpCasts())) { 5725 // If this is a conditional operator, analyze its condition for 5726 // min/max reduction operator. 5727 RHSExpr = ACO->getCond(); 5728 } 5729 if (const auto *BORHS = 5730 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5731 EExpr = BORHS->getRHS(); 5732 BO = BORHS->getOpcode(); 5733 } 5734 } 5735 if (XExpr) { 5736 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5737 auto &&AtomicRedGen = [BO, VD, 5738 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5739 const Expr *EExpr, const Expr *UpExpr) { 5740 LValue X = CGF.EmitLValue(XExpr); 5741 RValue E; 5742 if (EExpr) 5743 E = CGF.EmitAnyExpr(EExpr); 5744 CGF.EmitOMPAtomicSimpleUpdateExpr( 5745 X, E, BO, /*IsXLHSInRHSPart=*/true, 5746 llvm::AtomicOrdering::Monotonic, Loc, 5747 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5748 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5749 PrivateScope.addPrivate( 5750 VD, [&CGF, VD, XRValue, Loc]() { 5751 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5752 CGF.emitOMPSimpleStore( 5753 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5754 VD->getType().getNonReferenceType(), Loc); 5755 return LHSTemp; 5756 }); 5757 (void)PrivateScope.Privatize(); 5758 return CGF.EmitAnyExpr(UpExpr); 5759 }); 5760 }; 5761 if ((*IPriv)->getType()->isArrayType()) { 5762 // Emit atomic reduction for array section. 5763 const auto *RHSVar = 5764 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5765 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5766 AtomicRedGen, XExpr, EExpr, UpExpr); 5767 } else { 5768 // Emit atomic reduction for array subscript or single variable. 5769 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5770 } 5771 } else { 5772 // Emit as a critical region. 5773 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5774 const Expr *, const Expr *) { 5775 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5776 std::string Name = RT.getName({"atomic_reduction"}); 5777 RT.emitCriticalRegion( 5778 CGF, Name, 5779 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5780 Action.Enter(CGF); 5781 emitReductionCombiner(CGF, E); 5782 }, 5783 Loc); 5784 }; 5785 if ((*IPriv)->getType()->isArrayType()) { 5786 const auto *LHSVar = 5787 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5788 const auto *RHSVar = 5789 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5790 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5791 CritRedGen); 5792 } else { 5793 CritRedGen(CGF, nullptr, nullptr, nullptr); 5794 } 5795 } 5796 ++ILHS; 5797 ++IRHS; 5798 ++IPriv; 5799 } 5800 }; 5801 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5802 if (!WithNowait) { 5803 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5804 llvm::Value *EndArgs[] = { 5805 IdentTLoc, // ident_t *<loc> 5806 ThreadId, // i32 <gtid> 5807 Lock // kmp_critical_name *&<lock> 5808 }; 5809 CommonActionTy Action(nullptr, llvm::None, 5810 OMPBuilder.getOrCreateRuntimeFunction( 5811 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5812 EndArgs); 5813 AtomicRCG.setAction(Action); 5814 AtomicRCG(CGF); 5815 } else { 5816 AtomicRCG(CGF); 5817 } 5818 5819 CGF.EmitBranch(DefaultBB); 5820 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5821 } 5822 5823 /// Generates unique name for artificial threadprivate variables. 5824 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5825 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5826 const Expr *Ref) { 5827 SmallString<256> Buffer; 5828 llvm::raw_svector_ostream Out(Buffer); 5829 const clang::DeclRefExpr *DE; 5830 const VarDecl *D = ::getBaseDecl(Ref, DE); 5831 if (!D) 5832 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5833 D = D->getCanonicalDecl(); 5834 std::string Name = CGM.getOpenMPRuntime().getName( 5835 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5836 Out << Prefix << Name << "_" 5837 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5838 return std::string(Out.str()); 5839 } 5840 5841 /// Emits reduction initializer function: 5842 /// \code 5843 /// void @.red_init(void* %arg, void* %orig) { 5844 /// %0 = bitcast void* %arg to <type>* 5845 /// store <type> <init>, <type>* %0 5846 /// ret void 5847 /// } 5848 /// \endcode 5849 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5850 SourceLocation Loc, 5851 ReductionCodeGen &RCG, unsigned N) { 5852 ASTContext &C = CGM.getContext(); 5853 QualType VoidPtrTy = C.VoidPtrTy; 5854 VoidPtrTy.addRestrict(); 5855 FunctionArgList Args; 5856 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5857 ImplicitParamDecl::Other); 5858 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5859 ImplicitParamDecl::Other); 5860 Args.emplace_back(&Param); 5861 Args.emplace_back(&ParamOrig); 5862 const auto &FnInfo = 5863 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5864 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5865 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5866 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5867 Name, &CGM.getModule()); 5868 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5869 Fn->setDoesNotRecurse(); 5870 CodeGenFunction CGF(CGM); 5871 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5872 Address PrivateAddr = CGF.EmitLoadOfPointer( 5873 CGF.GetAddrOfLocalVar(&Param), 5874 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5875 llvm::Value *Size = nullptr; 5876 // If the size of the reduction item is non-constant, load it from global 5877 // threadprivate variable. 5878 if (RCG.getSizes(N).second) { 5879 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5880 CGF, CGM.getContext().getSizeType(), 5881 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5882 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5883 CGM.getContext().getSizeType(), Loc); 5884 } 5885 RCG.emitAggregateType(CGF, N, Size); 5886 LValue OrigLVal; 5887 // If initializer uses initializer from declare reduction construct, emit a 5888 // pointer to the address of the original reduction item (reuired by reduction 5889 // initializer) 5890 if (RCG.usesReductionInitializer(N)) { 5891 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5892 SharedAddr = CGF.EmitLoadOfPointer( 5893 SharedAddr, 5894 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5895 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5896 } else { 5897 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5898 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5899 CGM.getContext().VoidPtrTy); 5900 } 5901 // Emit the initializer: 5902 // %0 = bitcast void* %arg to <type>* 5903 // store <type> <init>, <type>* %0 5904 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5905 [](CodeGenFunction &) { return false; }); 5906 CGF.FinishFunction(); 5907 return Fn; 5908 } 5909 5910 /// Emits reduction combiner function: 5911 /// \code 5912 /// void @.red_comb(void* %arg0, void* %arg1) { 5913 /// %lhs = bitcast void* %arg0 to <type>* 5914 /// %rhs = bitcast void* %arg1 to <type>* 5915 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5916 /// store <type> %2, <type>* %lhs 5917 /// ret void 5918 /// } 5919 /// \endcode 5920 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5921 SourceLocation Loc, 5922 ReductionCodeGen &RCG, unsigned N, 5923 const Expr *ReductionOp, 5924 const Expr *LHS, const Expr *RHS, 5925 const Expr *PrivateRef) { 5926 ASTContext &C = CGM.getContext(); 5927 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5928 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5929 FunctionArgList Args; 5930 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5931 C.VoidPtrTy, ImplicitParamDecl::Other); 5932 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5933 ImplicitParamDecl::Other); 5934 Args.emplace_back(&ParamInOut); 5935 Args.emplace_back(&ParamIn); 5936 const auto &FnInfo = 5937 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5938 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5939 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5940 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5941 Name, &CGM.getModule()); 5942 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5943 Fn->setDoesNotRecurse(); 5944 CodeGenFunction CGF(CGM); 5945 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5946 llvm::Value *Size = nullptr; 5947 // If the size of the reduction item is non-constant, load it from global 5948 // threadprivate variable. 5949 if (RCG.getSizes(N).second) { 5950 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5951 CGF, CGM.getContext().getSizeType(), 5952 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5953 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5954 CGM.getContext().getSizeType(), Loc); 5955 } 5956 RCG.emitAggregateType(CGF, N, Size); 5957 // Remap lhs and rhs variables to the addresses of the function arguments. 5958 // %lhs = bitcast void* %arg0 to <type>* 5959 // %rhs = bitcast void* %arg1 to <type>* 5960 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5961 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5962 // Pull out the pointer to the variable. 5963 Address PtrAddr = CGF.EmitLoadOfPointer( 5964 CGF.GetAddrOfLocalVar(&ParamInOut), 5965 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5966 return CGF.Builder.CreateElementBitCast( 5967 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5968 }); 5969 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5970 // Pull out the pointer to the variable. 5971 Address PtrAddr = CGF.EmitLoadOfPointer( 5972 CGF.GetAddrOfLocalVar(&ParamIn), 5973 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5974 return CGF.Builder.CreateElementBitCast( 5975 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5976 }); 5977 PrivateScope.Privatize(); 5978 // Emit the combiner body: 5979 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5980 // store <type> %2, <type>* %lhs 5981 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5982 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5983 cast<DeclRefExpr>(RHS)); 5984 CGF.FinishFunction(); 5985 return Fn; 5986 } 5987 5988 /// Emits reduction finalizer function: 5989 /// \code 5990 /// void @.red_fini(void* %arg) { 5991 /// %0 = bitcast void* %arg to <type>* 5992 /// <destroy>(<type>* %0) 5993 /// ret void 5994 /// } 5995 /// \endcode 5996 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5997 SourceLocation Loc, 5998 ReductionCodeGen &RCG, unsigned N) { 5999 if (!RCG.needCleanups(N)) 6000 return nullptr; 6001 ASTContext &C = CGM.getContext(); 6002 FunctionArgList Args; 6003 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6004 ImplicitParamDecl::Other); 6005 Args.emplace_back(&Param); 6006 const auto &FnInfo = 6007 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6008 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6009 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6010 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6011 Name, &CGM.getModule()); 6012 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6013 Fn->setDoesNotRecurse(); 6014 CodeGenFunction CGF(CGM); 6015 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6016 Address PrivateAddr = CGF.EmitLoadOfPointer( 6017 CGF.GetAddrOfLocalVar(&Param), 6018 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6019 llvm::Value *Size = nullptr; 6020 // If the size of the reduction item is non-constant, load it from global 6021 // threadprivate variable. 6022 if (RCG.getSizes(N).second) { 6023 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6024 CGF, CGM.getContext().getSizeType(), 6025 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6026 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6027 CGM.getContext().getSizeType(), Loc); 6028 } 6029 RCG.emitAggregateType(CGF, N, Size); 6030 // Emit the finalizer body: 6031 // <destroy>(<type>* %0) 6032 RCG.emitCleanups(CGF, N, PrivateAddr); 6033 CGF.FinishFunction(Loc); 6034 return Fn; 6035 } 6036 6037 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6038 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6039 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6040 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6041 return nullptr; 6042 6043 // Build typedef struct: 6044 // kmp_taskred_input { 6045 // void *reduce_shar; // shared reduction item 6046 // void *reduce_orig; // original reduction item used for initialization 6047 // size_t reduce_size; // size of data item 6048 // void *reduce_init; // data initialization routine 6049 // void *reduce_fini; // data finalization routine 6050 // void *reduce_comb; // data combiner routine 6051 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6052 // } kmp_taskred_input_t; 6053 ASTContext &C = CGM.getContext(); 6054 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6055 RD->startDefinition(); 6056 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6057 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6058 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6059 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6060 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6061 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6062 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6063 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6064 RD->completeDefinition(); 6065 QualType RDType = C.getRecordType(RD); 6066 unsigned Size = Data.ReductionVars.size(); 6067 llvm::APInt ArraySize(/*numBits=*/64, Size); 6068 QualType ArrayRDType = C.getConstantArrayType( 6069 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6070 // kmp_task_red_input_t .rd_input.[Size]; 6071 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6072 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6073 Data.ReductionCopies, Data.ReductionOps); 6074 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6075 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6076 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6077 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6078 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6079 TaskRedInput.getPointer(), Idxs, 6080 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6081 ".rd_input.gep."); 6082 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6083 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6084 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6085 RCG.emitSharedOrigLValue(CGF, Cnt); 6086 llvm::Value *CastedShared = 6087 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6088 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6089 // ElemLVal.reduce_orig = &Origs[Cnt]; 6090 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6091 llvm::Value *CastedOrig = 6092 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6093 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6094 RCG.emitAggregateType(CGF, Cnt); 6095 llvm::Value *SizeValInChars; 6096 llvm::Value *SizeVal; 6097 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6098 // We use delayed creation/initialization for VLAs and array sections. It is 6099 // required because runtime does not provide the way to pass the sizes of 6100 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6101 // threadprivate global variables are used to store these values and use 6102 // them in the functions. 6103 bool DelayedCreation = !!SizeVal; 6104 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6105 /*isSigned=*/false); 6106 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6107 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6108 // ElemLVal.reduce_init = init; 6109 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6110 llvm::Value *InitAddr = 6111 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6112 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6113 // ElemLVal.reduce_fini = fini; 6114 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6115 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6116 llvm::Value *FiniAddr = Fini 6117 ? CGF.EmitCastToVoidPtr(Fini) 6118 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6119 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6120 // ElemLVal.reduce_comb = comb; 6121 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6122 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6123 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6124 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6125 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6126 // ElemLVal.flags = 0; 6127 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6128 if (DelayedCreation) { 6129 CGF.EmitStoreOfScalar( 6130 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6131 FlagsLVal); 6132 } else 6133 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6134 FlagsLVal.getType()); 6135 } 6136 if (Data.IsReductionWithTaskMod) { 6137 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6138 // is_ws, int num, void *data); 6139 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6140 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6141 CGM.IntTy, /*isSigned=*/true); 6142 llvm::Value *Args[] = { 6143 IdentTLoc, GTid, 6144 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6145 /*isSigned=*/true), 6146 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6147 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6148 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6149 return CGF.EmitRuntimeCall( 6150 OMPBuilder.getOrCreateRuntimeFunction( 6151 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6152 Args); 6153 } 6154 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6155 llvm::Value *Args[] = { 6156 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6157 /*isSigned=*/true), 6158 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6159 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6160 CGM.VoidPtrTy)}; 6161 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6162 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6163 Args); 6164 } 6165 6166 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6167 SourceLocation Loc, 6168 bool IsWorksharingReduction) { 6169 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6170 // is_ws, int num, void *data); 6171 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6172 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6173 CGM.IntTy, /*isSigned=*/true); 6174 llvm::Value *Args[] = {IdentTLoc, GTid, 6175 llvm::ConstantInt::get(CGM.IntTy, 6176 IsWorksharingReduction ? 1 : 0, 6177 /*isSigned=*/true)}; 6178 (void)CGF.EmitRuntimeCall( 6179 OMPBuilder.getOrCreateRuntimeFunction( 6180 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6181 Args); 6182 } 6183 6184 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6185 SourceLocation Loc, 6186 ReductionCodeGen &RCG, 6187 unsigned N) { 6188 auto Sizes = RCG.getSizes(N); 6189 // Emit threadprivate global variable if the type is non-constant 6190 // (Sizes.second = nullptr). 6191 if (Sizes.second) { 6192 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6193 /*isSigned=*/false); 6194 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6195 CGF, CGM.getContext().getSizeType(), 6196 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6197 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6198 } 6199 } 6200 6201 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6202 SourceLocation Loc, 6203 llvm::Value *ReductionsPtr, 6204 LValue SharedLVal) { 6205 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6206 // *d); 6207 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6208 CGM.IntTy, 6209 /*isSigned=*/true), 6210 ReductionsPtr, 6211 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6212 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6213 return Address( 6214 CGF.EmitRuntimeCall( 6215 OMPBuilder.getOrCreateRuntimeFunction( 6216 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6217 Args), 6218 SharedLVal.getAlignment()); 6219 } 6220 6221 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6222 SourceLocation Loc) { 6223 if (!CGF.HaveInsertPoint()) 6224 return; 6225 6226 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 6227 OMPBuilder.CreateTaskwait(CGF.Builder); 6228 } else { 6229 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6230 // global_tid); 6231 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6232 // Ignore return result until untied tasks are supported. 6233 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6234 CGM.getModule(), OMPRTL___kmpc_omp_taskwait), 6235 Args); 6236 } 6237 6238 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6239 Region->emitUntiedSwitch(CGF); 6240 } 6241 6242 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6243 OpenMPDirectiveKind InnerKind, 6244 const RegionCodeGenTy &CodeGen, 6245 bool HasCancel) { 6246 if (!CGF.HaveInsertPoint()) 6247 return; 6248 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6249 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6250 } 6251 6252 namespace { 6253 enum RTCancelKind { 6254 CancelNoreq = 0, 6255 CancelParallel = 1, 6256 CancelLoop = 2, 6257 CancelSections = 3, 6258 CancelTaskgroup = 4 6259 }; 6260 } // anonymous namespace 6261 6262 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6263 RTCancelKind CancelKind = CancelNoreq; 6264 if (CancelRegion == OMPD_parallel) 6265 CancelKind = CancelParallel; 6266 else if (CancelRegion == OMPD_for) 6267 CancelKind = CancelLoop; 6268 else if (CancelRegion == OMPD_sections) 6269 CancelKind = CancelSections; 6270 else { 6271 assert(CancelRegion == OMPD_taskgroup); 6272 CancelKind = CancelTaskgroup; 6273 } 6274 return CancelKind; 6275 } 6276 6277 void CGOpenMPRuntime::emitCancellationPointCall( 6278 CodeGenFunction &CGF, SourceLocation Loc, 6279 OpenMPDirectiveKind CancelRegion) { 6280 if (!CGF.HaveInsertPoint()) 6281 return; 6282 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6283 // global_tid, kmp_int32 cncl_kind); 6284 if (auto *OMPRegionInfo = 6285 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6286 // For 'cancellation point taskgroup', the task region info may not have a 6287 // cancel. This may instead happen in another adjacent task. 6288 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6289 llvm::Value *Args[] = { 6290 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6291 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6292 // Ignore return result until untied tasks are supported. 6293 llvm::Value *Result = CGF.EmitRuntimeCall( 6294 OMPBuilder.getOrCreateRuntimeFunction( 6295 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6296 Args); 6297 // if (__kmpc_cancellationpoint()) { 6298 // exit from construct; 6299 // } 6300 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6301 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6302 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6303 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6304 CGF.EmitBlock(ExitBB); 6305 // exit from construct; 6306 CodeGenFunction::JumpDest CancelDest = 6307 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6308 CGF.EmitBranchThroughCleanup(CancelDest); 6309 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6310 } 6311 } 6312 } 6313 6314 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6315 const Expr *IfCond, 6316 OpenMPDirectiveKind CancelRegion) { 6317 if (!CGF.HaveInsertPoint()) 6318 return; 6319 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6320 // kmp_int32 cncl_kind); 6321 auto &M = CGM.getModule(); 6322 if (auto *OMPRegionInfo = 6323 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6324 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6325 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6326 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6327 llvm::Value *Args[] = { 6328 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6329 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6330 // Ignore return result until untied tasks are supported. 6331 llvm::Value *Result = CGF.EmitRuntimeCall( 6332 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6333 // if (__kmpc_cancel()) { 6334 // exit from construct; 6335 // } 6336 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6337 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6338 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6339 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6340 CGF.EmitBlock(ExitBB); 6341 // exit from construct; 6342 CodeGenFunction::JumpDest CancelDest = 6343 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6344 CGF.EmitBranchThroughCleanup(CancelDest); 6345 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6346 }; 6347 if (IfCond) { 6348 emitIfClause(CGF, IfCond, ThenGen, 6349 [](CodeGenFunction &, PrePostActionTy &) {}); 6350 } else { 6351 RegionCodeGenTy ThenRCG(ThenGen); 6352 ThenRCG(CGF); 6353 } 6354 } 6355 } 6356 6357 namespace { 6358 /// Cleanup action for uses_allocators support. 6359 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6360 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6361 6362 public: 6363 OMPUsesAllocatorsActionTy( 6364 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6365 : Allocators(Allocators) {} 6366 void Enter(CodeGenFunction &CGF) override { 6367 if (!CGF.HaveInsertPoint()) 6368 return; 6369 for (const auto &AllocatorData : Allocators) { 6370 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6371 CGF, AllocatorData.first, AllocatorData.second); 6372 } 6373 } 6374 void Exit(CodeGenFunction &CGF) override { 6375 if (!CGF.HaveInsertPoint()) 6376 return; 6377 for (const auto &AllocatorData : Allocators) { 6378 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6379 AllocatorData.first); 6380 } 6381 } 6382 }; 6383 } // namespace 6384 6385 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6386 const OMPExecutableDirective &D, StringRef ParentName, 6387 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6388 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6389 assert(!ParentName.empty() && "Invalid target region parent name!"); 6390 HasEmittedTargetRegion = true; 6391 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6392 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6393 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6394 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6395 if (!D.AllocatorTraits) 6396 continue; 6397 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6398 } 6399 } 6400 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6401 CodeGen.setAction(UsesAllocatorAction); 6402 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6403 IsOffloadEntry, CodeGen); 6404 } 6405 6406 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6407 const Expr *Allocator, 6408 const Expr *AllocatorTraits) { 6409 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6410 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6411 // Use default memspace handle. 6412 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6413 llvm::Value *NumTraits = llvm::ConstantInt::get( 6414 CGF.IntTy, cast<ConstantArrayType>( 6415 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6416 ->getSize() 6417 .getLimitedValue()); 6418 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6419 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6420 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6421 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6422 AllocatorTraitsLVal.getBaseInfo(), 6423 AllocatorTraitsLVal.getTBAAInfo()); 6424 llvm::Value *Traits = 6425 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6426 6427 llvm::Value *AllocatorVal = 6428 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6429 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6430 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6431 // Store to allocator. 6432 CGF.EmitVarDecl(*cast<VarDecl>( 6433 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6434 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6435 AllocatorVal = 6436 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6437 Allocator->getType(), Allocator->getExprLoc()); 6438 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6439 } 6440 6441 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6442 const Expr *Allocator) { 6443 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6444 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6445 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6446 llvm::Value *AllocatorVal = 6447 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6448 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6449 CGF.getContext().VoidPtrTy, 6450 Allocator->getExprLoc()); 6451 (void)CGF.EmitRuntimeCall( 6452 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6453 OMPRTL___kmpc_destroy_allocator), 6454 {ThreadId, AllocatorVal}); 6455 } 6456 6457 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6458 const OMPExecutableDirective &D, StringRef ParentName, 6459 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6460 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6461 // Create a unique name for the entry function using the source location 6462 // information of the current target region. The name will be something like: 6463 // 6464 // __omp_offloading_DD_FFFF_PP_lBB 6465 // 6466 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6467 // mangled name of the function that encloses the target region and BB is the 6468 // line number of the target region. 6469 6470 unsigned DeviceID; 6471 unsigned FileID; 6472 unsigned Line; 6473 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6474 Line); 6475 SmallString<64> EntryFnName; 6476 { 6477 llvm::raw_svector_ostream OS(EntryFnName); 6478 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6479 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6480 } 6481 6482 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6483 6484 CodeGenFunction CGF(CGM, true); 6485 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6486 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6487 6488 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6489 6490 // If this target outline function is not an offload entry, we don't need to 6491 // register it. 6492 if (!IsOffloadEntry) 6493 return; 6494 6495 // The target region ID is used by the runtime library to identify the current 6496 // target region, so it only has to be unique and not necessarily point to 6497 // anything. It could be the pointer to the outlined function that implements 6498 // the target region, but we aren't using that so that the compiler doesn't 6499 // need to keep that, and could therefore inline the host function if proven 6500 // worthwhile during optimization. In the other hand, if emitting code for the 6501 // device, the ID has to be the function address so that it can retrieved from 6502 // the offloading entry and launched by the runtime library. We also mark the 6503 // outlined function to have external linkage in case we are emitting code for 6504 // the device, because these functions will be entry points to the device. 6505 6506 if (CGM.getLangOpts().OpenMPIsDevice) { 6507 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6508 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6509 OutlinedFn->setDSOLocal(false); 6510 } else { 6511 std::string Name = getName({EntryFnName, "region_id"}); 6512 OutlinedFnID = new llvm::GlobalVariable( 6513 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6514 llvm::GlobalValue::WeakAnyLinkage, 6515 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6516 } 6517 6518 // Register the information for the entry associated with this target region. 6519 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6520 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6521 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6522 } 6523 6524 /// Checks if the expression is constant or does not have non-trivial function 6525 /// calls. 6526 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6527 // We can skip constant expressions. 6528 // We can skip expressions with trivial calls or simple expressions. 6529 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6530 !E->hasNonTrivialCall(Ctx)) && 6531 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6532 } 6533 6534 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6535 const Stmt *Body) { 6536 const Stmt *Child = Body->IgnoreContainers(); 6537 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6538 Child = nullptr; 6539 for (const Stmt *S : C->body()) { 6540 if (const auto *E = dyn_cast<Expr>(S)) { 6541 if (isTrivial(Ctx, E)) 6542 continue; 6543 } 6544 // Some of the statements can be ignored. 6545 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6546 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6547 continue; 6548 // Analyze declarations. 6549 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6550 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6551 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6552 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6553 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6554 isa<UsingDirectiveDecl>(D) || 6555 isa<OMPDeclareReductionDecl>(D) || 6556 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6557 return true; 6558 const auto *VD = dyn_cast<VarDecl>(D); 6559 if (!VD) 6560 return false; 6561 return VD->isConstexpr() || 6562 ((VD->getType().isTrivialType(Ctx) || 6563 VD->getType()->isReferenceType()) && 6564 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6565 })) 6566 continue; 6567 } 6568 // Found multiple children - cannot get the one child only. 6569 if (Child) 6570 return nullptr; 6571 Child = S; 6572 } 6573 if (Child) 6574 Child = Child->IgnoreContainers(); 6575 } 6576 return Child; 6577 } 6578 6579 /// Emit the number of teams for a target directive. Inspect the num_teams 6580 /// clause associated with a teams construct combined or closely nested 6581 /// with the target directive. 6582 /// 6583 /// Emit a team of size one for directives such as 'target parallel' that 6584 /// have no associated teams construct. 6585 /// 6586 /// Otherwise, return nullptr. 6587 static llvm::Value * 6588 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6589 const OMPExecutableDirective &D) { 6590 assert(!CGF.getLangOpts().OpenMPIsDevice && 6591 "Clauses associated with the teams directive expected to be emitted " 6592 "only for the host!"); 6593 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6594 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6595 "Expected target-based executable directive."); 6596 CGBuilderTy &Bld = CGF.Builder; 6597 switch (DirectiveKind) { 6598 case OMPD_target: { 6599 const auto *CS = D.getInnermostCapturedStmt(); 6600 const auto *Body = 6601 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6602 const Stmt *ChildStmt = 6603 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6604 if (const auto *NestedDir = 6605 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6606 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6607 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6608 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6609 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6610 const Expr *NumTeams = 6611 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6612 llvm::Value *NumTeamsVal = 6613 CGF.EmitScalarExpr(NumTeams, 6614 /*IgnoreResultAssign*/ true); 6615 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6616 /*isSigned=*/true); 6617 } 6618 return Bld.getInt32(0); 6619 } 6620 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6621 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6622 return Bld.getInt32(1); 6623 return Bld.getInt32(0); 6624 } 6625 return nullptr; 6626 } 6627 case OMPD_target_teams: 6628 case OMPD_target_teams_distribute: 6629 case OMPD_target_teams_distribute_simd: 6630 case OMPD_target_teams_distribute_parallel_for: 6631 case OMPD_target_teams_distribute_parallel_for_simd: { 6632 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6633 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6634 const Expr *NumTeams = 6635 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6636 llvm::Value *NumTeamsVal = 6637 CGF.EmitScalarExpr(NumTeams, 6638 /*IgnoreResultAssign*/ true); 6639 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6640 /*isSigned=*/true); 6641 } 6642 return Bld.getInt32(0); 6643 } 6644 case OMPD_target_parallel: 6645 case OMPD_target_parallel_for: 6646 case OMPD_target_parallel_for_simd: 6647 case OMPD_target_simd: 6648 return Bld.getInt32(1); 6649 case OMPD_parallel: 6650 case OMPD_for: 6651 case OMPD_parallel_for: 6652 case OMPD_parallel_master: 6653 case OMPD_parallel_sections: 6654 case OMPD_for_simd: 6655 case OMPD_parallel_for_simd: 6656 case OMPD_cancel: 6657 case OMPD_cancellation_point: 6658 case OMPD_ordered: 6659 case OMPD_threadprivate: 6660 case OMPD_allocate: 6661 case OMPD_task: 6662 case OMPD_simd: 6663 case OMPD_sections: 6664 case OMPD_section: 6665 case OMPD_single: 6666 case OMPD_master: 6667 case OMPD_critical: 6668 case OMPD_taskyield: 6669 case OMPD_barrier: 6670 case OMPD_taskwait: 6671 case OMPD_taskgroup: 6672 case OMPD_atomic: 6673 case OMPD_flush: 6674 case OMPD_depobj: 6675 case OMPD_scan: 6676 case OMPD_teams: 6677 case OMPD_target_data: 6678 case OMPD_target_exit_data: 6679 case OMPD_target_enter_data: 6680 case OMPD_distribute: 6681 case OMPD_distribute_simd: 6682 case OMPD_distribute_parallel_for: 6683 case OMPD_distribute_parallel_for_simd: 6684 case OMPD_teams_distribute: 6685 case OMPD_teams_distribute_simd: 6686 case OMPD_teams_distribute_parallel_for: 6687 case OMPD_teams_distribute_parallel_for_simd: 6688 case OMPD_target_update: 6689 case OMPD_declare_simd: 6690 case OMPD_declare_variant: 6691 case OMPD_begin_declare_variant: 6692 case OMPD_end_declare_variant: 6693 case OMPD_declare_target: 6694 case OMPD_end_declare_target: 6695 case OMPD_declare_reduction: 6696 case OMPD_declare_mapper: 6697 case OMPD_taskloop: 6698 case OMPD_taskloop_simd: 6699 case OMPD_master_taskloop: 6700 case OMPD_master_taskloop_simd: 6701 case OMPD_parallel_master_taskloop: 6702 case OMPD_parallel_master_taskloop_simd: 6703 case OMPD_requires: 6704 case OMPD_unknown: 6705 break; 6706 default: 6707 break; 6708 } 6709 llvm_unreachable("Unexpected directive kind."); 6710 } 6711 6712 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6713 llvm::Value *DefaultThreadLimitVal) { 6714 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6715 CGF.getContext(), CS->getCapturedStmt()); 6716 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6717 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6718 llvm::Value *NumThreads = nullptr; 6719 llvm::Value *CondVal = nullptr; 6720 // Handle if clause. If if clause present, the number of threads is 6721 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6722 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6723 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6724 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6725 const OMPIfClause *IfClause = nullptr; 6726 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6727 if (C->getNameModifier() == OMPD_unknown || 6728 C->getNameModifier() == OMPD_parallel) { 6729 IfClause = C; 6730 break; 6731 } 6732 } 6733 if (IfClause) { 6734 const Expr *Cond = IfClause->getCondition(); 6735 bool Result; 6736 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6737 if (!Result) 6738 return CGF.Builder.getInt32(1); 6739 } else { 6740 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6741 if (const auto *PreInit = 6742 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6743 for (const auto *I : PreInit->decls()) { 6744 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6745 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6746 } else { 6747 CodeGenFunction::AutoVarEmission Emission = 6748 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6749 CGF.EmitAutoVarCleanups(Emission); 6750 } 6751 } 6752 } 6753 CondVal = CGF.EvaluateExprAsBool(Cond); 6754 } 6755 } 6756 } 6757 // Check the value of num_threads clause iff if clause was not specified 6758 // or is not evaluated to false. 6759 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6760 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6761 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6762 const auto *NumThreadsClause = 6763 Dir->getSingleClause<OMPNumThreadsClause>(); 6764 CodeGenFunction::LexicalScope Scope( 6765 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6766 if (const auto *PreInit = 6767 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6768 for (const auto *I : PreInit->decls()) { 6769 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6770 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6771 } else { 6772 CodeGenFunction::AutoVarEmission Emission = 6773 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6774 CGF.EmitAutoVarCleanups(Emission); 6775 } 6776 } 6777 } 6778 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6779 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6780 /*isSigned=*/false); 6781 if (DefaultThreadLimitVal) 6782 NumThreads = CGF.Builder.CreateSelect( 6783 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6784 DefaultThreadLimitVal, NumThreads); 6785 } else { 6786 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6787 : CGF.Builder.getInt32(0); 6788 } 6789 // Process condition of the if clause. 6790 if (CondVal) { 6791 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6792 CGF.Builder.getInt32(1)); 6793 } 6794 return NumThreads; 6795 } 6796 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6797 return CGF.Builder.getInt32(1); 6798 return DefaultThreadLimitVal; 6799 } 6800 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6801 : CGF.Builder.getInt32(0); 6802 } 6803 6804 /// Emit the number of threads for a target directive. Inspect the 6805 /// thread_limit clause associated with a teams construct combined or closely 6806 /// nested with the target directive. 6807 /// 6808 /// Emit the num_threads clause for directives such as 'target parallel' that 6809 /// have no associated teams construct. 6810 /// 6811 /// Otherwise, return nullptr. 6812 static llvm::Value * 6813 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6814 const OMPExecutableDirective &D) { 6815 assert(!CGF.getLangOpts().OpenMPIsDevice && 6816 "Clauses associated with the teams directive expected to be emitted " 6817 "only for the host!"); 6818 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6819 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6820 "Expected target-based executable directive."); 6821 CGBuilderTy &Bld = CGF.Builder; 6822 llvm::Value *ThreadLimitVal = nullptr; 6823 llvm::Value *NumThreadsVal = nullptr; 6824 switch (DirectiveKind) { 6825 case OMPD_target: { 6826 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6827 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6828 return NumThreads; 6829 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6830 CGF.getContext(), CS->getCapturedStmt()); 6831 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6832 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6833 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6834 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6835 const auto *ThreadLimitClause = 6836 Dir->getSingleClause<OMPThreadLimitClause>(); 6837 CodeGenFunction::LexicalScope Scope( 6838 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6839 if (const auto *PreInit = 6840 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6841 for (const auto *I : PreInit->decls()) { 6842 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6843 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6844 } else { 6845 CodeGenFunction::AutoVarEmission Emission = 6846 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6847 CGF.EmitAutoVarCleanups(Emission); 6848 } 6849 } 6850 } 6851 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6852 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6853 ThreadLimitVal = 6854 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6855 } 6856 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6857 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6858 CS = Dir->getInnermostCapturedStmt(); 6859 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6860 CGF.getContext(), CS->getCapturedStmt()); 6861 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6862 } 6863 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6864 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6865 CS = Dir->getInnermostCapturedStmt(); 6866 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6867 return NumThreads; 6868 } 6869 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6870 return Bld.getInt32(1); 6871 } 6872 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6873 } 6874 case OMPD_target_teams: { 6875 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6876 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6877 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6878 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6879 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6880 ThreadLimitVal = 6881 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6882 } 6883 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6884 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6885 return NumThreads; 6886 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6887 CGF.getContext(), CS->getCapturedStmt()); 6888 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6889 if (Dir->getDirectiveKind() == OMPD_distribute) { 6890 CS = Dir->getInnermostCapturedStmt(); 6891 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6892 return NumThreads; 6893 } 6894 } 6895 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6896 } 6897 case OMPD_target_teams_distribute: 6898 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6899 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6900 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6901 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6902 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6903 ThreadLimitVal = 6904 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6905 } 6906 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6907 case OMPD_target_parallel: 6908 case OMPD_target_parallel_for: 6909 case OMPD_target_parallel_for_simd: 6910 case OMPD_target_teams_distribute_parallel_for: 6911 case OMPD_target_teams_distribute_parallel_for_simd: { 6912 llvm::Value *CondVal = nullptr; 6913 // Handle if clause. If if clause present, the number of threads is 6914 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6915 if (D.hasClausesOfKind<OMPIfClause>()) { 6916 const OMPIfClause *IfClause = nullptr; 6917 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6918 if (C->getNameModifier() == OMPD_unknown || 6919 C->getNameModifier() == OMPD_parallel) { 6920 IfClause = C; 6921 break; 6922 } 6923 } 6924 if (IfClause) { 6925 const Expr *Cond = IfClause->getCondition(); 6926 bool Result; 6927 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6928 if (!Result) 6929 return Bld.getInt32(1); 6930 } else { 6931 CodeGenFunction::RunCleanupsScope Scope(CGF); 6932 CondVal = CGF.EvaluateExprAsBool(Cond); 6933 } 6934 } 6935 } 6936 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6937 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6938 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6939 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6940 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6941 ThreadLimitVal = 6942 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6943 } 6944 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6945 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6946 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6947 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6948 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6949 NumThreadsVal = 6950 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6951 ThreadLimitVal = ThreadLimitVal 6952 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6953 ThreadLimitVal), 6954 NumThreadsVal, ThreadLimitVal) 6955 : NumThreadsVal; 6956 } 6957 if (!ThreadLimitVal) 6958 ThreadLimitVal = Bld.getInt32(0); 6959 if (CondVal) 6960 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6961 return ThreadLimitVal; 6962 } 6963 case OMPD_target_teams_distribute_simd: 6964 case OMPD_target_simd: 6965 return Bld.getInt32(1); 6966 case OMPD_parallel: 6967 case OMPD_for: 6968 case OMPD_parallel_for: 6969 case OMPD_parallel_master: 6970 case OMPD_parallel_sections: 6971 case OMPD_for_simd: 6972 case OMPD_parallel_for_simd: 6973 case OMPD_cancel: 6974 case OMPD_cancellation_point: 6975 case OMPD_ordered: 6976 case OMPD_threadprivate: 6977 case OMPD_allocate: 6978 case OMPD_task: 6979 case OMPD_simd: 6980 case OMPD_sections: 6981 case OMPD_section: 6982 case OMPD_single: 6983 case OMPD_master: 6984 case OMPD_critical: 6985 case OMPD_taskyield: 6986 case OMPD_barrier: 6987 case OMPD_taskwait: 6988 case OMPD_taskgroup: 6989 case OMPD_atomic: 6990 case OMPD_flush: 6991 case OMPD_depobj: 6992 case OMPD_scan: 6993 case OMPD_teams: 6994 case OMPD_target_data: 6995 case OMPD_target_exit_data: 6996 case OMPD_target_enter_data: 6997 case OMPD_distribute: 6998 case OMPD_distribute_simd: 6999 case OMPD_distribute_parallel_for: 7000 case OMPD_distribute_parallel_for_simd: 7001 case OMPD_teams_distribute: 7002 case OMPD_teams_distribute_simd: 7003 case OMPD_teams_distribute_parallel_for: 7004 case OMPD_teams_distribute_parallel_for_simd: 7005 case OMPD_target_update: 7006 case OMPD_declare_simd: 7007 case OMPD_declare_variant: 7008 case OMPD_begin_declare_variant: 7009 case OMPD_end_declare_variant: 7010 case OMPD_declare_target: 7011 case OMPD_end_declare_target: 7012 case OMPD_declare_reduction: 7013 case OMPD_declare_mapper: 7014 case OMPD_taskloop: 7015 case OMPD_taskloop_simd: 7016 case OMPD_master_taskloop: 7017 case OMPD_master_taskloop_simd: 7018 case OMPD_parallel_master_taskloop: 7019 case OMPD_parallel_master_taskloop_simd: 7020 case OMPD_requires: 7021 case OMPD_unknown: 7022 break; 7023 default: 7024 break; 7025 } 7026 llvm_unreachable("Unsupported directive kind."); 7027 } 7028 7029 namespace { 7030 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7031 7032 // Utility to handle information from clauses associated with a given 7033 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7034 // It provides a convenient interface to obtain the information and generate 7035 // code for that information. 7036 class MappableExprsHandler { 7037 public: 7038 /// Values for bit flags used to specify the mapping type for 7039 /// offloading. 7040 enum OpenMPOffloadMappingFlags : uint64_t { 7041 /// No flags 7042 OMP_MAP_NONE = 0x0, 7043 /// Allocate memory on the device and move data from host to device. 7044 OMP_MAP_TO = 0x01, 7045 /// Allocate memory on the device and move data from device to host. 7046 OMP_MAP_FROM = 0x02, 7047 /// Always perform the requested mapping action on the element, even 7048 /// if it was already mapped before. 7049 OMP_MAP_ALWAYS = 0x04, 7050 /// Delete the element from the device environment, ignoring the 7051 /// current reference count associated with the element. 7052 OMP_MAP_DELETE = 0x08, 7053 /// The element being mapped is a pointer-pointee pair; both the 7054 /// pointer and the pointee should be mapped. 7055 OMP_MAP_PTR_AND_OBJ = 0x10, 7056 /// This flags signals that the base address of an entry should be 7057 /// passed to the target kernel as an argument. 7058 OMP_MAP_TARGET_PARAM = 0x20, 7059 /// Signal that the runtime library has to return the device pointer 7060 /// in the current position for the data being mapped. Used when we have the 7061 /// use_device_ptr or use_device_addr clause. 7062 OMP_MAP_RETURN_PARAM = 0x40, 7063 /// This flag signals that the reference being passed is a pointer to 7064 /// private data. 7065 OMP_MAP_PRIVATE = 0x80, 7066 /// Pass the element to the device by value. 7067 OMP_MAP_LITERAL = 0x100, 7068 /// Implicit map 7069 OMP_MAP_IMPLICIT = 0x200, 7070 /// Close is a hint to the runtime to allocate memory close to 7071 /// the target device. 7072 OMP_MAP_CLOSE = 0x400, 7073 /// 0x800 is reserved for compatibility with XLC. 7074 /// Produce a runtime error if the data is not already allocated. 7075 OMP_MAP_PRESENT = 0x1000, 7076 /// The 16 MSBs of the flags indicate whether the entry is member of some 7077 /// struct/class. 7078 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7079 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7080 }; 7081 7082 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7083 static unsigned getFlagMemberOffset() { 7084 unsigned Offset = 0; 7085 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7086 Remain = Remain >> 1) 7087 Offset++; 7088 return Offset; 7089 } 7090 7091 /// Class that associates information with a base pointer to be passed to the 7092 /// runtime library. 7093 class BasePointerInfo { 7094 /// The base pointer. 7095 llvm::Value *Ptr = nullptr; 7096 /// The base declaration that refers to this device pointer, or null if 7097 /// there is none. 7098 const ValueDecl *DevPtrDecl = nullptr; 7099 7100 public: 7101 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7102 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7103 llvm::Value *operator*() const { return Ptr; } 7104 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7105 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7106 }; 7107 7108 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7109 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7110 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7111 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7112 7113 /// This structure contains combined information generated for mappable 7114 /// clauses, including base pointers, pointers, sizes, map types, and 7115 /// user-defined mappers. 7116 struct MapCombinedInfoTy { 7117 MapBaseValuesArrayTy BasePointers; 7118 MapValuesArrayTy Pointers; 7119 MapValuesArrayTy Sizes; 7120 MapFlagsArrayTy Types; 7121 MapMappersArrayTy Mappers; 7122 7123 /// Append arrays in \a CurInfo. 7124 void append(MapCombinedInfoTy &CurInfo) { 7125 BasePointers.append(CurInfo.BasePointers.begin(), 7126 CurInfo.BasePointers.end()); 7127 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7128 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7129 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7130 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7131 } 7132 }; 7133 7134 /// Map between a struct and the its lowest & highest elements which have been 7135 /// mapped. 7136 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7137 /// HE(FieldIndex, Pointer)} 7138 struct StructRangeInfoTy { 7139 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7140 0, Address::invalid()}; 7141 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7142 0, Address::invalid()}; 7143 Address Base = Address::invalid(); 7144 }; 7145 7146 private: 7147 /// Kind that defines how a device pointer has to be returned. 7148 struct MapInfo { 7149 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7150 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7151 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7152 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7153 bool ReturnDevicePointer = false; 7154 bool IsImplicit = false; 7155 const ValueDecl *Mapper = nullptr; 7156 bool ForDeviceAddr = false; 7157 7158 MapInfo() = default; 7159 MapInfo( 7160 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7161 OpenMPMapClauseKind MapType, 7162 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7163 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7164 bool ReturnDevicePointer, bool IsImplicit, 7165 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false) 7166 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7167 MotionModifiers(MotionModifiers), 7168 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7169 Mapper(Mapper), ForDeviceAddr(ForDeviceAddr) {} 7170 }; 7171 7172 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7173 /// member and there is no map information about it, then emission of that 7174 /// entry is deferred until the whole struct has been processed. 7175 struct DeferredDevicePtrEntryTy { 7176 const Expr *IE = nullptr; 7177 const ValueDecl *VD = nullptr; 7178 bool ForDeviceAddr = false; 7179 7180 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7181 bool ForDeviceAddr) 7182 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7183 }; 7184 7185 /// The target directive from where the mappable clauses were extracted. It 7186 /// is either a executable directive or a user-defined mapper directive. 7187 llvm::PointerUnion<const OMPExecutableDirective *, 7188 const OMPDeclareMapperDecl *> 7189 CurDir; 7190 7191 /// Function the directive is being generated for. 7192 CodeGenFunction &CGF; 7193 7194 /// Set of all first private variables in the current directive. 7195 /// bool data is set to true if the variable is implicitly marked as 7196 /// firstprivate, false otherwise. 7197 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7198 7199 /// Map between device pointer declarations and their expression components. 7200 /// The key value for declarations in 'this' is null. 7201 llvm::DenseMap< 7202 const ValueDecl *, 7203 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7204 DevPointersMap; 7205 7206 llvm::Value *getExprTypeSize(const Expr *E) const { 7207 QualType ExprTy = E->getType().getCanonicalType(); 7208 7209 // Calculate the size for array shaping expression. 7210 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7211 llvm::Value *Size = 7212 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7213 for (const Expr *SE : OAE->getDimensions()) { 7214 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7215 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7216 CGF.getContext().getSizeType(), 7217 SE->getExprLoc()); 7218 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7219 } 7220 return Size; 7221 } 7222 7223 // Reference types are ignored for mapping purposes. 7224 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7225 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7226 7227 // Given that an array section is considered a built-in type, we need to 7228 // do the calculation based on the length of the section instead of relying 7229 // on CGF.getTypeSize(E->getType()). 7230 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7231 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7232 OAE->getBase()->IgnoreParenImpCasts()) 7233 .getCanonicalType(); 7234 7235 // If there is no length associated with the expression and lower bound is 7236 // not specified too, that means we are using the whole length of the 7237 // base. 7238 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7239 !OAE->getLowerBound()) 7240 return CGF.getTypeSize(BaseTy); 7241 7242 llvm::Value *ElemSize; 7243 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7244 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7245 } else { 7246 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7247 assert(ATy && "Expecting array type if not a pointer type."); 7248 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7249 } 7250 7251 // If we don't have a length at this point, that is because we have an 7252 // array section with a single element. 7253 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7254 return ElemSize; 7255 7256 if (const Expr *LenExpr = OAE->getLength()) { 7257 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7258 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7259 CGF.getContext().getSizeType(), 7260 LenExpr->getExprLoc()); 7261 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7262 } 7263 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7264 OAE->getLowerBound() && "expected array_section[lb:]."); 7265 // Size = sizetype - lb * elemtype; 7266 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7267 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7268 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7269 CGF.getContext().getSizeType(), 7270 OAE->getLowerBound()->getExprLoc()); 7271 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7272 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7273 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7274 LengthVal = CGF.Builder.CreateSelect( 7275 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7276 return LengthVal; 7277 } 7278 return CGF.getTypeSize(ExprTy); 7279 } 7280 7281 /// Return the corresponding bits for a given map clause modifier. Add 7282 /// a flag marking the map as a pointer if requested. Add a flag marking the 7283 /// map as the first one of a series of maps that relate to the same map 7284 /// expression. 7285 OpenMPOffloadMappingFlags getMapTypeBits( 7286 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7287 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7288 bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7289 OpenMPOffloadMappingFlags Bits = 7290 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7291 switch (MapType) { 7292 case OMPC_MAP_alloc: 7293 case OMPC_MAP_release: 7294 // alloc and release is the default behavior in the runtime library, i.e. 7295 // if we don't pass any bits alloc/release that is what the runtime is 7296 // going to do. Therefore, we don't need to signal anything for these two 7297 // type modifiers. 7298 break; 7299 case OMPC_MAP_to: 7300 Bits |= OMP_MAP_TO; 7301 break; 7302 case OMPC_MAP_from: 7303 Bits |= OMP_MAP_FROM; 7304 break; 7305 case OMPC_MAP_tofrom: 7306 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7307 break; 7308 case OMPC_MAP_delete: 7309 Bits |= OMP_MAP_DELETE; 7310 break; 7311 case OMPC_MAP_unknown: 7312 llvm_unreachable("Unexpected map type!"); 7313 } 7314 if (AddPtrFlag) 7315 Bits |= OMP_MAP_PTR_AND_OBJ; 7316 if (AddIsTargetParamFlag) 7317 Bits |= OMP_MAP_TARGET_PARAM; 7318 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7319 != MapModifiers.end()) 7320 Bits |= OMP_MAP_ALWAYS; 7321 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7322 != MapModifiers.end()) 7323 Bits |= OMP_MAP_CLOSE; 7324 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) 7325 != MapModifiers.end()) 7326 Bits |= OMP_MAP_PRESENT; 7327 if (llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) 7328 != MotionModifiers.end()) 7329 Bits |= OMP_MAP_PRESENT; 7330 return Bits; 7331 } 7332 7333 /// Return true if the provided expression is a final array section. A 7334 /// final array section, is one whose length can't be proved to be one. 7335 bool isFinalArraySectionExpression(const Expr *E) const { 7336 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7337 7338 // It is not an array section and therefore not a unity-size one. 7339 if (!OASE) 7340 return false; 7341 7342 // An array section with no colon always refer to a single element. 7343 if (OASE->getColonLocFirst().isInvalid()) 7344 return false; 7345 7346 const Expr *Length = OASE->getLength(); 7347 7348 // If we don't have a length we have to check if the array has size 1 7349 // for this dimension. Also, we should always expect a length if the 7350 // base type is pointer. 7351 if (!Length) { 7352 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7353 OASE->getBase()->IgnoreParenImpCasts()) 7354 .getCanonicalType(); 7355 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7356 return ATy->getSize().getSExtValue() != 1; 7357 // If we don't have a constant dimension length, we have to consider 7358 // the current section as having any size, so it is not necessarily 7359 // unitary. If it happen to be unity size, that's user fault. 7360 return true; 7361 } 7362 7363 // Check if the length evaluates to 1. 7364 Expr::EvalResult Result; 7365 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7366 return true; // Can have more that size 1. 7367 7368 llvm::APSInt ConstLength = Result.Val.getInt(); 7369 return ConstLength.getSExtValue() != 1; 7370 } 7371 7372 /// Generate the base pointers, section pointers, sizes, map type bits, and 7373 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7374 /// map type, map or motion modifiers, and expression components. 7375 /// \a IsFirstComponent should be set to true if the provided set of 7376 /// components is the first associated with a capture. 7377 void generateInfoForComponentList( 7378 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7379 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7380 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7381 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7382 bool IsFirstComponentList, bool IsImplicit, 7383 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7384 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7385 OverlappedElements = llvm::None) const { 7386 // The following summarizes what has to be generated for each map and the 7387 // types below. The generated information is expressed in this order: 7388 // base pointer, section pointer, size, flags 7389 // (to add to the ones that come from the map type and modifier). 7390 // 7391 // double d; 7392 // int i[100]; 7393 // float *p; 7394 // 7395 // struct S1 { 7396 // int i; 7397 // float f[50]; 7398 // } 7399 // struct S2 { 7400 // int i; 7401 // float f[50]; 7402 // S1 s; 7403 // double *p; 7404 // struct S2 *ps; 7405 // } 7406 // S2 s; 7407 // S2 *ps; 7408 // 7409 // map(d) 7410 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7411 // 7412 // map(i) 7413 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7414 // 7415 // map(i[1:23]) 7416 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7417 // 7418 // map(p) 7419 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7420 // 7421 // map(p[1:24]) 7422 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7423 // in unified shared memory mode or for local pointers 7424 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7425 // 7426 // map(s) 7427 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7428 // 7429 // map(s.i) 7430 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7431 // 7432 // map(s.s.f) 7433 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7434 // 7435 // map(s.p) 7436 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7437 // 7438 // map(to: s.p[:22]) 7439 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7440 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7441 // &(s.p), &(s.p[0]), 22*sizeof(double), 7442 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7443 // (*) alloc space for struct members, only this is a target parameter 7444 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7445 // optimizes this entry out, same in the examples below) 7446 // (***) map the pointee (map: to) 7447 // 7448 // map(s.ps) 7449 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7450 // 7451 // map(from: s.ps->s.i) 7452 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7453 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7454 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7455 // 7456 // map(to: s.ps->ps) 7457 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7458 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7459 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7460 // 7461 // map(s.ps->ps->ps) 7462 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7463 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7464 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7465 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7466 // 7467 // map(to: s.ps->ps->s.f[:22]) 7468 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7469 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7470 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7471 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7472 // 7473 // map(ps) 7474 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7475 // 7476 // map(ps->i) 7477 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7478 // 7479 // map(ps->s.f) 7480 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7481 // 7482 // map(from: ps->p) 7483 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7484 // 7485 // map(to: ps->p[:22]) 7486 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7487 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7488 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7489 // 7490 // map(ps->ps) 7491 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7492 // 7493 // map(from: ps->ps->s.i) 7494 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7495 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7496 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7497 // 7498 // map(from: ps->ps->ps) 7499 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7500 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7501 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7502 // 7503 // map(ps->ps->ps->ps) 7504 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7505 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7506 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7507 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7508 // 7509 // map(to: ps->ps->ps->s.f[:22]) 7510 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7511 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7512 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7513 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7514 // 7515 // map(to: s.f[:22]) map(from: s.p[:33]) 7516 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7517 // sizeof(double*) (**), TARGET_PARAM 7518 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7519 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7520 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7521 // (*) allocate contiguous space needed to fit all mapped members even if 7522 // we allocate space for members not mapped (in this example, 7523 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7524 // them as well because they fall between &s.f[0] and &s.p) 7525 // 7526 // map(from: s.f[:22]) map(to: ps->p[:33]) 7527 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7528 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7529 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7530 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7531 // (*) the struct this entry pertains to is the 2nd element in the list of 7532 // arguments, hence MEMBER_OF(2) 7533 // 7534 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7535 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7536 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7537 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7538 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7539 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7540 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7541 // (*) the struct this entry pertains to is the 4th element in the list 7542 // of arguments, hence MEMBER_OF(4) 7543 7544 // Track if the map information being generated is the first for a capture. 7545 bool IsCaptureFirstInfo = IsFirstComponentList; 7546 // When the variable is on a declare target link or in a to clause with 7547 // unified memory, a reference is needed to hold the host/device address 7548 // of the variable. 7549 bool RequiresReference = false; 7550 7551 // Scan the components from the base to the complete expression. 7552 auto CI = Components.rbegin(); 7553 auto CE = Components.rend(); 7554 auto I = CI; 7555 7556 // Track if the map information being generated is the first for a list of 7557 // components. 7558 bool IsExpressionFirstInfo = true; 7559 bool FirstPointerInComplexData = false; 7560 Address BP = Address::invalid(); 7561 const Expr *AssocExpr = I->getAssociatedExpression(); 7562 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7563 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7564 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7565 7566 if (isa<MemberExpr>(AssocExpr)) { 7567 // The base is the 'this' pointer. The content of the pointer is going 7568 // to be the base of the field being mapped. 7569 BP = CGF.LoadCXXThisAddress(); 7570 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7571 (OASE && 7572 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7573 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7574 } else if (OAShE && 7575 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7576 BP = Address( 7577 CGF.EmitScalarExpr(OAShE->getBase()), 7578 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7579 } else { 7580 // The base is the reference to the variable. 7581 // BP = &Var. 7582 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7583 if (const auto *VD = 7584 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7585 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7586 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7587 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7588 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7589 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7590 RequiresReference = true; 7591 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7592 } 7593 } 7594 } 7595 7596 // If the variable is a pointer and is being dereferenced (i.e. is not 7597 // the last component), the base has to be the pointer itself, not its 7598 // reference. References are ignored for mapping purposes. 7599 QualType Ty = 7600 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7601 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7602 // No need to generate individual map information for the pointer, it 7603 // can be associated with the combined storage if shared memory mode is 7604 // active or the base declaration is not global variable. 7605 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7606 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7607 !VD || VD->hasLocalStorage()) 7608 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7609 else 7610 FirstPointerInComplexData = true; 7611 ++I; 7612 } 7613 } 7614 7615 // Track whether a component of the list should be marked as MEMBER_OF some 7616 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7617 // in a component list should be marked as MEMBER_OF, all subsequent entries 7618 // do not belong to the base struct. E.g. 7619 // struct S2 s; 7620 // s.ps->ps->ps->f[:] 7621 // (1) (2) (3) (4) 7622 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7623 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7624 // is the pointee of ps(2) which is not member of struct s, so it should not 7625 // be marked as such (it is still PTR_AND_OBJ). 7626 // The variable is initialized to false so that PTR_AND_OBJ entries which 7627 // are not struct members are not considered (e.g. array of pointers to 7628 // data). 7629 bool ShouldBeMemberOf = false; 7630 7631 // Variable keeping track of whether or not we have encountered a component 7632 // in the component list which is a member expression. Useful when we have a 7633 // pointer or a final array section, in which case it is the previous 7634 // component in the list which tells us whether we have a member expression. 7635 // E.g. X.f[:] 7636 // While processing the final array section "[:]" it is "f" which tells us 7637 // whether we are dealing with a member of a declared struct. 7638 const MemberExpr *EncounteredME = nullptr; 7639 7640 for (; I != CE; ++I) { 7641 // If the current component is member of a struct (parent struct) mark it. 7642 if (!EncounteredME) { 7643 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7644 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7645 // as MEMBER_OF the parent struct. 7646 if (EncounteredME) { 7647 ShouldBeMemberOf = true; 7648 // Do not emit as complex pointer if this is actually not array-like 7649 // expression. 7650 if (FirstPointerInComplexData) { 7651 QualType Ty = std::prev(I) 7652 ->getAssociatedDeclaration() 7653 ->getType() 7654 .getNonReferenceType(); 7655 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7656 FirstPointerInComplexData = false; 7657 } 7658 } 7659 } 7660 7661 auto Next = std::next(I); 7662 7663 // We need to generate the addresses and sizes if this is the last 7664 // component, if the component is a pointer or if it is an array section 7665 // whose length can't be proved to be one. If this is a pointer, it 7666 // becomes the base address for the following components. 7667 7668 // A final array section, is one whose length can't be proved to be one. 7669 bool IsFinalArraySection = 7670 isFinalArraySectionExpression(I->getAssociatedExpression()); 7671 7672 // Get information on whether the element is a pointer. Have to do a 7673 // special treatment for array sections given that they are built-in 7674 // types. 7675 const auto *OASE = 7676 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7677 const auto *OAShE = 7678 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7679 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7680 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7681 bool IsPointer = 7682 OAShE || 7683 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7684 .getCanonicalType() 7685 ->isAnyPointerType()) || 7686 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7687 bool IsNonDerefPointer = IsPointer && !UO && !BO; 7688 7689 if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { 7690 // If this is not the last component, we expect the pointer to be 7691 // associated with an array expression or member expression. 7692 assert((Next == CE || 7693 isa<MemberExpr>(Next->getAssociatedExpression()) || 7694 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7695 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7696 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7697 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7698 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7699 "Unexpected expression"); 7700 7701 Address LB = Address::invalid(); 7702 if (OAShE) { 7703 LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 7704 CGF.getContext().getTypeAlignInChars( 7705 OAShE->getBase()->getType())); 7706 } else { 7707 LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7708 .getAddress(CGF); 7709 } 7710 7711 // If this component is a pointer inside the base struct then we don't 7712 // need to create any entry for it - it will be combined with the object 7713 // it is pointing to into a single PTR_AND_OBJ entry. 7714 bool IsMemberPointerOrAddr = 7715 (IsPointer || ForDeviceAddr) && EncounteredME && 7716 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7717 EncounteredME); 7718 if (!OverlappedElements.empty()) { 7719 // Handle base element with the info for overlapped elements. 7720 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7721 assert(Next == CE && 7722 "Expected last element for the overlapped elements."); 7723 assert(!IsPointer && 7724 "Unexpected base element with the pointer type."); 7725 // Mark the whole struct as the struct that requires allocation on the 7726 // device. 7727 PartialStruct.LowestElem = {0, LB}; 7728 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7729 I->getAssociatedExpression()->getType()); 7730 Address HB = CGF.Builder.CreateConstGEP( 7731 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7732 CGF.VoidPtrTy), 7733 TypeSize.getQuantity() - 1); 7734 PartialStruct.HighestElem = { 7735 std::numeric_limits<decltype( 7736 PartialStruct.HighestElem.first)>::max(), 7737 HB}; 7738 PartialStruct.Base = BP; 7739 // Emit data for non-overlapped data. 7740 OpenMPOffloadMappingFlags Flags = 7741 OMP_MAP_MEMBER_OF | 7742 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7743 /*AddPtrFlag=*/false, 7744 /*AddIsTargetParamFlag=*/false); 7745 LB = BP; 7746 llvm::Value *Size = nullptr; 7747 // Do bitcopy of all non-overlapped structure elements. 7748 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7749 Component : OverlappedElements) { 7750 Address ComponentLB = Address::invalid(); 7751 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7752 Component) { 7753 if (MC.getAssociatedDeclaration()) { 7754 ComponentLB = 7755 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7756 .getAddress(CGF); 7757 Size = CGF.Builder.CreatePtrDiff( 7758 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7759 CGF.EmitCastToVoidPtr(LB.getPointer())); 7760 break; 7761 } 7762 } 7763 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7764 CombinedInfo.Pointers.push_back(LB.getPointer()); 7765 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7766 Size, CGF.Int64Ty, /*isSigned=*/true)); 7767 CombinedInfo.Types.push_back(Flags); 7768 CombinedInfo.Mappers.push_back(nullptr); 7769 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7770 } 7771 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7772 CombinedInfo.Pointers.push_back(LB.getPointer()); 7773 Size = CGF.Builder.CreatePtrDiff( 7774 CGF.EmitCastToVoidPtr( 7775 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7776 CGF.EmitCastToVoidPtr(LB.getPointer())); 7777 CombinedInfo.Sizes.push_back( 7778 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7779 CombinedInfo.Types.push_back(Flags); 7780 CombinedInfo.Mappers.push_back(nullptr); 7781 break; 7782 } 7783 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7784 if (!IsMemberPointerOrAddr) { 7785 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7786 CombinedInfo.Pointers.push_back(LB.getPointer()); 7787 CombinedInfo.Sizes.push_back( 7788 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7789 7790 // If Mapper is valid, the last component inherits the mapper. 7791 bool HasMapper = Mapper && Next == CE; 7792 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 7793 7794 // We need to add a pointer flag for each map that comes from the 7795 // same expression except for the first one. We also need to signal 7796 // this map is the first one that relates with the current capture 7797 // (there is a set of entries for each capture). 7798 OpenMPOffloadMappingFlags Flags = 7799 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7800 !IsExpressionFirstInfo || RequiresReference || 7801 FirstPointerInComplexData, 7802 IsCaptureFirstInfo && !RequiresReference); 7803 7804 if (!IsExpressionFirstInfo) { 7805 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7806 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7807 if (IsPointer) 7808 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7809 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7810 7811 if (ShouldBeMemberOf) { 7812 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7813 // should be later updated with the correct value of MEMBER_OF. 7814 Flags |= OMP_MAP_MEMBER_OF; 7815 // From now on, all subsequent PTR_AND_OBJ entries should not be 7816 // marked as MEMBER_OF. 7817 ShouldBeMemberOf = false; 7818 } 7819 } 7820 7821 CombinedInfo.Types.push_back(Flags); 7822 } 7823 7824 // If we have encountered a member expression so far, keep track of the 7825 // mapped member. If the parent is "*this", then the value declaration 7826 // is nullptr. 7827 if (EncounteredME) { 7828 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7829 unsigned FieldIndex = FD->getFieldIndex(); 7830 7831 // Update info about the lowest and highest elements for this struct 7832 if (!PartialStruct.Base.isValid()) { 7833 PartialStruct.LowestElem = {FieldIndex, LB}; 7834 if (IsFinalArraySection) { 7835 Address HB = 7836 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7837 .getAddress(CGF); 7838 PartialStruct.HighestElem = {FieldIndex, HB}; 7839 } else { 7840 PartialStruct.HighestElem = {FieldIndex, LB}; 7841 } 7842 PartialStruct.Base = BP; 7843 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7844 PartialStruct.LowestElem = {FieldIndex, LB}; 7845 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7846 PartialStruct.HighestElem = {FieldIndex, LB}; 7847 } 7848 } 7849 7850 // If we have a final array section, we are done with this expression. 7851 if (IsFinalArraySection) 7852 break; 7853 7854 // The pointer becomes the base for the next element. 7855 if (Next != CE) 7856 BP = LB; 7857 7858 IsExpressionFirstInfo = false; 7859 IsCaptureFirstInfo = false; 7860 FirstPointerInComplexData = false; 7861 } 7862 } 7863 } 7864 7865 /// Return the adjusted map modifiers if the declaration a capture refers to 7866 /// appears in a first-private clause. This is expected to be used only with 7867 /// directives that start with 'target'. 7868 MappableExprsHandler::OpenMPOffloadMappingFlags 7869 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7870 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7871 7872 // A first private variable captured by reference will use only the 7873 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7874 // declaration is known as first-private in this handler. 7875 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7876 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7877 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7878 return MappableExprsHandler::OMP_MAP_ALWAYS | 7879 MappableExprsHandler::OMP_MAP_TO; 7880 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7881 return MappableExprsHandler::OMP_MAP_TO | 7882 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 7883 return MappableExprsHandler::OMP_MAP_PRIVATE | 7884 MappableExprsHandler::OMP_MAP_TO; 7885 } 7886 return MappableExprsHandler::OMP_MAP_TO | 7887 MappableExprsHandler::OMP_MAP_FROM; 7888 } 7889 7890 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7891 // Rotate by getFlagMemberOffset() bits. 7892 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7893 << getFlagMemberOffset()); 7894 } 7895 7896 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7897 OpenMPOffloadMappingFlags MemberOfFlag) { 7898 // If the entry is PTR_AND_OBJ but has not been marked with the special 7899 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7900 // marked as MEMBER_OF. 7901 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7902 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7903 return; 7904 7905 // Reset the placeholder value to prepare the flag for the assignment of the 7906 // proper MEMBER_OF value. 7907 Flags &= ~OMP_MAP_MEMBER_OF; 7908 Flags |= MemberOfFlag; 7909 } 7910 7911 void getPlainLayout(const CXXRecordDecl *RD, 7912 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7913 bool AsBase) const { 7914 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7915 7916 llvm::StructType *St = 7917 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7918 7919 unsigned NumElements = St->getNumElements(); 7920 llvm::SmallVector< 7921 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7922 RecordLayout(NumElements); 7923 7924 // Fill bases. 7925 for (const auto &I : RD->bases()) { 7926 if (I.isVirtual()) 7927 continue; 7928 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7929 // Ignore empty bases. 7930 if (Base->isEmpty() || CGF.getContext() 7931 .getASTRecordLayout(Base) 7932 .getNonVirtualSize() 7933 .isZero()) 7934 continue; 7935 7936 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7937 RecordLayout[FieldIndex] = Base; 7938 } 7939 // Fill in virtual bases. 7940 for (const auto &I : RD->vbases()) { 7941 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7942 // Ignore empty bases. 7943 if (Base->isEmpty()) 7944 continue; 7945 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7946 if (RecordLayout[FieldIndex]) 7947 continue; 7948 RecordLayout[FieldIndex] = Base; 7949 } 7950 // Fill in all the fields. 7951 assert(!RD->isUnion() && "Unexpected union."); 7952 for (const auto *Field : RD->fields()) { 7953 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7954 // will fill in later.) 7955 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 7956 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7957 RecordLayout[FieldIndex] = Field; 7958 } 7959 } 7960 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7961 &Data : RecordLayout) { 7962 if (Data.isNull()) 7963 continue; 7964 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7965 getPlainLayout(Base, Layout, /*AsBase=*/true); 7966 else 7967 Layout.push_back(Data.get<const FieldDecl *>()); 7968 } 7969 } 7970 7971 public: 7972 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7973 : CurDir(&Dir), CGF(CGF) { 7974 // Extract firstprivate clause information. 7975 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7976 for (const auto *D : C->varlists()) 7977 FirstPrivateDecls.try_emplace( 7978 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 7979 // Extract implicit firstprivates from uses_allocators clauses. 7980 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 7981 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 7982 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 7983 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 7984 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 7985 /*Implicit=*/true); 7986 else if (const auto *VD = dyn_cast<VarDecl>( 7987 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 7988 ->getDecl())) 7989 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 7990 } 7991 } 7992 // Extract device pointer clause information. 7993 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7994 for (auto L : C->component_lists()) 7995 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 7996 } 7997 7998 /// Constructor for the declare mapper directive. 7999 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8000 : CurDir(&Dir), CGF(CGF) {} 8001 8002 /// Generate code for the combined entry if we have a partially mapped struct 8003 /// and take care of the mapping flags of the arguments corresponding to 8004 /// individual struct members. 8005 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8006 MapFlagsArrayTy &CurTypes, 8007 const StructRangeInfoTy &PartialStruct) const { 8008 // Base is the base of the struct 8009 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8010 // Pointer is the address of the lowest element 8011 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 8012 CombinedInfo.Pointers.push_back(LB); 8013 // There should not be a mapper for a combined entry. 8014 CombinedInfo.Mappers.push_back(nullptr); 8015 // Size is (addr of {highest+1} element) - (addr of lowest element) 8016 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 8017 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 8018 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8019 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8020 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8021 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8022 /*isSigned=*/false); 8023 CombinedInfo.Sizes.push_back(Size); 8024 // Map type is always TARGET_PARAM 8025 CombinedInfo.Types.push_back(OMP_MAP_TARGET_PARAM); 8026 // If any element has the present modifier, then make sure the runtime 8027 // doesn't attempt to allocate the struct. 8028 if (CurTypes.end() != 8029 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8030 return Type & OMP_MAP_PRESENT; 8031 })) 8032 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8033 // Remove TARGET_PARAM flag from the first element 8034 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8035 8036 // All other current entries will be MEMBER_OF the combined entry 8037 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8038 // 0xFFFF in the MEMBER_OF field). 8039 OpenMPOffloadMappingFlags MemberOfFlag = 8040 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8041 for (auto &M : CurTypes) 8042 setCorrectMemberOfFlag(M, MemberOfFlag); 8043 } 8044 8045 /// Generate all the base pointers, section pointers, sizes, map types, and 8046 /// mappers for the extracted mappable expressions (all included in \a 8047 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8048 /// pair of the relevant declaration and index where it occurs is appended to 8049 /// the device pointers info array. 8050 void generateAllInfo( 8051 MapCombinedInfoTy &CombinedInfo, 8052 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8053 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8054 // We have to process the component lists that relate with the same 8055 // declaration in a single chunk so that we can generate the map flags 8056 // correctly. Therefore, we organize all lists in a map. 8057 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8058 8059 // Helper function to fill the information map for the different supported 8060 // clauses. 8061 auto &&InfoGen = 8062 [&Info, &SkipVarSet]( 8063 const ValueDecl *D, 8064 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8065 OpenMPMapClauseKind MapType, 8066 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8067 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8068 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8069 bool ForDeviceAddr = false) { 8070 const ValueDecl *VD = 8071 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8072 if (SkipVarSet.count(VD)) 8073 return; 8074 Info[VD].emplace_back(L, MapType, MapModifiers, MotionModifiers, 8075 ReturnDevicePointer, IsImplicit, Mapper, 8076 ForDeviceAddr); 8077 }; 8078 8079 assert(CurDir.is<const OMPExecutableDirective *>() && 8080 "Expect a executable directive"); 8081 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8082 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 8083 for (const auto L : C->component_lists()) { 8084 InfoGen(std::get<0>(L), std::get<1>(L), C->getMapType(), 8085 C->getMapTypeModifiers(), llvm::None, 8086 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L)); 8087 } 8088 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 8089 for (const auto L : C->component_lists()) { 8090 InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_to, llvm::None, 8091 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8092 C->isImplicit(), std::get<2>(L)); 8093 } 8094 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 8095 for (const auto L : C->component_lists()) { 8096 InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_from, llvm::None, 8097 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8098 C->isImplicit(), std::get<2>(L)); 8099 } 8100 8101 // Look at the use_device_ptr clause information and mark the existing map 8102 // entries as such. If there is no map information for an entry in the 8103 // use_device_ptr list, we create one with map type 'alloc' and zero size 8104 // section. It is the user fault if that was not mapped before. If there is 8105 // no map information and the pointer is a struct member, then we defer the 8106 // emission of that entry until the whole struct has been processed. 8107 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 8108 DeferredInfo; 8109 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8110 8111 for (const auto *C : 8112 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 8113 for (const auto L : C->component_lists()) { 8114 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8115 std::get<1>(L); 8116 assert(!Components.empty() && 8117 "Not expecting empty list of components!"); 8118 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8119 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8120 const Expr *IE = Components.back().getAssociatedExpression(); 8121 // If the first component is a member expression, we have to look into 8122 // 'this', which maps to null in the map of map information. Otherwise 8123 // look directly for the information. 8124 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8125 8126 // We potentially have map information for this declaration already. 8127 // Look for the first set of components that refer to it. 8128 if (It != Info.end()) { 8129 auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { 8130 return MI.Components.back().getAssociatedDeclaration() == VD; 8131 }); 8132 // If we found a map entry, signal that the pointer has to be returned 8133 // and move on to the next declaration. 8134 // Exclude cases where the base pointer is mapped as array subscript, 8135 // array section or array shaping. The base address is passed as a 8136 // pointer to base in this case and cannot be used as a base for 8137 // use_device_ptr list item. 8138 if (CI != It->second.end()) { 8139 auto PrevCI = std::next(CI->Components.rbegin()); 8140 const auto *VarD = dyn_cast<VarDecl>(VD); 8141 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8142 isa<MemberExpr>(IE) || 8143 !VD->getType().getNonReferenceType()->isPointerType() || 8144 PrevCI == CI->Components.rend() || 8145 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8146 VarD->hasLocalStorage()) { 8147 CI->ReturnDevicePointer = true; 8148 continue; 8149 } 8150 } 8151 } 8152 8153 // We didn't find any match in our map information - generate a zero 8154 // size array section - if the pointer is a struct member we defer this 8155 // action until the whole struct has been processed. 8156 if (isa<MemberExpr>(IE)) { 8157 // Insert the pointer into Info to be processed by 8158 // generateInfoForComponentList. Because it is a member pointer 8159 // without a pointee, no entry will be generated for it, therefore 8160 // we need to generate one after the whole struct has been processed. 8161 // Nonetheless, generateInfoForComponentList must be called to take 8162 // the pointer into account for the calculation of the range of the 8163 // partial struct. 8164 InfoGen(nullptr, Components, OMPC_MAP_unknown, llvm::None, llvm::None, 8165 /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr); 8166 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8167 } else { 8168 llvm::Value *Ptr = 8169 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8170 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8171 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8172 UseDevicePtrCombinedInfo.Sizes.push_back( 8173 llvm::Constant::getNullValue(CGF.Int64Ty)); 8174 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM | 8175 OMP_MAP_TARGET_PARAM); 8176 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8177 } 8178 } 8179 } 8180 8181 // Look at the use_device_addr clause information and mark the existing map 8182 // entries as such. If there is no map information for an entry in the 8183 // use_device_addr list, we create one with map type 'alloc' and zero size 8184 // section. It is the user fault if that was not mapped before. If there is 8185 // no map information and the pointer is a struct member, then we defer the 8186 // emission of that entry until the whole struct has been processed. 8187 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8188 for (const auto *C : 8189 CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) { 8190 for (const auto L : C->component_lists()) { 8191 assert(!std::get<1>(L).empty() && 8192 "Not expecting empty list of components!"); 8193 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8194 if (!Processed.insert(VD).second) 8195 continue; 8196 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8197 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8198 // If the first component is a member expression, we have to look into 8199 // 'this', which maps to null in the map of map information. Otherwise 8200 // look directly for the information. 8201 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8202 8203 // We potentially have map information for this declaration already. 8204 // Look for the first set of components that refer to it. 8205 if (It != Info.end()) { 8206 auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { 8207 return MI.Components.back().getAssociatedDeclaration() == VD; 8208 }); 8209 // If we found a map entry, signal that the pointer has to be returned 8210 // and move on to the next declaration. 8211 if (CI != It->second.end()) { 8212 CI->ReturnDevicePointer = true; 8213 continue; 8214 } 8215 } 8216 8217 // We didn't find any match in our map information - generate a zero 8218 // size array section - if the pointer is a struct member we defer this 8219 // action until the whole struct has been processed. 8220 if (isa<MemberExpr>(IE)) { 8221 // Insert the pointer into Info to be processed by 8222 // generateInfoForComponentList. Because it is a member pointer 8223 // without a pointee, no entry will be generated for it, therefore 8224 // we need to generate one after the whole struct has been processed. 8225 // Nonetheless, generateInfoForComponentList must be called to take 8226 // the pointer into account for the calculation of the range of the 8227 // partial struct. 8228 InfoGen(nullptr, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8229 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8230 nullptr, /*ForDeviceAddr=*/true); 8231 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8232 } else { 8233 llvm::Value *Ptr; 8234 if (IE->isGLValue()) 8235 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8236 else 8237 Ptr = CGF.EmitScalarExpr(IE); 8238 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8239 CombinedInfo.Pointers.push_back(Ptr); 8240 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8241 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8242 CombinedInfo.Mappers.push_back(nullptr); 8243 } 8244 } 8245 } 8246 8247 for (const auto &M : Info) { 8248 // We need to know when we generate information for the first component 8249 // associated with a capture, because the mapping flags depend on it. 8250 bool IsFirstComponentList = true; 8251 8252 // Temporary generated information. 8253 MapCombinedInfoTy CurInfo; 8254 StructRangeInfoTy PartialStruct; 8255 8256 for (const MapInfo &L : M.second) { 8257 assert(!L.Components.empty() && 8258 "Not expecting declaration with no component lists."); 8259 8260 // Remember the current base pointer index. 8261 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8262 generateInfoForComponentList(L.MapType, L.MapModifiers, 8263 L.MotionModifiers, L.Components, CurInfo, 8264 PartialStruct, IsFirstComponentList, 8265 L.IsImplicit, L.Mapper, L.ForDeviceAddr); 8266 8267 // If this entry relates with a device pointer, set the relevant 8268 // declaration and add the 'return pointer' flag. 8269 if (L.ReturnDevicePointer) { 8270 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8271 "Unexpected number of mapped base pointers."); 8272 8273 const ValueDecl *RelevantVD = 8274 L.Components.back().getAssociatedDeclaration(); 8275 assert(RelevantVD && 8276 "No relevant declaration related with device pointer??"); 8277 8278 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8279 RelevantVD); 8280 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8281 } 8282 IsFirstComponentList = false; 8283 } 8284 8285 // Append any pending zero-length pointers which are struct members and 8286 // used with use_device_ptr or use_device_addr. 8287 auto CI = DeferredInfo.find(M.first); 8288 if (CI != DeferredInfo.end()) { 8289 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8290 llvm::Value *BasePtr; 8291 llvm::Value *Ptr; 8292 if (L.ForDeviceAddr) { 8293 if (L.IE->isGLValue()) 8294 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8295 else 8296 Ptr = this->CGF.EmitScalarExpr(L.IE); 8297 BasePtr = Ptr; 8298 // Entry is RETURN_PARAM. Also, set the placeholder value 8299 // MEMBER_OF=FFFF so that the entry is later updated with the 8300 // correct value of MEMBER_OF. 8301 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8302 } else { 8303 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8304 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8305 L.IE->getExprLoc()); 8306 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8307 // value MEMBER_OF=FFFF so that the entry is later updated with the 8308 // correct value of MEMBER_OF. 8309 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8310 OMP_MAP_MEMBER_OF); 8311 } 8312 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8313 CurInfo.Pointers.push_back(Ptr); 8314 CurInfo.Sizes.push_back( 8315 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8316 CurInfo.Mappers.push_back(nullptr); 8317 } 8318 } 8319 8320 // If there is an entry in PartialStruct it means we have a struct with 8321 // individual members mapped. Emit an extra combined entry. 8322 if (PartialStruct.Base.isValid()) 8323 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct); 8324 8325 // We need to append the results of this capture to what we already have. 8326 CombinedInfo.append(CurInfo); 8327 } 8328 // Append data for use_device_ptr clauses. 8329 CombinedInfo.append(UseDevicePtrCombinedInfo); 8330 } 8331 8332 /// Generate all the base pointers, section pointers, sizes, map types, and 8333 /// mappers for the extracted map clauses of user-defined mapper (all included 8334 /// in \a CombinedInfo). 8335 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 8336 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8337 "Expect a declare mapper directive"); 8338 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8339 // We have to process the component lists that relate with the same 8340 // declaration in a single chunk so that we can generate the map flags 8341 // correctly. Therefore, we organize all lists in a map. 8342 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8343 8344 // Fill the information map for map clauses. 8345 for (const auto *C : CurMapperDir->clauselists()) { 8346 const auto *MC = cast<OMPMapClause>(C); 8347 for (const auto L : MC->component_lists()) { 8348 const ValueDecl *VD = 8349 std::get<0>(L) ? cast<ValueDecl>(std::get<0>(L)->getCanonicalDecl()) 8350 : nullptr; 8351 // Get the corresponding user-defined mapper. 8352 Info[VD].emplace_back(std::get<1>(L), MC->getMapType(), 8353 MC->getMapTypeModifiers(), llvm::None, 8354 /*ReturnDevicePointer=*/false, MC->isImplicit(), 8355 std::get<2>(L)); 8356 } 8357 } 8358 8359 for (const auto &M : Info) { 8360 // We need to know when we generate information for the first component 8361 // associated with a capture, because the mapping flags depend on it. 8362 bool IsFirstComponentList = true; 8363 8364 // Temporary generated information. 8365 MapCombinedInfoTy CurInfo; 8366 StructRangeInfoTy PartialStruct; 8367 8368 for (const MapInfo &L : M.second) { 8369 assert(!L.Components.empty() && 8370 "Not expecting declaration with no component lists."); 8371 generateInfoForComponentList(L.MapType, L.MapModifiers, 8372 L.MotionModifiers, L.Components, CurInfo, 8373 PartialStruct, IsFirstComponentList, 8374 L.IsImplicit, L.Mapper, L.ForDeviceAddr); 8375 IsFirstComponentList = false; 8376 } 8377 8378 // If there is an entry in PartialStruct it means we have a struct with 8379 // individual members mapped. Emit an extra combined entry. 8380 if (PartialStruct.Base.isValid()) 8381 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct); 8382 8383 // We need to append the results of this capture to what we already have. 8384 CombinedInfo.append(CurInfo); 8385 } 8386 } 8387 8388 /// Emit capture info for lambdas for variables captured by reference. 8389 void generateInfoForLambdaCaptures( 8390 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8391 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8392 const auto *RD = VD->getType() 8393 .getCanonicalType() 8394 .getNonReferenceType() 8395 ->getAsCXXRecordDecl(); 8396 if (!RD || !RD->isLambda()) 8397 return; 8398 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8399 LValue VDLVal = CGF.MakeAddrLValue( 8400 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8401 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8402 FieldDecl *ThisCapture = nullptr; 8403 RD->getCaptureFields(Captures, ThisCapture); 8404 if (ThisCapture) { 8405 LValue ThisLVal = 8406 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8407 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8408 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8409 VDLVal.getPointer(CGF)); 8410 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8411 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8412 CombinedInfo.Sizes.push_back( 8413 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8414 CGF.Int64Ty, /*isSigned=*/true)); 8415 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8416 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8417 CombinedInfo.Mappers.push_back(nullptr); 8418 } 8419 for (const LambdaCapture &LC : RD->captures()) { 8420 if (!LC.capturesVariable()) 8421 continue; 8422 const VarDecl *VD = LC.getCapturedVar(); 8423 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8424 continue; 8425 auto It = Captures.find(VD); 8426 assert(It != Captures.end() && "Found lambda capture without field."); 8427 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8428 if (LC.getCaptureKind() == LCK_ByRef) { 8429 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8430 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8431 VDLVal.getPointer(CGF)); 8432 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8433 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8434 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8435 CGF.getTypeSize( 8436 VD->getType().getCanonicalType().getNonReferenceType()), 8437 CGF.Int64Ty, /*isSigned=*/true)); 8438 } else { 8439 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8440 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8441 VDLVal.getPointer(CGF)); 8442 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8443 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8444 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8445 } 8446 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8447 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8448 CombinedInfo.Mappers.push_back(nullptr); 8449 } 8450 } 8451 8452 /// Set correct indices for lambdas captures. 8453 void adjustMemberOfForLambdaCaptures( 8454 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8455 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8456 MapFlagsArrayTy &Types) const { 8457 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8458 // Set correct member_of idx for all implicit lambda captures. 8459 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8460 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8461 continue; 8462 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8463 assert(BasePtr && "Unable to find base lambda address."); 8464 int TgtIdx = -1; 8465 for (unsigned J = I; J > 0; --J) { 8466 unsigned Idx = J - 1; 8467 if (Pointers[Idx] != BasePtr) 8468 continue; 8469 TgtIdx = Idx; 8470 break; 8471 } 8472 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8473 // All other current entries will be MEMBER_OF the combined entry 8474 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8475 // 0xFFFF in the MEMBER_OF field). 8476 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8477 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8478 } 8479 } 8480 8481 /// Generate the base pointers, section pointers, sizes, map types, and 8482 /// mappers associated to a given capture (all included in \a CombinedInfo). 8483 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8484 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8485 StructRangeInfoTy &PartialStruct) const { 8486 assert(!Cap->capturesVariableArrayType() && 8487 "Not expecting to generate map info for a variable array type!"); 8488 8489 // We need to know when we generating information for the first component 8490 const ValueDecl *VD = Cap->capturesThis() 8491 ? nullptr 8492 : Cap->getCapturedVar()->getCanonicalDecl(); 8493 8494 // If this declaration appears in a is_device_ptr clause we just have to 8495 // pass the pointer by value. If it is a reference to a declaration, we just 8496 // pass its value. 8497 if (DevPointersMap.count(VD)) { 8498 CombinedInfo.BasePointers.emplace_back(Arg, VD); 8499 CombinedInfo.Pointers.push_back(Arg); 8500 CombinedInfo.Sizes.push_back( 8501 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8502 CGF.Int64Ty, /*isSigned=*/true)); 8503 CombinedInfo.Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8504 CombinedInfo.Mappers.push_back(nullptr); 8505 return; 8506 } 8507 8508 using MapData = 8509 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8510 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 8511 const ValueDecl *>; 8512 SmallVector<MapData, 4> DeclComponentLists; 8513 assert(CurDir.is<const OMPExecutableDirective *>() && 8514 "Expect a executable directive"); 8515 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8516 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8517 for (const auto L : C->decl_component_lists(VD)) { 8518 const ValueDecl *VDecl, *Mapper; 8519 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8520 std::tie(VDecl, Components, Mapper) = L; 8521 assert(VDecl == VD && "We got information for the wrong declaration??"); 8522 assert(!Components.empty() && 8523 "Not expecting declaration with no component lists."); 8524 DeclComponentLists.emplace_back(Components, C->getMapType(), 8525 C->getMapTypeModifiers(), 8526 C->isImplicit(), Mapper); 8527 } 8528 } 8529 8530 // Find overlapping elements (including the offset from the base element). 8531 llvm::SmallDenseMap< 8532 const MapData *, 8533 llvm::SmallVector< 8534 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8535 4> 8536 OverlappedData; 8537 size_t Count = 0; 8538 for (const MapData &L : DeclComponentLists) { 8539 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8540 OpenMPMapClauseKind MapType; 8541 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8542 bool IsImplicit; 8543 const ValueDecl *Mapper; 8544 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L; 8545 ++Count; 8546 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8547 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8548 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper) = L1; 8549 auto CI = Components.rbegin(); 8550 auto CE = Components.rend(); 8551 auto SI = Components1.rbegin(); 8552 auto SE = Components1.rend(); 8553 for (; CI != CE && SI != SE; ++CI, ++SI) { 8554 if (CI->getAssociatedExpression()->getStmtClass() != 8555 SI->getAssociatedExpression()->getStmtClass()) 8556 break; 8557 // Are we dealing with different variables/fields? 8558 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8559 break; 8560 } 8561 // Found overlapping if, at least for one component, reached the head of 8562 // the components list. 8563 if (CI == CE || SI == SE) { 8564 assert((CI != CE || SI != SE) && 8565 "Unexpected full match of the mapping components."); 8566 const MapData &BaseData = CI == CE ? L : L1; 8567 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8568 SI == SE ? Components : Components1; 8569 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8570 OverlappedElements.getSecond().push_back(SubData); 8571 } 8572 } 8573 } 8574 // Sort the overlapped elements for each item. 8575 llvm::SmallVector<const FieldDecl *, 4> Layout; 8576 if (!OverlappedData.empty()) { 8577 if (const auto *CRD = 8578 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8579 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8580 else { 8581 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8582 Layout.append(RD->field_begin(), RD->field_end()); 8583 } 8584 } 8585 for (auto &Pair : OverlappedData) { 8586 llvm::sort( 8587 Pair.getSecond(), 8588 [&Layout]( 8589 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8590 OMPClauseMappableExprCommon::MappableExprComponentListRef 8591 Second) { 8592 auto CI = First.rbegin(); 8593 auto CE = First.rend(); 8594 auto SI = Second.rbegin(); 8595 auto SE = Second.rend(); 8596 for (; CI != CE && SI != SE; ++CI, ++SI) { 8597 if (CI->getAssociatedExpression()->getStmtClass() != 8598 SI->getAssociatedExpression()->getStmtClass()) 8599 break; 8600 // Are we dealing with different variables/fields? 8601 if (CI->getAssociatedDeclaration() != 8602 SI->getAssociatedDeclaration()) 8603 break; 8604 } 8605 8606 // Lists contain the same elements. 8607 if (CI == CE && SI == SE) 8608 return false; 8609 8610 // List with less elements is less than list with more elements. 8611 if (CI == CE || SI == SE) 8612 return CI == CE; 8613 8614 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8615 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8616 if (FD1->getParent() == FD2->getParent()) 8617 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8618 const auto It = 8619 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8620 return FD == FD1 || FD == FD2; 8621 }); 8622 return *It == FD1; 8623 }); 8624 } 8625 8626 // Associated with a capture, because the mapping flags depend on it. 8627 // Go through all of the elements with the overlapped elements. 8628 for (const auto &Pair : OverlappedData) { 8629 const MapData &L = *Pair.getFirst(); 8630 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8631 OpenMPMapClauseKind MapType; 8632 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8633 bool IsImplicit; 8634 const ValueDecl *Mapper; 8635 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L; 8636 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8637 OverlappedComponents = Pair.getSecond(); 8638 bool IsFirstComponentList = true; 8639 generateInfoForComponentList( 8640 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 8641 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 8642 /*ForDeviceAddr=*/false, OverlappedComponents); 8643 } 8644 // Go through other elements without overlapped elements. 8645 bool IsFirstComponentList = OverlappedData.empty(); 8646 for (const MapData &L : DeclComponentLists) { 8647 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8648 OpenMPMapClauseKind MapType; 8649 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8650 bool IsImplicit; 8651 const ValueDecl *Mapper; 8652 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L; 8653 auto It = OverlappedData.find(&L); 8654 if (It == OverlappedData.end()) 8655 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 8656 Components, CombinedInfo, PartialStruct, 8657 IsFirstComponentList, IsImplicit, Mapper); 8658 IsFirstComponentList = false; 8659 } 8660 } 8661 8662 /// Generate the default map information for a given capture \a CI, 8663 /// record field declaration \a RI and captured value \a CV. 8664 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8665 const FieldDecl &RI, llvm::Value *CV, 8666 MapCombinedInfoTy &CombinedInfo) const { 8667 bool IsImplicit = true; 8668 // Do the default mapping. 8669 if (CI.capturesThis()) { 8670 CombinedInfo.BasePointers.push_back(CV); 8671 CombinedInfo.Pointers.push_back(CV); 8672 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8673 CombinedInfo.Sizes.push_back( 8674 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8675 CGF.Int64Ty, /*isSigned=*/true)); 8676 // Default map type. 8677 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8678 } else if (CI.capturesVariableByCopy()) { 8679 CombinedInfo.BasePointers.push_back(CV); 8680 CombinedInfo.Pointers.push_back(CV); 8681 if (!RI.getType()->isAnyPointerType()) { 8682 // We have to signal to the runtime captures passed by value that are 8683 // not pointers. 8684 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 8685 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8686 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8687 } else { 8688 // Pointers are implicitly mapped with a zero size and no flags 8689 // (other than first map that is added for all implicit maps). 8690 CombinedInfo.Types.push_back(OMP_MAP_NONE); 8691 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8692 } 8693 const VarDecl *VD = CI.getCapturedVar(); 8694 auto I = FirstPrivateDecls.find(VD); 8695 if (I != FirstPrivateDecls.end()) 8696 IsImplicit = I->getSecond(); 8697 } else { 8698 assert(CI.capturesVariable() && "Expected captured reference."); 8699 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8700 QualType ElementType = PtrTy->getPointeeType(); 8701 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8702 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8703 // The default map type for a scalar/complex type is 'to' because by 8704 // default the value doesn't have to be retrieved. For an aggregate 8705 // type, the default is 'tofrom'. 8706 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 8707 const VarDecl *VD = CI.getCapturedVar(); 8708 auto I = FirstPrivateDecls.find(VD); 8709 if (I != FirstPrivateDecls.end() && 8710 VD->getType().isConstant(CGF.getContext())) { 8711 llvm::Constant *Addr = 8712 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8713 // Copy the value of the original variable to the new global copy. 8714 CGF.Builder.CreateMemCpy( 8715 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 8716 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8717 CombinedInfo.Sizes.back(), /*IsVolatile=*/false); 8718 // Use new global variable as the base pointers. 8719 CombinedInfo.BasePointers.push_back(Addr); 8720 CombinedInfo.Pointers.push_back(Addr); 8721 } else { 8722 CombinedInfo.BasePointers.push_back(CV); 8723 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8724 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8725 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8726 AlignmentSource::Decl)); 8727 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 8728 } else { 8729 CombinedInfo.Pointers.push_back(CV); 8730 } 8731 } 8732 if (I != FirstPrivateDecls.end()) 8733 IsImplicit = I->getSecond(); 8734 } 8735 // Every default map produces a single argument which is a target parameter. 8736 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 8737 8738 // Add flag stating this is an implicit map. 8739 if (IsImplicit) 8740 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 8741 8742 // No user-defined mapper for default mapping. 8743 CombinedInfo.Mappers.push_back(nullptr); 8744 } 8745 }; 8746 } // anonymous namespace 8747 8748 /// Emit the arrays used to pass the captures and map information to the 8749 /// offloading runtime library. If there is no map or capture information, 8750 /// return nullptr by reference. 8751 static void 8752 emitOffloadingArrays(CodeGenFunction &CGF, 8753 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 8754 CGOpenMPRuntime::TargetDataInfo &Info) { 8755 CodeGenModule &CGM = CGF.CGM; 8756 ASTContext &Ctx = CGF.getContext(); 8757 8758 // Reset the array information. 8759 Info.clearArrayInfo(); 8760 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 8761 8762 if (Info.NumberOfPtrs) { 8763 // Detect if we have any capture size requiring runtime evaluation of the 8764 // size so that a constant array could be eventually used. 8765 bool hasRuntimeEvaluationCaptureSize = false; 8766 for (llvm::Value *S : CombinedInfo.Sizes) 8767 if (!isa<llvm::Constant>(S)) { 8768 hasRuntimeEvaluationCaptureSize = true; 8769 break; 8770 } 8771 8772 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8773 QualType PointerArrayType = Ctx.getConstantArrayType( 8774 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 8775 /*IndexTypeQuals=*/0); 8776 8777 Info.BasePointersArray = 8778 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8779 Info.PointersArray = 8780 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8781 Address MappersArray = 8782 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 8783 Info.MappersArray = MappersArray.getPointer(); 8784 8785 // If we don't have any VLA types or other types that require runtime 8786 // evaluation, we can use a constant array for the map sizes, otherwise we 8787 // need to fill up the arrays as we do for the pointers. 8788 QualType Int64Ty = 8789 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8790 if (hasRuntimeEvaluationCaptureSize) { 8791 QualType SizeArrayType = Ctx.getConstantArrayType( 8792 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 8793 /*IndexTypeQuals=*/0); 8794 Info.SizesArray = 8795 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8796 } else { 8797 // We expect all the sizes to be constant, so we collect them to create 8798 // a constant array. 8799 SmallVector<llvm::Constant *, 16> ConstSizes; 8800 for (llvm::Value *S : CombinedInfo.Sizes) 8801 ConstSizes.push_back(cast<llvm::Constant>(S)); 8802 8803 auto *SizesArrayInit = llvm::ConstantArray::get( 8804 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 8805 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8806 auto *SizesArrayGbl = new llvm::GlobalVariable( 8807 CGM.getModule(), SizesArrayInit->getType(), 8808 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8809 SizesArrayInit, Name); 8810 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8811 Info.SizesArray = SizesArrayGbl; 8812 } 8813 8814 // The map types are always constant so we don't need to generate code to 8815 // fill arrays. Instead, we create an array constant. 8816 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 8817 llvm::copy(CombinedInfo.Types, Mapping.begin()); 8818 llvm::Constant *MapTypesArrayInit = 8819 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8820 std::string MaptypesName = 8821 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8822 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8823 CGM.getModule(), MapTypesArrayInit->getType(), 8824 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8825 MapTypesArrayInit, MaptypesName); 8826 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8827 Info.MapTypesArray = MapTypesArrayGbl; 8828 8829 // If there's a present map type modifier, it must not be applied to the end 8830 // of a region, so generate a separate map type array in that case. 8831 if (Info.separateBeginEndCalls()) { 8832 bool EndMapTypesDiffer = false; 8833 for (uint64_t &Type : Mapping) { 8834 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 8835 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 8836 EndMapTypesDiffer = true; 8837 } 8838 } 8839 if (EndMapTypesDiffer) { 8840 MapTypesArrayInit = 8841 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8842 MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8843 MapTypesArrayGbl = new llvm::GlobalVariable( 8844 CGM.getModule(), MapTypesArrayInit->getType(), 8845 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8846 MapTypesArrayInit, MaptypesName); 8847 MapTypesArrayGbl->setUnnamedAddr( 8848 llvm::GlobalValue::UnnamedAddr::Global); 8849 Info.MapTypesArrayEnd = MapTypesArrayGbl; 8850 } 8851 } 8852 8853 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8854 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 8855 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8856 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8857 Info.BasePointersArray, 0, I); 8858 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8859 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8860 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8861 CGF.Builder.CreateStore(BPVal, BPAddr); 8862 8863 if (Info.requiresDevicePointerInfo()) 8864 if (const ValueDecl *DevVD = 8865 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 8866 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8867 8868 llvm::Value *PVal = CombinedInfo.Pointers[I]; 8869 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8870 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8871 Info.PointersArray, 0, I); 8872 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8873 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8874 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8875 CGF.Builder.CreateStore(PVal, PAddr); 8876 8877 if (hasRuntimeEvaluationCaptureSize) { 8878 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8879 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8880 Info.SizesArray, 8881 /*Idx0=*/0, 8882 /*Idx1=*/I); 8883 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 8884 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 8885 CGM.Int64Ty, 8886 /*isSigned=*/true), 8887 SAddr); 8888 } 8889 8890 // Fill up the mapper array. 8891 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 8892 if (CombinedInfo.Mappers[I]) { 8893 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 8894 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 8895 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 8896 Info.HasMapper = true; 8897 } 8898 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 8899 CGF.Builder.CreateStore(MFunc, MAddr); 8900 } 8901 } 8902 } 8903 8904 /// Emit the arguments to be passed to the runtime library based on the 8905 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 8906 /// ForEndCall, emit map types to be passed for the end of the region instead of 8907 /// the beginning. 8908 static void emitOffloadingArraysArgument( 8909 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8910 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8911 llvm::Value *&MapTypesArrayArg, llvm::Value *&MappersArrayArg, 8912 CGOpenMPRuntime::TargetDataInfo &Info, bool ForEndCall = false) { 8913 assert((!ForEndCall || Info.separateBeginEndCalls()) && 8914 "expected region end call to runtime only when end call is separate"); 8915 CodeGenModule &CGM = CGF.CGM; 8916 if (Info.NumberOfPtrs) { 8917 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8918 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8919 Info.BasePointersArray, 8920 /*Idx0=*/0, /*Idx1=*/0); 8921 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8922 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8923 Info.PointersArray, 8924 /*Idx0=*/0, 8925 /*Idx1=*/0); 8926 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8927 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 8928 /*Idx0=*/0, /*Idx1=*/0); 8929 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8930 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8931 ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 8932 : Info.MapTypesArray, 8933 /*Idx0=*/0, 8934 /*Idx1=*/0); 8935 MappersArrayArg = 8936 Info.HasMapper 8937 ? CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy) 8938 : llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8939 } else { 8940 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8941 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8942 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8943 MapTypesArrayArg = 8944 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8945 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8946 } 8947 } 8948 8949 /// Check for inner distribute directive. 8950 static const OMPExecutableDirective * 8951 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8952 const auto *CS = D.getInnermostCapturedStmt(); 8953 const auto *Body = 8954 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8955 const Stmt *ChildStmt = 8956 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8957 8958 if (const auto *NestedDir = 8959 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8960 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8961 switch (D.getDirectiveKind()) { 8962 case OMPD_target: 8963 if (isOpenMPDistributeDirective(DKind)) 8964 return NestedDir; 8965 if (DKind == OMPD_teams) { 8966 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8967 /*IgnoreCaptured=*/true); 8968 if (!Body) 8969 return nullptr; 8970 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8971 if (const auto *NND = 8972 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8973 DKind = NND->getDirectiveKind(); 8974 if (isOpenMPDistributeDirective(DKind)) 8975 return NND; 8976 } 8977 } 8978 return nullptr; 8979 case OMPD_target_teams: 8980 if (isOpenMPDistributeDirective(DKind)) 8981 return NestedDir; 8982 return nullptr; 8983 case OMPD_target_parallel: 8984 case OMPD_target_simd: 8985 case OMPD_target_parallel_for: 8986 case OMPD_target_parallel_for_simd: 8987 return nullptr; 8988 case OMPD_target_teams_distribute: 8989 case OMPD_target_teams_distribute_simd: 8990 case OMPD_target_teams_distribute_parallel_for: 8991 case OMPD_target_teams_distribute_parallel_for_simd: 8992 case OMPD_parallel: 8993 case OMPD_for: 8994 case OMPD_parallel_for: 8995 case OMPD_parallel_master: 8996 case OMPD_parallel_sections: 8997 case OMPD_for_simd: 8998 case OMPD_parallel_for_simd: 8999 case OMPD_cancel: 9000 case OMPD_cancellation_point: 9001 case OMPD_ordered: 9002 case OMPD_threadprivate: 9003 case OMPD_allocate: 9004 case OMPD_task: 9005 case OMPD_simd: 9006 case OMPD_sections: 9007 case OMPD_section: 9008 case OMPD_single: 9009 case OMPD_master: 9010 case OMPD_critical: 9011 case OMPD_taskyield: 9012 case OMPD_barrier: 9013 case OMPD_taskwait: 9014 case OMPD_taskgroup: 9015 case OMPD_atomic: 9016 case OMPD_flush: 9017 case OMPD_depobj: 9018 case OMPD_scan: 9019 case OMPD_teams: 9020 case OMPD_target_data: 9021 case OMPD_target_exit_data: 9022 case OMPD_target_enter_data: 9023 case OMPD_distribute: 9024 case OMPD_distribute_simd: 9025 case OMPD_distribute_parallel_for: 9026 case OMPD_distribute_parallel_for_simd: 9027 case OMPD_teams_distribute: 9028 case OMPD_teams_distribute_simd: 9029 case OMPD_teams_distribute_parallel_for: 9030 case OMPD_teams_distribute_parallel_for_simd: 9031 case OMPD_target_update: 9032 case OMPD_declare_simd: 9033 case OMPD_declare_variant: 9034 case OMPD_begin_declare_variant: 9035 case OMPD_end_declare_variant: 9036 case OMPD_declare_target: 9037 case OMPD_end_declare_target: 9038 case OMPD_declare_reduction: 9039 case OMPD_declare_mapper: 9040 case OMPD_taskloop: 9041 case OMPD_taskloop_simd: 9042 case OMPD_master_taskloop: 9043 case OMPD_master_taskloop_simd: 9044 case OMPD_parallel_master_taskloop: 9045 case OMPD_parallel_master_taskloop_simd: 9046 case OMPD_requires: 9047 case OMPD_unknown: 9048 default: 9049 llvm_unreachable("Unexpected directive."); 9050 } 9051 } 9052 9053 return nullptr; 9054 } 9055 9056 /// Emit the user-defined mapper function. The code generation follows the 9057 /// pattern in the example below. 9058 /// \code 9059 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9060 /// void *base, void *begin, 9061 /// int64_t size, int64_t type) { 9062 /// // Allocate space for an array section first. 9063 /// if (size > 1 && !maptype.IsDelete) 9064 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9065 /// size*sizeof(Ty), clearToFrom(type)); 9066 /// // Map members. 9067 /// for (unsigned i = 0; i < size; i++) { 9068 /// // For each component specified by this mapper: 9069 /// for (auto c : all_components) { 9070 /// if (c.hasMapper()) 9071 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9072 /// c.arg_type); 9073 /// else 9074 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9075 /// c.arg_begin, c.arg_size, c.arg_type); 9076 /// } 9077 /// } 9078 /// // Delete the array section. 9079 /// if (size > 1 && maptype.IsDelete) 9080 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9081 /// size*sizeof(Ty), clearToFrom(type)); 9082 /// } 9083 /// \endcode 9084 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9085 CodeGenFunction *CGF) { 9086 if (UDMMap.count(D) > 0) 9087 return; 9088 ASTContext &C = CGM.getContext(); 9089 QualType Ty = D->getType(); 9090 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9091 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9092 auto *MapperVarDecl = 9093 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9094 SourceLocation Loc = D->getLocation(); 9095 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9096 9097 // Prepare mapper function arguments and attributes. 9098 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9099 C.VoidPtrTy, ImplicitParamDecl::Other); 9100 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9101 ImplicitParamDecl::Other); 9102 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9103 C.VoidPtrTy, ImplicitParamDecl::Other); 9104 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9105 ImplicitParamDecl::Other); 9106 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9107 ImplicitParamDecl::Other); 9108 FunctionArgList Args; 9109 Args.push_back(&HandleArg); 9110 Args.push_back(&BaseArg); 9111 Args.push_back(&BeginArg); 9112 Args.push_back(&SizeArg); 9113 Args.push_back(&TypeArg); 9114 const CGFunctionInfo &FnInfo = 9115 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9116 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9117 SmallString<64> TyStr; 9118 llvm::raw_svector_ostream Out(TyStr); 9119 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9120 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9121 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9122 Name, &CGM.getModule()); 9123 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9124 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9125 // Start the mapper function code generation. 9126 CodeGenFunction MapperCGF(CGM); 9127 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9128 // Compute the starting and end addreses of array elements. 9129 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9130 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9131 C.getPointerType(Int64Ty), Loc); 9132 // Convert the size in bytes into the number of array elements. 9133 Size = MapperCGF.Builder.CreateExactUDiv( 9134 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9135 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9136 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 9137 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 9138 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9139 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9140 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9141 C.getPointerType(Int64Ty), Loc); 9142 // Prepare common arguments for array initiation and deletion. 9143 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9144 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9145 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9146 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9147 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9148 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9149 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9150 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9151 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9152 9153 // Emit array initiation if this is an array section and \p MapType indicates 9154 // that memory allocation is required. 9155 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9156 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9157 ElementSize, HeadBB, /*IsInit=*/true); 9158 9159 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9160 9161 // Emit the loop header block. 9162 MapperCGF.EmitBlock(HeadBB); 9163 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9164 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9165 // Evaluate whether the initial condition is satisfied. 9166 llvm::Value *IsEmpty = 9167 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9168 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9169 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9170 9171 // Emit the loop body block. 9172 MapperCGF.EmitBlock(BodyBB); 9173 llvm::BasicBlock *LastBB = BodyBB; 9174 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9175 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9176 PtrPHI->addIncoming(PtrBegin, EntryBB); 9177 Address PtrCurrent = 9178 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9179 .getAlignment() 9180 .alignmentOfArrayElement(ElementSize)); 9181 // Privatize the declared variable of mapper to be the current array element. 9182 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9183 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 9184 return MapperCGF 9185 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 9186 .getAddress(MapperCGF); 9187 }); 9188 (void)Scope.Privatize(); 9189 9190 // Get map clause information. Fill up the arrays with all mapped variables. 9191 MappableExprsHandler::MapCombinedInfoTy Info; 9192 MappableExprsHandler MEHandler(*D, MapperCGF); 9193 MEHandler.generateAllInfoForMapper(Info); 9194 9195 // Call the runtime API __tgt_mapper_num_components to get the number of 9196 // pre-existing components. 9197 llvm::Value *OffloadingArgs[] = {Handle}; 9198 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9199 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9200 OMPRTL___tgt_mapper_num_components), 9201 OffloadingArgs); 9202 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9203 PreviousSize, 9204 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9205 9206 // Fill up the runtime mapper handle for all components. 9207 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9208 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9209 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9210 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9211 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9212 llvm::Value *CurSizeArg = Info.Sizes[I]; 9213 9214 // Extract the MEMBER_OF field from the map type. 9215 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 9216 MapperCGF.EmitBlock(MemberBB); 9217 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 9218 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 9219 OriMapType, 9220 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 9221 llvm::BasicBlock *MemberCombineBB = 9222 MapperCGF.createBasicBlock("omp.member.combine"); 9223 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 9224 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 9225 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 9226 // Add the number of pre-existing components to the MEMBER_OF field if it 9227 // is valid. 9228 MapperCGF.EmitBlock(MemberCombineBB); 9229 llvm::Value *CombinedMember = 9230 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9231 // Do nothing if it is not a member of previous components. 9232 MapperCGF.EmitBlock(TypeBB); 9233 llvm::PHINode *MemberMapType = 9234 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9235 MemberMapType->addIncoming(OriMapType, MemberBB); 9236 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9237 9238 // Combine the map type inherited from user-defined mapper with that 9239 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9240 // bits of the \a MapType, which is the input argument of the mapper 9241 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9242 // bits of MemberMapType. 9243 // [OpenMP 5.0], 1.2.6. map-type decay. 9244 // | alloc | to | from | tofrom | release | delete 9245 // ---------------------------------------------------------- 9246 // alloc | alloc | alloc | alloc | alloc | release | delete 9247 // to | alloc | to | alloc | to | release | delete 9248 // from | alloc | alloc | from | from | release | delete 9249 // tofrom | alloc | to | from | tofrom | release | delete 9250 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9251 MapType, 9252 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9253 MappableExprsHandler::OMP_MAP_FROM)); 9254 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9255 llvm::BasicBlock *AllocElseBB = 9256 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9257 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9258 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9259 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9260 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9261 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9262 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9263 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9264 MapperCGF.EmitBlock(AllocBB); 9265 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9266 MemberMapType, 9267 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9268 MappableExprsHandler::OMP_MAP_FROM))); 9269 MapperCGF.Builder.CreateBr(EndBB); 9270 MapperCGF.EmitBlock(AllocElseBB); 9271 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9272 LeftToFrom, 9273 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9274 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9275 // In case of to, clear OMP_MAP_FROM. 9276 MapperCGF.EmitBlock(ToBB); 9277 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9278 MemberMapType, 9279 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9280 MapperCGF.Builder.CreateBr(EndBB); 9281 MapperCGF.EmitBlock(ToElseBB); 9282 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9283 LeftToFrom, 9284 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9285 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9286 // In case of from, clear OMP_MAP_TO. 9287 MapperCGF.EmitBlock(FromBB); 9288 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9289 MemberMapType, 9290 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9291 // In case of tofrom, do nothing. 9292 MapperCGF.EmitBlock(EndBB); 9293 LastBB = EndBB; 9294 llvm::PHINode *CurMapType = 9295 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9296 CurMapType->addIncoming(AllocMapType, AllocBB); 9297 CurMapType->addIncoming(ToMapType, ToBB); 9298 CurMapType->addIncoming(FromMapType, FromBB); 9299 CurMapType->addIncoming(MemberMapType, ToElseBB); 9300 9301 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9302 CurSizeArg, CurMapType}; 9303 if (Info.Mappers[I]) { 9304 // Call the corresponding mapper function. 9305 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 9306 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 9307 assert(MapperFunc && "Expect a valid mapper function is available."); 9308 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 9309 } else { 9310 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9311 // data structure. 9312 MapperCGF.EmitRuntimeCall( 9313 OMPBuilder.getOrCreateRuntimeFunction( 9314 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9315 OffloadingArgs); 9316 } 9317 } 9318 9319 // Update the pointer to point to the next element that needs to be mapped, 9320 // and check whether we have mapped all elements. 9321 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9322 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9323 PtrPHI->addIncoming(PtrNext, LastBB); 9324 llvm::Value *IsDone = 9325 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9326 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9327 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9328 9329 MapperCGF.EmitBlock(ExitBB); 9330 // Emit array deletion if this is an array section and \p MapType indicates 9331 // that deletion is required. 9332 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9333 ElementSize, DoneBB, /*IsInit=*/false); 9334 9335 // Emit the function exit block. 9336 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9337 MapperCGF.FinishFunction(); 9338 UDMMap.try_emplace(D, Fn); 9339 if (CGF) { 9340 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9341 Decls.second.push_back(D); 9342 } 9343 } 9344 9345 /// Emit the array initialization or deletion portion for user-defined mapper 9346 /// code generation. First, it evaluates whether an array section is mapped and 9347 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9348 /// true, and \a MapType indicates to not delete this array, array 9349 /// initialization code is generated. If \a IsInit is false, and \a MapType 9350 /// indicates to not this array, array deletion code is generated. 9351 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9352 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9353 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9354 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9355 StringRef Prefix = IsInit ? ".init" : ".del"; 9356 9357 // Evaluate if this is an array section. 9358 llvm::BasicBlock *IsDeleteBB = 9359 MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"})); 9360 llvm::BasicBlock *BodyBB = 9361 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9362 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9363 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9364 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9365 9366 // Evaluate if we are going to delete this section. 9367 MapperCGF.EmitBlock(IsDeleteBB); 9368 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9369 MapType, 9370 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9371 llvm::Value *DeleteCond; 9372 if (IsInit) { 9373 DeleteCond = MapperCGF.Builder.CreateIsNull( 9374 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9375 } else { 9376 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9377 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9378 } 9379 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9380 9381 MapperCGF.EmitBlock(BodyBB); 9382 // Get the array size by multiplying element size and element number (i.e., \p 9383 // Size). 9384 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9385 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9386 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9387 // memory allocation/deletion purpose only. 9388 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9389 MapType, 9390 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9391 MappableExprsHandler::OMP_MAP_FROM))); 9392 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9393 // data structure. 9394 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9395 MapperCGF.EmitRuntimeCall( 9396 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9397 OMPRTL___tgt_push_mapper_component), 9398 OffloadingArgs); 9399 } 9400 9401 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 9402 const OMPDeclareMapperDecl *D) { 9403 auto I = UDMMap.find(D); 9404 if (I != UDMMap.end()) 9405 return I->second; 9406 emitUserDefinedMapper(D); 9407 return UDMMap.lookup(D); 9408 } 9409 9410 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9411 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9412 llvm::Value *DeviceID, 9413 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9414 const OMPLoopDirective &D)> 9415 SizeEmitter) { 9416 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9417 const OMPExecutableDirective *TD = &D; 9418 // Get nested teams distribute kind directive, if any. 9419 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9420 TD = getNestedDistributeDirective(CGM.getContext(), D); 9421 if (!TD) 9422 return; 9423 const auto *LD = cast<OMPLoopDirective>(TD); 9424 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, 9425 PrePostActionTy &) { 9426 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9427 llvm::Value *Args[] = {DeviceID, NumIterations}; 9428 CGF.EmitRuntimeCall( 9429 OMPBuilder.getOrCreateRuntimeFunction( 9430 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount), 9431 Args); 9432 } 9433 }; 9434 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9435 } 9436 9437 void CGOpenMPRuntime::emitTargetCall( 9438 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9439 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9440 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9441 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9442 const OMPLoopDirective &D)> 9443 SizeEmitter) { 9444 if (!CGF.HaveInsertPoint()) 9445 return; 9446 9447 assert(OutlinedFn && "Invalid outlined function!"); 9448 9449 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 9450 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9451 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9452 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9453 PrePostActionTy &) { 9454 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9455 }; 9456 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9457 9458 CodeGenFunction::OMPTargetDataInfo InputInfo; 9459 llvm::Value *MapTypesArray = nullptr; 9460 // Fill up the pointer arrays and transfer execution to the device. 9461 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9462 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, 9463 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9464 if (Device.getInt() == OMPC_DEVICE_ancestor) { 9465 // Reverse offloading is not supported, so just execute on the host. 9466 if (RequiresOuterTask) { 9467 CapturedVars.clear(); 9468 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9469 } 9470 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9471 return; 9472 } 9473 9474 // On top of the arrays that were filled up, the target offloading call 9475 // takes as arguments the device id as well as the host pointer. The host 9476 // pointer is used by the runtime library to identify the current target 9477 // region, so it only has to be unique and not necessarily point to 9478 // anything. It could be the pointer to the outlined function that 9479 // implements the target region, but we aren't using that so that the 9480 // compiler doesn't need to keep that, and could therefore inline the host 9481 // function if proven worthwhile during optimization. 9482 9483 // From this point on, we need to have an ID of the target region defined. 9484 assert(OutlinedFnID && "Invalid outlined function ID!"); 9485 9486 // Emit device ID if any. 9487 llvm::Value *DeviceID; 9488 if (Device.getPointer()) { 9489 assert((Device.getInt() == OMPC_DEVICE_unknown || 9490 Device.getInt() == OMPC_DEVICE_device_num) && 9491 "Expected device_num modifier."); 9492 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 9493 DeviceID = 9494 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 9495 } else { 9496 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9497 } 9498 9499 // Emit the number of elements in the offloading arrays. 9500 llvm::Value *PointerNum = 9501 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9502 9503 // Return value of the runtime offloading call. 9504 llvm::Value *Return; 9505 9506 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9507 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9508 9509 // Emit tripcount for the target loop-based directive. 9510 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 9511 9512 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9513 // The target region is an outlined function launched by the runtime 9514 // via calls __tgt_target() or __tgt_target_teams(). 9515 // 9516 // __tgt_target() launches a target region with one team and one thread, 9517 // executing a serial region. This master thread may in turn launch 9518 // more threads within its team upon encountering a parallel region, 9519 // however, no additional teams can be launched on the device. 9520 // 9521 // __tgt_target_teams() launches a target region with one or more teams, 9522 // each with one or more threads. This call is required for target 9523 // constructs such as: 9524 // 'target teams' 9525 // 'target' / 'teams' 9526 // 'target teams distribute parallel for' 9527 // 'target parallel' 9528 // and so on. 9529 // 9530 // Note that on the host and CPU targets, the runtime implementation of 9531 // these calls simply call the outlined function without forking threads. 9532 // The outlined functions themselves have runtime calls to 9533 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9534 // the compiler in emitTeamsCall() and emitParallelCall(). 9535 // 9536 // In contrast, on the NVPTX target, the implementation of 9537 // __tgt_target_teams() launches a GPU kernel with the requested number 9538 // of teams and threads so no additional calls to the runtime are required. 9539 if (NumTeams) { 9540 // If we have NumTeams defined this means that we have an enclosed teams 9541 // region. Therefore we also expect to have NumThreads defined. These two 9542 // values should be defined in the presence of a teams directive, 9543 // regardless of having any clauses associated. If the user is using teams 9544 // but no clauses, these two values will be the default that should be 9545 // passed to the runtime library - a 32-bit integer with the value zero. 9546 assert(NumThreads && "Thread limit expression should be available along " 9547 "with number of teams."); 9548 llvm::Value *OffloadingArgs[] = {DeviceID, 9549 OutlinedFnID, 9550 PointerNum, 9551 InputInfo.BasePointersArray.getPointer(), 9552 InputInfo.PointersArray.getPointer(), 9553 InputInfo.SizesArray.getPointer(), 9554 MapTypesArray, 9555 InputInfo.MappersArray.getPointer(), 9556 NumTeams, 9557 NumThreads}; 9558 Return = CGF.EmitRuntimeCall( 9559 OMPBuilder.getOrCreateRuntimeFunction( 9560 CGM.getModule(), HasNowait 9561 ? OMPRTL___tgt_target_teams_nowait_mapper 9562 : OMPRTL___tgt_target_teams_mapper), 9563 OffloadingArgs); 9564 } else { 9565 llvm::Value *OffloadingArgs[] = {DeviceID, 9566 OutlinedFnID, 9567 PointerNum, 9568 InputInfo.BasePointersArray.getPointer(), 9569 InputInfo.PointersArray.getPointer(), 9570 InputInfo.SizesArray.getPointer(), 9571 MapTypesArray, 9572 InputInfo.MappersArray.getPointer()}; 9573 Return = CGF.EmitRuntimeCall( 9574 OMPBuilder.getOrCreateRuntimeFunction( 9575 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 9576 : OMPRTL___tgt_target_mapper), 9577 OffloadingArgs); 9578 } 9579 9580 // Check the error code and execute the host version if required. 9581 llvm::BasicBlock *OffloadFailedBlock = 9582 CGF.createBasicBlock("omp_offload.failed"); 9583 llvm::BasicBlock *OffloadContBlock = 9584 CGF.createBasicBlock("omp_offload.cont"); 9585 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9586 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9587 9588 CGF.EmitBlock(OffloadFailedBlock); 9589 if (RequiresOuterTask) { 9590 CapturedVars.clear(); 9591 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9592 } 9593 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9594 CGF.EmitBranch(OffloadContBlock); 9595 9596 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9597 }; 9598 9599 // Notify that the host version must be executed. 9600 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 9601 RequiresOuterTask](CodeGenFunction &CGF, 9602 PrePostActionTy &) { 9603 if (RequiresOuterTask) { 9604 CapturedVars.clear(); 9605 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9606 } 9607 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9608 }; 9609 9610 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 9611 &CapturedVars, RequiresOuterTask, 9612 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 9613 // Fill up the arrays with all the captured variables. 9614 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 9615 9616 // Get mappable expression information. 9617 MappableExprsHandler MEHandler(D, CGF); 9618 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9619 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 9620 9621 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9622 auto CV = CapturedVars.begin(); 9623 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9624 CE = CS.capture_end(); 9625 CI != CE; ++CI, ++RI, ++CV) { 9626 MappableExprsHandler::MapCombinedInfoTy CurInfo; 9627 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9628 9629 // VLA sizes are passed to the outlined region by copy and do not have map 9630 // information associated. 9631 if (CI->capturesVariableArrayType()) { 9632 CurInfo.BasePointers.push_back(*CV); 9633 CurInfo.Pointers.push_back(*CV); 9634 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 9635 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9636 // Copy to the device as an argument. No need to retrieve it. 9637 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 9638 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 9639 MappableExprsHandler::OMP_MAP_IMPLICIT); 9640 CurInfo.Mappers.push_back(nullptr); 9641 } else { 9642 // If we have any information in the map clause, we use it, otherwise we 9643 // just do a default mapping. 9644 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 9645 if (!CI->capturesThis()) 9646 MappedVarSet.insert(CI->getCapturedVar()); 9647 else 9648 MappedVarSet.insert(nullptr); 9649 if (CurInfo.BasePointers.empty()) 9650 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 9651 // Generate correct mapping for variables captured by reference in 9652 // lambdas. 9653 if (CI->capturesVariable()) 9654 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 9655 CurInfo, LambdaPointers); 9656 } 9657 // We expect to have at least an element of information for this capture. 9658 assert(!CurInfo.BasePointers.empty() && 9659 "Non-existing map pointer for capture!"); 9660 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 9661 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 9662 CurInfo.BasePointers.size() == CurInfo.Types.size() && 9663 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 9664 "Inconsistent map information sizes!"); 9665 9666 // If there is an entry in PartialStruct it means we have a struct with 9667 // individual members mapped. Emit an extra combined entry. 9668 if (PartialStruct.Base.isValid()) 9669 MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct); 9670 9671 // We need to append the results of this capture to what we already have. 9672 CombinedInfo.append(CurInfo); 9673 } 9674 // Adjust MEMBER_OF flags for the lambdas captures. 9675 MEHandler.adjustMemberOfForLambdaCaptures( 9676 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 9677 CombinedInfo.Types); 9678 // Map any list items in a map clause that were not captures because they 9679 // weren't referenced within the construct. 9680 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); 9681 9682 TargetDataInfo Info; 9683 // Fill up the arrays and create the arguments. 9684 emitOffloadingArrays(CGF, CombinedInfo, Info); 9685 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9686 Info.PointersArray, Info.SizesArray, 9687 Info.MapTypesArray, Info.MappersArray, Info); 9688 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9689 InputInfo.BasePointersArray = 9690 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9691 InputInfo.PointersArray = 9692 Address(Info.PointersArray, CGM.getPointerAlign()); 9693 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 9694 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 9695 MapTypesArray = Info.MapTypesArray; 9696 if (RequiresOuterTask) 9697 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9698 else 9699 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9700 }; 9701 9702 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 9703 CodeGenFunction &CGF, PrePostActionTy &) { 9704 if (RequiresOuterTask) { 9705 CodeGenFunction::OMPTargetDataInfo InputInfo; 9706 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9707 } else { 9708 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9709 } 9710 }; 9711 9712 // If we have a target function ID it means that we need to support 9713 // offloading, otherwise, just execute on the host. We need to execute on host 9714 // regardless of the conditional in the if clause if, e.g., the user do not 9715 // specify target triples. 9716 if (OutlinedFnID) { 9717 if (IfCond) { 9718 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9719 } else { 9720 RegionCodeGenTy ThenRCG(TargetThenGen); 9721 ThenRCG(CGF); 9722 } 9723 } else { 9724 RegionCodeGenTy ElseRCG(TargetElseGen); 9725 ElseRCG(CGF); 9726 } 9727 } 9728 9729 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9730 StringRef ParentName) { 9731 if (!S) 9732 return; 9733 9734 // Codegen OMP target directives that offload compute to the device. 9735 bool RequiresDeviceCodegen = 9736 isa<OMPExecutableDirective>(S) && 9737 isOpenMPTargetExecutionDirective( 9738 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9739 9740 if (RequiresDeviceCodegen) { 9741 const auto &E = *cast<OMPExecutableDirective>(S); 9742 unsigned DeviceID; 9743 unsigned FileID; 9744 unsigned Line; 9745 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 9746 FileID, Line); 9747 9748 // Is this a target region that should not be emitted as an entry point? If 9749 // so just signal we are done with this target region. 9750 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 9751 ParentName, Line)) 9752 return; 9753 9754 switch (E.getDirectiveKind()) { 9755 case OMPD_target: 9756 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9757 cast<OMPTargetDirective>(E)); 9758 break; 9759 case OMPD_target_parallel: 9760 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9761 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9762 break; 9763 case OMPD_target_teams: 9764 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9765 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9766 break; 9767 case OMPD_target_teams_distribute: 9768 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9769 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9770 break; 9771 case OMPD_target_teams_distribute_simd: 9772 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9773 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9774 break; 9775 case OMPD_target_parallel_for: 9776 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9777 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9778 break; 9779 case OMPD_target_parallel_for_simd: 9780 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9781 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9782 break; 9783 case OMPD_target_simd: 9784 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9785 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9786 break; 9787 case OMPD_target_teams_distribute_parallel_for: 9788 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9789 CGM, ParentName, 9790 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9791 break; 9792 case OMPD_target_teams_distribute_parallel_for_simd: 9793 CodeGenFunction:: 9794 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9795 CGM, ParentName, 9796 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9797 break; 9798 case OMPD_parallel: 9799 case OMPD_for: 9800 case OMPD_parallel_for: 9801 case OMPD_parallel_master: 9802 case OMPD_parallel_sections: 9803 case OMPD_for_simd: 9804 case OMPD_parallel_for_simd: 9805 case OMPD_cancel: 9806 case OMPD_cancellation_point: 9807 case OMPD_ordered: 9808 case OMPD_threadprivate: 9809 case OMPD_allocate: 9810 case OMPD_task: 9811 case OMPD_simd: 9812 case OMPD_sections: 9813 case OMPD_section: 9814 case OMPD_single: 9815 case OMPD_master: 9816 case OMPD_critical: 9817 case OMPD_taskyield: 9818 case OMPD_barrier: 9819 case OMPD_taskwait: 9820 case OMPD_taskgroup: 9821 case OMPD_atomic: 9822 case OMPD_flush: 9823 case OMPD_depobj: 9824 case OMPD_scan: 9825 case OMPD_teams: 9826 case OMPD_target_data: 9827 case OMPD_target_exit_data: 9828 case OMPD_target_enter_data: 9829 case OMPD_distribute: 9830 case OMPD_distribute_simd: 9831 case OMPD_distribute_parallel_for: 9832 case OMPD_distribute_parallel_for_simd: 9833 case OMPD_teams_distribute: 9834 case OMPD_teams_distribute_simd: 9835 case OMPD_teams_distribute_parallel_for: 9836 case OMPD_teams_distribute_parallel_for_simd: 9837 case OMPD_target_update: 9838 case OMPD_declare_simd: 9839 case OMPD_declare_variant: 9840 case OMPD_begin_declare_variant: 9841 case OMPD_end_declare_variant: 9842 case OMPD_declare_target: 9843 case OMPD_end_declare_target: 9844 case OMPD_declare_reduction: 9845 case OMPD_declare_mapper: 9846 case OMPD_taskloop: 9847 case OMPD_taskloop_simd: 9848 case OMPD_master_taskloop: 9849 case OMPD_master_taskloop_simd: 9850 case OMPD_parallel_master_taskloop: 9851 case OMPD_parallel_master_taskloop_simd: 9852 case OMPD_requires: 9853 case OMPD_unknown: 9854 default: 9855 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9856 } 9857 return; 9858 } 9859 9860 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9861 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9862 return; 9863 9864 scanForTargetRegionsFunctions( 9865 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9866 return; 9867 } 9868 9869 // If this is a lambda function, look into its body. 9870 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9871 S = L->getBody(); 9872 9873 // Keep looking for target regions recursively. 9874 for (const Stmt *II : S->children()) 9875 scanForTargetRegionsFunctions(II, ParentName); 9876 } 9877 9878 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9879 // If emitting code for the host, we do not process FD here. Instead we do 9880 // the normal code generation. 9881 if (!CGM.getLangOpts().OpenMPIsDevice) { 9882 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 9883 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9884 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9885 // Do not emit device_type(nohost) functions for the host. 9886 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 9887 return true; 9888 } 9889 return false; 9890 } 9891 9892 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9893 // Try to detect target regions in the function. 9894 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 9895 StringRef Name = CGM.getMangledName(GD); 9896 scanForTargetRegionsFunctions(FD->getBody(), Name); 9897 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9898 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9899 // Do not emit device_type(nohost) functions for the host. 9900 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 9901 return true; 9902 } 9903 9904 // Do not to emit function if it is not marked as declare target. 9905 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9906 AlreadyEmittedTargetDecls.count(VD) == 0; 9907 } 9908 9909 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9910 if (!CGM.getLangOpts().OpenMPIsDevice) 9911 return false; 9912 9913 // Check if there are Ctors/Dtors in this declaration and look for target 9914 // regions in it. We use the complete variant to produce the kernel name 9915 // mangling. 9916 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9917 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9918 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9919 StringRef ParentName = 9920 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9921 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9922 } 9923 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9924 StringRef ParentName = 9925 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9926 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9927 } 9928 } 9929 9930 // Do not to emit variable if it is not marked as declare target. 9931 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9932 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9933 cast<VarDecl>(GD.getDecl())); 9934 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9935 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9936 HasRequiresUnifiedSharedMemory)) { 9937 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9938 return true; 9939 } 9940 return false; 9941 } 9942 9943 llvm::Constant * 9944 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9945 const VarDecl *VD) { 9946 assert(VD->getType().isConstant(CGM.getContext()) && 9947 "Expected constant variable."); 9948 StringRef VarName; 9949 llvm::Constant *Addr; 9950 llvm::GlobalValue::LinkageTypes Linkage; 9951 QualType Ty = VD->getType(); 9952 SmallString<128> Buffer; 9953 { 9954 unsigned DeviceID; 9955 unsigned FileID; 9956 unsigned Line; 9957 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9958 FileID, Line); 9959 llvm::raw_svector_ostream OS(Buffer); 9960 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9961 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9962 VarName = OS.str(); 9963 } 9964 Linkage = llvm::GlobalValue::InternalLinkage; 9965 Addr = 9966 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9967 getDefaultFirstprivateAddressSpace()); 9968 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9969 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9970 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9971 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9972 VarName, Addr, VarSize, 9973 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9974 return Addr; 9975 } 9976 9977 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9978 llvm::Constant *Addr) { 9979 if (CGM.getLangOpts().OMPTargetTriples.empty() && 9980 !CGM.getLangOpts().OpenMPIsDevice) 9981 return; 9982 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9983 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9984 if (!Res) { 9985 if (CGM.getLangOpts().OpenMPIsDevice) { 9986 // Register non-target variables being emitted in device code (debug info 9987 // may cause this). 9988 StringRef VarName = CGM.getMangledName(VD); 9989 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9990 } 9991 return; 9992 } 9993 // Register declare target variables. 9994 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9995 StringRef VarName; 9996 CharUnits VarSize; 9997 llvm::GlobalValue::LinkageTypes Linkage; 9998 9999 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10000 !HasRequiresUnifiedSharedMemory) { 10001 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10002 VarName = CGM.getMangledName(VD); 10003 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10004 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10005 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10006 } else { 10007 VarSize = CharUnits::Zero(); 10008 } 10009 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10010 // Temp solution to prevent optimizations of the internal variables. 10011 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10012 std::string RefName = getName({VarName, "ref"}); 10013 if (!CGM.GetGlobalValue(RefName)) { 10014 llvm::Constant *AddrRef = 10015 getOrCreateInternalVariable(Addr->getType(), RefName); 10016 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10017 GVAddrRef->setConstant(/*Val=*/true); 10018 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10019 GVAddrRef->setInitializer(Addr); 10020 CGM.addCompilerUsedGlobal(GVAddrRef); 10021 } 10022 } 10023 } else { 10024 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10025 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10026 HasRequiresUnifiedSharedMemory)) && 10027 "Declare target attribute must link or to with unified memory."); 10028 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10029 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10030 else 10031 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10032 10033 if (CGM.getLangOpts().OpenMPIsDevice) { 10034 VarName = Addr->getName(); 10035 Addr = nullptr; 10036 } else { 10037 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10038 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10039 } 10040 VarSize = CGM.getPointerSize(); 10041 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10042 } 10043 10044 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10045 VarName, Addr, VarSize, Flags, Linkage); 10046 } 10047 10048 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10049 if (isa<FunctionDecl>(GD.getDecl()) || 10050 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10051 return emitTargetFunctions(GD); 10052 10053 return emitTargetGlobalVariable(GD); 10054 } 10055 10056 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10057 for (const VarDecl *VD : DeferredGlobalVariables) { 10058 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10059 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10060 if (!Res) 10061 continue; 10062 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10063 !HasRequiresUnifiedSharedMemory) { 10064 CGM.EmitGlobal(VD); 10065 } else { 10066 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10067 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10068 HasRequiresUnifiedSharedMemory)) && 10069 "Expected link clause or to clause with unified memory."); 10070 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10071 } 10072 } 10073 } 10074 10075 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10076 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10077 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10078 " Expected target-based directive."); 10079 } 10080 10081 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10082 for (const OMPClause *Clause : D->clauselists()) { 10083 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10084 HasRequiresUnifiedSharedMemory = true; 10085 } else if (const auto *AC = 10086 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10087 switch (AC->getAtomicDefaultMemOrderKind()) { 10088 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10089 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10090 break; 10091 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10092 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10093 break; 10094 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10095 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10096 break; 10097 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10098 break; 10099 } 10100 } 10101 } 10102 } 10103 10104 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10105 return RequiresAtomicOrdering; 10106 } 10107 10108 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10109 LangAS &AS) { 10110 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10111 return false; 10112 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10113 switch(A->getAllocatorType()) { 10114 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10115 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10116 // Not supported, fallback to the default mem space. 10117 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10118 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10119 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10120 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10121 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10122 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10123 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10124 AS = LangAS::Default; 10125 return true; 10126 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10127 llvm_unreachable("Expected predefined allocator for the variables with the " 10128 "static storage."); 10129 } 10130 return false; 10131 } 10132 10133 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10134 return HasRequiresUnifiedSharedMemory; 10135 } 10136 10137 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10138 CodeGenModule &CGM) 10139 : CGM(CGM) { 10140 if (CGM.getLangOpts().OpenMPIsDevice) { 10141 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10142 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10143 } 10144 } 10145 10146 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10147 if (CGM.getLangOpts().OpenMPIsDevice) 10148 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10149 } 10150 10151 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10152 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10153 return true; 10154 10155 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10156 // Do not to emit function if it is marked as declare target as it was already 10157 // emitted. 10158 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10159 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10160 if (auto *F = dyn_cast_or_null<llvm::Function>( 10161 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10162 return !F->isDeclaration(); 10163 return false; 10164 } 10165 return true; 10166 } 10167 10168 return !AlreadyEmittedTargetDecls.insert(D).second; 10169 } 10170 10171 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10172 // If we don't have entries or if we are emitting code for the device, we 10173 // don't need to do anything. 10174 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10175 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10176 (OffloadEntriesInfoManager.empty() && 10177 !HasEmittedDeclareTargetRegion && 10178 !HasEmittedTargetRegion)) 10179 return nullptr; 10180 10181 // Create and register the function that handles the requires directives. 10182 ASTContext &C = CGM.getContext(); 10183 10184 llvm::Function *RequiresRegFn; 10185 { 10186 CodeGenFunction CGF(CGM); 10187 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10188 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10189 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10190 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10191 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10192 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10193 // TODO: check for other requires clauses. 10194 // The requires directive takes effect only when a target region is 10195 // present in the compilation unit. Otherwise it is ignored and not 10196 // passed to the runtime. This avoids the runtime from throwing an error 10197 // for mismatching requires clauses across compilation units that don't 10198 // contain at least 1 target region. 10199 assert((HasEmittedTargetRegion || 10200 HasEmittedDeclareTargetRegion || 10201 !OffloadEntriesInfoManager.empty()) && 10202 "Target or declare target region expected."); 10203 if (HasRequiresUnifiedSharedMemory) 10204 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10205 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10206 CGM.getModule(), OMPRTL___tgt_register_requires), 10207 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10208 CGF.FinishFunction(); 10209 } 10210 return RequiresRegFn; 10211 } 10212 10213 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10214 const OMPExecutableDirective &D, 10215 SourceLocation Loc, 10216 llvm::Function *OutlinedFn, 10217 ArrayRef<llvm::Value *> CapturedVars) { 10218 if (!CGF.HaveInsertPoint()) 10219 return; 10220 10221 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10222 CodeGenFunction::RunCleanupsScope Scope(CGF); 10223 10224 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10225 llvm::Value *Args[] = { 10226 RTLoc, 10227 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10228 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10229 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10230 RealArgs.append(std::begin(Args), std::end(Args)); 10231 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10232 10233 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10234 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10235 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10236 } 10237 10238 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10239 const Expr *NumTeams, 10240 const Expr *ThreadLimit, 10241 SourceLocation Loc) { 10242 if (!CGF.HaveInsertPoint()) 10243 return; 10244 10245 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10246 10247 llvm::Value *NumTeamsVal = 10248 NumTeams 10249 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10250 CGF.CGM.Int32Ty, /* isSigned = */ true) 10251 : CGF.Builder.getInt32(0); 10252 10253 llvm::Value *ThreadLimitVal = 10254 ThreadLimit 10255 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10256 CGF.CGM.Int32Ty, /* isSigned = */ true) 10257 : CGF.Builder.getInt32(0); 10258 10259 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10260 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10261 ThreadLimitVal}; 10262 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10263 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10264 PushNumTeamsArgs); 10265 } 10266 10267 void CGOpenMPRuntime::emitTargetDataCalls( 10268 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10269 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10270 if (!CGF.HaveInsertPoint()) 10271 return; 10272 10273 // Action used to replace the default codegen action and turn privatization 10274 // off. 10275 PrePostActionTy NoPrivAction; 10276 10277 // Generate the code for the opening of the data environment. Capture all the 10278 // arguments of the runtime call by reference because they are used in the 10279 // closing of the region. 10280 auto &&BeginThenGen = [this, &D, Device, &Info, 10281 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10282 // Fill up the arrays with all the mapped variables. 10283 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10284 10285 // Get map clause information. 10286 MappableExprsHandler MEHandler(D, CGF); 10287 MEHandler.generateAllInfo(CombinedInfo); 10288 10289 // Fill up the arrays and create the arguments. 10290 emitOffloadingArrays(CGF, CombinedInfo, Info); 10291 10292 llvm::Value *BasePointersArrayArg = nullptr; 10293 llvm::Value *PointersArrayArg = nullptr; 10294 llvm::Value *SizesArrayArg = nullptr; 10295 llvm::Value *MapTypesArrayArg = nullptr; 10296 llvm::Value *MappersArrayArg = nullptr; 10297 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10298 SizesArrayArg, MapTypesArrayArg, 10299 MappersArrayArg, Info, /*ForEndCall=*/false); 10300 10301 // Emit device ID if any. 10302 llvm::Value *DeviceID = nullptr; 10303 if (Device) { 10304 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10305 CGF.Int64Ty, /*isSigned=*/true); 10306 } else { 10307 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10308 } 10309 10310 // Emit the number of elements in the offloading arrays. 10311 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10312 10313 llvm::Value *OffloadingArgs[] = { 10314 DeviceID, PointerNum, BasePointersArrayArg, PointersArrayArg, 10315 SizesArrayArg, MapTypesArrayArg, MappersArrayArg}; 10316 CGF.EmitRuntimeCall( 10317 OMPBuilder.getOrCreateRuntimeFunction( 10318 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 10319 OffloadingArgs); 10320 10321 // If device pointer privatization is required, emit the body of the region 10322 // here. It will have to be duplicated: with and without privatization. 10323 if (!Info.CaptureDeviceAddrMap.empty()) 10324 CodeGen(CGF); 10325 }; 10326 10327 // Generate code for the closing of the data region. 10328 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10329 PrePostActionTy &) { 10330 assert(Info.isValid() && "Invalid data environment closing arguments."); 10331 10332 llvm::Value *BasePointersArrayArg = nullptr; 10333 llvm::Value *PointersArrayArg = nullptr; 10334 llvm::Value *SizesArrayArg = nullptr; 10335 llvm::Value *MapTypesArrayArg = nullptr; 10336 llvm::Value *MappersArrayArg = nullptr; 10337 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10338 SizesArrayArg, MapTypesArrayArg, 10339 MappersArrayArg, Info, /*ForEndCall=*/true); 10340 10341 // Emit device ID if any. 10342 llvm::Value *DeviceID = nullptr; 10343 if (Device) { 10344 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10345 CGF.Int64Ty, /*isSigned=*/true); 10346 } else { 10347 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10348 } 10349 10350 // Emit the number of elements in the offloading arrays. 10351 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10352 10353 llvm::Value *OffloadingArgs[] = { 10354 DeviceID, PointerNum, BasePointersArrayArg, PointersArrayArg, 10355 SizesArrayArg, MapTypesArrayArg, MappersArrayArg}; 10356 CGF.EmitRuntimeCall( 10357 OMPBuilder.getOrCreateRuntimeFunction( 10358 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 10359 OffloadingArgs); 10360 }; 10361 10362 // If we need device pointer privatization, we need to emit the body of the 10363 // region with no privatization in the 'else' branch of the conditional. 10364 // Otherwise, we don't have to do anything. 10365 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10366 PrePostActionTy &) { 10367 if (!Info.CaptureDeviceAddrMap.empty()) { 10368 CodeGen.setAction(NoPrivAction); 10369 CodeGen(CGF); 10370 } 10371 }; 10372 10373 // We don't have to do anything to close the region if the if clause evaluates 10374 // to false. 10375 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10376 10377 if (IfCond) { 10378 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10379 } else { 10380 RegionCodeGenTy RCG(BeginThenGen); 10381 RCG(CGF); 10382 } 10383 10384 // If we don't require privatization of device pointers, we emit the body in 10385 // between the runtime calls. This avoids duplicating the body code. 10386 if (Info.CaptureDeviceAddrMap.empty()) { 10387 CodeGen.setAction(NoPrivAction); 10388 CodeGen(CGF); 10389 } 10390 10391 if (IfCond) { 10392 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10393 } else { 10394 RegionCodeGenTy RCG(EndThenGen); 10395 RCG(CGF); 10396 } 10397 } 10398 10399 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10400 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10401 const Expr *Device) { 10402 if (!CGF.HaveInsertPoint()) 10403 return; 10404 10405 assert((isa<OMPTargetEnterDataDirective>(D) || 10406 isa<OMPTargetExitDataDirective>(D) || 10407 isa<OMPTargetUpdateDirective>(D)) && 10408 "Expecting either target enter, exit data, or update directives."); 10409 10410 CodeGenFunction::OMPTargetDataInfo InputInfo; 10411 llvm::Value *MapTypesArray = nullptr; 10412 // Generate the code for the opening of the data environment. 10413 auto &&ThenGen = [this, &D, Device, &InputInfo, 10414 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10415 // Emit device ID if any. 10416 llvm::Value *DeviceID = nullptr; 10417 if (Device) { 10418 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10419 CGF.Int64Ty, /*isSigned=*/true); 10420 } else { 10421 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10422 } 10423 10424 // Emit the number of elements in the offloading arrays. 10425 llvm::Constant *PointerNum = 10426 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10427 10428 llvm::Value *OffloadingArgs[] = {DeviceID, 10429 PointerNum, 10430 InputInfo.BasePointersArray.getPointer(), 10431 InputInfo.PointersArray.getPointer(), 10432 InputInfo.SizesArray.getPointer(), 10433 MapTypesArray, 10434 InputInfo.MappersArray.getPointer()}; 10435 10436 // Select the right runtime function call for each standalone 10437 // directive. 10438 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10439 RuntimeFunction RTLFn; 10440 switch (D.getDirectiveKind()) { 10441 case OMPD_target_enter_data: 10442 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 10443 : OMPRTL___tgt_target_data_begin_mapper; 10444 break; 10445 case OMPD_target_exit_data: 10446 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 10447 : OMPRTL___tgt_target_data_end_mapper; 10448 break; 10449 case OMPD_target_update: 10450 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 10451 : OMPRTL___tgt_target_data_update_mapper; 10452 break; 10453 case OMPD_parallel: 10454 case OMPD_for: 10455 case OMPD_parallel_for: 10456 case OMPD_parallel_master: 10457 case OMPD_parallel_sections: 10458 case OMPD_for_simd: 10459 case OMPD_parallel_for_simd: 10460 case OMPD_cancel: 10461 case OMPD_cancellation_point: 10462 case OMPD_ordered: 10463 case OMPD_threadprivate: 10464 case OMPD_allocate: 10465 case OMPD_task: 10466 case OMPD_simd: 10467 case OMPD_sections: 10468 case OMPD_section: 10469 case OMPD_single: 10470 case OMPD_master: 10471 case OMPD_critical: 10472 case OMPD_taskyield: 10473 case OMPD_barrier: 10474 case OMPD_taskwait: 10475 case OMPD_taskgroup: 10476 case OMPD_atomic: 10477 case OMPD_flush: 10478 case OMPD_depobj: 10479 case OMPD_scan: 10480 case OMPD_teams: 10481 case OMPD_target_data: 10482 case OMPD_distribute: 10483 case OMPD_distribute_simd: 10484 case OMPD_distribute_parallel_for: 10485 case OMPD_distribute_parallel_for_simd: 10486 case OMPD_teams_distribute: 10487 case OMPD_teams_distribute_simd: 10488 case OMPD_teams_distribute_parallel_for: 10489 case OMPD_teams_distribute_parallel_for_simd: 10490 case OMPD_declare_simd: 10491 case OMPD_declare_variant: 10492 case OMPD_begin_declare_variant: 10493 case OMPD_end_declare_variant: 10494 case OMPD_declare_target: 10495 case OMPD_end_declare_target: 10496 case OMPD_declare_reduction: 10497 case OMPD_declare_mapper: 10498 case OMPD_taskloop: 10499 case OMPD_taskloop_simd: 10500 case OMPD_master_taskloop: 10501 case OMPD_master_taskloop_simd: 10502 case OMPD_parallel_master_taskloop: 10503 case OMPD_parallel_master_taskloop_simd: 10504 case OMPD_target: 10505 case OMPD_target_simd: 10506 case OMPD_target_teams_distribute: 10507 case OMPD_target_teams_distribute_simd: 10508 case OMPD_target_teams_distribute_parallel_for: 10509 case OMPD_target_teams_distribute_parallel_for_simd: 10510 case OMPD_target_teams: 10511 case OMPD_target_parallel: 10512 case OMPD_target_parallel_for: 10513 case OMPD_target_parallel_for_simd: 10514 case OMPD_requires: 10515 case OMPD_unknown: 10516 default: 10517 llvm_unreachable("Unexpected standalone target data directive."); 10518 break; 10519 } 10520 CGF.EmitRuntimeCall( 10521 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 10522 OffloadingArgs); 10523 }; 10524 10525 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 10526 CodeGenFunction &CGF, PrePostActionTy &) { 10527 // Fill up the arrays with all the mapped variables. 10528 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10529 10530 // Get map clause information. 10531 MappableExprsHandler MEHandler(D, CGF); 10532 MEHandler.generateAllInfo(CombinedInfo); 10533 10534 TargetDataInfo Info; 10535 // Fill up the arrays and create the arguments. 10536 emitOffloadingArrays(CGF, CombinedInfo, Info); 10537 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10538 Info.PointersArray, Info.SizesArray, 10539 Info.MapTypesArray, Info.MappersArray, Info); 10540 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10541 InputInfo.BasePointersArray = 10542 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10543 InputInfo.PointersArray = 10544 Address(Info.PointersArray, CGM.getPointerAlign()); 10545 InputInfo.SizesArray = 10546 Address(Info.SizesArray, CGM.getPointerAlign()); 10547 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10548 MapTypesArray = Info.MapTypesArray; 10549 if (D.hasClausesOfKind<OMPDependClause>()) 10550 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10551 else 10552 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10553 }; 10554 10555 if (IfCond) { 10556 emitIfClause(CGF, IfCond, TargetThenGen, 10557 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10558 } else { 10559 RegionCodeGenTy ThenRCG(TargetThenGen); 10560 ThenRCG(CGF); 10561 } 10562 } 10563 10564 namespace { 10565 /// Kind of parameter in a function with 'declare simd' directive. 10566 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 10567 /// Attribute set of the parameter. 10568 struct ParamAttrTy { 10569 ParamKindTy Kind = Vector; 10570 llvm::APSInt StrideOrArg; 10571 llvm::APSInt Alignment; 10572 }; 10573 } // namespace 10574 10575 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10576 ArrayRef<ParamAttrTy> ParamAttrs) { 10577 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10578 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10579 // of that clause. The VLEN value must be power of 2. 10580 // In other case the notion of the function`s "characteristic data type" (CDT) 10581 // is used to compute the vector length. 10582 // CDT is defined in the following order: 10583 // a) For non-void function, the CDT is the return type. 10584 // b) If the function has any non-uniform, non-linear parameters, then the 10585 // CDT is the type of the first such parameter. 10586 // c) If the CDT determined by a) or b) above is struct, union, or class 10587 // type which is pass-by-value (except for the type that maps to the 10588 // built-in complex data type), the characteristic data type is int. 10589 // d) If none of the above three cases is applicable, the CDT is int. 10590 // The VLEN is then determined based on the CDT and the size of vector 10591 // register of that ISA for which current vector version is generated. The 10592 // VLEN is computed using the formula below: 10593 // VLEN = sizeof(vector_register) / sizeof(CDT), 10594 // where vector register size specified in section 3.2.1 Registers and the 10595 // Stack Frame of original AMD64 ABI document. 10596 QualType RetType = FD->getReturnType(); 10597 if (RetType.isNull()) 10598 return 0; 10599 ASTContext &C = FD->getASTContext(); 10600 QualType CDT; 10601 if (!RetType.isNull() && !RetType->isVoidType()) { 10602 CDT = RetType; 10603 } else { 10604 unsigned Offset = 0; 10605 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10606 if (ParamAttrs[Offset].Kind == Vector) 10607 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10608 ++Offset; 10609 } 10610 if (CDT.isNull()) { 10611 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10612 if (ParamAttrs[I + Offset].Kind == Vector) { 10613 CDT = FD->getParamDecl(I)->getType(); 10614 break; 10615 } 10616 } 10617 } 10618 } 10619 if (CDT.isNull()) 10620 CDT = C.IntTy; 10621 CDT = CDT->getCanonicalTypeUnqualified(); 10622 if (CDT->isRecordType() || CDT->isUnionType()) 10623 CDT = C.IntTy; 10624 return C.getTypeSize(CDT); 10625 } 10626 10627 static void 10628 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10629 const llvm::APSInt &VLENVal, 10630 ArrayRef<ParamAttrTy> ParamAttrs, 10631 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10632 struct ISADataTy { 10633 char ISA; 10634 unsigned VecRegSize; 10635 }; 10636 ISADataTy ISAData[] = { 10637 { 10638 'b', 128 10639 }, // SSE 10640 { 10641 'c', 256 10642 }, // AVX 10643 { 10644 'd', 256 10645 }, // AVX2 10646 { 10647 'e', 512 10648 }, // AVX512 10649 }; 10650 llvm::SmallVector<char, 2> Masked; 10651 switch (State) { 10652 case OMPDeclareSimdDeclAttr::BS_Undefined: 10653 Masked.push_back('N'); 10654 Masked.push_back('M'); 10655 break; 10656 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10657 Masked.push_back('N'); 10658 break; 10659 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10660 Masked.push_back('M'); 10661 break; 10662 } 10663 for (char Mask : Masked) { 10664 for (const ISADataTy &Data : ISAData) { 10665 SmallString<256> Buffer; 10666 llvm::raw_svector_ostream Out(Buffer); 10667 Out << "_ZGV" << Data.ISA << Mask; 10668 if (!VLENVal) { 10669 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10670 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10671 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10672 } else { 10673 Out << VLENVal; 10674 } 10675 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 10676 switch (ParamAttr.Kind){ 10677 case LinearWithVarStride: 10678 Out << 's' << ParamAttr.StrideOrArg; 10679 break; 10680 case Linear: 10681 Out << 'l'; 10682 if (ParamAttr.StrideOrArg != 1) 10683 Out << ParamAttr.StrideOrArg; 10684 break; 10685 case Uniform: 10686 Out << 'u'; 10687 break; 10688 case Vector: 10689 Out << 'v'; 10690 break; 10691 } 10692 if (!!ParamAttr.Alignment) 10693 Out << 'a' << ParamAttr.Alignment; 10694 } 10695 Out << '_' << Fn->getName(); 10696 Fn->addFnAttr(Out.str()); 10697 } 10698 } 10699 } 10700 10701 // This are the Functions that are needed to mangle the name of the 10702 // vector functions generated by the compiler, according to the rules 10703 // defined in the "Vector Function ABI specifications for AArch64", 10704 // available at 10705 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10706 10707 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 10708 /// 10709 /// TODO: Need to implement the behavior for reference marked with a 10710 /// var or no linear modifiers (1.b in the section). For this, we 10711 /// need to extend ParamKindTy to support the linear modifiers. 10712 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10713 QT = QT.getCanonicalType(); 10714 10715 if (QT->isVoidType()) 10716 return false; 10717 10718 if (Kind == ParamKindTy::Uniform) 10719 return false; 10720 10721 if (Kind == ParamKindTy::Linear) 10722 return false; 10723 10724 // TODO: Handle linear references with modifiers 10725 10726 if (Kind == ParamKindTy::LinearWithVarStride) 10727 return false; 10728 10729 return true; 10730 } 10731 10732 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10733 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10734 QT = QT.getCanonicalType(); 10735 unsigned Size = C.getTypeSize(QT); 10736 10737 // Only scalars and complex within 16 bytes wide set PVB to true. 10738 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10739 return false; 10740 10741 if (QT->isFloatingType()) 10742 return true; 10743 10744 if (QT->isIntegerType()) 10745 return true; 10746 10747 if (QT->isPointerType()) 10748 return true; 10749 10750 // TODO: Add support for complex types (section 3.1.2, item 2). 10751 10752 return false; 10753 } 10754 10755 /// Computes the lane size (LS) of a return type or of an input parameter, 10756 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10757 /// TODO: Add support for references, section 3.2.1, item 1. 10758 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10759 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10760 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10761 if (getAArch64PBV(PTy, C)) 10762 return C.getTypeSize(PTy); 10763 } 10764 if (getAArch64PBV(QT, C)) 10765 return C.getTypeSize(QT); 10766 10767 return C.getTypeSize(C.getUIntPtrType()); 10768 } 10769 10770 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10771 // signature of the scalar function, as defined in 3.2.2 of the 10772 // AAVFABI. 10773 static std::tuple<unsigned, unsigned, bool> 10774 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10775 QualType RetType = FD->getReturnType().getCanonicalType(); 10776 10777 ASTContext &C = FD->getASTContext(); 10778 10779 bool OutputBecomesInput = false; 10780 10781 llvm::SmallVector<unsigned, 8> Sizes; 10782 if (!RetType->isVoidType()) { 10783 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10784 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10785 OutputBecomesInput = true; 10786 } 10787 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10788 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10789 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10790 } 10791 10792 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10793 // The LS of a function parameter / return value can only be a power 10794 // of 2, starting from 8 bits, up to 128. 10795 assert(std::all_of(Sizes.begin(), Sizes.end(), 10796 [](unsigned Size) { 10797 return Size == 8 || Size == 16 || Size == 32 || 10798 Size == 64 || Size == 128; 10799 }) && 10800 "Invalid size"); 10801 10802 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10803 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10804 OutputBecomesInput); 10805 } 10806 10807 /// Mangle the parameter part of the vector function name according to 10808 /// their OpenMP classification. The mangling function is defined in 10809 /// section 3.5 of the AAVFABI. 10810 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10811 SmallString<256> Buffer; 10812 llvm::raw_svector_ostream Out(Buffer); 10813 for (const auto &ParamAttr : ParamAttrs) { 10814 switch (ParamAttr.Kind) { 10815 case LinearWithVarStride: 10816 Out << "ls" << ParamAttr.StrideOrArg; 10817 break; 10818 case Linear: 10819 Out << 'l'; 10820 // Don't print the step value if it is not present or if it is 10821 // equal to 1. 10822 if (ParamAttr.StrideOrArg != 1) 10823 Out << ParamAttr.StrideOrArg; 10824 break; 10825 case Uniform: 10826 Out << 'u'; 10827 break; 10828 case Vector: 10829 Out << 'v'; 10830 break; 10831 } 10832 10833 if (!!ParamAttr.Alignment) 10834 Out << 'a' << ParamAttr.Alignment; 10835 } 10836 10837 return std::string(Out.str()); 10838 } 10839 10840 // Function used to add the attribute. The parameter `VLEN` is 10841 // templated to allow the use of "x" when targeting scalable functions 10842 // for SVE. 10843 template <typename T> 10844 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10845 char ISA, StringRef ParSeq, 10846 StringRef MangledName, bool OutputBecomesInput, 10847 llvm::Function *Fn) { 10848 SmallString<256> Buffer; 10849 llvm::raw_svector_ostream Out(Buffer); 10850 Out << Prefix << ISA << LMask << VLEN; 10851 if (OutputBecomesInput) 10852 Out << "v"; 10853 Out << ParSeq << "_" << MangledName; 10854 Fn->addFnAttr(Out.str()); 10855 } 10856 10857 // Helper function to generate the Advanced SIMD names depending on 10858 // the value of the NDS when simdlen is not present. 10859 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10860 StringRef Prefix, char ISA, 10861 StringRef ParSeq, StringRef MangledName, 10862 bool OutputBecomesInput, 10863 llvm::Function *Fn) { 10864 switch (NDS) { 10865 case 8: 10866 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10867 OutputBecomesInput, Fn); 10868 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10869 OutputBecomesInput, Fn); 10870 break; 10871 case 16: 10872 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10873 OutputBecomesInput, Fn); 10874 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10875 OutputBecomesInput, Fn); 10876 break; 10877 case 32: 10878 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10879 OutputBecomesInput, Fn); 10880 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10881 OutputBecomesInput, Fn); 10882 break; 10883 case 64: 10884 case 128: 10885 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10886 OutputBecomesInput, Fn); 10887 break; 10888 default: 10889 llvm_unreachable("Scalar type is too wide."); 10890 } 10891 } 10892 10893 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10894 static void emitAArch64DeclareSimdFunction( 10895 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10896 ArrayRef<ParamAttrTy> ParamAttrs, 10897 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10898 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10899 10900 // Get basic data for building the vector signature. 10901 const auto Data = getNDSWDS(FD, ParamAttrs); 10902 const unsigned NDS = std::get<0>(Data); 10903 const unsigned WDS = std::get<1>(Data); 10904 const bool OutputBecomesInput = std::get<2>(Data); 10905 10906 // Check the values provided via `simdlen` by the user. 10907 // 1. A `simdlen(1)` doesn't produce vector signatures, 10908 if (UserVLEN == 1) { 10909 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10910 DiagnosticsEngine::Warning, 10911 "The clause simdlen(1) has no effect when targeting aarch64."); 10912 CGM.getDiags().Report(SLoc, DiagID); 10913 return; 10914 } 10915 10916 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10917 // Advanced SIMD output. 10918 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10919 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10920 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10921 "power of 2 when targeting Advanced SIMD."); 10922 CGM.getDiags().Report(SLoc, DiagID); 10923 return; 10924 } 10925 10926 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10927 // limits. 10928 if (ISA == 's' && UserVLEN != 0) { 10929 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10930 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10931 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10932 "lanes in the architectural constraints " 10933 "for SVE (min is 128-bit, max is " 10934 "2048-bit, by steps of 128-bit)"); 10935 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10936 return; 10937 } 10938 } 10939 10940 // Sort out parameter sequence. 10941 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10942 StringRef Prefix = "_ZGV"; 10943 // Generate simdlen from user input (if any). 10944 if (UserVLEN) { 10945 if (ISA == 's') { 10946 // SVE generates only a masked function. 10947 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10948 OutputBecomesInput, Fn); 10949 } else { 10950 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10951 // Advanced SIMD generates one or two functions, depending on 10952 // the `[not]inbranch` clause. 10953 switch (State) { 10954 case OMPDeclareSimdDeclAttr::BS_Undefined: 10955 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10956 OutputBecomesInput, Fn); 10957 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10958 OutputBecomesInput, Fn); 10959 break; 10960 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10961 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10962 OutputBecomesInput, Fn); 10963 break; 10964 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10965 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10966 OutputBecomesInput, Fn); 10967 break; 10968 } 10969 } 10970 } else { 10971 // If no user simdlen is provided, follow the AAVFABI rules for 10972 // generating the vector length. 10973 if (ISA == 's') { 10974 // SVE, section 3.4.1, item 1. 10975 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10976 OutputBecomesInput, Fn); 10977 } else { 10978 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10979 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10980 // two vector names depending on the use of the clause 10981 // `[not]inbranch`. 10982 switch (State) { 10983 case OMPDeclareSimdDeclAttr::BS_Undefined: 10984 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10985 OutputBecomesInput, Fn); 10986 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10987 OutputBecomesInput, Fn); 10988 break; 10989 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10990 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10991 OutputBecomesInput, Fn); 10992 break; 10993 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10994 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10995 OutputBecomesInput, Fn); 10996 break; 10997 } 10998 } 10999 } 11000 } 11001 11002 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11003 llvm::Function *Fn) { 11004 ASTContext &C = CGM.getContext(); 11005 FD = FD->getMostRecentDecl(); 11006 // Map params to their positions in function decl. 11007 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11008 if (isa<CXXMethodDecl>(FD)) 11009 ParamPositions.try_emplace(FD, 0); 11010 unsigned ParamPos = ParamPositions.size(); 11011 for (const ParmVarDecl *P : FD->parameters()) { 11012 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11013 ++ParamPos; 11014 } 11015 while (FD) { 11016 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11017 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11018 // Mark uniform parameters. 11019 for (const Expr *E : Attr->uniforms()) { 11020 E = E->IgnoreParenImpCasts(); 11021 unsigned Pos; 11022 if (isa<CXXThisExpr>(E)) { 11023 Pos = ParamPositions[FD]; 11024 } else { 11025 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11026 ->getCanonicalDecl(); 11027 Pos = ParamPositions[PVD]; 11028 } 11029 ParamAttrs[Pos].Kind = Uniform; 11030 } 11031 // Get alignment info. 11032 auto NI = Attr->alignments_begin(); 11033 for (const Expr *E : Attr->aligneds()) { 11034 E = E->IgnoreParenImpCasts(); 11035 unsigned Pos; 11036 QualType ParmTy; 11037 if (isa<CXXThisExpr>(E)) { 11038 Pos = ParamPositions[FD]; 11039 ParmTy = E->getType(); 11040 } else { 11041 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11042 ->getCanonicalDecl(); 11043 Pos = ParamPositions[PVD]; 11044 ParmTy = PVD->getType(); 11045 } 11046 ParamAttrs[Pos].Alignment = 11047 (*NI) 11048 ? (*NI)->EvaluateKnownConstInt(C) 11049 : llvm::APSInt::getUnsigned( 11050 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11051 .getQuantity()); 11052 ++NI; 11053 } 11054 // Mark linear parameters. 11055 auto SI = Attr->steps_begin(); 11056 auto MI = Attr->modifiers_begin(); 11057 for (const Expr *E : Attr->linears()) { 11058 E = E->IgnoreParenImpCasts(); 11059 unsigned Pos; 11060 // Rescaling factor needed to compute the linear parameter 11061 // value in the mangled name. 11062 unsigned PtrRescalingFactor = 1; 11063 if (isa<CXXThisExpr>(E)) { 11064 Pos = ParamPositions[FD]; 11065 } else { 11066 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11067 ->getCanonicalDecl(); 11068 Pos = ParamPositions[PVD]; 11069 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11070 PtrRescalingFactor = CGM.getContext() 11071 .getTypeSizeInChars(P->getPointeeType()) 11072 .getQuantity(); 11073 } 11074 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11075 ParamAttr.Kind = Linear; 11076 // Assuming a stride of 1, for `linear` without modifiers. 11077 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11078 if (*SI) { 11079 Expr::EvalResult Result; 11080 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11081 if (const auto *DRE = 11082 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11083 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 11084 ParamAttr.Kind = LinearWithVarStride; 11085 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 11086 ParamPositions[StridePVD->getCanonicalDecl()]); 11087 } 11088 } 11089 } else { 11090 ParamAttr.StrideOrArg = Result.Val.getInt(); 11091 } 11092 } 11093 // If we are using a linear clause on a pointer, we need to 11094 // rescale the value of linear_step with the byte size of the 11095 // pointee type. 11096 if (Linear == ParamAttr.Kind) 11097 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11098 ++SI; 11099 ++MI; 11100 } 11101 llvm::APSInt VLENVal; 11102 SourceLocation ExprLoc; 11103 const Expr *VLENExpr = Attr->getSimdlen(); 11104 if (VLENExpr) { 11105 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11106 ExprLoc = VLENExpr->getExprLoc(); 11107 } 11108 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11109 if (CGM.getTriple().isX86()) { 11110 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11111 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11112 unsigned VLEN = VLENVal.getExtValue(); 11113 StringRef MangledName = Fn->getName(); 11114 if (CGM.getTarget().hasFeature("sve")) 11115 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11116 MangledName, 's', 128, Fn, ExprLoc); 11117 if (CGM.getTarget().hasFeature("neon")) 11118 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11119 MangledName, 'n', 128, Fn, ExprLoc); 11120 } 11121 } 11122 FD = FD->getPreviousDecl(); 11123 } 11124 } 11125 11126 namespace { 11127 /// Cleanup action for doacross support. 11128 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11129 public: 11130 static const int DoacrossFinArgs = 2; 11131 11132 private: 11133 llvm::FunctionCallee RTLFn; 11134 llvm::Value *Args[DoacrossFinArgs]; 11135 11136 public: 11137 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11138 ArrayRef<llvm::Value *> CallArgs) 11139 : RTLFn(RTLFn) { 11140 assert(CallArgs.size() == DoacrossFinArgs); 11141 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11142 } 11143 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11144 if (!CGF.HaveInsertPoint()) 11145 return; 11146 CGF.EmitRuntimeCall(RTLFn, Args); 11147 } 11148 }; 11149 } // namespace 11150 11151 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11152 const OMPLoopDirective &D, 11153 ArrayRef<Expr *> NumIterations) { 11154 if (!CGF.HaveInsertPoint()) 11155 return; 11156 11157 ASTContext &C = CGM.getContext(); 11158 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11159 RecordDecl *RD; 11160 if (KmpDimTy.isNull()) { 11161 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11162 // kmp_int64 lo; // lower 11163 // kmp_int64 up; // upper 11164 // kmp_int64 st; // stride 11165 // }; 11166 RD = C.buildImplicitRecord("kmp_dim"); 11167 RD->startDefinition(); 11168 addFieldToRecordDecl(C, RD, Int64Ty); 11169 addFieldToRecordDecl(C, RD, Int64Ty); 11170 addFieldToRecordDecl(C, RD, Int64Ty); 11171 RD->completeDefinition(); 11172 KmpDimTy = C.getRecordType(RD); 11173 } else { 11174 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11175 } 11176 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11177 QualType ArrayTy = 11178 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11179 11180 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11181 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11182 enum { LowerFD = 0, UpperFD, StrideFD }; 11183 // Fill dims with data. 11184 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11185 LValue DimsLVal = CGF.MakeAddrLValue( 11186 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11187 // dims.upper = num_iterations; 11188 LValue UpperLVal = CGF.EmitLValueForField( 11189 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11190 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11191 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11192 Int64Ty, NumIterations[I]->getExprLoc()); 11193 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11194 // dims.stride = 1; 11195 LValue StrideLVal = CGF.EmitLValueForField( 11196 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11197 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11198 StrideLVal); 11199 } 11200 11201 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11202 // kmp_int32 num_dims, struct kmp_dim * dims); 11203 llvm::Value *Args[] = { 11204 emitUpdateLocation(CGF, D.getBeginLoc()), 11205 getThreadID(CGF, D.getBeginLoc()), 11206 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11207 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11208 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11209 CGM.VoidPtrTy)}; 11210 11211 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11212 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11213 CGF.EmitRuntimeCall(RTLFn, Args); 11214 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11215 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11216 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11217 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11218 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11219 llvm::makeArrayRef(FiniArgs)); 11220 } 11221 11222 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11223 const OMPDependClause *C) { 11224 QualType Int64Ty = 11225 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11226 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11227 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11228 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11229 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11230 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11231 const Expr *CounterVal = C->getLoopData(I); 11232 assert(CounterVal); 11233 llvm::Value *CntVal = CGF.EmitScalarConversion( 11234 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11235 CounterVal->getExprLoc()); 11236 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11237 /*Volatile=*/false, Int64Ty); 11238 } 11239 llvm::Value *Args[] = { 11240 emitUpdateLocation(CGF, C->getBeginLoc()), 11241 getThreadID(CGF, C->getBeginLoc()), 11242 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11243 llvm::FunctionCallee RTLFn; 11244 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11245 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11246 OMPRTL___kmpc_doacross_post); 11247 } else { 11248 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11249 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11250 OMPRTL___kmpc_doacross_wait); 11251 } 11252 CGF.EmitRuntimeCall(RTLFn, Args); 11253 } 11254 11255 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11256 llvm::FunctionCallee Callee, 11257 ArrayRef<llvm::Value *> Args) const { 11258 assert(Loc.isValid() && "Outlined function call location must be valid."); 11259 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11260 11261 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11262 if (Fn->doesNotThrow()) { 11263 CGF.EmitNounwindRuntimeCall(Fn, Args); 11264 return; 11265 } 11266 } 11267 CGF.EmitRuntimeCall(Callee, Args); 11268 } 11269 11270 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11271 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11272 ArrayRef<llvm::Value *> Args) const { 11273 emitCall(CGF, Loc, OutlinedFn, Args); 11274 } 11275 11276 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11277 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11278 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11279 HasEmittedDeclareTargetRegion = true; 11280 } 11281 11282 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11283 const VarDecl *NativeParam, 11284 const VarDecl *TargetParam) const { 11285 return CGF.GetAddrOfLocalVar(NativeParam); 11286 } 11287 11288 namespace { 11289 /// Cleanup action for allocate support. 11290 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11291 public: 11292 static const int CleanupArgs = 3; 11293 11294 private: 11295 llvm::FunctionCallee RTLFn; 11296 llvm::Value *Args[CleanupArgs]; 11297 11298 public: 11299 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 11300 ArrayRef<llvm::Value *> CallArgs) 11301 : RTLFn(RTLFn) { 11302 assert(CallArgs.size() == CleanupArgs && 11303 "Size of arguments does not match."); 11304 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11305 } 11306 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11307 if (!CGF.HaveInsertPoint()) 11308 return; 11309 CGF.EmitRuntimeCall(RTLFn, Args); 11310 } 11311 }; 11312 } // namespace 11313 11314 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11315 const VarDecl *VD) { 11316 if (!VD) 11317 return Address::invalid(); 11318 const VarDecl *CVD = VD->getCanonicalDecl(); 11319 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 11320 return Address::invalid(); 11321 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11322 // Use the default allocation. 11323 if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 11324 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 11325 !AA->getAllocator()) 11326 return Address::invalid(); 11327 llvm::Value *Size; 11328 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11329 if (CVD->getType()->isVariablyModifiedType()) { 11330 Size = CGF.getTypeSize(CVD->getType()); 11331 // Align the size: ((size + align - 1) / align) * align 11332 Size = CGF.Builder.CreateNUWAdd( 11333 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11334 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11335 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11336 } else { 11337 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11338 Size = CGM.getSize(Sz.alignTo(Align)); 11339 } 11340 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11341 assert(AA->getAllocator() && 11342 "Expected allocator expression for non-default allocator."); 11343 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11344 // According to the standard, the original allocator type is a enum (integer). 11345 // Convert to pointer type, if required. 11346 if (Allocator->getType()->isIntegerTy()) 11347 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 11348 else if (Allocator->getType()->isPointerTy()) 11349 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 11350 CGM.VoidPtrTy); 11351 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11352 11353 llvm::Value *Addr = 11354 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11355 CGM.getModule(), OMPRTL___kmpc_alloc), 11356 Args, getName({CVD->getName(), ".void.addr"})); 11357 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 11358 Allocator}; 11359 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11360 CGM.getModule(), OMPRTL___kmpc_free); 11361 11362 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11363 llvm::makeArrayRef(FiniArgs)); 11364 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11365 Addr, 11366 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 11367 getName({CVD->getName(), ".addr"})); 11368 return Address(Addr, Align); 11369 } 11370 11371 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11372 CodeGenModule &CGM, const OMPLoopDirective &S) 11373 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11374 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11375 if (!NeedToPush) 11376 return; 11377 NontemporalDeclsSet &DS = 11378 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11379 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11380 for (const Stmt *Ref : C->private_refs()) { 11381 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11382 const ValueDecl *VD; 11383 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11384 VD = DRE->getDecl(); 11385 } else { 11386 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11387 assert((ME->isImplicitCXXThis() || 11388 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11389 "Expected member of current class."); 11390 VD = ME->getMemberDecl(); 11391 } 11392 DS.insert(VD); 11393 } 11394 } 11395 } 11396 11397 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11398 if (!NeedToPush) 11399 return; 11400 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11401 } 11402 11403 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11404 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11405 11406 return llvm::any_of( 11407 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11408 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 11409 } 11410 11411 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11412 const OMPExecutableDirective &S, 11413 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11414 const { 11415 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11416 // Vars in target/task regions must be excluded completely. 11417 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11418 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11419 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11420 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11421 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11422 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11423 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11424 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11425 } 11426 } 11427 // Exclude vars in private clauses. 11428 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11429 for (const Expr *Ref : C->varlists()) { 11430 if (!Ref->getType()->isScalarType()) 11431 continue; 11432 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11433 if (!DRE) 11434 continue; 11435 NeedToCheckForLPCs.insert(DRE->getDecl()); 11436 } 11437 } 11438 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11439 for (const Expr *Ref : C->varlists()) { 11440 if (!Ref->getType()->isScalarType()) 11441 continue; 11442 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11443 if (!DRE) 11444 continue; 11445 NeedToCheckForLPCs.insert(DRE->getDecl()); 11446 } 11447 } 11448 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11449 for (const Expr *Ref : C->varlists()) { 11450 if (!Ref->getType()->isScalarType()) 11451 continue; 11452 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11453 if (!DRE) 11454 continue; 11455 NeedToCheckForLPCs.insert(DRE->getDecl()); 11456 } 11457 } 11458 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11459 for (const Expr *Ref : C->varlists()) { 11460 if (!Ref->getType()->isScalarType()) 11461 continue; 11462 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11463 if (!DRE) 11464 continue; 11465 NeedToCheckForLPCs.insert(DRE->getDecl()); 11466 } 11467 } 11468 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 11469 for (const Expr *Ref : C->varlists()) { 11470 if (!Ref->getType()->isScalarType()) 11471 continue; 11472 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11473 if (!DRE) 11474 continue; 11475 NeedToCheckForLPCs.insert(DRE->getDecl()); 11476 } 11477 } 11478 for (const Decl *VD : NeedToCheckForLPCs) { 11479 for (const LastprivateConditionalData &Data : 11480 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 11481 if (Data.DeclToUniqueName.count(VD) > 0) { 11482 if (!Data.Disabled) 11483 NeedToAddForLPCsAsDisabled.insert(VD); 11484 break; 11485 } 11486 } 11487 } 11488 } 11489 11490 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11491 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 11492 : CGM(CGF.CGM), 11493 Action((CGM.getLangOpts().OpenMP >= 50 && 11494 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11495 [](const OMPLastprivateClause *C) { 11496 return C->getKind() == 11497 OMPC_LASTPRIVATE_conditional; 11498 })) 11499 ? ActionToDo::PushAsLastprivateConditional 11500 : ActionToDo::DoNotPush) { 11501 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11502 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 11503 return; 11504 assert(Action == ActionToDo::PushAsLastprivateConditional && 11505 "Expected a push action."); 11506 LastprivateConditionalData &Data = 11507 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11508 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11509 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 11510 continue; 11511 11512 for (const Expr *Ref : C->varlists()) { 11513 Data.DeclToUniqueName.insert(std::make_pair( 11514 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 11515 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 11516 } 11517 } 11518 Data.IVLVal = IVLVal; 11519 Data.Fn = CGF.CurFn; 11520 } 11521 11522 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11523 CodeGenFunction &CGF, const OMPExecutableDirective &S) 11524 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 11525 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11526 if (CGM.getLangOpts().OpenMP < 50) 11527 return; 11528 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 11529 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 11530 if (!NeedToAddForLPCsAsDisabled.empty()) { 11531 Action = ActionToDo::DisableLastprivateConditional; 11532 LastprivateConditionalData &Data = 11533 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11534 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 11535 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 11536 Data.Fn = CGF.CurFn; 11537 Data.Disabled = true; 11538 } 11539 } 11540 11541 CGOpenMPRuntime::LastprivateConditionalRAII 11542 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 11543 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 11544 return LastprivateConditionalRAII(CGF, S); 11545 } 11546 11547 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 11548 if (CGM.getLangOpts().OpenMP < 50) 11549 return; 11550 if (Action == ActionToDo::DisableLastprivateConditional) { 11551 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11552 "Expected list of disabled private vars."); 11553 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11554 } 11555 if (Action == ActionToDo::PushAsLastprivateConditional) { 11556 assert( 11557 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11558 "Expected list of lastprivate conditional vars."); 11559 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11560 } 11561 } 11562 11563 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 11564 const VarDecl *VD) { 11565 ASTContext &C = CGM.getContext(); 11566 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 11567 if (I == LastprivateConditionalToTypes.end()) 11568 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 11569 QualType NewType; 11570 const FieldDecl *VDField; 11571 const FieldDecl *FiredField; 11572 LValue BaseLVal; 11573 auto VI = I->getSecond().find(VD); 11574 if (VI == I->getSecond().end()) { 11575 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 11576 RD->startDefinition(); 11577 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 11578 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 11579 RD->completeDefinition(); 11580 NewType = C.getRecordType(RD); 11581 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 11582 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 11583 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 11584 } else { 11585 NewType = std::get<0>(VI->getSecond()); 11586 VDField = std::get<1>(VI->getSecond()); 11587 FiredField = std::get<2>(VI->getSecond()); 11588 BaseLVal = std::get<3>(VI->getSecond()); 11589 } 11590 LValue FiredLVal = 11591 CGF.EmitLValueForField(BaseLVal, FiredField); 11592 CGF.EmitStoreOfScalar( 11593 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 11594 FiredLVal); 11595 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 11596 } 11597 11598 namespace { 11599 /// Checks if the lastprivate conditional variable is referenced in LHS. 11600 class LastprivateConditionalRefChecker final 11601 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 11602 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 11603 const Expr *FoundE = nullptr; 11604 const Decl *FoundD = nullptr; 11605 StringRef UniqueDeclName; 11606 LValue IVLVal; 11607 llvm::Function *FoundFn = nullptr; 11608 SourceLocation Loc; 11609 11610 public: 11611 bool VisitDeclRefExpr(const DeclRefExpr *E) { 11612 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11613 llvm::reverse(LPM)) { 11614 auto It = D.DeclToUniqueName.find(E->getDecl()); 11615 if (It == D.DeclToUniqueName.end()) 11616 continue; 11617 if (D.Disabled) 11618 return false; 11619 FoundE = E; 11620 FoundD = E->getDecl()->getCanonicalDecl(); 11621 UniqueDeclName = It->second; 11622 IVLVal = D.IVLVal; 11623 FoundFn = D.Fn; 11624 break; 11625 } 11626 return FoundE == E; 11627 } 11628 bool VisitMemberExpr(const MemberExpr *E) { 11629 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 11630 return false; 11631 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11632 llvm::reverse(LPM)) { 11633 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 11634 if (It == D.DeclToUniqueName.end()) 11635 continue; 11636 if (D.Disabled) 11637 return false; 11638 FoundE = E; 11639 FoundD = E->getMemberDecl()->getCanonicalDecl(); 11640 UniqueDeclName = It->second; 11641 IVLVal = D.IVLVal; 11642 FoundFn = D.Fn; 11643 break; 11644 } 11645 return FoundE == E; 11646 } 11647 bool VisitStmt(const Stmt *S) { 11648 for (const Stmt *Child : S->children()) { 11649 if (!Child) 11650 continue; 11651 if (const auto *E = dyn_cast<Expr>(Child)) 11652 if (!E->isGLValue()) 11653 continue; 11654 if (Visit(Child)) 11655 return true; 11656 } 11657 return false; 11658 } 11659 explicit LastprivateConditionalRefChecker( 11660 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 11661 : LPM(LPM) {} 11662 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 11663 getFoundData() const { 11664 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 11665 } 11666 }; 11667 } // namespace 11668 11669 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 11670 LValue IVLVal, 11671 StringRef UniqueDeclName, 11672 LValue LVal, 11673 SourceLocation Loc) { 11674 // Last updated loop counter for the lastprivate conditional var. 11675 // int<xx> last_iv = 0; 11676 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 11677 llvm::Constant *LastIV = 11678 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 11679 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 11680 IVLVal.getAlignment().getAsAlign()); 11681 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 11682 11683 // Last value of the lastprivate conditional. 11684 // decltype(priv_a) last_a; 11685 llvm::Constant *Last = getOrCreateInternalVariable( 11686 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 11687 cast<llvm::GlobalVariable>(Last)->setAlignment( 11688 LVal.getAlignment().getAsAlign()); 11689 LValue LastLVal = 11690 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 11691 11692 // Global loop counter. Required to handle inner parallel-for regions. 11693 // iv 11694 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 11695 11696 // #pragma omp critical(a) 11697 // if (last_iv <= iv) { 11698 // last_iv = iv; 11699 // last_a = priv_a; 11700 // } 11701 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 11702 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 11703 Action.Enter(CGF); 11704 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 11705 // (last_iv <= iv) ? Check if the variable is updated and store new 11706 // value in global var. 11707 llvm::Value *CmpRes; 11708 if (IVLVal.getType()->isSignedIntegerType()) { 11709 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 11710 } else { 11711 assert(IVLVal.getType()->isUnsignedIntegerType() && 11712 "Loop iteration variable must be integer."); 11713 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 11714 } 11715 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 11716 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 11717 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 11718 // { 11719 CGF.EmitBlock(ThenBB); 11720 11721 // last_iv = iv; 11722 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 11723 11724 // last_a = priv_a; 11725 switch (CGF.getEvaluationKind(LVal.getType())) { 11726 case TEK_Scalar: { 11727 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 11728 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 11729 break; 11730 } 11731 case TEK_Complex: { 11732 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 11733 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 11734 break; 11735 } 11736 case TEK_Aggregate: 11737 llvm_unreachable( 11738 "Aggregates are not supported in lastprivate conditional."); 11739 } 11740 // } 11741 CGF.EmitBranch(ExitBB); 11742 // There is no need to emit line number for unconditional branch. 11743 (void)ApplyDebugLocation::CreateEmpty(CGF); 11744 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 11745 }; 11746 11747 if (CGM.getLangOpts().OpenMPSimd) { 11748 // Do not emit as a critical region as no parallel region could be emitted. 11749 RegionCodeGenTy ThenRCG(CodeGen); 11750 ThenRCG(CGF); 11751 } else { 11752 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 11753 } 11754 } 11755 11756 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 11757 const Expr *LHS) { 11758 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11759 return; 11760 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 11761 if (!Checker.Visit(LHS)) 11762 return; 11763 const Expr *FoundE; 11764 const Decl *FoundD; 11765 StringRef UniqueDeclName; 11766 LValue IVLVal; 11767 llvm::Function *FoundFn; 11768 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 11769 Checker.getFoundData(); 11770 if (FoundFn != CGF.CurFn) { 11771 // Special codegen for inner parallel regions. 11772 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 11773 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 11774 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 11775 "Lastprivate conditional is not found in outer region."); 11776 QualType StructTy = std::get<0>(It->getSecond()); 11777 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 11778 LValue PrivLVal = CGF.EmitLValue(FoundE); 11779 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11780 PrivLVal.getAddress(CGF), 11781 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 11782 LValue BaseLVal = 11783 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 11784 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 11785 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 11786 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 11787 FiredLVal, llvm::AtomicOrdering::Unordered, 11788 /*IsVolatile=*/true, /*isInit=*/false); 11789 return; 11790 } 11791 11792 // Private address of the lastprivate conditional in the current context. 11793 // priv_a 11794 LValue LVal = CGF.EmitLValue(FoundE); 11795 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 11796 FoundE->getExprLoc()); 11797 } 11798 11799 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 11800 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11801 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 11802 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11803 return; 11804 auto Range = llvm::reverse(LastprivateConditionalStack); 11805 auto It = llvm::find_if( 11806 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 11807 if (It == Range.end() || It->Fn != CGF.CurFn) 11808 return; 11809 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 11810 assert(LPCI != LastprivateConditionalToTypes.end() && 11811 "Lastprivates must be registered already."); 11812 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11813 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 11814 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 11815 for (const auto &Pair : It->DeclToUniqueName) { 11816 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 11817 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 11818 continue; 11819 auto I = LPCI->getSecond().find(Pair.first); 11820 assert(I != LPCI->getSecond().end() && 11821 "Lastprivate must be rehistered already."); 11822 // bool Cmp = priv_a.Fired != 0; 11823 LValue BaseLVal = std::get<3>(I->getSecond()); 11824 LValue FiredLVal = 11825 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 11826 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 11827 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 11828 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 11829 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 11830 // if (Cmp) { 11831 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 11832 CGF.EmitBlock(ThenBB); 11833 Address Addr = CGF.GetAddrOfLocalVar(VD); 11834 LValue LVal; 11835 if (VD->getType()->isReferenceType()) 11836 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 11837 AlignmentSource::Decl); 11838 else 11839 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 11840 AlignmentSource::Decl); 11841 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 11842 D.getBeginLoc()); 11843 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 11844 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 11845 // } 11846 } 11847 } 11848 11849 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 11850 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 11851 SourceLocation Loc) { 11852 if (CGF.getLangOpts().OpenMP < 50) 11853 return; 11854 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 11855 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 11856 "Unknown lastprivate conditional variable."); 11857 StringRef UniqueName = It->second; 11858 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 11859 // The variable was not updated in the region - exit. 11860 if (!GV) 11861 return; 11862 LValue LPLVal = CGF.MakeAddrLValue( 11863 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 11864 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 11865 CGF.EmitStoreOfScalar(Res, PrivLVal); 11866 } 11867 11868 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11869 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11870 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11871 llvm_unreachable("Not supported in SIMD-only mode"); 11872 } 11873 11874 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11875 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11876 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11877 llvm_unreachable("Not supported in SIMD-only mode"); 11878 } 11879 11880 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11881 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11882 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11883 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11884 bool Tied, unsigned &NumberOfParts) { 11885 llvm_unreachable("Not supported in SIMD-only mode"); 11886 } 11887 11888 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11889 SourceLocation Loc, 11890 llvm::Function *OutlinedFn, 11891 ArrayRef<llvm::Value *> CapturedVars, 11892 const Expr *IfCond) { 11893 llvm_unreachable("Not supported in SIMD-only mode"); 11894 } 11895 11896 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11897 CodeGenFunction &CGF, StringRef CriticalName, 11898 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11899 const Expr *Hint) { 11900 llvm_unreachable("Not supported in SIMD-only mode"); 11901 } 11902 11903 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11904 const RegionCodeGenTy &MasterOpGen, 11905 SourceLocation Loc) { 11906 llvm_unreachable("Not supported in SIMD-only mode"); 11907 } 11908 11909 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11910 SourceLocation Loc) { 11911 llvm_unreachable("Not supported in SIMD-only mode"); 11912 } 11913 11914 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 11915 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 11916 SourceLocation Loc) { 11917 llvm_unreachable("Not supported in SIMD-only mode"); 11918 } 11919 11920 void CGOpenMPSIMDRuntime::emitSingleRegion( 11921 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 11922 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 11923 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 11924 ArrayRef<const Expr *> AssignmentOps) { 11925 llvm_unreachable("Not supported in SIMD-only mode"); 11926 } 11927 11928 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 11929 const RegionCodeGenTy &OrderedOpGen, 11930 SourceLocation Loc, 11931 bool IsThreads) { 11932 llvm_unreachable("Not supported in SIMD-only mode"); 11933 } 11934 11935 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 11936 SourceLocation Loc, 11937 OpenMPDirectiveKind Kind, 11938 bool EmitChecks, 11939 bool ForceSimpleCall) { 11940 llvm_unreachable("Not supported in SIMD-only mode"); 11941 } 11942 11943 void CGOpenMPSIMDRuntime::emitForDispatchInit( 11944 CodeGenFunction &CGF, SourceLocation Loc, 11945 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 11946 bool Ordered, const DispatchRTInput &DispatchValues) { 11947 llvm_unreachable("Not supported in SIMD-only mode"); 11948 } 11949 11950 void CGOpenMPSIMDRuntime::emitForStaticInit( 11951 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 11952 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 11953 llvm_unreachable("Not supported in SIMD-only mode"); 11954 } 11955 11956 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 11957 CodeGenFunction &CGF, SourceLocation Loc, 11958 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 11959 llvm_unreachable("Not supported in SIMD-only mode"); 11960 } 11961 11962 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 11963 SourceLocation Loc, 11964 unsigned IVSize, 11965 bool IVSigned) { 11966 llvm_unreachable("Not supported in SIMD-only mode"); 11967 } 11968 11969 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 11970 SourceLocation Loc, 11971 OpenMPDirectiveKind DKind) { 11972 llvm_unreachable("Not supported in SIMD-only mode"); 11973 } 11974 11975 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 11976 SourceLocation Loc, 11977 unsigned IVSize, bool IVSigned, 11978 Address IL, Address LB, 11979 Address UB, Address ST) { 11980 llvm_unreachable("Not supported in SIMD-only mode"); 11981 } 11982 11983 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 11984 llvm::Value *NumThreads, 11985 SourceLocation Loc) { 11986 llvm_unreachable("Not supported in SIMD-only mode"); 11987 } 11988 11989 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 11990 ProcBindKind ProcBind, 11991 SourceLocation Loc) { 11992 llvm_unreachable("Not supported in SIMD-only mode"); 11993 } 11994 11995 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 11996 const VarDecl *VD, 11997 Address VDAddr, 11998 SourceLocation Loc) { 11999 llvm_unreachable("Not supported in SIMD-only mode"); 12000 } 12001 12002 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12003 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12004 CodeGenFunction *CGF) { 12005 llvm_unreachable("Not supported in SIMD-only mode"); 12006 } 12007 12008 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12009 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12010 llvm_unreachable("Not supported in SIMD-only mode"); 12011 } 12012 12013 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12014 ArrayRef<const Expr *> Vars, 12015 SourceLocation Loc, 12016 llvm::AtomicOrdering AO) { 12017 llvm_unreachable("Not supported in SIMD-only mode"); 12018 } 12019 12020 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12021 const OMPExecutableDirective &D, 12022 llvm::Function *TaskFunction, 12023 QualType SharedsTy, Address Shareds, 12024 const Expr *IfCond, 12025 const OMPTaskDataTy &Data) { 12026 llvm_unreachable("Not supported in SIMD-only mode"); 12027 } 12028 12029 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12030 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12031 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12032 const Expr *IfCond, const OMPTaskDataTy &Data) { 12033 llvm_unreachable("Not supported in SIMD-only mode"); 12034 } 12035 12036 void CGOpenMPSIMDRuntime::emitReduction( 12037 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12038 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12039 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12040 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12041 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12042 ReductionOps, Options); 12043 } 12044 12045 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12046 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12047 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12048 llvm_unreachable("Not supported in SIMD-only mode"); 12049 } 12050 12051 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12052 SourceLocation Loc, 12053 bool IsWorksharingReduction) { 12054 llvm_unreachable("Not supported in SIMD-only mode"); 12055 } 12056 12057 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12058 SourceLocation Loc, 12059 ReductionCodeGen &RCG, 12060 unsigned N) { 12061 llvm_unreachable("Not supported in SIMD-only mode"); 12062 } 12063 12064 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12065 SourceLocation Loc, 12066 llvm::Value *ReductionsPtr, 12067 LValue SharedLVal) { 12068 llvm_unreachable("Not supported in SIMD-only mode"); 12069 } 12070 12071 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12072 SourceLocation Loc) { 12073 llvm_unreachable("Not supported in SIMD-only mode"); 12074 } 12075 12076 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12077 CodeGenFunction &CGF, SourceLocation Loc, 12078 OpenMPDirectiveKind CancelRegion) { 12079 llvm_unreachable("Not supported in SIMD-only mode"); 12080 } 12081 12082 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12083 SourceLocation Loc, const Expr *IfCond, 12084 OpenMPDirectiveKind CancelRegion) { 12085 llvm_unreachable("Not supported in SIMD-only mode"); 12086 } 12087 12088 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12089 const OMPExecutableDirective &D, StringRef ParentName, 12090 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12091 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12092 llvm_unreachable("Not supported in SIMD-only mode"); 12093 } 12094 12095 void CGOpenMPSIMDRuntime::emitTargetCall( 12096 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12097 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12098 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12099 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12100 const OMPLoopDirective &D)> 12101 SizeEmitter) { 12102 llvm_unreachable("Not supported in SIMD-only mode"); 12103 } 12104 12105 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12106 llvm_unreachable("Not supported in SIMD-only mode"); 12107 } 12108 12109 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12110 llvm_unreachable("Not supported in SIMD-only mode"); 12111 } 12112 12113 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12114 return false; 12115 } 12116 12117 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12118 const OMPExecutableDirective &D, 12119 SourceLocation Loc, 12120 llvm::Function *OutlinedFn, 12121 ArrayRef<llvm::Value *> CapturedVars) { 12122 llvm_unreachable("Not supported in SIMD-only mode"); 12123 } 12124 12125 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12126 const Expr *NumTeams, 12127 const Expr *ThreadLimit, 12128 SourceLocation Loc) { 12129 llvm_unreachable("Not supported in SIMD-only mode"); 12130 } 12131 12132 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12133 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12134 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12135 llvm_unreachable("Not supported in SIMD-only mode"); 12136 } 12137 12138 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12139 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12140 const Expr *Device) { 12141 llvm_unreachable("Not supported in SIMD-only mode"); 12142 } 12143 12144 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12145 const OMPLoopDirective &D, 12146 ArrayRef<Expr *> NumIterations) { 12147 llvm_unreachable("Not supported in SIMD-only mode"); 12148 } 12149 12150 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12151 const OMPDependClause *C) { 12152 llvm_unreachable("Not supported in SIMD-only mode"); 12153 } 12154 12155 const VarDecl * 12156 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12157 const VarDecl *NativeParam) const { 12158 llvm_unreachable("Not supported in SIMD-only mode"); 12159 } 12160 12161 Address 12162 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12163 const VarDecl *NativeParam, 12164 const VarDecl *TargetParam) const { 12165 llvm_unreachable("Not supported in SIMD-only mode"); 12166 } 12167