1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/FileManager.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/CodeGen/ConstantInitBuilder.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/SetOperations.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/Bitcode/BitcodeReader.h" 32 #include "llvm/IR/Constants.h" 33 #include "llvm/IR/DerivedTypes.h" 34 #include "llvm/IR/GlobalValue.h" 35 #include "llvm/IR/Value.h" 36 #include "llvm/Support/AtomicOrdering.h" 37 #include "llvm/Support/Format.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include <cassert> 40 #include <numeric> 41 42 using namespace clang; 43 using namespace CodeGen; 44 using namespace llvm::omp; 45 46 namespace { 47 /// Base class for handling code generation inside OpenMP regions. 48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 49 public: 50 /// Kinds of OpenMP regions used in codegen. 51 enum CGOpenMPRegionKind { 52 /// Region with outlined function for standalone 'parallel' 53 /// directive. 54 ParallelOutlinedRegion, 55 /// Region with outlined function for standalone 'task' directive. 56 TaskOutlinedRegion, 57 /// Region for constructs that do not require function outlining, 58 /// like 'for', 'sections', 'atomic' etc. directives. 59 InlinedRegion, 60 /// Region with outlined function for standalone 'target' directive. 61 TargetRegion, 62 }; 63 64 CGOpenMPRegionInfo(const CapturedStmt &CS, 65 const CGOpenMPRegionKind RegionKind, 66 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 67 bool HasCancel) 68 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 69 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 70 71 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 73 bool HasCancel) 74 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 75 Kind(Kind), HasCancel(HasCancel) {} 76 77 /// Get a variable or parameter for storing global thread id 78 /// inside OpenMP construct. 79 virtual const VarDecl *getThreadIDVariable() const = 0; 80 81 /// Emit the captured statement body. 82 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 83 84 /// Get an LValue for the current ThreadID variable. 85 /// \return LValue for thread id variable. This LValue always has type int32*. 86 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 87 88 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 89 90 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 91 92 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 93 94 bool hasCancel() const { return HasCancel; } 95 96 static bool classof(const CGCapturedStmtInfo *Info) { 97 return Info->getKind() == CR_OpenMP; 98 } 99 100 ~CGOpenMPRegionInfo() override = default; 101 102 protected: 103 CGOpenMPRegionKind RegionKind; 104 RegionCodeGenTy CodeGen; 105 OpenMPDirectiveKind Kind; 106 bool HasCancel; 107 }; 108 109 /// API for captured statement code generation in OpenMP constructs. 110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 111 public: 112 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 113 const RegionCodeGenTy &CodeGen, 114 OpenMPDirectiveKind Kind, bool HasCancel, 115 StringRef HelperName) 116 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 117 HasCancel), 118 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 119 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 120 } 121 122 /// Get a variable or parameter for storing global thread id 123 /// inside OpenMP construct. 124 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 125 126 /// Get the name of the capture helper. 127 StringRef getHelperName() const override { return HelperName; } 128 129 static bool classof(const CGCapturedStmtInfo *Info) { 130 return CGOpenMPRegionInfo::classof(Info) && 131 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 132 ParallelOutlinedRegion; 133 } 134 135 private: 136 /// A variable or parameter storing global thread id for OpenMP 137 /// constructs. 138 const VarDecl *ThreadIDVar; 139 StringRef HelperName; 140 }; 141 142 /// API for captured statement code generation in OpenMP constructs. 143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 144 public: 145 class UntiedTaskActionTy final : public PrePostActionTy { 146 bool Untied; 147 const VarDecl *PartIDVar; 148 const RegionCodeGenTy UntiedCodeGen; 149 llvm::SwitchInst *UntiedSwitch = nullptr; 150 151 public: 152 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 153 const RegionCodeGenTy &UntiedCodeGen) 154 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 155 void Enter(CodeGenFunction &CGF) override { 156 if (Untied) { 157 // Emit task switching point. 158 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 159 CGF.GetAddrOfLocalVar(PartIDVar), 160 PartIDVar->getType()->castAs<PointerType>()); 161 llvm::Value *Res = 162 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 163 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 164 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 165 CGF.EmitBlock(DoneBB); 166 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 167 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 168 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 169 CGF.Builder.GetInsertBlock()); 170 emitUntiedSwitch(CGF); 171 } 172 } 173 void emitUntiedSwitch(CodeGenFunction &CGF) const { 174 if (Untied) { 175 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 176 CGF.GetAddrOfLocalVar(PartIDVar), 177 PartIDVar->getType()->castAs<PointerType>()); 178 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 179 PartIdLVal); 180 UntiedCodeGen(CGF); 181 CodeGenFunction::JumpDest CurPoint = 182 CGF.getJumpDestInCurrentScope(".untied.next."); 183 CGF.EmitBranch(CGF.ReturnBlock.getBlock()); 184 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 185 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 186 CGF.Builder.GetInsertBlock()); 187 CGF.EmitBranchThroughCleanup(CurPoint); 188 CGF.EmitBlock(CurPoint.getBlock()); 189 } 190 } 191 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 192 }; 193 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 194 const VarDecl *ThreadIDVar, 195 const RegionCodeGenTy &CodeGen, 196 OpenMPDirectiveKind Kind, bool HasCancel, 197 const UntiedTaskActionTy &Action) 198 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 199 ThreadIDVar(ThreadIDVar), Action(Action) { 200 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 201 } 202 203 /// Get a variable or parameter for storing global thread id 204 /// inside OpenMP construct. 205 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 206 207 /// Get an LValue for the current ThreadID variable. 208 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 209 210 /// Get the name of the capture helper. 211 StringRef getHelperName() const override { return ".omp_outlined."; } 212 213 void emitUntiedSwitch(CodeGenFunction &CGF) override { 214 Action.emitUntiedSwitch(CGF); 215 } 216 217 static bool classof(const CGCapturedStmtInfo *Info) { 218 return CGOpenMPRegionInfo::classof(Info) && 219 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 220 TaskOutlinedRegion; 221 } 222 223 private: 224 /// A variable or parameter storing global thread id for OpenMP 225 /// constructs. 226 const VarDecl *ThreadIDVar; 227 /// Action for emitting code for untied tasks. 228 const UntiedTaskActionTy &Action; 229 }; 230 231 /// API for inlined captured statement code generation in OpenMP 232 /// constructs. 233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 234 public: 235 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 236 const RegionCodeGenTy &CodeGen, 237 OpenMPDirectiveKind Kind, bool HasCancel) 238 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 239 OldCSI(OldCSI), 240 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 241 242 // Retrieve the value of the context parameter. 243 llvm::Value *getContextValue() const override { 244 if (OuterRegionInfo) 245 return OuterRegionInfo->getContextValue(); 246 llvm_unreachable("No context value for inlined OpenMP region"); 247 } 248 249 void setContextValue(llvm::Value *V) override { 250 if (OuterRegionInfo) { 251 OuterRegionInfo->setContextValue(V); 252 return; 253 } 254 llvm_unreachable("No context value for inlined OpenMP region"); 255 } 256 257 /// Lookup the captured field decl for a variable. 258 const FieldDecl *lookup(const VarDecl *VD) const override { 259 if (OuterRegionInfo) 260 return OuterRegionInfo->lookup(VD); 261 // If there is no outer outlined region,no need to lookup in a list of 262 // captured variables, we can use the original one. 263 return nullptr; 264 } 265 266 FieldDecl *getThisFieldDecl() const override { 267 if (OuterRegionInfo) 268 return OuterRegionInfo->getThisFieldDecl(); 269 return nullptr; 270 } 271 272 /// Get a variable or parameter for storing global thread id 273 /// inside OpenMP construct. 274 const VarDecl *getThreadIDVariable() const override { 275 if (OuterRegionInfo) 276 return OuterRegionInfo->getThreadIDVariable(); 277 return nullptr; 278 } 279 280 /// Get an LValue for the current ThreadID variable. 281 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 282 if (OuterRegionInfo) 283 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 284 llvm_unreachable("No LValue for inlined OpenMP construct"); 285 } 286 287 /// Get the name of the capture helper. 288 StringRef getHelperName() const override { 289 if (auto *OuterRegionInfo = getOldCSI()) 290 return OuterRegionInfo->getHelperName(); 291 llvm_unreachable("No helper name for inlined OpenMP construct"); 292 } 293 294 void emitUntiedSwitch(CodeGenFunction &CGF) override { 295 if (OuterRegionInfo) 296 OuterRegionInfo->emitUntiedSwitch(CGF); 297 } 298 299 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 300 301 static bool classof(const CGCapturedStmtInfo *Info) { 302 return CGOpenMPRegionInfo::classof(Info) && 303 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 304 } 305 306 ~CGOpenMPInlinedRegionInfo() override = default; 307 308 private: 309 /// CodeGen info about outer OpenMP region. 310 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 311 CGOpenMPRegionInfo *OuterRegionInfo; 312 }; 313 314 /// API for captured statement code generation in OpenMP target 315 /// constructs. For this captures, implicit parameters are used instead of the 316 /// captured fields. The name of the target region has to be unique in a given 317 /// application so it is provided by the client, because only the client has 318 /// the information to generate that. 319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 320 public: 321 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 322 const RegionCodeGenTy &CodeGen, StringRef HelperName) 323 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 324 /*HasCancel=*/false), 325 HelperName(HelperName) {} 326 327 /// This is unused for target regions because each starts executing 328 /// with a single thread. 329 const VarDecl *getThreadIDVariable() const override { return nullptr; } 330 331 /// Get the name of the capture helper. 332 StringRef getHelperName() const override { return HelperName; } 333 334 static bool classof(const CGCapturedStmtInfo *Info) { 335 return CGOpenMPRegionInfo::classof(Info) && 336 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 337 } 338 339 private: 340 StringRef HelperName; 341 }; 342 343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 344 llvm_unreachable("No codegen for expressions"); 345 } 346 /// API for generation of expressions captured in a innermost OpenMP 347 /// region. 348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 349 public: 350 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 351 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 352 OMPD_unknown, 353 /*HasCancel=*/false), 354 PrivScope(CGF) { 355 // Make sure the globals captured in the provided statement are local by 356 // using the privatization logic. We assume the same variable is not 357 // captured more than once. 358 for (const auto &C : CS.captures()) { 359 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 360 continue; 361 362 const VarDecl *VD = C.getCapturedVar(); 363 if (VD->isLocalVarDeclOrParm()) 364 continue; 365 366 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 367 /*RefersToEnclosingVariableOrCapture=*/false, 368 VD->getType().getNonReferenceType(), VK_LValue, 369 C.getLocation()); 370 PrivScope.addPrivate( 371 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 372 } 373 (void)PrivScope.Privatize(); 374 } 375 376 /// Lookup the captured field decl for a variable. 377 const FieldDecl *lookup(const VarDecl *VD) const override { 378 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 379 return FD; 380 return nullptr; 381 } 382 383 /// Emit the captured statement body. 384 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 385 llvm_unreachable("No body for expressions"); 386 } 387 388 /// Get a variable or parameter for storing global thread id 389 /// inside OpenMP construct. 390 const VarDecl *getThreadIDVariable() const override { 391 llvm_unreachable("No thread id for expressions"); 392 } 393 394 /// Get the name of the capture helper. 395 StringRef getHelperName() const override { 396 llvm_unreachable("No helper name for expressions"); 397 } 398 399 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 400 401 private: 402 /// Private scope to capture global variables. 403 CodeGenFunction::OMPPrivateScope PrivScope; 404 }; 405 406 /// RAII for emitting code of OpenMP constructs. 407 class InlinedOpenMPRegionRAII { 408 CodeGenFunction &CGF; 409 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 410 FieldDecl *LambdaThisCaptureField = nullptr; 411 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 412 413 public: 414 /// Constructs region for combined constructs. 415 /// \param CodeGen Code generation sequence for combined directives. Includes 416 /// a list of functions used for code generation of implicitly inlined 417 /// regions. 418 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 419 OpenMPDirectiveKind Kind, bool HasCancel) 420 : CGF(CGF) { 421 // Start emission for the construct. 422 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 423 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 424 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 425 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 426 CGF.LambdaThisCaptureField = nullptr; 427 BlockInfo = CGF.BlockInfo; 428 CGF.BlockInfo = nullptr; 429 } 430 431 ~InlinedOpenMPRegionRAII() { 432 // Restore original CapturedStmtInfo only if we're done with code emission. 433 auto *OldCSI = 434 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 435 delete CGF.CapturedStmtInfo; 436 CGF.CapturedStmtInfo = OldCSI; 437 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 438 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 439 CGF.BlockInfo = BlockInfo; 440 } 441 }; 442 443 /// Values for bit flags used in the ident_t to describe the fields. 444 /// All enumeric elements are named and described in accordance with the code 445 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 446 enum OpenMPLocationFlags : unsigned { 447 /// Use trampoline for internal microtask. 448 OMP_IDENT_IMD = 0x01, 449 /// Use c-style ident structure. 450 OMP_IDENT_KMPC = 0x02, 451 /// Atomic reduction option for kmpc_reduce. 452 OMP_ATOMIC_REDUCE = 0x10, 453 /// Explicit 'barrier' directive. 454 OMP_IDENT_BARRIER_EXPL = 0x20, 455 /// Implicit barrier in code. 456 OMP_IDENT_BARRIER_IMPL = 0x40, 457 /// Implicit barrier in 'for' directive. 458 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 459 /// Implicit barrier in 'sections' directive. 460 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 461 /// Implicit barrier in 'single' directive. 462 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 463 /// Call of __kmp_for_static_init for static loop. 464 OMP_IDENT_WORK_LOOP = 0x200, 465 /// Call of __kmp_for_static_init for sections. 466 OMP_IDENT_WORK_SECTIONS = 0x400, 467 /// Call of __kmp_for_static_init for distribute. 468 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 469 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 470 }; 471 472 namespace { 473 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 474 /// Values for bit flags for marking which requires clauses have been used. 475 enum OpenMPOffloadingRequiresDirFlags : int64_t { 476 /// flag undefined. 477 OMP_REQ_UNDEFINED = 0x000, 478 /// no requires clause present. 479 OMP_REQ_NONE = 0x001, 480 /// reverse_offload clause. 481 OMP_REQ_REVERSE_OFFLOAD = 0x002, 482 /// unified_address clause. 483 OMP_REQ_UNIFIED_ADDRESS = 0x004, 484 /// unified_shared_memory clause. 485 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 486 /// dynamic_allocators clause. 487 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 488 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 489 }; 490 491 enum OpenMPOffloadingReservedDeviceIDs { 492 /// Device ID if the device was not defined, runtime should get it 493 /// from environment variables in the spec. 494 OMP_DEVICEID_UNDEF = -1, 495 }; 496 } // anonymous namespace 497 498 /// Describes ident structure that describes a source location. 499 /// All descriptions are taken from 500 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 501 /// Original structure: 502 /// typedef struct ident { 503 /// kmp_int32 reserved_1; /**< might be used in Fortran; 504 /// see above */ 505 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 506 /// KMP_IDENT_KMPC identifies this union 507 /// member */ 508 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 509 /// see above */ 510 ///#if USE_ITT_BUILD 511 /// /* but currently used for storing 512 /// region-specific ITT */ 513 /// /* contextual information. */ 514 ///#endif /* USE_ITT_BUILD */ 515 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 516 /// C++ */ 517 /// char const *psource; /**< String describing the source location. 518 /// The string is composed of semi-colon separated 519 // fields which describe the source file, 520 /// the function and a pair of line numbers that 521 /// delimit the construct. 522 /// */ 523 /// } ident_t; 524 enum IdentFieldIndex { 525 /// might be used in Fortran 526 IdentField_Reserved_1, 527 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 528 IdentField_Flags, 529 /// Not really used in Fortran any more 530 IdentField_Reserved_2, 531 /// Source[4] in Fortran, do not use for C++ 532 IdentField_Reserved_3, 533 /// String describing the source location. The string is composed of 534 /// semi-colon separated fields which describe the source file, the function 535 /// and a pair of line numbers that delimit the construct. 536 IdentField_PSource 537 }; 538 539 /// Schedule types for 'omp for' loops (these enumerators are taken from 540 /// the enum sched_type in kmp.h). 541 enum OpenMPSchedType { 542 /// Lower bound for default (unordered) versions. 543 OMP_sch_lower = 32, 544 OMP_sch_static_chunked = 33, 545 OMP_sch_static = 34, 546 OMP_sch_dynamic_chunked = 35, 547 OMP_sch_guided_chunked = 36, 548 OMP_sch_runtime = 37, 549 OMP_sch_auto = 38, 550 /// static with chunk adjustment (e.g., simd) 551 OMP_sch_static_balanced_chunked = 45, 552 /// Lower bound for 'ordered' versions. 553 OMP_ord_lower = 64, 554 OMP_ord_static_chunked = 65, 555 OMP_ord_static = 66, 556 OMP_ord_dynamic_chunked = 67, 557 OMP_ord_guided_chunked = 68, 558 OMP_ord_runtime = 69, 559 OMP_ord_auto = 70, 560 OMP_sch_default = OMP_sch_static, 561 /// dist_schedule types 562 OMP_dist_sch_static_chunked = 91, 563 OMP_dist_sch_static = 92, 564 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 565 /// Set if the monotonic schedule modifier was present. 566 OMP_sch_modifier_monotonic = (1 << 29), 567 /// Set if the nonmonotonic schedule modifier was present. 568 OMP_sch_modifier_nonmonotonic = (1 << 30), 569 }; 570 571 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 572 /// region. 573 class CleanupTy final : public EHScopeStack::Cleanup { 574 PrePostActionTy *Action; 575 576 public: 577 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 578 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 579 if (!CGF.HaveInsertPoint()) 580 return; 581 Action->Exit(CGF); 582 } 583 }; 584 585 } // anonymous namespace 586 587 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 588 CodeGenFunction::RunCleanupsScope Scope(CGF); 589 if (PrePostAction) { 590 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 591 Callback(CodeGen, CGF, *PrePostAction); 592 } else { 593 PrePostActionTy Action; 594 Callback(CodeGen, CGF, Action); 595 } 596 } 597 598 /// Check if the combiner is a call to UDR combiner and if it is so return the 599 /// UDR decl used for reduction. 600 static const OMPDeclareReductionDecl * 601 getReductionInit(const Expr *ReductionOp) { 602 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 603 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 604 if (const auto *DRE = 605 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 606 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 607 return DRD; 608 return nullptr; 609 } 610 611 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 612 const OMPDeclareReductionDecl *DRD, 613 const Expr *InitOp, 614 Address Private, Address Original, 615 QualType Ty) { 616 if (DRD->getInitializer()) { 617 std::pair<llvm::Function *, llvm::Function *> Reduction = 618 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 619 const auto *CE = cast<CallExpr>(InitOp); 620 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 621 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 622 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 623 const auto *LHSDRE = 624 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 625 const auto *RHSDRE = 626 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 627 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 628 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 629 [=]() { return Private; }); 630 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 631 [=]() { return Original; }); 632 (void)PrivateScope.Privatize(); 633 RValue Func = RValue::get(Reduction.second); 634 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 635 CGF.EmitIgnoredExpr(InitOp); 636 } else { 637 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 638 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 639 auto *GV = new llvm::GlobalVariable( 640 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 641 llvm::GlobalValue::PrivateLinkage, Init, Name); 642 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 643 RValue InitRVal; 644 switch (CGF.getEvaluationKind(Ty)) { 645 case TEK_Scalar: 646 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 647 break; 648 case TEK_Complex: 649 InitRVal = 650 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 651 break; 652 case TEK_Aggregate: 653 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 654 break; 655 } 656 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 657 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 658 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 659 /*IsInitializer=*/false); 660 } 661 } 662 663 /// Emit initialization of arrays of complex types. 664 /// \param DestAddr Address of the array. 665 /// \param Type Type of array. 666 /// \param Init Initial expression of array. 667 /// \param SrcAddr Address of the original array. 668 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 669 QualType Type, bool EmitDeclareReductionInit, 670 const Expr *Init, 671 const OMPDeclareReductionDecl *DRD, 672 Address SrcAddr = Address::invalid()) { 673 // Perform element-by-element initialization. 674 QualType ElementTy; 675 676 // Drill down to the base element type on both arrays. 677 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 678 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 679 DestAddr = 680 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 681 if (DRD) 682 SrcAddr = 683 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 684 685 llvm::Value *SrcBegin = nullptr; 686 if (DRD) 687 SrcBegin = SrcAddr.getPointer(); 688 llvm::Value *DestBegin = DestAddr.getPointer(); 689 // Cast from pointer to array type to pointer to single element. 690 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 691 // The basic structure here is a while-do loop. 692 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 693 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 694 llvm::Value *IsEmpty = 695 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 696 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 697 698 // Enter the loop body, making that address the current address. 699 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 700 CGF.EmitBlock(BodyBB); 701 702 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 703 704 llvm::PHINode *SrcElementPHI = nullptr; 705 Address SrcElementCurrent = Address::invalid(); 706 if (DRD) { 707 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 708 "omp.arraycpy.srcElementPast"); 709 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 710 SrcElementCurrent = 711 Address(SrcElementPHI, 712 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 713 } 714 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 715 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 716 DestElementPHI->addIncoming(DestBegin, EntryBB); 717 Address DestElementCurrent = 718 Address(DestElementPHI, 719 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 720 721 // Emit copy. 722 { 723 CodeGenFunction::RunCleanupsScope InitScope(CGF); 724 if (EmitDeclareReductionInit) { 725 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 726 SrcElementCurrent, ElementTy); 727 } else 728 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 729 /*IsInitializer=*/false); 730 } 731 732 if (DRD) { 733 // Shift the address forward by one element. 734 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 735 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 736 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 737 } 738 739 // Shift the address forward by one element. 740 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 741 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 742 // Check whether we've reached the end. 743 llvm::Value *Done = 744 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 745 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 746 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 747 748 // Done. 749 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 750 } 751 752 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 753 return CGF.EmitOMPSharedLValue(E); 754 } 755 756 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 757 const Expr *E) { 758 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 759 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 760 return LValue(); 761 } 762 763 void ReductionCodeGen::emitAggregateInitialization( 764 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 765 const OMPDeclareReductionDecl *DRD) { 766 // Emit VarDecl with copy init for arrays. 767 // Get the address of the original variable captured in current 768 // captured region. 769 const auto *PrivateVD = 770 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 771 bool EmitDeclareReductionInit = 772 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 773 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 774 EmitDeclareReductionInit, 775 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 776 : PrivateVD->getInit(), 777 DRD, SharedLVal.getAddress(CGF)); 778 } 779 780 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 781 ArrayRef<const Expr *> Origs, 782 ArrayRef<const Expr *> Privates, 783 ArrayRef<const Expr *> ReductionOps) { 784 ClausesData.reserve(Shareds.size()); 785 SharedAddresses.reserve(Shareds.size()); 786 Sizes.reserve(Shareds.size()); 787 BaseDecls.reserve(Shareds.size()); 788 const auto *IOrig = Origs.begin(); 789 const auto *IPriv = Privates.begin(); 790 const auto *IRed = ReductionOps.begin(); 791 for (const Expr *Ref : Shareds) { 792 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 793 std::advance(IOrig, 1); 794 std::advance(IPriv, 1); 795 std::advance(IRed, 1); 796 } 797 } 798 799 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 800 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 801 "Number of generated lvalues must be exactly N."); 802 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 803 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 804 SharedAddresses.emplace_back(First, Second); 805 if (ClausesData[N].Shared == ClausesData[N].Ref) { 806 OrigAddresses.emplace_back(First, Second); 807 } else { 808 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 809 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 810 OrigAddresses.emplace_back(First, Second); 811 } 812 } 813 814 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 815 const auto *PrivateVD = 816 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 817 QualType PrivateType = PrivateVD->getType(); 818 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 819 if (!PrivateType->isVariablyModifiedType()) { 820 Sizes.emplace_back( 821 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 822 nullptr); 823 return; 824 } 825 llvm::Value *Size; 826 llvm::Value *SizeInChars; 827 auto *ElemType = 828 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 829 ->getElementType(); 830 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 831 if (AsArraySection) { 832 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 833 OrigAddresses[N].first.getPointer(CGF)); 834 Size = CGF.Builder.CreateNUWAdd( 835 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 836 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 837 } else { 838 SizeInChars = 839 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 840 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 841 } 842 Sizes.emplace_back(SizeInChars, Size); 843 CodeGenFunction::OpaqueValueMapping OpaqueMap( 844 CGF, 845 cast<OpaqueValueExpr>( 846 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 847 RValue::get(Size)); 848 CGF.EmitVariablyModifiedType(PrivateType); 849 } 850 851 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 852 llvm::Value *Size) { 853 const auto *PrivateVD = 854 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 855 QualType PrivateType = PrivateVD->getType(); 856 if (!PrivateType->isVariablyModifiedType()) { 857 assert(!Size && !Sizes[N].second && 858 "Size should be nullptr for non-variably modified reduction " 859 "items."); 860 return; 861 } 862 CodeGenFunction::OpaqueValueMapping OpaqueMap( 863 CGF, 864 cast<OpaqueValueExpr>( 865 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 866 RValue::get(Size)); 867 CGF.EmitVariablyModifiedType(PrivateType); 868 } 869 870 void ReductionCodeGen::emitInitialization( 871 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 872 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 873 assert(SharedAddresses.size() > N && "No variable was generated"); 874 const auto *PrivateVD = 875 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 876 const OMPDeclareReductionDecl *DRD = 877 getReductionInit(ClausesData[N].ReductionOp); 878 QualType PrivateType = PrivateVD->getType(); 879 PrivateAddr = CGF.Builder.CreateElementBitCast( 880 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 881 QualType SharedType = SharedAddresses[N].first.getType(); 882 SharedLVal = CGF.MakeAddrLValue( 883 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 884 CGF.ConvertTypeForMem(SharedType)), 885 SharedType, SharedAddresses[N].first.getBaseInfo(), 886 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 887 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 888 if (DRD && DRD->getInitializer()) 889 (void)DefaultInit(CGF); 890 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 891 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 892 (void)DefaultInit(CGF); 893 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 894 PrivateAddr, SharedLVal.getAddress(CGF), 895 SharedLVal.getType()); 896 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 897 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 898 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 899 PrivateVD->getType().getQualifiers(), 900 /*IsInitializer=*/false); 901 } 902 } 903 904 bool ReductionCodeGen::needCleanups(unsigned N) { 905 const auto *PrivateVD = 906 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 907 QualType PrivateType = PrivateVD->getType(); 908 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 909 return DTorKind != QualType::DK_none; 910 } 911 912 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 913 Address PrivateAddr) { 914 const auto *PrivateVD = 915 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 916 QualType PrivateType = PrivateVD->getType(); 917 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 918 if (needCleanups(N)) { 919 PrivateAddr = CGF.Builder.CreateElementBitCast( 920 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 921 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 922 } 923 } 924 925 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 926 LValue BaseLV) { 927 BaseTy = BaseTy.getNonReferenceType(); 928 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 929 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 930 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 931 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 932 } else { 933 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 934 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 935 } 936 BaseTy = BaseTy->getPointeeType(); 937 } 938 return CGF.MakeAddrLValue( 939 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 940 CGF.ConvertTypeForMem(ElTy)), 941 BaseLV.getType(), BaseLV.getBaseInfo(), 942 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 943 } 944 945 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 946 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 947 llvm::Value *Addr) { 948 Address Tmp = Address::invalid(); 949 Address TopTmp = Address::invalid(); 950 Address MostTopTmp = Address::invalid(); 951 BaseTy = BaseTy.getNonReferenceType(); 952 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 953 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 954 Tmp = CGF.CreateMemTemp(BaseTy); 955 if (TopTmp.isValid()) 956 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 957 else 958 MostTopTmp = Tmp; 959 TopTmp = Tmp; 960 BaseTy = BaseTy->getPointeeType(); 961 } 962 llvm::Type *Ty = BaseLVType; 963 if (Tmp.isValid()) 964 Ty = Tmp.getElementType(); 965 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 966 if (Tmp.isValid()) { 967 CGF.Builder.CreateStore(Addr, Tmp); 968 return MostTopTmp; 969 } 970 return Address(Addr, BaseLVAlignment); 971 } 972 973 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 974 const VarDecl *OrigVD = nullptr; 975 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 976 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 977 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 978 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 979 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 980 Base = TempASE->getBase()->IgnoreParenImpCasts(); 981 DE = cast<DeclRefExpr>(Base); 982 OrigVD = cast<VarDecl>(DE->getDecl()); 983 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 984 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 985 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 986 Base = TempASE->getBase()->IgnoreParenImpCasts(); 987 DE = cast<DeclRefExpr>(Base); 988 OrigVD = cast<VarDecl>(DE->getDecl()); 989 } 990 return OrigVD; 991 } 992 993 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 994 Address PrivateAddr) { 995 const DeclRefExpr *DE; 996 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 997 BaseDecls.emplace_back(OrigVD); 998 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 999 LValue BaseLValue = 1000 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1001 OriginalBaseLValue); 1002 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1003 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1004 llvm::Value *PrivatePointer = 1005 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1006 PrivateAddr.getPointer(), 1007 SharedAddresses[N].first.getAddress(CGF).getType()); 1008 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1009 return castToBase(CGF, OrigVD->getType(), 1010 SharedAddresses[N].first.getType(), 1011 OriginalBaseLValue.getAddress(CGF).getType(), 1012 OriginalBaseLValue.getAlignment(), Ptr); 1013 } 1014 BaseDecls.emplace_back( 1015 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1016 return PrivateAddr; 1017 } 1018 1019 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1020 const OMPDeclareReductionDecl *DRD = 1021 getReductionInit(ClausesData[N].ReductionOp); 1022 return DRD && DRD->getInitializer(); 1023 } 1024 1025 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1026 return CGF.EmitLoadOfPointerLValue( 1027 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1028 getThreadIDVariable()->getType()->castAs<PointerType>()); 1029 } 1030 1031 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1032 if (!CGF.HaveInsertPoint()) 1033 return; 1034 // 1.2.2 OpenMP Language Terminology 1035 // Structured block - An executable statement with a single entry at the 1036 // top and a single exit at the bottom. 1037 // The point of exit cannot be a branch out of the structured block. 1038 // longjmp() and throw() must not violate the entry/exit criteria. 1039 CGF.EHStack.pushTerminate(); 1040 CodeGen(CGF); 1041 CGF.EHStack.popTerminate(); 1042 } 1043 1044 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1045 CodeGenFunction &CGF) { 1046 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1047 getThreadIDVariable()->getType(), 1048 AlignmentSource::Decl); 1049 } 1050 1051 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1052 QualType FieldTy) { 1053 auto *Field = FieldDecl::Create( 1054 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1055 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1056 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1057 Field->setAccess(AS_public); 1058 DC->addDecl(Field); 1059 return Field; 1060 } 1061 1062 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1063 StringRef Separator) 1064 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1065 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { 1066 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1067 1068 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def 1069 OMPBuilder.initialize(); 1070 loadOffloadInfoMetadata(); 1071 } 1072 1073 void CGOpenMPRuntime::clear() { 1074 InternalVars.clear(); 1075 // Clean non-target variable declarations possibly used only in debug info. 1076 for (const auto &Data : EmittedNonTargetVariables) { 1077 if (!Data.getValue().pointsToAliveValue()) 1078 continue; 1079 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1080 if (!GV) 1081 continue; 1082 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1083 continue; 1084 GV->eraseFromParent(); 1085 } 1086 } 1087 1088 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1089 SmallString<128> Buffer; 1090 llvm::raw_svector_ostream OS(Buffer); 1091 StringRef Sep = FirstSeparator; 1092 for (StringRef Part : Parts) { 1093 OS << Sep << Part; 1094 Sep = Separator; 1095 } 1096 return std::string(OS.str()); 1097 } 1098 1099 static llvm::Function * 1100 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1101 const Expr *CombinerInitializer, const VarDecl *In, 1102 const VarDecl *Out, bool IsCombiner) { 1103 // void .omp_combiner.(Ty *in, Ty *out); 1104 ASTContext &C = CGM.getContext(); 1105 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1106 FunctionArgList Args; 1107 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1108 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1109 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1110 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1111 Args.push_back(&OmpOutParm); 1112 Args.push_back(&OmpInParm); 1113 const CGFunctionInfo &FnInfo = 1114 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1115 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1116 std::string Name = CGM.getOpenMPRuntime().getName( 1117 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1118 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1119 Name, &CGM.getModule()); 1120 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1121 if (CGM.getLangOpts().Optimize) { 1122 Fn->removeFnAttr(llvm::Attribute::NoInline); 1123 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1124 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1125 } 1126 CodeGenFunction CGF(CGM); 1127 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1128 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1129 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1130 Out->getLocation()); 1131 CodeGenFunction::OMPPrivateScope Scope(CGF); 1132 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1133 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1134 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1135 .getAddress(CGF); 1136 }); 1137 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1138 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1139 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1140 .getAddress(CGF); 1141 }); 1142 (void)Scope.Privatize(); 1143 if (!IsCombiner && Out->hasInit() && 1144 !CGF.isTrivialInitializer(Out->getInit())) { 1145 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1146 Out->getType().getQualifiers(), 1147 /*IsInitializer=*/true); 1148 } 1149 if (CombinerInitializer) 1150 CGF.EmitIgnoredExpr(CombinerInitializer); 1151 Scope.ForceCleanup(); 1152 CGF.FinishFunction(); 1153 return Fn; 1154 } 1155 1156 void CGOpenMPRuntime::emitUserDefinedReduction( 1157 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1158 if (UDRMap.count(D) > 0) 1159 return; 1160 llvm::Function *Combiner = emitCombinerOrInitializer( 1161 CGM, D->getType(), D->getCombiner(), 1162 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1163 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1164 /*IsCombiner=*/true); 1165 llvm::Function *Initializer = nullptr; 1166 if (const Expr *Init = D->getInitializer()) { 1167 Initializer = emitCombinerOrInitializer( 1168 CGM, D->getType(), 1169 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1170 : nullptr, 1171 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1172 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1173 /*IsCombiner=*/false); 1174 } 1175 UDRMap.try_emplace(D, Combiner, Initializer); 1176 if (CGF) { 1177 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1178 Decls.second.push_back(D); 1179 } 1180 } 1181 1182 std::pair<llvm::Function *, llvm::Function *> 1183 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1184 auto I = UDRMap.find(D); 1185 if (I != UDRMap.end()) 1186 return I->second; 1187 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1188 return UDRMap.lookup(D); 1189 } 1190 1191 namespace { 1192 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1193 // Builder if one is present. 1194 struct PushAndPopStackRAII { 1195 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1196 bool HasCancel) 1197 : OMPBuilder(OMPBuilder) { 1198 if (!OMPBuilder) 1199 return; 1200 1201 // The following callback is the crucial part of clangs cleanup process. 1202 // 1203 // NOTE: 1204 // Once the OpenMPIRBuilder is used to create parallel regions (and 1205 // similar), the cancellation destination (Dest below) is determined via 1206 // IP. That means if we have variables to finalize we split the block at IP, 1207 // use the new block (=BB) as destination to build a JumpDest (via 1208 // getJumpDestInCurrentScope(BB)) which then is fed to 1209 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1210 // to push & pop an FinalizationInfo object. 1211 // The FiniCB will still be needed but at the point where the 1212 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1213 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1214 assert(IP.getBlock()->end() == IP.getPoint() && 1215 "Clang CG should cause non-terminated block!"); 1216 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1217 CGF.Builder.restoreIP(IP); 1218 CodeGenFunction::JumpDest Dest = 1219 CGF.getOMPCancelDestination(OMPD_parallel); 1220 CGF.EmitBranchThroughCleanup(Dest); 1221 }; 1222 1223 // TODO: Remove this once we emit parallel regions through the 1224 // OpenMPIRBuilder as it can do this setup internally. 1225 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1226 {FiniCB, OMPD_parallel, HasCancel}); 1227 OMPBuilder->pushFinalizationCB(std::move(FI)); 1228 } 1229 ~PushAndPopStackRAII() { 1230 if (OMPBuilder) 1231 OMPBuilder->popFinalizationCB(); 1232 } 1233 llvm::OpenMPIRBuilder *OMPBuilder; 1234 }; 1235 } // namespace 1236 1237 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1238 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1239 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1240 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1241 assert(ThreadIDVar->getType()->isPointerType() && 1242 "thread id variable must be of type kmp_int32 *"); 1243 CodeGenFunction CGF(CGM, true); 1244 bool HasCancel = false; 1245 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1246 HasCancel = OPD->hasCancel(); 1247 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1248 HasCancel = OPD->hasCancel(); 1249 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1250 HasCancel = OPSD->hasCancel(); 1251 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1252 HasCancel = OPFD->hasCancel(); 1253 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1254 HasCancel = OPFD->hasCancel(); 1255 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1256 HasCancel = OPFD->hasCancel(); 1257 else if (const auto *OPFD = 1258 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1259 HasCancel = OPFD->hasCancel(); 1260 else if (const auto *OPFD = 1261 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1262 HasCancel = OPFD->hasCancel(); 1263 1264 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1265 // parallel region to make cancellation barriers work properly. 1266 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); 1267 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel); 1268 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1269 HasCancel, OutlinedHelperName); 1270 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1271 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1272 } 1273 1274 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1275 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1276 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1277 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1278 return emitParallelOrTeamsOutlinedFunction( 1279 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1280 } 1281 1282 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1283 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1284 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1285 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1286 return emitParallelOrTeamsOutlinedFunction( 1287 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1288 } 1289 1290 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1291 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1292 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1293 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1294 bool Tied, unsigned &NumberOfParts) { 1295 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1296 PrePostActionTy &) { 1297 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1298 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1299 llvm::Value *TaskArgs[] = { 1300 UpLoc, ThreadID, 1301 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1302 TaskTVar->getType()->castAs<PointerType>()) 1303 .getPointer(CGF)}; 1304 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1305 CGM.getModule(), OMPRTL___kmpc_omp_task), 1306 TaskArgs); 1307 }; 1308 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1309 UntiedCodeGen); 1310 CodeGen.setAction(Action); 1311 assert(!ThreadIDVar->getType()->isPointerType() && 1312 "thread id variable must be of type kmp_int32 for tasks"); 1313 const OpenMPDirectiveKind Region = 1314 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1315 : OMPD_task; 1316 const CapturedStmt *CS = D.getCapturedStmt(Region); 1317 bool HasCancel = false; 1318 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1319 HasCancel = TD->hasCancel(); 1320 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1321 HasCancel = TD->hasCancel(); 1322 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1323 HasCancel = TD->hasCancel(); 1324 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1325 HasCancel = TD->hasCancel(); 1326 1327 CodeGenFunction CGF(CGM, true); 1328 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1329 InnermostKind, HasCancel, Action); 1330 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1331 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1332 if (!Tied) 1333 NumberOfParts = Action.getNumberOfParts(); 1334 return Res; 1335 } 1336 1337 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1338 const RecordDecl *RD, const CGRecordLayout &RL, 1339 ArrayRef<llvm::Constant *> Data) { 1340 llvm::StructType *StructTy = RL.getLLVMType(); 1341 unsigned PrevIdx = 0; 1342 ConstantInitBuilder CIBuilder(CGM); 1343 auto DI = Data.begin(); 1344 for (const FieldDecl *FD : RD->fields()) { 1345 unsigned Idx = RL.getLLVMFieldNo(FD); 1346 // Fill the alignment. 1347 for (unsigned I = PrevIdx; I < Idx; ++I) 1348 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1349 PrevIdx = Idx + 1; 1350 Fields.add(*DI); 1351 ++DI; 1352 } 1353 } 1354 1355 template <class... As> 1356 static llvm::GlobalVariable * 1357 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1358 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1359 As &&... Args) { 1360 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1361 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1362 ConstantInitBuilder CIBuilder(CGM); 1363 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1364 buildStructValue(Fields, CGM, RD, RL, Data); 1365 return Fields.finishAndCreateGlobal( 1366 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1367 std::forward<As>(Args)...); 1368 } 1369 1370 template <typename T> 1371 static void 1372 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1373 ArrayRef<llvm::Constant *> Data, 1374 T &Parent) { 1375 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1376 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1377 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1378 buildStructValue(Fields, CGM, RD, RL, Data); 1379 Fields.finishAndAddTo(Parent); 1380 } 1381 1382 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1383 bool AtCurrentPoint) { 1384 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1385 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1386 1387 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1388 if (AtCurrentPoint) { 1389 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1390 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1391 } else { 1392 Elem.second.ServiceInsertPt = 1393 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1394 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1395 } 1396 } 1397 1398 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1399 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1400 if (Elem.second.ServiceInsertPt) { 1401 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1402 Elem.second.ServiceInsertPt = nullptr; 1403 Ptr->eraseFromParent(); 1404 } 1405 } 1406 1407 static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, 1408 SourceLocation Loc, 1409 SmallString<128> &Buffer) { 1410 llvm::raw_svector_ostream OS(Buffer); 1411 // Build debug location 1412 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1413 OS << ";" << PLoc.getFilename() << ";"; 1414 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1415 OS << FD->getQualifiedNameAsString(); 1416 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1417 return OS.str(); 1418 } 1419 1420 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1421 SourceLocation Loc, 1422 unsigned Flags) { 1423 llvm::Constant *SrcLocStr; 1424 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1425 Loc.isInvalid()) { 1426 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 1427 } else { 1428 std::string FunctionName = ""; 1429 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1430 FunctionName = FD->getQualifiedNameAsString(); 1431 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1432 const char *FileName = PLoc.getFilename(); 1433 unsigned Line = PLoc.getLine(); 1434 unsigned Column = PLoc.getColumn(); 1435 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName, 1436 Line, Column); 1437 } 1438 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1439 return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), 1440 Reserved2Flags); 1441 } 1442 1443 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1444 SourceLocation Loc) { 1445 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1446 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as 1447 // the clang invariants used below might be broken. 1448 if (CGM.getLangOpts().OpenMPIRBuilder) { 1449 SmallString<128> Buffer; 1450 OMPBuilder.updateToLocation(CGF.Builder.saveIP()); 1451 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( 1452 getIdentStringFromSourceLocation(CGF, Loc, Buffer)); 1453 return OMPBuilder.getOrCreateThreadID( 1454 OMPBuilder.getOrCreateIdent(SrcLocStr)); 1455 } 1456 1457 llvm::Value *ThreadID = nullptr; 1458 // Check whether we've already cached a load of the thread id in this 1459 // function. 1460 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1461 if (I != OpenMPLocThreadIDMap.end()) { 1462 ThreadID = I->second.ThreadID; 1463 if (ThreadID != nullptr) 1464 return ThreadID; 1465 } 1466 // If exceptions are enabled, do not use parameter to avoid possible crash. 1467 if (auto *OMPRegionInfo = 1468 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1469 if (OMPRegionInfo->getThreadIDVariable()) { 1470 // Check if this an outlined function with thread id passed as argument. 1471 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1472 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1473 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1474 !CGF.getLangOpts().CXXExceptions || 1475 CGF.Builder.GetInsertBlock() == TopBlock || 1476 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1477 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1478 TopBlock || 1479 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1480 CGF.Builder.GetInsertBlock()) { 1481 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1482 // If value loaded in entry block, cache it and use it everywhere in 1483 // function. 1484 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1485 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1486 Elem.second.ThreadID = ThreadID; 1487 } 1488 return ThreadID; 1489 } 1490 } 1491 } 1492 1493 // This is not an outlined function region - need to call __kmpc_int32 1494 // kmpc_global_thread_num(ident_t *loc). 1495 // Generate thread id value and cache this value for use across the 1496 // function. 1497 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1498 if (!Elem.second.ServiceInsertPt) 1499 setLocThreadIdInsertPt(CGF); 1500 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1501 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1502 llvm::CallInst *Call = CGF.Builder.CreateCall( 1503 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 1504 OMPRTL___kmpc_global_thread_num), 1505 emitUpdateLocation(CGF, Loc)); 1506 Call->setCallingConv(CGF.getRuntimeCC()); 1507 Elem.second.ThreadID = Call; 1508 return Call; 1509 } 1510 1511 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1512 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1513 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1514 clearLocThreadIdInsertPt(CGF); 1515 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1516 } 1517 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1518 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1519 UDRMap.erase(D); 1520 FunctionUDRMap.erase(CGF.CurFn); 1521 } 1522 auto I = FunctionUDMMap.find(CGF.CurFn); 1523 if (I != FunctionUDMMap.end()) { 1524 for(const auto *D : I->second) 1525 UDMMap.erase(D); 1526 FunctionUDMMap.erase(I); 1527 } 1528 LastprivateConditionalToTypes.erase(CGF.CurFn); 1529 FunctionToUntiedTaskStackMap.erase(CGF.CurFn); 1530 } 1531 1532 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1533 return OMPBuilder.IdentPtr; 1534 } 1535 1536 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1537 if (!Kmpc_MicroTy) { 1538 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1539 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1540 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1541 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1542 } 1543 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1544 } 1545 1546 llvm::FunctionCallee 1547 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 1548 assert((IVSize == 32 || IVSize == 64) && 1549 "IV size is not compatible with the omp runtime"); 1550 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 1551 : "__kmpc_for_static_init_4u") 1552 : (IVSigned ? "__kmpc_for_static_init_8" 1553 : "__kmpc_for_static_init_8u"); 1554 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1555 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1556 llvm::Type *TypeParams[] = { 1557 getIdentTyPointerTy(), // loc 1558 CGM.Int32Ty, // tid 1559 CGM.Int32Ty, // schedtype 1560 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1561 PtrTy, // p_lower 1562 PtrTy, // p_upper 1563 PtrTy, // p_stride 1564 ITy, // incr 1565 ITy // chunk 1566 }; 1567 auto *FnTy = 1568 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1569 return CGM.CreateRuntimeFunction(FnTy, Name); 1570 } 1571 1572 llvm::FunctionCallee 1573 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 1574 assert((IVSize == 32 || IVSize == 64) && 1575 "IV size is not compatible with the omp runtime"); 1576 StringRef Name = 1577 IVSize == 32 1578 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 1579 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 1580 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1581 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 1582 CGM.Int32Ty, // tid 1583 CGM.Int32Ty, // schedtype 1584 ITy, // lower 1585 ITy, // upper 1586 ITy, // stride 1587 ITy // chunk 1588 }; 1589 auto *FnTy = 1590 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1591 return CGM.CreateRuntimeFunction(FnTy, Name); 1592 } 1593 1594 llvm::FunctionCallee 1595 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 1596 assert((IVSize == 32 || IVSize == 64) && 1597 "IV size is not compatible with the omp runtime"); 1598 StringRef Name = 1599 IVSize == 32 1600 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 1601 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 1602 llvm::Type *TypeParams[] = { 1603 getIdentTyPointerTy(), // loc 1604 CGM.Int32Ty, // tid 1605 }; 1606 auto *FnTy = 1607 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1608 return CGM.CreateRuntimeFunction(FnTy, Name); 1609 } 1610 1611 llvm::FunctionCallee 1612 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 1613 assert((IVSize == 32 || IVSize == 64) && 1614 "IV size is not compatible with the omp runtime"); 1615 StringRef Name = 1616 IVSize == 32 1617 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 1618 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 1619 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 1620 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 1621 llvm::Type *TypeParams[] = { 1622 getIdentTyPointerTy(), // loc 1623 CGM.Int32Ty, // tid 1624 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 1625 PtrTy, // p_lower 1626 PtrTy, // p_upper 1627 PtrTy // p_stride 1628 }; 1629 auto *FnTy = 1630 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1631 return CGM.CreateRuntimeFunction(FnTy, Name); 1632 } 1633 1634 /// Obtain information that uniquely identifies a target entry. This 1635 /// consists of the file and device IDs as well as line number associated with 1636 /// the relevant entry source location. 1637 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 1638 unsigned &DeviceID, unsigned &FileID, 1639 unsigned &LineNum) { 1640 SourceManager &SM = C.getSourceManager(); 1641 1642 // The loc should be always valid and have a file ID (the user cannot use 1643 // #pragma directives in macros) 1644 1645 assert(Loc.isValid() && "Source location is expected to be always valid."); 1646 1647 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 1648 assert(PLoc.isValid() && "Source location is expected to be always valid."); 1649 1650 llvm::sys::fs::UniqueID ID; 1651 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 1652 SM.getDiagnostics().Report(diag::err_cannot_open_file) 1653 << PLoc.getFilename() << EC.message(); 1654 1655 DeviceID = ID.getDevice(); 1656 FileID = ID.getFile(); 1657 LineNum = PLoc.getLine(); 1658 } 1659 1660 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 1661 if (CGM.getLangOpts().OpenMPSimd) 1662 return Address::invalid(); 1663 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1664 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1665 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 1666 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1667 HasRequiresUnifiedSharedMemory))) { 1668 SmallString<64> PtrName; 1669 { 1670 llvm::raw_svector_ostream OS(PtrName); 1671 OS << CGM.getMangledName(GlobalDecl(VD)); 1672 if (!VD->isExternallyVisible()) { 1673 unsigned DeviceID, FileID, Line; 1674 getTargetEntryUniqueInfo(CGM.getContext(), 1675 VD->getCanonicalDecl()->getBeginLoc(), 1676 DeviceID, FileID, Line); 1677 OS << llvm::format("_%x", FileID); 1678 } 1679 OS << "_decl_tgt_ref_ptr"; 1680 } 1681 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 1682 if (!Ptr) { 1683 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 1684 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 1685 PtrName); 1686 1687 auto *GV = cast<llvm::GlobalVariable>(Ptr); 1688 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 1689 1690 if (!CGM.getLangOpts().OpenMPIsDevice) 1691 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 1692 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 1693 } 1694 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 1695 } 1696 return Address::invalid(); 1697 } 1698 1699 llvm::Constant * 1700 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 1701 assert(!CGM.getLangOpts().OpenMPUseTLS || 1702 !CGM.getContext().getTargetInfo().isTLSSupported()); 1703 // Lookup the entry, lazily creating it if necessary. 1704 std::string Suffix = getName({"cache", ""}); 1705 return getOrCreateInternalVariable( 1706 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 1707 } 1708 1709 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 1710 const VarDecl *VD, 1711 Address VDAddr, 1712 SourceLocation Loc) { 1713 if (CGM.getLangOpts().OpenMPUseTLS && 1714 CGM.getContext().getTargetInfo().isTLSSupported()) 1715 return VDAddr; 1716 1717 llvm::Type *VarTy = VDAddr.getElementType(); 1718 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 1719 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 1720 CGM.Int8PtrTy), 1721 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 1722 getOrCreateThreadPrivateCache(VD)}; 1723 return Address(CGF.EmitRuntimeCall( 1724 OMPBuilder.getOrCreateRuntimeFunction( 1725 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 1726 Args), 1727 VDAddr.getAlignment()); 1728 } 1729 1730 void CGOpenMPRuntime::emitThreadPrivateVarInit( 1731 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 1732 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 1733 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 1734 // library. 1735 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 1736 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 1737 CGM.getModule(), OMPRTL___kmpc_global_thread_num), 1738 OMPLoc); 1739 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 1740 // to register constructor/destructor for variable. 1741 llvm::Value *Args[] = { 1742 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 1743 Ctor, CopyCtor, Dtor}; 1744 CGF.EmitRuntimeCall( 1745 OMPBuilder.getOrCreateRuntimeFunction( 1746 CGM.getModule(), OMPRTL___kmpc_threadprivate_register), 1747 Args); 1748 } 1749 1750 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 1751 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 1752 bool PerformInit, CodeGenFunction *CGF) { 1753 if (CGM.getLangOpts().OpenMPUseTLS && 1754 CGM.getContext().getTargetInfo().isTLSSupported()) 1755 return nullptr; 1756 1757 VD = VD->getDefinition(CGM.getContext()); 1758 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 1759 QualType ASTTy = VD->getType(); 1760 1761 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 1762 const Expr *Init = VD->getAnyInitializer(); 1763 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1764 // Generate function that re-emits the declaration's initializer into the 1765 // threadprivate copy of the variable VD 1766 CodeGenFunction CtorCGF(CGM); 1767 FunctionArgList Args; 1768 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1769 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1770 ImplicitParamDecl::Other); 1771 Args.push_back(&Dst); 1772 1773 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1774 CGM.getContext().VoidPtrTy, Args); 1775 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1776 std::string Name = getName({"__kmpc_global_ctor_", ""}); 1777 llvm::Function *Fn = 1778 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1779 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 1780 Args, Loc, Loc); 1781 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 1782 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1783 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1784 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 1785 Arg = CtorCGF.Builder.CreateElementBitCast( 1786 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 1787 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 1788 /*IsInitializer=*/true); 1789 ArgVal = CtorCGF.EmitLoadOfScalar( 1790 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 1791 CGM.getContext().VoidPtrTy, Dst.getLocation()); 1792 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 1793 CtorCGF.FinishFunction(); 1794 Ctor = Fn; 1795 } 1796 if (VD->getType().isDestructedType() != QualType::DK_none) { 1797 // Generate function that emits destructor call for the threadprivate copy 1798 // of the variable VD 1799 CodeGenFunction DtorCGF(CGM); 1800 FunctionArgList Args; 1801 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 1802 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 1803 ImplicitParamDecl::Other); 1804 Args.push_back(&Dst); 1805 1806 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 1807 CGM.getContext().VoidTy, Args); 1808 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1809 std::string Name = getName({"__kmpc_global_dtor_", ""}); 1810 llvm::Function *Fn = 1811 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); 1812 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1813 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 1814 Loc, Loc); 1815 // Create a scope with an artificial location for the body of this function. 1816 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1817 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 1818 DtorCGF.GetAddrOfLocalVar(&Dst), 1819 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 1820 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 1821 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1822 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1823 DtorCGF.FinishFunction(); 1824 Dtor = Fn; 1825 } 1826 // Do not emit init function if it is not required. 1827 if (!Ctor && !Dtor) 1828 return nullptr; 1829 1830 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1831 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 1832 /*isVarArg=*/false) 1833 ->getPointerTo(); 1834 // Copying constructor for the threadprivate variable. 1835 // Must be NULL - reserved by runtime, but currently it requires that this 1836 // parameter is always NULL. Otherwise it fires assertion. 1837 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 1838 if (Ctor == nullptr) { 1839 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1840 /*isVarArg=*/false) 1841 ->getPointerTo(); 1842 Ctor = llvm::Constant::getNullValue(CtorTy); 1843 } 1844 if (Dtor == nullptr) { 1845 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 1846 /*isVarArg=*/false) 1847 ->getPointerTo(); 1848 Dtor = llvm::Constant::getNullValue(DtorTy); 1849 } 1850 if (!CGF) { 1851 auto *InitFunctionTy = 1852 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 1853 std::string Name = getName({"__omp_threadprivate_init_", ""}); 1854 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( 1855 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 1856 CodeGenFunction InitCGF(CGM); 1857 FunctionArgList ArgList; 1858 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 1859 CGM.getTypes().arrangeNullaryFunction(), ArgList, 1860 Loc, Loc); 1861 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1862 InitCGF.FinishFunction(); 1863 return InitFunction; 1864 } 1865 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 1866 } 1867 return nullptr; 1868 } 1869 1870 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 1871 llvm::GlobalVariable *Addr, 1872 bool PerformInit) { 1873 if (CGM.getLangOpts().OMPTargetTriples.empty() && 1874 !CGM.getLangOpts().OpenMPIsDevice) 1875 return false; 1876 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 1877 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 1878 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 1879 (*Res == OMPDeclareTargetDeclAttr::MT_To && 1880 HasRequiresUnifiedSharedMemory)) 1881 return CGM.getLangOpts().OpenMPIsDevice; 1882 VD = VD->getDefinition(CGM.getContext()); 1883 assert(VD && "Unknown VarDecl"); 1884 1885 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 1886 return CGM.getLangOpts().OpenMPIsDevice; 1887 1888 QualType ASTTy = VD->getType(); 1889 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 1890 1891 // Produce the unique prefix to identify the new target regions. We use 1892 // the source location of the variable declaration which we know to not 1893 // conflict with any target region. 1894 unsigned DeviceID; 1895 unsigned FileID; 1896 unsigned Line; 1897 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 1898 SmallString<128> Buffer, Out; 1899 { 1900 llvm::raw_svector_ostream OS(Buffer); 1901 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 1902 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 1903 } 1904 1905 const Expr *Init = VD->getAnyInitializer(); 1906 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 1907 llvm::Constant *Ctor; 1908 llvm::Constant *ID; 1909 if (CGM.getLangOpts().OpenMPIsDevice) { 1910 // Generate function that re-emits the declaration's initializer into 1911 // the threadprivate copy of the variable VD 1912 CodeGenFunction CtorCGF(CGM); 1913 1914 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1915 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1916 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1917 FTy, Twine(Buffer, "_ctor"), FI, Loc); 1918 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 1919 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1920 FunctionArgList(), Loc, Loc); 1921 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 1922 CtorCGF.EmitAnyExprToMem(Init, 1923 Address(Addr, CGM.getContext().getDeclAlign(VD)), 1924 Init->getType().getQualifiers(), 1925 /*IsInitializer=*/true); 1926 CtorCGF.FinishFunction(); 1927 Ctor = Fn; 1928 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1929 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 1930 } else { 1931 Ctor = new llvm::GlobalVariable( 1932 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1933 llvm::GlobalValue::PrivateLinkage, 1934 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 1935 ID = Ctor; 1936 } 1937 1938 // Register the information for the entry associated with the constructor. 1939 Out.clear(); 1940 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1941 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 1942 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 1943 } 1944 if (VD->getType().isDestructedType() != QualType::DK_none) { 1945 llvm::Constant *Dtor; 1946 llvm::Constant *ID; 1947 if (CGM.getLangOpts().OpenMPIsDevice) { 1948 // Generate function that emits destructor call for the threadprivate 1949 // copy of the variable VD 1950 CodeGenFunction DtorCGF(CGM); 1951 1952 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 1953 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 1954 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( 1955 FTy, Twine(Buffer, "_dtor"), FI, Loc); 1956 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 1957 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 1958 FunctionArgList(), Loc, Loc); 1959 // Create a scope with an artificial location for the body of this 1960 // function. 1961 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 1962 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 1963 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 1964 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 1965 DtorCGF.FinishFunction(); 1966 Dtor = Fn; 1967 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 1968 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 1969 } else { 1970 Dtor = new llvm::GlobalVariable( 1971 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 1972 llvm::GlobalValue::PrivateLinkage, 1973 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 1974 ID = Dtor; 1975 } 1976 // Register the information for the entry associated with the destructor. 1977 Out.clear(); 1978 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 1979 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 1980 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 1981 } 1982 return CGM.getLangOpts().OpenMPIsDevice; 1983 } 1984 1985 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 1986 QualType VarType, 1987 StringRef Name) { 1988 std::string Suffix = getName({"artificial", ""}); 1989 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 1990 llvm::Value *GAddr = 1991 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 1992 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 1993 CGM.getTarget().isTLSSupported()) { 1994 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 1995 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 1996 } 1997 std::string CacheSuffix = getName({"cache", ""}); 1998 llvm::Value *Args[] = { 1999 emitUpdateLocation(CGF, SourceLocation()), 2000 getThreadID(CGF, SourceLocation()), 2001 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2002 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2003 /*isSigned=*/false), 2004 getOrCreateInternalVariable( 2005 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2006 return Address( 2007 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2008 CGF.EmitRuntimeCall( 2009 OMPBuilder.getOrCreateRuntimeFunction( 2010 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), 2011 Args), 2012 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2013 CGM.getContext().getTypeAlignInChars(VarType)); 2014 } 2015 2016 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 2017 const RegionCodeGenTy &ThenGen, 2018 const RegionCodeGenTy &ElseGen) { 2019 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2020 2021 // If the condition constant folds and can be elided, try to avoid emitting 2022 // the condition and the dead arm of the if/else. 2023 bool CondConstant; 2024 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2025 if (CondConstant) 2026 ThenGen(CGF); 2027 else 2028 ElseGen(CGF); 2029 return; 2030 } 2031 2032 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2033 // emit the conditional branch. 2034 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2035 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2036 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2037 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2038 2039 // Emit the 'then' code. 2040 CGF.EmitBlock(ThenBlock); 2041 ThenGen(CGF); 2042 CGF.EmitBranch(ContBlock); 2043 // Emit the 'else' code if present. 2044 // There is no need to emit line number for unconditional branch. 2045 (void)ApplyDebugLocation::CreateEmpty(CGF); 2046 CGF.EmitBlock(ElseBlock); 2047 ElseGen(CGF); 2048 // There is no need to emit line number for unconditional branch. 2049 (void)ApplyDebugLocation::CreateEmpty(CGF); 2050 CGF.EmitBranch(ContBlock); 2051 // Emit the continuation block for code after the if. 2052 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2053 } 2054 2055 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2056 llvm::Function *OutlinedFn, 2057 ArrayRef<llvm::Value *> CapturedVars, 2058 const Expr *IfCond) { 2059 if (!CGF.HaveInsertPoint()) 2060 return; 2061 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2062 auto &M = CGM.getModule(); 2063 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, 2064 this](CodeGenFunction &CGF, PrePostActionTy &) { 2065 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2066 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2067 llvm::Value *Args[] = { 2068 RTLoc, 2069 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2070 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2071 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2072 RealArgs.append(std::begin(Args), std::end(Args)); 2073 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2074 2075 llvm::FunctionCallee RTLFn = 2076 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); 2077 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2078 }; 2079 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, 2080 this](CodeGenFunction &CGF, PrePostActionTy &) { 2081 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2082 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2083 // Build calls: 2084 // __kmpc_serialized_parallel(&Loc, GTid); 2085 llvm::Value *Args[] = {RTLoc, ThreadID}; 2086 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2087 M, OMPRTL___kmpc_serialized_parallel), 2088 Args); 2089 2090 // OutlinedFn(>id, &zero_bound, CapturedStruct); 2091 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 2092 Address ZeroAddrBound = 2093 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2094 /*Name=*/".bound.zero.addr"); 2095 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 2096 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2097 // ThreadId for serialized parallels is 0. 2098 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 2099 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 2100 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 2101 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 2102 2103 // __kmpc_end_serialized_parallel(&Loc, GTid); 2104 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 2105 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2106 M, OMPRTL___kmpc_end_serialized_parallel), 2107 EndArgs); 2108 }; 2109 if (IfCond) { 2110 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 2111 } else { 2112 RegionCodeGenTy ThenRCG(ThenGen); 2113 ThenRCG(CGF); 2114 } 2115 } 2116 2117 // If we're inside an (outlined) parallel region, use the region info's 2118 // thread-ID variable (it is passed in a first argument of the outlined function 2119 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 2120 // regular serial code region, get thread ID by calling kmp_int32 2121 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 2122 // return the address of that temp. 2123 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 2124 SourceLocation Loc) { 2125 if (auto *OMPRegionInfo = 2126 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2127 if (OMPRegionInfo->getThreadIDVariable()) 2128 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 2129 2130 llvm::Value *ThreadID = getThreadID(CGF, Loc); 2131 QualType Int32Ty = 2132 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 2133 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 2134 CGF.EmitStoreOfScalar(ThreadID, 2135 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 2136 2137 return ThreadIDTemp; 2138 } 2139 2140 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 2141 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 2142 SmallString<256> Buffer; 2143 llvm::raw_svector_ostream Out(Buffer); 2144 Out << Name; 2145 StringRef RuntimeName = Out.str(); 2146 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 2147 if (Elem.second) { 2148 assert(Elem.second->getType()->getPointerElementType() == Ty && 2149 "OMP internal variable has different type than requested"); 2150 return &*Elem.second; 2151 } 2152 2153 return Elem.second = new llvm::GlobalVariable( 2154 CGM.getModule(), Ty, /*IsConstant*/ false, 2155 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 2156 Elem.first(), /*InsertBefore=*/nullptr, 2157 llvm::GlobalValue::NotThreadLocal, AddressSpace); 2158 } 2159 2160 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 2161 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 2162 std::string Name = getName({Prefix, "var"}); 2163 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 2164 } 2165 2166 namespace { 2167 /// Common pre(post)-action for different OpenMP constructs. 2168 class CommonActionTy final : public PrePostActionTy { 2169 llvm::FunctionCallee EnterCallee; 2170 ArrayRef<llvm::Value *> EnterArgs; 2171 llvm::FunctionCallee ExitCallee; 2172 ArrayRef<llvm::Value *> ExitArgs; 2173 bool Conditional; 2174 llvm::BasicBlock *ContBlock = nullptr; 2175 2176 public: 2177 CommonActionTy(llvm::FunctionCallee EnterCallee, 2178 ArrayRef<llvm::Value *> EnterArgs, 2179 llvm::FunctionCallee ExitCallee, 2180 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 2181 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 2182 ExitArgs(ExitArgs), Conditional(Conditional) {} 2183 void Enter(CodeGenFunction &CGF) override { 2184 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 2185 if (Conditional) { 2186 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 2187 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2188 ContBlock = CGF.createBasicBlock("omp_if.end"); 2189 // Generate the branch (If-stmt) 2190 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 2191 CGF.EmitBlock(ThenBlock); 2192 } 2193 } 2194 void Done(CodeGenFunction &CGF) { 2195 // Emit the rest of blocks/branches 2196 CGF.EmitBranch(ContBlock); 2197 CGF.EmitBlock(ContBlock, true); 2198 } 2199 void Exit(CodeGenFunction &CGF) override { 2200 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 2201 } 2202 }; 2203 } // anonymous namespace 2204 2205 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 2206 StringRef CriticalName, 2207 const RegionCodeGenTy &CriticalOpGen, 2208 SourceLocation Loc, const Expr *Hint) { 2209 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 2210 // CriticalOpGen(); 2211 // __kmpc_end_critical(ident_t *, gtid, Lock); 2212 // Prepare arguments and build a call to __kmpc_critical 2213 if (!CGF.HaveInsertPoint()) 2214 return; 2215 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2216 getCriticalRegionLock(CriticalName)}; 2217 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 2218 std::end(Args)); 2219 if (Hint) { 2220 EnterArgs.push_back(CGF.Builder.CreateIntCast( 2221 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); 2222 } 2223 CommonActionTy Action( 2224 OMPBuilder.getOrCreateRuntimeFunction( 2225 CGM.getModule(), 2226 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), 2227 EnterArgs, 2228 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2229 OMPRTL___kmpc_end_critical), 2230 Args); 2231 CriticalOpGen.setAction(Action); 2232 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 2233 } 2234 2235 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 2236 const RegionCodeGenTy &MasterOpGen, 2237 SourceLocation Loc) { 2238 if (!CGF.HaveInsertPoint()) 2239 return; 2240 // if(__kmpc_master(ident_t *, gtid)) { 2241 // MasterOpGen(); 2242 // __kmpc_end_master(ident_t *, gtid); 2243 // } 2244 // Prepare arguments and build a call to __kmpc_master 2245 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2246 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2247 CGM.getModule(), OMPRTL___kmpc_master), 2248 Args, 2249 OMPBuilder.getOrCreateRuntimeFunction( 2250 CGM.getModule(), OMPRTL___kmpc_end_master), 2251 Args, 2252 /*Conditional=*/true); 2253 MasterOpGen.setAction(Action); 2254 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 2255 Action.Done(CGF); 2256 } 2257 2258 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 2259 SourceLocation Loc) { 2260 if (!CGF.HaveInsertPoint()) 2261 return; 2262 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2263 OMPBuilder.createTaskyield(CGF.Builder); 2264 } else { 2265 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 2266 llvm::Value *Args[] = { 2267 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2268 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 2269 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2270 CGM.getModule(), OMPRTL___kmpc_omp_taskyield), 2271 Args); 2272 } 2273 2274 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 2275 Region->emitUntiedSwitch(CGF); 2276 } 2277 2278 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 2279 const RegionCodeGenTy &TaskgroupOpGen, 2280 SourceLocation Loc) { 2281 if (!CGF.HaveInsertPoint()) 2282 return; 2283 // __kmpc_taskgroup(ident_t *, gtid); 2284 // TaskgroupOpGen(); 2285 // __kmpc_end_taskgroup(ident_t *, gtid); 2286 // Prepare arguments and build a call to __kmpc_taskgroup 2287 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2288 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2289 CGM.getModule(), OMPRTL___kmpc_taskgroup), 2290 Args, 2291 OMPBuilder.getOrCreateRuntimeFunction( 2292 CGM.getModule(), OMPRTL___kmpc_end_taskgroup), 2293 Args); 2294 TaskgroupOpGen.setAction(Action); 2295 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 2296 } 2297 2298 /// Given an array of pointers to variables, project the address of a 2299 /// given variable. 2300 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 2301 unsigned Index, const VarDecl *Var) { 2302 // Pull out the pointer to the variable. 2303 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 2304 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 2305 2306 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 2307 Addr = CGF.Builder.CreateElementBitCast( 2308 Addr, CGF.ConvertTypeForMem(Var->getType())); 2309 return Addr; 2310 } 2311 2312 static llvm::Value *emitCopyprivateCopyFunction( 2313 CodeGenModule &CGM, llvm::Type *ArgsType, 2314 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 2315 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 2316 SourceLocation Loc) { 2317 ASTContext &C = CGM.getContext(); 2318 // void copy_func(void *LHSArg, void *RHSArg); 2319 FunctionArgList Args; 2320 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2321 ImplicitParamDecl::Other); 2322 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 2323 ImplicitParamDecl::Other); 2324 Args.push_back(&LHSArg); 2325 Args.push_back(&RHSArg); 2326 const auto &CGFI = 2327 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 2328 std::string Name = 2329 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 2330 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 2331 llvm::GlobalValue::InternalLinkage, Name, 2332 &CGM.getModule()); 2333 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 2334 Fn->setDoesNotRecurse(); 2335 CodeGenFunction CGF(CGM); 2336 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 2337 // Dest = (void*[n])(LHSArg); 2338 // Src = (void*[n])(RHSArg); 2339 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2340 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 2341 ArgsType), CGF.getPointerAlign()); 2342 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2343 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 2344 ArgsType), CGF.getPointerAlign()); 2345 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 2346 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 2347 // ... 2348 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 2349 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 2350 const auto *DestVar = 2351 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 2352 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 2353 2354 const auto *SrcVar = 2355 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 2356 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 2357 2358 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 2359 QualType Type = VD->getType(); 2360 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 2361 } 2362 CGF.FinishFunction(); 2363 return Fn; 2364 } 2365 2366 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 2367 const RegionCodeGenTy &SingleOpGen, 2368 SourceLocation Loc, 2369 ArrayRef<const Expr *> CopyprivateVars, 2370 ArrayRef<const Expr *> SrcExprs, 2371 ArrayRef<const Expr *> DstExprs, 2372 ArrayRef<const Expr *> AssignmentOps) { 2373 if (!CGF.HaveInsertPoint()) 2374 return; 2375 assert(CopyprivateVars.size() == SrcExprs.size() && 2376 CopyprivateVars.size() == DstExprs.size() && 2377 CopyprivateVars.size() == AssignmentOps.size()); 2378 ASTContext &C = CGM.getContext(); 2379 // int32 did_it = 0; 2380 // if(__kmpc_single(ident_t *, gtid)) { 2381 // SingleOpGen(); 2382 // __kmpc_end_single(ident_t *, gtid); 2383 // did_it = 1; 2384 // } 2385 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2386 // <copy_func>, did_it); 2387 2388 Address DidIt = Address::invalid(); 2389 if (!CopyprivateVars.empty()) { 2390 // int32 did_it = 0; 2391 QualType KmpInt32Ty = 2392 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 2393 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 2394 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 2395 } 2396 // Prepare arguments and build a call to __kmpc_single 2397 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2398 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2399 CGM.getModule(), OMPRTL___kmpc_single), 2400 Args, 2401 OMPBuilder.getOrCreateRuntimeFunction( 2402 CGM.getModule(), OMPRTL___kmpc_end_single), 2403 Args, 2404 /*Conditional=*/true); 2405 SingleOpGen.setAction(Action); 2406 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 2407 if (DidIt.isValid()) { 2408 // did_it = 1; 2409 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 2410 } 2411 Action.Done(CGF); 2412 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 2413 // <copy_func>, did_it); 2414 if (DidIt.isValid()) { 2415 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 2416 QualType CopyprivateArrayTy = C.getConstantArrayType( 2417 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 2418 /*IndexTypeQuals=*/0); 2419 // Create a list of all private variables for copyprivate. 2420 Address CopyprivateList = 2421 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 2422 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 2423 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 2424 CGF.Builder.CreateStore( 2425 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2426 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 2427 CGF.VoidPtrTy), 2428 Elem); 2429 } 2430 // Build function that copies private values from single region to all other 2431 // threads in the corresponding parallel region. 2432 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 2433 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 2434 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 2435 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 2436 Address CL = 2437 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 2438 CGF.VoidPtrTy); 2439 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 2440 llvm::Value *Args[] = { 2441 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 2442 getThreadID(CGF, Loc), // i32 <gtid> 2443 BufSize, // size_t <buf_size> 2444 CL.getPointer(), // void *<copyprivate list> 2445 CpyFn, // void (*) (void *, void *) <copy_func> 2446 DidItVal // i32 did_it 2447 }; 2448 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2449 CGM.getModule(), OMPRTL___kmpc_copyprivate), 2450 Args); 2451 } 2452 } 2453 2454 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 2455 const RegionCodeGenTy &OrderedOpGen, 2456 SourceLocation Loc, bool IsThreads) { 2457 if (!CGF.HaveInsertPoint()) 2458 return; 2459 // __kmpc_ordered(ident_t *, gtid); 2460 // OrderedOpGen(); 2461 // __kmpc_end_ordered(ident_t *, gtid); 2462 // Prepare arguments and build a call to __kmpc_ordered 2463 if (IsThreads) { 2464 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2465 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 2466 CGM.getModule(), OMPRTL___kmpc_ordered), 2467 Args, 2468 OMPBuilder.getOrCreateRuntimeFunction( 2469 CGM.getModule(), OMPRTL___kmpc_end_ordered), 2470 Args); 2471 OrderedOpGen.setAction(Action); 2472 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2473 return; 2474 } 2475 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 2476 } 2477 2478 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 2479 unsigned Flags; 2480 if (Kind == OMPD_for) 2481 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 2482 else if (Kind == OMPD_sections) 2483 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 2484 else if (Kind == OMPD_single) 2485 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 2486 else if (Kind == OMPD_barrier) 2487 Flags = OMP_IDENT_BARRIER_EXPL; 2488 else 2489 Flags = OMP_IDENT_BARRIER_IMPL; 2490 return Flags; 2491 } 2492 2493 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 2494 CodeGenFunction &CGF, const OMPLoopDirective &S, 2495 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 2496 // Check if the loop directive is actually a doacross loop directive. In this 2497 // case choose static, 1 schedule. 2498 if (llvm::any_of( 2499 S.getClausesOfKind<OMPOrderedClause>(), 2500 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 2501 ScheduleKind = OMPC_SCHEDULE_static; 2502 // Chunk size is 1 in this case. 2503 llvm::APInt ChunkSize(32, 1); 2504 ChunkExpr = IntegerLiteral::Create( 2505 CGF.getContext(), ChunkSize, 2506 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 2507 SourceLocation()); 2508 } 2509 } 2510 2511 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 2512 OpenMPDirectiveKind Kind, bool EmitChecks, 2513 bool ForceSimpleCall) { 2514 // Check if we should use the OMPBuilder 2515 auto *OMPRegionInfo = 2516 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 2517 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2518 CGF.Builder.restoreIP(OMPBuilder.createBarrier( 2519 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 2520 return; 2521 } 2522 2523 if (!CGF.HaveInsertPoint()) 2524 return; 2525 // Build call __kmpc_cancel_barrier(loc, thread_id); 2526 // Build call __kmpc_barrier(loc, thread_id); 2527 unsigned Flags = getDefaultFlagsForBarriers(Kind); 2528 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 2529 // thread_id); 2530 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 2531 getThreadID(CGF, Loc)}; 2532 if (OMPRegionInfo) { 2533 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 2534 llvm::Value *Result = CGF.EmitRuntimeCall( 2535 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 2536 OMPRTL___kmpc_cancel_barrier), 2537 Args); 2538 if (EmitChecks) { 2539 // if (__kmpc_cancel_barrier()) { 2540 // exit from construct; 2541 // } 2542 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 2543 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 2544 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 2545 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 2546 CGF.EmitBlock(ExitBB); 2547 // exit from construct; 2548 CodeGenFunction::JumpDest CancelDestination = 2549 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 2550 CGF.EmitBranchThroughCleanup(CancelDestination); 2551 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 2552 } 2553 return; 2554 } 2555 } 2556 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2557 CGM.getModule(), OMPRTL___kmpc_barrier), 2558 Args); 2559 } 2560 2561 /// Map the OpenMP loop schedule to the runtime enumeration. 2562 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 2563 bool Chunked, bool Ordered) { 2564 switch (ScheduleKind) { 2565 case OMPC_SCHEDULE_static: 2566 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 2567 : (Ordered ? OMP_ord_static : OMP_sch_static); 2568 case OMPC_SCHEDULE_dynamic: 2569 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 2570 case OMPC_SCHEDULE_guided: 2571 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 2572 case OMPC_SCHEDULE_runtime: 2573 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 2574 case OMPC_SCHEDULE_auto: 2575 return Ordered ? OMP_ord_auto : OMP_sch_auto; 2576 case OMPC_SCHEDULE_unknown: 2577 assert(!Chunked && "chunk was specified but schedule kind not known"); 2578 return Ordered ? OMP_ord_static : OMP_sch_static; 2579 } 2580 llvm_unreachable("Unexpected runtime schedule"); 2581 } 2582 2583 /// Map the OpenMP distribute schedule to the runtime enumeration. 2584 static OpenMPSchedType 2585 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 2586 // only static is allowed for dist_schedule 2587 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 2588 } 2589 2590 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 2591 bool Chunked) const { 2592 OpenMPSchedType Schedule = 2593 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2594 return Schedule == OMP_sch_static; 2595 } 2596 2597 bool CGOpenMPRuntime::isStaticNonchunked( 2598 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2599 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2600 return Schedule == OMP_dist_sch_static; 2601 } 2602 2603 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 2604 bool Chunked) const { 2605 OpenMPSchedType Schedule = 2606 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 2607 return Schedule == OMP_sch_static_chunked; 2608 } 2609 2610 bool CGOpenMPRuntime::isStaticChunked( 2611 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 2612 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 2613 return Schedule == OMP_dist_sch_static_chunked; 2614 } 2615 2616 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 2617 OpenMPSchedType Schedule = 2618 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 2619 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 2620 return Schedule != OMP_sch_static; 2621 } 2622 2623 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 2624 OpenMPScheduleClauseModifier M1, 2625 OpenMPScheduleClauseModifier M2) { 2626 int Modifier = 0; 2627 switch (M1) { 2628 case OMPC_SCHEDULE_MODIFIER_monotonic: 2629 Modifier = OMP_sch_modifier_monotonic; 2630 break; 2631 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2632 Modifier = OMP_sch_modifier_nonmonotonic; 2633 break; 2634 case OMPC_SCHEDULE_MODIFIER_simd: 2635 if (Schedule == OMP_sch_static_chunked) 2636 Schedule = OMP_sch_static_balanced_chunked; 2637 break; 2638 case OMPC_SCHEDULE_MODIFIER_last: 2639 case OMPC_SCHEDULE_MODIFIER_unknown: 2640 break; 2641 } 2642 switch (M2) { 2643 case OMPC_SCHEDULE_MODIFIER_monotonic: 2644 Modifier = OMP_sch_modifier_monotonic; 2645 break; 2646 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 2647 Modifier = OMP_sch_modifier_nonmonotonic; 2648 break; 2649 case OMPC_SCHEDULE_MODIFIER_simd: 2650 if (Schedule == OMP_sch_static_chunked) 2651 Schedule = OMP_sch_static_balanced_chunked; 2652 break; 2653 case OMPC_SCHEDULE_MODIFIER_last: 2654 case OMPC_SCHEDULE_MODIFIER_unknown: 2655 break; 2656 } 2657 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 2658 // If the static schedule kind is specified or if the ordered clause is 2659 // specified, and if the nonmonotonic modifier is not specified, the effect is 2660 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 2661 // modifier is specified, the effect is as if the nonmonotonic modifier is 2662 // specified. 2663 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 2664 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 2665 Schedule == OMP_sch_static_balanced_chunked || 2666 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 2667 Schedule == OMP_dist_sch_static_chunked || 2668 Schedule == OMP_dist_sch_static)) 2669 Modifier = OMP_sch_modifier_nonmonotonic; 2670 } 2671 return Schedule | Modifier; 2672 } 2673 2674 void CGOpenMPRuntime::emitForDispatchInit( 2675 CodeGenFunction &CGF, SourceLocation Loc, 2676 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 2677 bool Ordered, const DispatchRTInput &DispatchValues) { 2678 if (!CGF.HaveInsertPoint()) 2679 return; 2680 OpenMPSchedType Schedule = getRuntimeSchedule( 2681 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 2682 assert(Ordered || 2683 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 2684 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 2685 Schedule != OMP_sch_static_balanced_chunked)); 2686 // Call __kmpc_dispatch_init( 2687 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 2688 // kmp_int[32|64] lower, kmp_int[32|64] upper, 2689 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 2690 2691 // If the Chunk was not specified in the clause - use default value 1. 2692 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 2693 : CGF.Builder.getIntN(IVSize, 1); 2694 llvm::Value *Args[] = { 2695 emitUpdateLocation(CGF, Loc), 2696 getThreadID(CGF, Loc), 2697 CGF.Builder.getInt32(addMonoNonMonoModifier( 2698 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 2699 DispatchValues.LB, // Lower 2700 DispatchValues.UB, // Upper 2701 CGF.Builder.getIntN(IVSize, 1), // Stride 2702 Chunk // Chunk 2703 }; 2704 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 2705 } 2706 2707 static void emitForStaticInitCall( 2708 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 2709 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 2710 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 2711 const CGOpenMPRuntime::StaticRTInput &Values) { 2712 if (!CGF.HaveInsertPoint()) 2713 return; 2714 2715 assert(!Values.Ordered); 2716 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 2717 Schedule == OMP_sch_static_balanced_chunked || 2718 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 2719 Schedule == OMP_dist_sch_static || 2720 Schedule == OMP_dist_sch_static_chunked); 2721 2722 // Call __kmpc_for_static_init( 2723 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 2724 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 2725 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 2726 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 2727 llvm::Value *Chunk = Values.Chunk; 2728 if (Chunk == nullptr) { 2729 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 2730 Schedule == OMP_dist_sch_static) && 2731 "expected static non-chunked schedule"); 2732 // If the Chunk was not specified in the clause - use default value 1. 2733 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 2734 } else { 2735 assert((Schedule == OMP_sch_static_chunked || 2736 Schedule == OMP_sch_static_balanced_chunked || 2737 Schedule == OMP_ord_static_chunked || 2738 Schedule == OMP_dist_sch_static_chunked) && 2739 "expected static chunked schedule"); 2740 } 2741 llvm::Value *Args[] = { 2742 UpdateLocation, 2743 ThreadId, 2744 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 2745 M2)), // Schedule type 2746 Values.IL.getPointer(), // &isLastIter 2747 Values.LB.getPointer(), // &LB 2748 Values.UB.getPointer(), // &UB 2749 Values.ST.getPointer(), // &Stride 2750 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 2751 Chunk // Chunk 2752 }; 2753 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 2754 } 2755 2756 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 2757 SourceLocation Loc, 2758 OpenMPDirectiveKind DKind, 2759 const OpenMPScheduleTy &ScheduleKind, 2760 const StaticRTInput &Values) { 2761 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 2762 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 2763 assert(isOpenMPWorksharingDirective(DKind) && 2764 "Expected loop-based or sections-based directive."); 2765 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 2766 isOpenMPLoopDirective(DKind) 2767 ? OMP_IDENT_WORK_LOOP 2768 : OMP_IDENT_WORK_SECTIONS); 2769 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2770 llvm::FunctionCallee StaticInitFunction = 2771 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2772 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2773 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2774 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 2775 } 2776 2777 void CGOpenMPRuntime::emitDistributeStaticInit( 2778 CodeGenFunction &CGF, SourceLocation Loc, 2779 OpenMPDistScheduleClauseKind SchedKind, 2780 const CGOpenMPRuntime::StaticRTInput &Values) { 2781 OpenMPSchedType ScheduleNum = 2782 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 2783 llvm::Value *UpdatedLocation = 2784 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 2785 llvm::Value *ThreadId = getThreadID(CGF, Loc); 2786 llvm::FunctionCallee StaticInitFunction = 2787 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 2788 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 2789 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 2790 OMPC_SCHEDULE_MODIFIER_unknown, Values); 2791 } 2792 2793 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 2794 SourceLocation Loc, 2795 OpenMPDirectiveKind DKind) { 2796 if (!CGF.HaveInsertPoint()) 2797 return; 2798 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 2799 llvm::Value *Args[] = { 2800 emitUpdateLocation(CGF, Loc, 2801 isOpenMPDistributeDirective(DKind) 2802 ? OMP_IDENT_WORK_DISTRIBUTE 2803 : isOpenMPLoopDirective(DKind) 2804 ? OMP_IDENT_WORK_LOOP 2805 : OMP_IDENT_WORK_SECTIONS), 2806 getThreadID(CGF, Loc)}; 2807 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 2808 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2809 CGM.getModule(), OMPRTL___kmpc_for_static_fini), 2810 Args); 2811 } 2812 2813 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 2814 SourceLocation Loc, 2815 unsigned IVSize, 2816 bool IVSigned) { 2817 if (!CGF.HaveInsertPoint()) 2818 return; 2819 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 2820 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 2821 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 2822 } 2823 2824 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 2825 SourceLocation Loc, unsigned IVSize, 2826 bool IVSigned, Address IL, 2827 Address LB, Address UB, 2828 Address ST) { 2829 // Call __kmpc_dispatch_next( 2830 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 2831 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 2832 // kmp_int[32|64] *p_stride); 2833 llvm::Value *Args[] = { 2834 emitUpdateLocation(CGF, Loc), 2835 getThreadID(CGF, Loc), 2836 IL.getPointer(), // &isLastIter 2837 LB.getPointer(), // &Lower 2838 UB.getPointer(), // &Upper 2839 ST.getPointer() // &Stride 2840 }; 2841 llvm::Value *Call = 2842 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 2843 return CGF.EmitScalarConversion( 2844 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 2845 CGF.getContext().BoolTy, Loc); 2846 } 2847 2848 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 2849 llvm::Value *NumThreads, 2850 SourceLocation Loc) { 2851 if (!CGF.HaveInsertPoint()) 2852 return; 2853 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 2854 llvm::Value *Args[] = { 2855 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2856 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 2857 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2858 CGM.getModule(), OMPRTL___kmpc_push_num_threads), 2859 Args); 2860 } 2861 2862 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 2863 ProcBindKind ProcBind, 2864 SourceLocation Loc) { 2865 if (!CGF.HaveInsertPoint()) 2866 return; 2867 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 2868 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 2869 llvm::Value *Args[] = { 2870 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2871 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 2872 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2873 CGM.getModule(), OMPRTL___kmpc_push_proc_bind), 2874 Args); 2875 } 2876 2877 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 2878 SourceLocation Loc, llvm::AtomicOrdering AO) { 2879 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 2880 OMPBuilder.createFlush(CGF.Builder); 2881 } else { 2882 if (!CGF.HaveInsertPoint()) 2883 return; 2884 // Build call void __kmpc_flush(ident_t *loc) 2885 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 2886 CGM.getModule(), OMPRTL___kmpc_flush), 2887 emitUpdateLocation(CGF, Loc)); 2888 } 2889 } 2890 2891 namespace { 2892 /// Indexes of fields for type kmp_task_t. 2893 enum KmpTaskTFields { 2894 /// List of shared variables. 2895 KmpTaskTShareds, 2896 /// Task routine. 2897 KmpTaskTRoutine, 2898 /// Partition id for the untied tasks. 2899 KmpTaskTPartId, 2900 /// Function with call of destructors for private variables. 2901 Data1, 2902 /// Task priority. 2903 Data2, 2904 /// (Taskloops only) Lower bound. 2905 KmpTaskTLowerBound, 2906 /// (Taskloops only) Upper bound. 2907 KmpTaskTUpperBound, 2908 /// (Taskloops only) Stride. 2909 KmpTaskTStride, 2910 /// (Taskloops only) Is last iteration flag. 2911 KmpTaskTLastIter, 2912 /// (Taskloops only) Reduction data. 2913 KmpTaskTReductions, 2914 }; 2915 } // anonymous namespace 2916 2917 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 2918 return OffloadEntriesTargetRegion.empty() && 2919 OffloadEntriesDeviceGlobalVar.empty(); 2920 } 2921 2922 /// Initialize target region entry. 2923 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2924 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2925 StringRef ParentName, unsigned LineNum, 2926 unsigned Order) { 2927 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 2928 "only required for the device " 2929 "code generation."); 2930 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 2931 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 2932 OMPTargetRegionEntryTargetRegion); 2933 ++OffloadingEntriesNum; 2934 } 2935 2936 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 2937 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 2938 StringRef ParentName, unsigned LineNum, 2939 llvm::Constant *Addr, llvm::Constant *ID, 2940 OMPTargetRegionEntryKind Flags) { 2941 // If we are emitting code for a target, the entry is already initialized, 2942 // only has to be registered. 2943 if (CGM.getLangOpts().OpenMPIsDevice) { 2944 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 2945 unsigned DiagID = CGM.getDiags().getCustomDiagID( 2946 DiagnosticsEngine::Error, 2947 "Unable to find target region on line '%0' in the device code."); 2948 CGM.getDiags().Report(DiagID) << LineNum; 2949 return; 2950 } 2951 auto &Entry = 2952 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 2953 assert(Entry.isValid() && "Entry not initialized!"); 2954 Entry.setAddress(Addr); 2955 Entry.setID(ID); 2956 Entry.setFlags(Flags); 2957 } else { 2958 if (Flags == 2959 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && 2960 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, 2961 /*IgnoreAddressId*/ true)) 2962 return; 2963 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && 2964 "Target region entry already registered!"); 2965 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 2966 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 2967 ++OffloadingEntriesNum; 2968 } 2969 } 2970 2971 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 2972 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, 2973 bool IgnoreAddressId) const { 2974 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 2975 if (PerDevice == OffloadEntriesTargetRegion.end()) 2976 return false; 2977 auto PerFile = PerDevice->second.find(FileID); 2978 if (PerFile == PerDevice->second.end()) 2979 return false; 2980 auto PerParentName = PerFile->second.find(ParentName); 2981 if (PerParentName == PerFile->second.end()) 2982 return false; 2983 auto PerLine = PerParentName->second.find(LineNum); 2984 if (PerLine == PerParentName->second.end()) 2985 return false; 2986 // Fail if this entry is already registered. 2987 if (!IgnoreAddressId && 2988 (PerLine->second.getAddress() || PerLine->second.getID())) 2989 return false; 2990 return true; 2991 } 2992 2993 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 2994 const OffloadTargetRegionEntryInfoActTy &Action) { 2995 // Scan all target region entries and perform the provided action. 2996 for (const auto &D : OffloadEntriesTargetRegion) 2997 for (const auto &F : D.second) 2998 for (const auto &P : F.second) 2999 for (const auto &L : P.second) 3000 Action(D.first, F.first, P.first(), L.first, L.second); 3001 } 3002 3003 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3004 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3005 OMPTargetGlobalVarEntryKind Flags, 3006 unsigned Order) { 3007 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3008 "only required for the device " 3009 "code generation."); 3010 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3011 ++OffloadingEntriesNum; 3012 } 3013 3014 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3015 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3016 CharUnits VarSize, 3017 OMPTargetGlobalVarEntryKind Flags, 3018 llvm::GlobalValue::LinkageTypes Linkage) { 3019 if (CGM.getLangOpts().OpenMPIsDevice) { 3020 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3021 assert(Entry.isValid() && Entry.getFlags() == Flags && 3022 "Entry not initialized!"); 3023 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3024 "Resetting with the new address."); 3025 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3026 if (Entry.getVarSize().isZero()) { 3027 Entry.setVarSize(VarSize); 3028 Entry.setLinkage(Linkage); 3029 } 3030 return; 3031 } 3032 Entry.setVarSize(VarSize); 3033 Entry.setLinkage(Linkage); 3034 Entry.setAddress(Addr); 3035 } else { 3036 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3037 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3038 assert(Entry.isValid() && Entry.getFlags() == Flags && 3039 "Entry not initialized!"); 3040 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3041 "Resetting with the new address."); 3042 if (Entry.getVarSize().isZero()) { 3043 Entry.setVarSize(VarSize); 3044 Entry.setLinkage(Linkage); 3045 } 3046 return; 3047 } 3048 OffloadEntriesDeviceGlobalVar.try_emplace( 3049 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3050 ++OffloadingEntriesNum; 3051 } 3052 } 3053 3054 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3055 actOnDeviceGlobalVarEntriesInfo( 3056 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3057 // Scan all target region entries and perform the provided action. 3058 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3059 Action(E.getKey(), E.getValue()); 3060 } 3061 3062 void CGOpenMPRuntime::createOffloadEntry( 3063 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 3064 llvm::GlobalValue::LinkageTypes Linkage) { 3065 StringRef Name = Addr->getName(); 3066 llvm::Module &M = CGM.getModule(); 3067 llvm::LLVMContext &C = M.getContext(); 3068 3069 // Create constant string with the name. 3070 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 3071 3072 std::string StringName = getName({"omp_offloading", "entry_name"}); 3073 auto *Str = new llvm::GlobalVariable( 3074 M, StrPtrInit->getType(), /*isConstant=*/true, 3075 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 3076 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3077 3078 llvm::Constant *Data[] = { 3079 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), 3080 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), 3081 llvm::ConstantInt::get(CGM.SizeTy, Size), 3082 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 3083 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 3084 std::string EntryName = getName({"omp_offloading", "entry", ""}); 3085 llvm::GlobalVariable *Entry = createGlobalStruct( 3086 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 3087 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 3088 3089 // The entry has to be created in the section the linker expects it to be. 3090 Entry->setSection("omp_offloading_entries"); 3091 } 3092 3093 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 3094 // Emit the offloading entries and metadata so that the device codegen side 3095 // can easily figure out what to emit. The produced metadata looks like 3096 // this: 3097 // 3098 // !omp_offload.info = !{!1, ...} 3099 // 3100 // Right now we only generate metadata for function that contain target 3101 // regions. 3102 3103 // If we are in simd mode or there are no entries, we don't need to do 3104 // anything. 3105 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 3106 return; 3107 3108 llvm::Module &M = CGM.getModule(); 3109 llvm::LLVMContext &C = M.getContext(); 3110 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 3111 SourceLocation, StringRef>, 3112 16> 3113 OrderedEntries(OffloadEntriesInfoManager.size()); 3114 llvm::SmallVector<StringRef, 16> ParentFunctions( 3115 OffloadEntriesInfoManager.size()); 3116 3117 // Auxiliary methods to create metadata values and strings. 3118 auto &&GetMDInt = [this](unsigned V) { 3119 return llvm::ConstantAsMetadata::get( 3120 llvm::ConstantInt::get(CGM.Int32Ty, V)); 3121 }; 3122 3123 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 3124 3125 // Create the offloading info metadata node. 3126 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 3127 3128 // Create function that emits metadata for each target region entry; 3129 auto &&TargetRegionMetadataEmitter = 3130 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 3131 &GetMDString]( 3132 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3133 unsigned Line, 3134 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 3135 // Generate metadata for target regions. Each entry of this metadata 3136 // contains: 3137 // - Entry 0 -> Kind of this type of metadata (0). 3138 // - Entry 1 -> Device ID of the file where the entry was identified. 3139 // - Entry 2 -> File ID of the file where the entry was identified. 3140 // - Entry 3 -> Mangled name of the function where the entry was 3141 // identified. 3142 // - Entry 4 -> Line in the file where the entry was identified. 3143 // - Entry 5 -> Order the entry was created. 3144 // The first element of the metadata node is the kind. 3145 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 3146 GetMDInt(FileID), GetMDString(ParentName), 3147 GetMDInt(Line), GetMDInt(E.getOrder())}; 3148 3149 SourceLocation Loc; 3150 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 3151 E = CGM.getContext().getSourceManager().fileinfo_end(); 3152 I != E; ++I) { 3153 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 3154 I->getFirst()->getUniqueID().getFile() == FileID) { 3155 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 3156 I->getFirst(), Line, 1); 3157 break; 3158 } 3159 } 3160 // Save this entry in the right position of the ordered entries array. 3161 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 3162 ParentFunctions[E.getOrder()] = ParentName; 3163 3164 // Add metadata to the named metadata node. 3165 MD->addOperand(llvm::MDNode::get(C, Ops)); 3166 }; 3167 3168 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 3169 TargetRegionMetadataEmitter); 3170 3171 // Create function that emits metadata for each device global variable entry; 3172 auto &&DeviceGlobalVarMetadataEmitter = 3173 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 3174 MD](StringRef MangledName, 3175 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 3176 &E) { 3177 // Generate metadata for global variables. Each entry of this metadata 3178 // contains: 3179 // - Entry 0 -> Kind of this type of metadata (1). 3180 // - Entry 1 -> Mangled name of the variable. 3181 // - Entry 2 -> Declare target kind. 3182 // - Entry 3 -> Order the entry was created. 3183 // The first element of the metadata node is the kind. 3184 llvm::Metadata *Ops[] = { 3185 GetMDInt(E.getKind()), GetMDString(MangledName), 3186 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 3187 3188 // Save this entry in the right position of the ordered entries array. 3189 OrderedEntries[E.getOrder()] = 3190 std::make_tuple(&E, SourceLocation(), MangledName); 3191 3192 // Add metadata to the named metadata node. 3193 MD->addOperand(llvm::MDNode::get(C, Ops)); 3194 }; 3195 3196 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 3197 DeviceGlobalVarMetadataEmitter); 3198 3199 for (const auto &E : OrderedEntries) { 3200 assert(std::get<0>(E) && "All ordered entries must exist!"); 3201 if (const auto *CE = 3202 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 3203 std::get<0>(E))) { 3204 if (!CE->getID() || !CE->getAddress()) { 3205 // Do not blame the entry if the parent funtion is not emitted. 3206 StringRef FnName = ParentFunctions[CE->getOrder()]; 3207 if (!CGM.GetGlobalValue(FnName)) 3208 continue; 3209 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3210 DiagnosticsEngine::Error, 3211 "Offloading entry for target region in %0 is incorrect: either the " 3212 "address or the ID is invalid."); 3213 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 3214 continue; 3215 } 3216 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 3217 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 3218 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 3219 OffloadEntryInfoDeviceGlobalVar>( 3220 std::get<0>(E))) { 3221 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 3222 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3223 CE->getFlags()); 3224 switch (Flags) { 3225 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 3226 if (CGM.getLangOpts().OpenMPIsDevice && 3227 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 3228 continue; 3229 if (!CE->getAddress()) { 3230 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3231 DiagnosticsEngine::Error, "Offloading entry for declare target " 3232 "variable %0 is incorrect: the " 3233 "address is invalid."); 3234 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 3235 continue; 3236 } 3237 // The vaiable has no definition - no need to add the entry. 3238 if (CE->getVarSize().isZero()) 3239 continue; 3240 break; 3241 } 3242 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 3243 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 3244 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 3245 "Declaret target link address is set."); 3246 if (CGM.getLangOpts().OpenMPIsDevice) 3247 continue; 3248 if (!CE->getAddress()) { 3249 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3250 DiagnosticsEngine::Error, 3251 "Offloading entry for declare target variable is incorrect: the " 3252 "address is invalid."); 3253 CGM.getDiags().Report(DiagID); 3254 continue; 3255 } 3256 break; 3257 } 3258 createOffloadEntry(CE->getAddress(), CE->getAddress(), 3259 CE->getVarSize().getQuantity(), Flags, 3260 CE->getLinkage()); 3261 } else { 3262 llvm_unreachable("Unsupported entry kind."); 3263 } 3264 } 3265 } 3266 3267 /// Loads all the offload entries information from the host IR 3268 /// metadata. 3269 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 3270 // If we are in target mode, load the metadata from the host IR. This code has 3271 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 3272 3273 if (!CGM.getLangOpts().OpenMPIsDevice) 3274 return; 3275 3276 if (CGM.getLangOpts().OMPHostIRFile.empty()) 3277 return; 3278 3279 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 3280 if (auto EC = Buf.getError()) { 3281 CGM.getDiags().Report(diag::err_cannot_open_file) 3282 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3283 return; 3284 } 3285 3286 llvm::LLVMContext C; 3287 auto ME = expectedToErrorOrAndEmitErrors( 3288 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 3289 3290 if (auto EC = ME.getError()) { 3291 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3292 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 3293 CGM.getDiags().Report(DiagID) 3294 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 3295 return; 3296 } 3297 3298 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 3299 if (!MD) 3300 return; 3301 3302 for (llvm::MDNode *MN : MD->operands()) { 3303 auto &&GetMDInt = [MN](unsigned Idx) { 3304 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 3305 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 3306 }; 3307 3308 auto &&GetMDString = [MN](unsigned Idx) { 3309 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 3310 return V->getString(); 3311 }; 3312 3313 switch (GetMDInt(0)) { 3314 default: 3315 llvm_unreachable("Unexpected metadata!"); 3316 break; 3317 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3318 OffloadingEntryInfoTargetRegion: 3319 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 3320 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 3321 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 3322 /*Order=*/GetMDInt(5)); 3323 break; 3324 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 3325 OffloadingEntryInfoDeviceGlobalVar: 3326 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 3327 /*MangledName=*/GetMDString(1), 3328 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 3329 /*Flags=*/GetMDInt(2)), 3330 /*Order=*/GetMDInt(3)); 3331 break; 3332 } 3333 } 3334 } 3335 3336 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 3337 if (!KmpRoutineEntryPtrTy) { 3338 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 3339 ASTContext &C = CGM.getContext(); 3340 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 3341 FunctionProtoType::ExtProtoInfo EPI; 3342 KmpRoutineEntryPtrQTy = C.getPointerType( 3343 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 3344 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 3345 } 3346 } 3347 3348 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 3349 // Make sure the type of the entry is already created. This is the type we 3350 // have to create: 3351 // struct __tgt_offload_entry{ 3352 // void *addr; // Pointer to the offload entry info. 3353 // // (function or global) 3354 // char *name; // Name of the function or global. 3355 // size_t size; // Size of the entry info (0 if it a function). 3356 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 3357 // int32_t reserved; // Reserved, to use by the runtime library. 3358 // }; 3359 if (TgtOffloadEntryQTy.isNull()) { 3360 ASTContext &C = CGM.getContext(); 3361 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 3362 RD->startDefinition(); 3363 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3364 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 3365 addFieldToRecordDecl(C, RD, C.getSizeType()); 3366 addFieldToRecordDecl( 3367 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3368 addFieldToRecordDecl( 3369 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 3370 RD->completeDefinition(); 3371 RD->addAttr(PackedAttr::CreateImplicit(C)); 3372 TgtOffloadEntryQTy = C.getRecordType(RD); 3373 } 3374 return TgtOffloadEntryQTy; 3375 } 3376 3377 namespace { 3378 struct PrivateHelpersTy { 3379 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 3380 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 3381 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 3382 PrivateElemInit(PrivateElemInit) {} 3383 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} 3384 const Expr *OriginalRef = nullptr; 3385 const VarDecl *Original = nullptr; 3386 const VarDecl *PrivateCopy = nullptr; 3387 const VarDecl *PrivateElemInit = nullptr; 3388 bool isLocalPrivate() const { 3389 return !OriginalRef && !PrivateCopy && !PrivateElemInit; 3390 } 3391 }; 3392 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 3393 } // anonymous namespace 3394 3395 static bool isAllocatableDecl(const VarDecl *VD) { 3396 const VarDecl *CVD = VD->getCanonicalDecl(); 3397 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 3398 return false; 3399 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 3400 // Use the default allocation. 3401 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 3402 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 3403 !AA->getAllocator()); 3404 } 3405 3406 static RecordDecl * 3407 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 3408 if (!Privates.empty()) { 3409 ASTContext &C = CGM.getContext(); 3410 // Build struct .kmp_privates_t. { 3411 // /* private vars */ 3412 // }; 3413 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 3414 RD->startDefinition(); 3415 for (const auto &Pair : Privates) { 3416 const VarDecl *VD = Pair.second.Original; 3417 QualType Type = VD->getType().getNonReferenceType(); 3418 // If the private variable is a local variable with lvalue ref type, 3419 // allocate the pointer instead of the pointee type. 3420 if (Pair.second.isLocalPrivate()) { 3421 if (VD->getType()->isLValueReferenceType()) 3422 Type = C.getPointerType(Type); 3423 if (isAllocatableDecl(VD)) 3424 Type = C.getPointerType(Type); 3425 } 3426 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 3427 if (VD->hasAttrs()) { 3428 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 3429 E(VD->getAttrs().end()); 3430 I != E; ++I) 3431 FD->addAttr(*I); 3432 } 3433 } 3434 RD->completeDefinition(); 3435 return RD; 3436 } 3437 return nullptr; 3438 } 3439 3440 static RecordDecl * 3441 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 3442 QualType KmpInt32Ty, 3443 QualType KmpRoutineEntryPointerQTy) { 3444 ASTContext &C = CGM.getContext(); 3445 // Build struct kmp_task_t { 3446 // void * shareds; 3447 // kmp_routine_entry_t routine; 3448 // kmp_int32 part_id; 3449 // kmp_cmplrdata_t data1; 3450 // kmp_cmplrdata_t data2; 3451 // For taskloops additional fields: 3452 // kmp_uint64 lb; 3453 // kmp_uint64 ub; 3454 // kmp_int64 st; 3455 // kmp_int32 liter; 3456 // void * reductions; 3457 // }; 3458 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 3459 UD->startDefinition(); 3460 addFieldToRecordDecl(C, UD, KmpInt32Ty); 3461 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 3462 UD->completeDefinition(); 3463 QualType KmpCmplrdataTy = C.getRecordType(UD); 3464 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 3465 RD->startDefinition(); 3466 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3467 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 3468 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3469 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3470 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 3471 if (isOpenMPTaskLoopDirective(Kind)) { 3472 QualType KmpUInt64Ty = 3473 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 3474 QualType KmpInt64Ty = 3475 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 3476 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3477 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 3478 addFieldToRecordDecl(C, RD, KmpInt64Ty); 3479 addFieldToRecordDecl(C, RD, KmpInt32Ty); 3480 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 3481 } 3482 RD->completeDefinition(); 3483 return RD; 3484 } 3485 3486 static RecordDecl * 3487 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 3488 ArrayRef<PrivateDataTy> Privates) { 3489 ASTContext &C = CGM.getContext(); 3490 // Build struct kmp_task_t_with_privates { 3491 // kmp_task_t task_data; 3492 // .kmp_privates_t. privates; 3493 // }; 3494 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 3495 RD->startDefinition(); 3496 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 3497 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 3498 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 3499 RD->completeDefinition(); 3500 return RD; 3501 } 3502 3503 /// Emit a proxy function which accepts kmp_task_t as the second 3504 /// argument. 3505 /// \code 3506 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 3507 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 3508 /// For taskloops: 3509 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3510 /// tt->reductions, tt->shareds); 3511 /// return 0; 3512 /// } 3513 /// \endcode 3514 static llvm::Function * 3515 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 3516 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 3517 QualType KmpTaskTWithPrivatesPtrQTy, 3518 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 3519 QualType SharedsPtrTy, llvm::Function *TaskFunction, 3520 llvm::Value *TaskPrivatesMap) { 3521 ASTContext &C = CGM.getContext(); 3522 FunctionArgList Args; 3523 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3524 ImplicitParamDecl::Other); 3525 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3526 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3527 ImplicitParamDecl::Other); 3528 Args.push_back(&GtidArg); 3529 Args.push_back(&TaskTypeArg); 3530 const auto &TaskEntryFnInfo = 3531 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3532 llvm::FunctionType *TaskEntryTy = 3533 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 3534 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 3535 auto *TaskEntry = llvm::Function::Create( 3536 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3537 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 3538 TaskEntry->setDoesNotRecurse(); 3539 CodeGenFunction CGF(CGM); 3540 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 3541 Loc, Loc); 3542 3543 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 3544 // tt, 3545 // For taskloops: 3546 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 3547 // tt->task_data.shareds); 3548 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 3549 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 3550 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3551 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3552 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3553 const auto *KmpTaskTWithPrivatesQTyRD = 3554 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3555 LValue Base = 3556 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3557 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 3558 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 3559 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 3560 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 3561 3562 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 3563 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 3564 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3565 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 3566 CGF.ConvertTypeForMem(SharedsPtrTy)); 3567 3568 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 3569 llvm::Value *PrivatesParam; 3570 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 3571 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 3572 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3573 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 3574 } else { 3575 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 3576 } 3577 3578 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 3579 TaskPrivatesMap, 3580 CGF.Builder 3581 .CreatePointerBitCastOrAddrSpaceCast( 3582 TDBase.getAddress(CGF), CGF.VoidPtrTy) 3583 .getPointer()}; 3584 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 3585 std::end(CommonArgs)); 3586 if (isOpenMPTaskLoopDirective(Kind)) { 3587 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 3588 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 3589 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 3590 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 3591 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 3592 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 3593 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 3594 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 3595 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 3596 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3597 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3598 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 3599 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 3600 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 3601 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 3602 CallArgs.push_back(LBParam); 3603 CallArgs.push_back(UBParam); 3604 CallArgs.push_back(StParam); 3605 CallArgs.push_back(LIParam); 3606 CallArgs.push_back(RParam); 3607 } 3608 CallArgs.push_back(SharedsParam); 3609 3610 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 3611 CallArgs); 3612 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 3613 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 3614 CGF.FinishFunction(); 3615 return TaskEntry; 3616 } 3617 3618 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 3619 SourceLocation Loc, 3620 QualType KmpInt32Ty, 3621 QualType KmpTaskTWithPrivatesPtrQTy, 3622 QualType KmpTaskTWithPrivatesQTy) { 3623 ASTContext &C = CGM.getContext(); 3624 FunctionArgList Args; 3625 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 3626 ImplicitParamDecl::Other); 3627 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3628 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 3629 ImplicitParamDecl::Other); 3630 Args.push_back(&GtidArg); 3631 Args.push_back(&TaskTypeArg); 3632 const auto &DestructorFnInfo = 3633 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 3634 llvm::FunctionType *DestructorFnTy = 3635 CGM.getTypes().GetFunctionType(DestructorFnInfo); 3636 std::string Name = 3637 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 3638 auto *DestructorFn = 3639 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 3640 Name, &CGM.getModule()); 3641 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 3642 DestructorFnInfo); 3643 DestructorFn->setDoesNotRecurse(); 3644 CodeGenFunction CGF(CGM); 3645 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 3646 Args, Loc, Loc); 3647 3648 LValue Base = CGF.EmitLoadOfPointerLValue( 3649 CGF.GetAddrOfLocalVar(&TaskTypeArg), 3650 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3651 const auto *KmpTaskTWithPrivatesQTyRD = 3652 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 3653 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3654 Base = CGF.EmitLValueForField(Base, *FI); 3655 for (const auto *Field : 3656 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 3657 if (QualType::DestructionKind DtorKind = 3658 Field->getType().isDestructedType()) { 3659 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 3660 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 3661 } 3662 } 3663 CGF.FinishFunction(); 3664 return DestructorFn; 3665 } 3666 3667 /// Emit a privates mapping function for correct handling of private and 3668 /// firstprivate variables. 3669 /// \code 3670 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 3671 /// **noalias priv1,..., <tyn> **noalias privn) { 3672 /// *priv1 = &.privates.priv1; 3673 /// ...; 3674 /// *privn = &.privates.privn; 3675 /// } 3676 /// \endcode 3677 static llvm::Value * 3678 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 3679 const OMPTaskDataTy &Data, QualType PrivatesQTy, 3680 ArrayRef<PrivateDataTy> Privates) { 3681 ASTContext &C = CGM.getContext(); 3682 FunctionArgList Args; 3683 ImplicitParamDecl TaskPrivatesArg( 3684 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3685 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 3686 ImplicitParamDecl::Other); 3687 Args.push_back(&TaskPrivatesArg); 3688 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; 3689 unsigned Counter = 1; 3690 for (const Expr *E : Data.PrivateVars) { 3691 Args.push_back(ImplicitParamDecl::Create( 3692 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3693 C.getPointerType(C.getPointerType(E->getType())) 3694 .withConst() 3695 .withRestrict(), 3696 ImplicitParamDecl::Other)); 3697 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3698 PrivateVarsPos[VD] = Counter; 3699 ++Counter; 3700 } 3701 for (const Expr *E : Data.FirstprivateVars) { 3702 Args.push_back(ImplicitParamDecl::Create( 3703 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3704 C.getPointerType(C.getPointerType(E->getType())) 3705 .withConst() 3706 .withRestrict(), 3707 ImplicitParamDecl::Other)); 3708 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3709 PrivateVarsPos[VD] = Counter; 3710 ++Counter; 3711 } 3712 for (const Expr *E : Data.LastprivateVars) { 3713 Args.push_back(ImplicitParamDecl::Create( 3714 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3715 C.getPointerType(C.getPointerType(E->getType())) 3716 .withConst() 3717 .withRestrict(), 3718 ImplicitParamDecl::Other)); 3719 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 3720 PrivateVarsPos[VD] = Counter; 3721 ++Counter; 3722 } 3723 for (const VarDecl *VD : Data.PrivateLocals) { 3724 QualType Ty = VD->getType().getNonReferenceType(); 3725 if (VD->getType()->isLValueReferenceType()) 3726 Ty = C.getPointerType(Ty); 3727 if (isAllocatableDecl(VD)) 3728 Ty = C.getPointerType(Ty); 3729 Args.push_back(ImplicitParamDecl::Create( 3730 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3731 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), 3732 ImplicitParamDecl::Other)); 3733 PrivateVarsPos[VD] = Counter; 3734 ++Counter; 3735 } 3736 const auto &TaskPrivatesMapFnInfo = 3737 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3738 llvm::FunctionType *TaskPrivatesMapTy = 3739 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 3740 std::string Name = 3741 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 3742 auto *TaskPrivatesMap = llvm::Function::Create( 3743 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 3744 &CGM.getModule()); 3745 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 3746 TaskPrivatesMapFnInfo); 3747 if (CGM.getLangOpts().Optimize) { 3748 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 3749 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 3750 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 3751 } 3752 CodeGenFunction CGF(CGM); 3753 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 3754 TaskPrivatesMapFnInfo, Args, Loc, Loc); 3755 3756 // *privi = &.privates.privi; 3757 LValue Base = CGF.EmitLoadOfPointerLValue( 3758 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 3759 TaskPrivatesArg.getType()->castAs<PointerType>()); 3760 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 3761 Counter = 0; 3762 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 3763 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 3764 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 3765 LValue RefLVal = 3766 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 3767 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 3768 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 3769 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 3770 ++Counter; 3771 } 3772 CGF.FinishFunction(); 3773 return TaskPrivatesMap; 3774 } 3775 3776 /// Emit initialization for private variables in task-based directives. 3777 static void emitPrivatesInit(CodeGenFunction &CGF, 3778 const OMPExecutableDirective &D, 3779 Address KmpTaskSharedsPtr, LValue TDBase, 3780 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3781 QualType SharedsTy, QualType SharedsPtrTy, 3782 const OMPTaskDataTy &Data, 3783 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 3784 ASTContext &C = CGF.getContext(); 3785 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 3786 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 3787 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 3788 ? OMPD_taskloop 3789 : OMPD_task; 3790 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 3791 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 3792 LValue SrcBase; 3793 bool IsTargetTask = 3794 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 3795 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 3796 // For target-based directives skip 4 firstprivate arrays BasePointersArray, 3797 // PointersArray, SizesArray, and MappersArray. The original variables for 3798 // these arrays are not captured and we get their addresses explicitly. 3799 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 3800 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 3801 SrcBase = CGF.MakeAddrLValue( 3802 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3803 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 3804 SharedsTy); 3805 } 3806 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 3807 for (const PrivateDataTy &Pair : Privates) { 3808 // Do not initialize private locals. 3809 if (Pair.second.isLocalPrivate()) { 3810 ++FI; 3811 continue; 3812 } 3813 const VarDecl *VD = Pair.second.PrivateCopy; 3814 const Expr *Init = VD->getAnyInitializer(); 3815 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 3816 !CGF.isTrivialInitializer(Init)))) { 3817 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 3818 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 3819 const VarDecl *OriginalVD = Pair.second.Original; 3820 // Check if the variable is the target-based BasePointersArray, 3821 // PointersArray, SizesArray, or MappersArray. 3822 LValue SharedRefLValue; 3823 QualType Type = PrivateLValue.getType(); 3824 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 3825 if (IsTargetTask && !SharedField) { 3826 assert(isa<ImplicitParamDecl>(OriginalVD) && 3827 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 3828 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3829 ->getNumParams() == 0 && 3830 isa<TranslationUnitDecl>( 3831 cast<CapturedDecl>(OriginalVD->getDeclContext()) 3832 ->getDeclContext()) && 3833 "Expected artificial target data variable."); 3834 SharedRefLValue = 3835 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 3836 } else if (ForDup) { 3837 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 3838 SharedRefLValue = CGF.MakeAddrLValue( 3839 Address(SharedRefLValue.getPointer(CGF), 3840 C.getDeclAlign(OriginalVD)), 3841 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 3842 SharedRefLValue.getTBAAInfo()); 3843 } else if (CGF.LambdaCaptureFields.count( 3844 Pair.second.Original->getCanonicalDecl()) > 0 || 3845 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 3846 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3847 } else { 3848 // Processing for implicitly captured variables. 3849 InlinedOpenMPRegionRAII Region( 3850 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 3851 /*HasCancel=*/false); 3852 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 3853 } 3854 if (Type->isArrayType()) { 3855 // Initialize firstprivate array. 3856 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 3857 // Perform simple memcpy. 3858 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 3859 } else { 3860 // Initialize firstprivate array using element-by-element 3861 // initialization. 3862 CGF.EmitOMPAggregateAssign( 3863 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 3864 Type, 3865 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 3866 Address SrcElement) { 3867 // Clean up any temporaries needed by the initialization. 3868 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3869 InitScope.addPrivate( 3870 Elem, [SrcElement]() -> Address { return SrcElement; }); 3871 (void)InitScope.Privatize(); 3872 // Emit initialization for single element. 3873 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 3874 CGF, &CapturesInfo); 3875 CGF.EmitAnyExprToMem(Init, DestElement, 3876 Init->getType().getQualifiers(), 3877 /*IsInitializer=*/false); 3878 }); 3879 } 3880 } else { 3881 CodeGenFunction::OMPPrivateScope InitScope(CGF); 3882 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 3883 return SharedRefLValue.getAddress(CGF); 3884 }); 3885 (void)InitScope.Privatize(); 3886 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 3887 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 3888 /*capturedByInit=*/false); 3889 } 3890 } else { 3891 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 3892 } 3893 } 3894 ++FI; 3895 } 3896 } 3897 3898 /// Check if duplication function is required for taskloops. 3899 static bool checkInitIsRequired(CodeGenFunction &CGF, 3900 ArrayRef<PrivateDataTy> Privates) { 3901 bool InitRequired = false; 3902 for (const PrivateDataTy &Pair : Privates) { 3903 if (Pair.second.isLocalPrivate()) 3904 continue; 3905 const VarDecl *VD = Pair.second.PrivateCopy; 3906 const Expr *Init = VD->getAnyInitializer(); 3907 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 3908 !CGF.isTrivialInitializer(Init)); 3909 if (InitRequired) 3910 break; 3911 } 3912 return InitRequired; 3913 } 3914 3915 3916 /// Emit task_dup function (for initialization of 3917 /// private/firstprivate/lastprivate vars and last_iter flag) 3918 /// \code 3919 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 3920 /// lastpriv) { 3921 /// // setup lastprivate flag 3922 /// task_dst->last = lastpriv; 3923 /// // could be constructor calls here... 3924 /// } 3925 /// \endcode 3926 static llvm::Value * 3927 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 3928 const OMPExecutableDirective &D, 3929 QualType KmpTaskTWithPrivatesPtrQTy, 3930 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3931 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 3932 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 3933 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 3934 ASTContext &C = CGM.getContext(); 3935 FunctionArgList Args; 3936 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3937 KmpTaskTWithPrivatesPtrQTy, 3938 ImplicitParamDecl::Other); 3939 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 3940 KmpTaskTWithPrivatesPtrQTy, 3941 ImplicitParamDecl::Other); 3942 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 3943 ImplicitParamDecl::Other); 3944 Args.push_back(&DstArg); 3945 Args.push_back(&SrcArg); 3946 Args.push_back(&LastprivArg); 3947 const auto &TaskDupFnInfo = 3948 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3949 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 3950 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 3951 auto *TaskDup = llvm::Function::Create( 3952 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 3953 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 3954 TaskDup->setDoesNotRecurse(); 3955 CodeGenFunction CGF(CGM); 3956 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 3957 Loc); 3958 3959 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3960 CGF.GetAddrOfLocalVar(&DstArg), 3961 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3962 // task_dst->liter = lastpriv; 3963 if (WithLastIter) { 3964 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 3965 LValue Base = CGF.EmitLValueForField( 3966 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3967 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 3968 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 3969 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 3970 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 3971 } 3972 3973 // Emit initial values for private copies (if any). 3974 assert(!Privates.empty()); 3975 Address KmpTaskSharedsPtr = Address::invalid(); 3976 if (!Data.FirstprivateVars.empty()) { 3977 LValue TDBase = CGF.EmitLoadOfPointerLValue( 3978 CGF.GetAddrOfLocalVar(&SrcArg), 3979 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 3980 LValue Base = CGF.EmitLValueForField( 3981 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 3982 KmpTaskSharedsPtr = Address( 3983 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 3984 Base, *std::next(KmpTaskTQTyRD->field_begin(), 3985 KmpTaskTShareds)), 3986 Loc), 3987 CGM.getNaturalTypeAlignment(SharedsTy)); 3988 } 3989 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 3990 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 3991 CGF.FinishFunction(); 3992 return TaskDup; 3993 } 3994 3995 /// Checks if destructor function is required to be generated. 3996 /// \return true if cleanups are required, false otherwise. 3997 static bool 3998 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, 3999 ArrayRef<PrivateDataTy> Privates) { 4000 for (const PrivateDataTy &P : Privates) { 4001 if (P.second.isLocalPrivate()) 4002 continue; 4003 QualType Ty = P.second.Original->getType().getNonReferenceType(); 4004 if (Ty.isDestructedType()) 4005 return true; 4006 } 4007 return false; 4008 } 4009 4010 namespace { 4011 /// Loop generator for OpenMP iterator expression. 4012 class OMPIteratorGeneratorScope final 4013 : public CodeGenFunction::OMPPrivateScope { 4014 CodeGenFunction &CGF; 4015 const OMPIteratorExpr *E = nullptr; 4016 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 4017 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 4018 OMPIteratorGeneratorScope() = delete; 4019 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 4020 4021 public: 4022 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 4023 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 4024 if (!E) 4025 return; 4026 SmallVector<llvm::Value *, 4> Uppers; 4027 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4028 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 4029 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 4030 addPrivate(VD, [&CGF, VD]() { 4031 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 4032 }); 4033 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4034 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 4035 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 4036 "counter.addr"); 4037 }); 4038 } 4039 Privatize(); 4040 4041 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 4042 const OMPIteratorHelperData &HelperData = E->getHelper(I); 4043 LValue CLVal = 4044 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 4045 HelperData.CounterVD->getType()); 4046 // Counter = 0; 4047 CGF.EmitStoreOfScalar( 4048 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 4049 CLVal); 4050 CodeGenFunction::JumpDest &ContDest = 4051 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 4052 CodeGenFunction::JumpDest &ExitDest = 4053 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 4054 // N = <number-of_iterations>; 4055 llvm::Value *N = Uppers[I]; 4056 // cont: 4057 // if (Counter < N) goto body; else goto exit; 4058 CGF.EmitBlock(ContDest.getBlock()); 4059 auto *CVal = 4060 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 4061 llvm::Value *Cmp = 4062 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 4063 ? CGF.Builder.CreateICmpSLT(CVal, N) 4064 : CGF.Builder.CreateICmpULT(CVal, N); 4065 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 4066 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 4067 // body: 4068 CGF.EmitBlock(BodyBB); 4069 // Iteri = Begini + Counter * Stepi; 4070 CGF.EmitIgnoredExpr(HelperData.Update); 4071 } 4072 } 4073 ~OMPIteratorGeneratorScope() { 4074 if (!E) 4075 return; 4076 for (unsigned I = E->numOfIterators(); I > 0; --I) { 4077 // Counter = Counter + 1; 4078 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 4079 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 4080 // goto cont; 4081 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 4082 // exit: 4083 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 4084 } 4085 } 4086 }; 4087 } // namespace 4088 4089 static std::pair<llvm::Value *, llvm::Value *> 4090 getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { 4091 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 4092 llvm::Value *Addr; 4093 if (OASE) { 4094 const Expr *Base = OASE->getBase(); 4095 Addr = CGF.EmitScalarExpr(Base); 4096 } else { 4097 Addr = CGF.EmitLValue(E).getPointer(CGF); 4098 } 4099 llvm::Value *SizeVal; 4100 QualType Ty = E->getType(); 4101 if (OASE) { 4102 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 4103 for (const Expr *SE : OASE->getDimensions()) { 4104 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 4105 Sz = CGF.EmitScalarConversion( 4106 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); 4107 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); 4108 } 4109 } else if (const auto *ASE = 4110 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 4111 LValue UpAddrLVal = 4112 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 4113 llvm::Value *UpAddr = 4114 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 4115 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); 4116 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); 4117 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 4118 } else { 4119 SizeVal = CGF.getTypeSize(Ty); 4120 } 4121 return std::make_pair(Addr, SizeVal); 4122 } 4123 4124 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4125 static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { 4126 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); 4127 if (KmpTaskAffinityInfoTy.isNull()) { 4128 RecordDecl *KmpAffinityInfoRD = 4129 C.buildImplicitRecord("kmp_task_affinity_info_t"); 4130 KmpAffinityInfoRD->startDefinition(); 4131 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); 4132 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); 4133 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); 4134 KmpAffinityInfoRD->completeDefinition(); 4135 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); 4136 } 4137 } 4138 4139 CGOpenMPRuntime::TaskResultTy 4140 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4141 const OMPExecutableDirective &D, 4142 llvm::Function *TaskFunction, QualType SharedsTy, 4143 Address Shareds, const OMPTaskDataTy &Data) { 4144 ASTContext &C = CGM.getContext(); 4145 llvm::SmallVector<PrivateDataTy, 4> Privates; 4146 // Aggregate privates and sort them by the alignment. 4147 const auto *I = Data.PrivateCopies.begin(); 4148 for (const Expr *E : Data.PrivateVars) { 4149 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4150 Privates.emplace_back( 4151 C.getDeclAlign(VD), 4152 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4153 /*PrivateElemInit=*/nullptr)); 4154 ++I; 4155 } 4156 I = Data.FirstprivateCopies.begin(); 4157 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4158 for (const Expr *E : Data.FirstprivateVars) { 4159 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4160 Privates.emplace_back( 4161 C.getDeclAlign(VD), 4162 PrivateHelpersTy( 4163 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4164 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4165 ++I; 4166 ++IElemInitRef; 4167 } 4168 I = Data.LastprivateCopies.begin(); 4169 for (const Expr *E : Data.LastprivateVars) { 4170 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4171 Privates.emplace_back( 4172 C.getDeclAlign(VD), 4173 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4174 /*PrivateElemInit=*/nullptr)); 4175 ++I; 4176 } 4177 for (const VarDecl *VD : Data.PrivateLocals) { 4178 if (isAllocatableDecl(VD)) 4179 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); 4180 else 4181 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); 4182 } 4183 llvm::stable_sort(Privates, 4184 [](const PrivateDataTy &L, const PrivateDataTy &R) { 4185 return L.first > R.first; 4186 }); 4187 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4188 // Build type kmp_routine_entry_t (if not built yet). 4189 emitKmpRoutineEntryT(KmpInt32Ty); 4190 // Build type kmp_task_t (if not built yet). 4191 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4192 if (SavedKmpTaskloopTQTy.isNull()) { 4193 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4194 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4195 } 4196 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4197 } else { 4198 assert((D.getDirectiveKind() == OMPD_task || 4199 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4200 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4201 "Expected taskloop, task or target directive"); 4202 if (SavedKmpTaskTQTy.isNull()) { 4203 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4204 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4205 } 4206 KmpTaskTQTy = SavedKmpTaskTQTy; 4207 } 4208 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4209 // Build particular struct kmp_task_t for the given task. 4210 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 4211 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 4212 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 4213 QualType KmpTaskTWithPrivatesPtrQTy = 4214 C.getPointerType(KmpTaskTWithPrivatesQTy); 4215 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 4216 llvm::Type *KmpTaskTWithPrivatesPtrTy = 4217 KmpTaskTWithPrivatesTy->getPointerTo(); 4218 llvm::Value *KmpTaskTWithPrivatesTySize = 4219 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 4220 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 4221 4222 // Emit initial values for private copies (if any). 4223 llvm::Value *TaskPrivatesMap = nullptr; 4224 llvm::Type *TaskPrivatesMapTy = 4225 std::next(TaskFunction->arg_begin(), 3)->getType(); 4226 if (!Privates.empty()) { 4227 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4228 TaskPrivatesMap = 4229 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); 4230 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4231 TaskPrivatesMap, TaskPrivatesMapTy); 4232 } else { 4233 TaskPrivatesMap = llvm::ConstantPointerNull::get( 4234 cast<llvm::PointerType>(TaskPrivatesMapTy)); 4235 } 4236 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 4237 // kmp_task_t *tt); 4238 llvm::Function *TaskEntry = emitProxyTaskFunction( 4239 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4240 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 4241 TaskPrivatesMap); 4242 4243 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 4244 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 4245 // kmp_routine_entry_t *task_entry); 4246 // Task flags. Format is taken from 4247 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 4248 // description of kmp_tasking_flags struct. 4249 enum { 4250 TiedFlag = 0x1, 4251 FinalFlag = 0x2, 4252 DestructorsFlag = 0x8, 4253 PriorityFlag = 0x20, 4254 DetachableFlag = 0x40, 4255 }; 4256 unsigned Flags = Data.Tied ? TiedFlag : 0; 4257 bool NeedsCleanup = false; 4258 if (!Privates.empty()) { 4259 NeedsCleanup = 4260 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); 4261 if (NeedsCleanup) 4262 Flags = Flags | DestructorsFlag; 4263 } 4264 if (Data.Priority.getInt()) 4265 Flags = Flags | PriorityFlag; 4266 if (D.hasClausesOfKind<OMPDetachClause>()) 4267 Flags = Flags | DetachableFlag; 4268 llvm::Value *TaskFlags = 4269 Data.Final.getPointer() 4270 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 4271 CGF.Builder.getInt32(FinalFlag), 4272 CGF.Builder.getInt32(/*C=*/0)) 4273 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 4274 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 4275 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 4276 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 4277 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 4278 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4279 TaskEntry, KmpRoutineEntryPtrTy)}; 4280 llvm::Value *NewTask; 4281 if (D.hasClausesOfKind<OMPNowaitClause>()) { 4282 // Check if we have any device clause associated with the directive. 4283 const Expr *Device = nullptr; 4284 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 4285 Device = C->getDevice(); 4286 // Emit device ID if any otherwise use default value. 4287 llvm::Value *DeviceID; 4288 if (Device) 4289 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 4290 CGF.Int64Ty, /*isSigned=*/true); 4291 else 4292 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 4293 AllocArgs.push_back(DeviceID); 4294 NewTask = CGF.EmitRuntimeCall( 4295 OMPBuilder.getOrCreateRuntimeFunction( 4296 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), 4297 AllocArgs); 4298 } else { 4299 NewTask = 4300 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4301 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), 4302 AllocArgs); 4303 } 4304 // Emit detach clause initialization. 4305 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 4306 // task_descriptor); 4307 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 4308 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 4309 LValue EvtLVal = CGF.EmitLValue(Evt); 4310 4311 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 4312 // int gtid, kmp_task_t *task); 4313 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 4314 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 4315 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 4316 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 4317 OMPBuilder.getOrCreateRuntimeFunction( 4318 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), 4319 {Loc, Tid, NewTask}); 4320 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 4321 Evt->getExprLoc()); 4322 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 4323 } 4324 // Process affinity clauses. 4325 if (D.hasClausesOfKind<OMPAffinityClause>()) { 4326 // Process list of affinity data. 4327 ASTContext &C = CGM.getContext(); 4328 Address AffinitiesArray = Address::invalid(); 4329 // Calculate number of elements to form the array of affinity data. 4330 llvm::Value *NumOfElements = nullptr; 4331 unsigned NumAffinities = 0; 4332 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4333 if (const Expr *Modifier = C->getModifier()) { 4334 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); 4335 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4336 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4337 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4338 NumOfElements = 4339 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; 4340 } 4341 } else { 4342 NumAffinities += C->varlist_size(); 4343 } 4344 } 4345 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); 4346 // Fields ids in kmp_task_affinity_info record. 4347 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; 4348 4349 QualType KmpTaskAffinityInfoArrayTy; 4350 if (NumOfElements) { 4351 NumOfElements = CGF.Builder.CreateNUWAdd( 4352 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); 4353 OpaqueValueExpr OVE( 4354 Loc, 4355 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), 4356 VK_RValue); 4357 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4358 RValue::get(NumOfElements)); 4359 KmpTaskAffinityInfoArrayTy = 4360 C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, 4361 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4362 // Properly emit variable-sized array. 4363 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, 4364 ImplicitParamDecl::Other); 4365 CGF.EmitVarDecl(*PD); 4366 AffinitiesArray = CGF.GetAddrOfLocalVar(PD); 4367 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4368 /*isSigned=*/false); 4369 } else { 4370 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( 4371 KmpTaskAffinityInfoTy, 4372 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, 4373 ArrayType::Normal, /*IndexTypeQuals=*/0); 4374 AffinitiesArray = 4375 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); 4376 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); 4377 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, 4378 /*isSigned=*/false); 4379 } 4380 4381 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); 4382 // Fill array by elements without iterators. 4383 unsigned Pos = 0; 4384 bool HasIterator = false; 4385 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4386 if (C->getModifier()) { 4387 HasIterator = true; 4388 continue; 4389 } 4390 for (const Expr *E : C->varlists()) { 4391 llvm::Value *Addr; 4392 llvm::Value *Size; 4393 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4394 LValue Base = 4395 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), 4396 KmpTaskAffinityInfoTy); 4397 // affs[i].base_addr = &<Affinities[i].second>; 4398 LValue BaseAddrLVal = CGF.EmitLValueForField( 4399 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4400 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4401 BaseAddrLVal); 4402 // affs[i].len = sizeof(<Affinities[i].second>); 4403 LValue LenLVal = CGF.EmitLValueForField( 4404 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4405 CGF.EmitStoreOfScalar(Size, LenLVal); 4406 ++Pos; 4407 } 4408 } 4409 LValue PosLVal; 4410 if (HasIterator) { 4411 PosLVal = CGF.MakeAddrLValue( 4412 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), 4413 C.getSizeType()); 4414 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4415 } 4416 // Process elements with iterators. 4417 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { 4418 const Expr *Modifier = C->getModifier(); 4419 if (!Modifier) 4420 continue; 4421 OMPIteratorGeneratorScope IteratorScope( 4422 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); 4423 for (const Expr *E : C->varlists()) { 4424 llvm::Value *Addr; 4425 llvm::Value *Size; 4426 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4427 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4428 LValue Base = CGF.MakeAddrLValue( 4429 Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), 4430 AffinitiesArray.getAlignment()), 4431 KmpTaskAffinityInfoTy); 4432 // affs[i].base_addr = &<Affinities[i].second>; 4433 LValue BaseAddrLVal = CGF.EmitLValueForField( 4434 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); 4435 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4436 BaseAddrLVal); 4437 // affs[i].len = sizeof(<Affinities[i].second>); 4438 LValue LenLVal = CGF.EmitLValueForField( 4439 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); 4440 CGF.EmitStoreOfScalar(Size, LenLVal); 4441 Idx = CGF.Builder.CreateNUWAdd( 4442 Idx, llvm::ConstantInt::get(Idx->getType(), 1)); 4443 CGF.EmitStoreOfScalar(Idx, PosLVal); 4444 } 4445 } 4446 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, 4447 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 4448 // naffins, kmp_task_affinity_info_t *affin_list); 4449 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); 4450 llvm::Value *GTid = getThreadID(CGF, Loc); 4451 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4452 AffinitiesArray.getPointer(), CGM.VoidPtrTy); 4453 // FIXME: Emit the function and ignore its result for now unless the 4454 // runtime function is properly implemented. 4455 (void)CGF.EmitRuntimeCall( 4456 OMPBuilder.getOrCreateRuntimeFunction( 4457 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), 4458 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); 4459 } 4460 llvm::Value *NewTaskNewTaskTTy = 4461 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4462 NewTask, KmpTaskTWithPrivatesPtrTy); 4463 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 4464 KmpTaskTWithPrivatesQTy); 4465 LValue TDBase = 4466 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4467 // Fill the data in the resulting kmp_task_t record. 4468 // Copy shareds if there are any. 4469 Address KmpTaskSharedsPtr = Address::invalid(); 4470 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 4471 KmpTaskSharedsPtr = 4472 Address(CGF.EmitLoadOfScalar( 4473 CGF.EmitLValueForField( 4474 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 4475 KmpTaskTShareds)), 4476 Loc), 4477 CGM.getNaturalTypeAlignment(SharedsTy)); 4478 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 4479 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 4480 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 4481 } 4482 // Emit initial values for private copies (if any). 4483 TaskResultTy Result; 4484 if (!Privates.empty()) { 4485 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 4486 SharedsTy, SharedsPtrTy, Data, Privates, 4487 /*ForDup=*/false); 4488 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 4489 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 4490 Result.TaskDupFn = emitTaskDupFunction( 4491 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 4492 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 4493 /*WithLastIter=*/!Data.LastprivateVars.empty()); 4494 } 4495 } 4496 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 4497 enum { Priority = 0, Destructors = 1 }; 4498 // Provide pointer to function with destructors for privates. 4499 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 4500 const RecordDecl *KmpCmplrdataUD = 4501 (*FI)->getType()->getAsUnionType()->getDecl(); 4502 if (NeedsCleanup) { 4503 llvm::Value *DestructorFn = emitDestructorsFunction( 4504 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 4505 KmpTaskTWithPrivatesQTy); 4506 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 4507 LValue DestructorsLV = CGF.EmitLValueForField( 4508 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 4509 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4510 DestructorFn, KmpRoutineEntryPtrTy), 4511 DestructorsLV); 4512 } 4513 // Set priority. 4514 if (Data.Priority.getInt()) { 4515 LValue Data2LV = CGF.EmitLValueForField( 4516 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 4517 LValue PriorityLV = CGF.EmitLValueForField( 4518 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 4519 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 4520 } 4521 Result.NewTask = NewTask; 4522 Result.TaskEntry = TaskEntry; 4523 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 4524 Result.TDBase = TDBase; 4525 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 4526 return Result; 4527 } 4528 4529 namespace { 4530 /// Dependence kind for RTL. 4531 enum RTLDependenceKindTy { 4532 DepIn = 0x01, 4533 DepInOut = 0x3, 4534 DepMutexInOutSet = 0x4 4535 }; 4536 /// Fields ids in kmp_depend_info record. 4537 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 4538 } // namespace 4539 4540 /// Translates internal dependency kind into the runtime kind. 4541 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 4542 RTLDependenceKindTy DepKind; 4543 switch (K) { 4544 case OMPC_DEPEND_in: 4545 DepKind = DepIn; 4546 break; 4547 // Out and InOut dependencies must use the same code. 4548 case OMPC_DEPEND_out: 4549 case OMPC_DEPEND_inout: 4550 DepKind = DepInOut; 4551 break; 4552 case OMPC_DEPEND_mutexinoutset: 4553 DepKind = DepMutexInOutSet; 4554 break; 4555 case OMPC_DEPEND_source: 4556 case OMPC_DEPEND_sink: 4557 case OMPC_DEPEND_depobj: 4558 case OMPC_DEPEND_unknown: 4559 llvm_unreachable("Unknown task dependence type"); 4560 } 4561 return DepKind; 4562 } 4563 4564 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 4565 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 4566 QualType &FlagsTy) { 4567 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 4568 if (KmpDependInfoTy.isNull()) { 4569 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 4570 KmpDependInfoRD->startDefinition(); 4571 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 4572 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 4573 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 4574 KmpDependInfoRD->completeDefinition(); 4575 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 4576 } 4577 } 4578 4579 std::pair<llvm::Value *, LValue> 4580 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 4581 SourceLocation Loc) { 4582 ASTContext &C = CGM.getContext(); 4583 QualType FlagsTy; 4584 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4585 RecordDecl *KmpDependInfoRD = 4586 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4587 LValue Base = CGF.EmitLoadOfPointerLValue( 4588 DepobjLVal.getAddress(CGF), 4589 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4590 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4591 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4592 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 4593 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4594 Base.getTBAAInfo()); 4595 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4596 Addr.getPointer(), 4597 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4598 LValue NumDepsBase = CGF.MakeAddrLValue( 4599 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4600 Base.getBaseInfo(), Base.getTBAAInfo()); 4601 // NumDeps = deps[i].base_addr; 4602 LValue BaseAddrLVal = CGF.EmitLValueForField( 4603 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4604 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 4605 return std::make_pair(NumDeps, Base); 4606 } 4607 4608 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4609 llvm::PointerUnion<unsigned *, LValue *> Pos, 4610 const OMPTaskDataTy::DependData &Data, 4611 Address DependenciesArray) { 4612 CodeGenModule &CGM = CGF.CGM; 4613 ASTContext &C = CGM.getContext(); 4614 QualType FlagsTy; 4615 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4616 RecordDecl *KmpDependInfoRD = 4617 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4618 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 4619 4620 OMPIteratorGeneratorScope IteratorScope( 4621 CGF, cast_or_null<OMPIteratorExpr>( 4622 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4623 : nullptr)); 4624 for (const Expr *E : Data.DepExprs) { 4625 llvm::Value *Addr; 4626 llvm::Value *Size; 4627 std::tie(Addr, Size) = getPointerAndSize(CGF, E); 4628 LValue Base; 4629 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4630 Base = CGF.MakeAddrLValue( 4631 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 4632 } else { 4633 LValue &PosLVal = *Pos.get<LValue *>(); 4634 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4635 Base = CGF.MakeAddrLValue( 4636 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 4637 DependenciesArray.getAlignment()), 4638 KmpDependInfoTy); 4639 } 4640 // deps[i].base_addr = &<Dependencies[i].second>; 4641 LValue BaseAddrLVal = CGF.EmitLValueForField( 4642 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4643 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 4644 BaseAddrLVal); 4645 // deps[i].len = sizeof(<Dependencies[i].second>); 4646 LValue LenLVal = CGF.EmitLValueForField( 4647 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 4648 CGF.EmitStoreOfScalar(Size, LenLVal); 4649 // deps[i].flags = <Dependencies[i].first>; 4650 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 4651 LValue FlagsLVal = CGF.EmitLValueForField( 4652 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 4653 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 4654 FlagsLVal); 4655 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 4656 ++(*P); 4657 } else { 4658 LValue &PosLVal = *Pos.get<LValue *>(); 4659 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4660 Idx = CGF.Builder.CreateNUWAdd(Idx, 4661 llvm::ConstantInt::get(Idx->getType(), 1)); 4662 CGF.EmitStoreOfScalar(Idx, PosLVal); 4663 } 4664 } 4665 } 4666 4667 static SmallVector<llvm::Value *, 4> 4668 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4669 const OMPTaskDataTy::DependData &Data) { 4670 assert(Data.DepKind == OMPC_DEPEND_depobj && 4671 "Expected depobj dependecy kind."); 4672 SmallVector<llvm::Value *, 4> Sizes; 4673 SmallVector<LValue, 4> SizeLVals; 4674 ASTContext &C = CGF.getContext(); 4675 QualType FlagsTy; 4676 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4677 RecordDecl *KmpDependInfoRD = 4678 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4679 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4680 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4681 { 4682 OMPIteratorGeneratorScope IteratorScope( 4683 CGF, cast_or_null<OMPIteratorExpr>( 4684 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4685 : nullptr)); 4686 for (const Expr *E : Data.DepExprs) { 4687 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4688 LValue Base = CGF.EmitLoadOfPointerLValue( 4689 DepobjLVal.getAddress(CGF), 4690 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4691 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4692 Base.getAddress(CGF), KmpDependInfoPtrT); 4693 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4694 Base.getTBAAInfo()); 4695 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4696 Addr.getPointer(), 4697 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4698 LValue NumDepsBase = CGF.MakeAddrLValue( 4699 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4700 Base.getBaseInfo(), Base.getTBAAInfo()); 4701 // NumDeps = deps[i].base_addr; 4702 LValue BaseAddrLVal = CGF.EmitLValueForField( 4703 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4704 llvm::Value *NumDeps = 4705 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4706 LValue NumLVal = CGF.MakeAddrLValue( 4707 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 4708 C.getUIntPtrType()); 4709 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 4710 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 4711 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 4712 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 4713 CGF.EmitStoreOfScalar(Add, NumLVal); 4714 SizeLVals.push_back(NumLVal); 4715 } 4716 } 4717 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 4718 llvm::Value *Size = 4719 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 4720 Sizes.push_back(Size); 4721 } 4722 return Sizes; 4723 } 4724 4725 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 4726 LValue PosLVal, 4727 const OMPTaskDataTy::DependData &Data, 4728 Address DependenciesArray) { 4729 assert(Data.DepKind == OMPC_DEPEND_depobj && 4730 "Expected depobj dependecy kind."); 4731 ASTContext &C = CGF.getContext(); 4732 QualType FlagsTy; 4733 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4734 RecordDecl *KmpDependInfoRD = 4735 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4736 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4737 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 4738 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 4739 { 4740 OMPIteratorGeneratorScope IteratorScope( 4741 CGF, cast_or_null<OMPIteratorExpr>( 4742 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 4743 : nullptr)); 4744 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 4745 const Expr *E = Data.DepExprs[I]; 4746 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 4747 LValue Base = CGF.EmitLoadOfPointerLValue( 4748 DepobjLVal.getAddress(CGF), 4749 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4750 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4751 Base.getAddress(CGF), KmpDependInfoPtrT); 4752 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 4753 Base.getTBAAInfo()); 4754 4755 // Get number of elements in a single depobj. 4756 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 4757 Addr.getPointer(), 4758 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 4759 LValue NumDepsBase = CGF.MakeAddrLValue( 4760 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 4761 Base.getBaseInfo(), Base.getTBAAInfo()); 4762 // NumDeps = deps[i].base_addr; 4763 LValue BaseAddrLVal = CGF.EmitLValueForField( 4764 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4765 llvm::Value *NumDeps = 4766 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 4767 4768 // memcopy dependency data. 4769 llvm::Value *Size = CGF.Builder.CreateNUWMul( 4770 ElSize, 4771 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 4772 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 4773 Address DepAddr = 4774 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 4775 DependenciesArray.getAlignment()); 4776 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 4777 4778 // Increase pos. 4779 // pos += size; 4780 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 4781 CGF.EmitStoreOfScalar(Add, PosLVal); 4782 } 4783 } 4784 } 4785 4786 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 4787 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 4788 SourceLocation Loc) { 4789 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 4790 return D.DepExprs.empty(); 4791 })) 4792 return std::make_pair(nullptr, Address::invalid()); 4793 // Process list of dependencies. 4794 ASTContext &C = CGM.getContext(); 4795 Address DependenciesArray = Address::invalid(); 4796 llvm::Value *NumOfElements = nullptr; 4797 unsigned NumDependencies = std::accumulate( 4798 Dependencies.begin(), Dependencies.end(), 0, 4799 [](unsigned V, const OMPTaskDataTy::DependData &D) { 4800 return D.DepKind == OMPC_DEPEND_depobj 4801 ? V 4802 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 4803 }); 4804 QualType FlagsTy; 4805 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4806 bool HasDepobjDeps = false; 4807 bool HasRegularWithIterators = false; 4808 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 4809 llvm::Value *NumOfRegularWithIterators = 4810 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 4811 // Calculate number of depobj dependecies and regular deps with the iterators. 4812 for (const OMPTaskDataTy::DependData &D : Dependencies) { 4813 if (D.DepKind == OMPC_DEPEND_depobj) { 4814 SmallVector<llvm::Value *, 4> Sizes = 4815 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 4816 for (llvm::Value *Size : Sizes) { 4817 NumOfDepobjElements = 4818 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 4819 } 4820 HasDepobjDeps = true; 4821 continue; 4822 } 4823 // Include number of iterations, if any. 4824 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 4825 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4826 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4827 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 4828 NumOfRegularWithIterators = 4829 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 4830 } 4831 HasRegularWithIterators = true; 4832 continue; 4833 } 4834 } 4835 4836 QualType KmpDependInfoArrayTy; 4837 if (HasDepobjDeps || HasRegularWithIterators) { 4838 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 4839 /*isSigned=*/false); 4840 if (HasDepobjDeps) { 4841 NumOfElements = 4842 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 4843 } 4844 if (HasRegularWithIterators) { 4845 NumOfElements = 4846 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 4847 } 4848 OpaqueValueExpr OVE(Loc, 4849 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 4850 VK_RValue); 4851 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 4852 RValue::get(NumOfElements)); 4853 KmpDependInfoArrayTy = 4854 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 4855 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 4856 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 4857 // Properly emit variable-sized array. 4858 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 4859 ImplicitParamDecl::Other); 4860 CGF.EmitVarDecl(*PD); 4861 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 4862 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 4863 /*isSigned=*/false); 4864 } else { 4865 KmpDependInfoArrayTy = C.getConstantArrayType( 4866 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 4867 ArrayType::Normal, /*IndexTypeQuals=*/0); 4868 DependenciesArray = 4869 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 4870 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 4871 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 4872 /*isSigned=*/false); 4873 } 4874 unsigned Pos = 0; 4875 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4876 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4877 Dependencies[I].IteratorExpr) 4878 continue; 4879 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 4880 DependenciesArray); 4881 } 4882 // Copy regular dependecies with iterators. 4883 LValue PosLVal = CGF.MakeAddrLValue( 4884 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 4885 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 4886 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4887 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 4888 !Dependencies[I].IteratorExpr) 4889 continue; 4890 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 4891 DependenciesArray); 4892 } 4893 // Copy final depobj arrays without iterators. 4894 if (HasDepobjDeps) { 4895 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 4896 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 4897 continue; 4898 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 4899 DependenciesArray); 4900 } 4901 } 4902 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4903 DependenciesArray, CGF.VoidPtrTy); 4904 return std::make_pair(NumOfElements, DependenciesArray); 4905 } 4906 4907 Address CGOpenMPRuntime::emitDepobjDependClause( 4908 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 4909 SourceLocation Loc) { 4910 if (Dependencies.DepExprs.empty()) 4911 return Address::invalid(); 4912 // Process list of dependencies. 4913 ASTContext &C = CGM.getContext(); 4914 Address DependenciesArray = Address::invalid(); 4915 unsigned NumDependencies = Dependencies.DepExprs.size(); 4916 QualType FlagsTy; 4917 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4918 RecordDecl *KmpDependInfoRD = 4919 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 4920 4921 llvm::Value *Size; 4922 // Define type kmp_depend_info[<Dependencies.size()>]; 4923 // For depobj reserve one extra element to store the number of elements. 4924 // It is required to handle depobj(x) update(in) construct. 4925 // kmp_depend_info[<Dependencies.size()>] deps; 4926 llvm::Value *NumDepsVal; 4927 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 4928 if (const auto *IE = 4929 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 4930 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 4931 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 4932 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 4933 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 4934 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 4935 } 4936 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 4937 NumDepsVal); 4938 CharUnits SizeInBytes = 4939 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 4940 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 4941 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 4942 NumDepsVal = 4943 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 4944 } else { 4945 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 4946 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 4947 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 4948 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 4949 Size = CGM.getSize(Sz.alignTo(Align)); 4950 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 4951 } 4952 // Need to allocate on the dynamic memory. 4953 llvm::Value *ThreadID = getThreadID(CGF, Loc); 4954 // Use default allocator. 4955 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4956 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 4957 4958 llvm::Value *Addr = 4959 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 4960 CGM.getModule(), OMPRTL___kmpc_alloc), 4961 Args, ".dep.arr.addr"); 4962 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4963 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 4964 DependenciesArray = Address(Addr, Align); 4965 // Write number of elements in the first element of array for depobj. 4966 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 4967 // deps[i].base_addr = NumDependencies; 4968 LValue BaseAddrLVal = CGF.EmitLValueForField( 4969 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 4970 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 4971 llvm::PointerUnion<unsigned *, LValue *> Pos; 4972 unsigned Idx = 1; 4973 LValue PosLVal; 4974 if (Dependencies.IteratorExpr) { 4975 PosLVal = CGF.MakeAddrLValue( 4976 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 4977 C.getSizeType()); 4978 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 4979 /*IsInit=*/true); 4980 Pos = &PosLVal; 4981 } else { 4982 Pos = &Idx; 4983 } 4984 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 4985 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4986 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 4987 return DependenciesArray; 4988 } 4989 4990 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 4991 SourceLocation Loc) { 4992 ASTContext &C = CGM.getContext(); 4993 QualType FlagsTy; 4994 getDependTypes(C, KmpDependInfoTy, FlagsTy); 4995 LValue Base = CGF.EmitLoadOfPointerLValue( 4996 DepobjLVal.getAddress(CGF), 4997 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 4998 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 4999 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5000 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5001 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5002 Addr.getPointer(), 5003 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5004 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5005 CGF.VoidPtrTy); 5006 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5007 // Use default allocator. 5008 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5009 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5010 5011 // _kmpc_free(gtid, addr, nullptr); 5012 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5013 CGM.getModule(), OMPRTL___kmpc_free), 5014 Args); 5015 } 5016 5017 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5018 OpenMPDependClauseKind NewDepKind, 5019 SourceLocation Loc) { 5020 ASTContext &C = CGM.getContext(); 5021 QualType FlagsTy; 5022 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5023 RecordDecl *KmpDependInfoRD = 5024 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5025 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5026 llvm::Value *NumDeps; 5027 LValue Base; 5028 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5029 5030 Address Begin = Base.getAddress(CGF); 5031 // Cast from pointer to array type to pointer to single element. 5032 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5033 // The basic structure here is a while-do loop. 5034 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5035 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5036 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5037 CGF.EmitBlock(BodyBB); 5038 llvm::PHINode *ElementPHI = 5039 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5040 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5041 Begin = Address(ElementPHI, Begin.getAlignment()); 5042 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5043 Base.getTBAAInfo()); 5044 // deps[i].flags = NewDepKind; 5045 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5046 LValue FlagsLVal = CGF.EmitLValueForField( 5047 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5048 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5049 FlagsLVal); 5050 5051 // Shift the address forward by one element. 5052 Address ElementNext = 5053 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5054 ElementPHI->addIncoming(ElementNext.getPointer(), 5055 CGF.Builder.GetInsertBlock()); 5056 llvm::Value *IsEmpty = 5057 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5058 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5059 // Done. 5060 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5061 } 5062 5063 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5064 const OMPExecutableDirective &D, 5065 llvm::Function *TaskFunction, 5066 QualType SharedsTy, Address Shareds, 5067 const Expr *IfCond, 5068 const OMPTaskDataTy &Data) { 5069 if (!CGF.HaveInsertPoint()) 5070 return; 5071 5072 TaskResultTy Result = 5073 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5074 llvm::Value *NewTask = Result.NewTask; 5075 llvm::Function *TaskEntry = Result.TaskEntry; 5076 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5077 LValue TDBase = Result.TDBase; 5078 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5079 // Process list of dependences. 5080 Address DependenciesArray = Address::invalid(); 5081 llvm::Value *NumOfElements; 5082 std::tie(NumOfElements, DependenciesArray) = 5083 emitDependClause(CGF, Data.Dependences, Loc); 5084 5085 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5086 // libcall. 5087 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5088 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5089 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5090 // list is not empty 5091 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5092 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5093 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5094 llvm::Value *DepTaskArgs[7]; 5095 if (!Data.Dependences.empty()) { 5096 DepTaskArgs[0] = UpLoc; 5097 DepTaskArgs[1] = ThreadID; 5098 DepTaskArgs[2] = NewTask; 5099 DepTaskArgs[3] = NumOfElements; 5100 DepTaskArgs[4] = DependenciesArray.getPointer(); 5101 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5102 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5103 } 5104 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5105 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5106 if (!Data.Tied) { 5107 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5108 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5109 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5110 } 5111 if (!Data.Dependences.empty()) { 5112 CGF.EmitRuntimeCall( 5113 OMPBuilder.getOrCreateRuntimeFunction( 5114 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), 5115 DepTaskArgs); 5116 } else { 5117 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5118 CGM.getModule(), OMPRTL___kmpc_omp_task), 5119 TaskArgs); 5120 } 5121 // Check if parent region is untied and build return for untied task; 5122 if (auto *Region = 5123 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5124 Region->emitUntiedSwitch(CGF); 5125 }; 5126 5127 llvm::Value *DepWaitTaskArgs[6]; 5128 if (!Data.Dependences.empty()) { 5129 DepWaitTaskArgs[0] = UpLoc; 5130 DepWaitTaskArgs[1] = ThreadID; 5131 DepWaitTaskArgs[2] = NumOfElements; 5132 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5133 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5134 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5135 } 5136 auto &M = CGM.getModule(); 5137 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, 5138 TaskEntry, &Data, &DepWaitTaskArgs, 5139 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5140 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5141 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5142 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5143 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5144 // is specified. 5145 if (!Data.Dependences.empty()) 5146 CGF.EmitRuntimeCall( 5147 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), 5148 DepWaitTaskArgs); 5149 // Call proxy_task_entry(gtid, new_task); 5150 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5151 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5152 Action.Enter(CGF); 5153 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5154 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5155 OutlinedFnArgs); 5156 }; 5157 5158 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5159 // kmp_task_t *new_task); 5160 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5161 // kmp_task_t *new_task); 5162 RegionCodeGenTy RCG(CodeGen); 5163 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( 5164 M, OMPRTL___kmpc_omp_task_begin_if0), 5165 TaskArgs, 5166 OMPBuilder.getOrCreateRuntimeFunction( 5167 M, OMPRTL___kmpc_omp_task_complete_if0), 5168 TaskArgs); 5169 RCG.setAction(Action); 5170 RCG(CGF); 5171 }; 5172 5173 if (IfCond) { 5174 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5175 } else { 5176 RegionCodeGenTy ThenRCG(ThenCodeGen); 5177 ThenRCG(CGF); 5178 } 5179 } 5180 5181 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5182 const OMPLoopDirective &D, 5183 llvm::Function *TaskFunction, 5184 QualType SharedsTy, Address Shareds, 5185 const Expr *IfCond, 5186 const OMPTaskDataTy &Data) { 5187 if (!CGF.HaveInsertPoint()) 5188 return; 5189 TaskResultTy Result = 5190 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5191 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5192 // libcall. 5193 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5194 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5195 // sched, kmp_uint64 grainsize, void *task_dup); 5196 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5197 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5198 llvm::Value *IfVal; 5199 if (IfCond) { 5200 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5201 /*isSigned=*/true); 5202 } else { 5203 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5204 } 5205 5206 LValue LBLVal = CGF.EmitLValueForField( 5207 Result.TDBase, 5208 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5209 const auto *LBVar = 5210 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5211 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5212 LBLVal.getQuals(), 5213 /*IsInitializer=*/true); 5214 LValue UBLVal = CGF.EmitLValueForField( 5215 Result.TDBase, 5216 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5217 const auto *UBVar = 5218 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5219 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5220 UBLVal.getQuals(), 5221 /*IsInitializer=*/true); 5222 LValue StLVal = CGF.EmitLValueForField( 5223 Result.TDBase, 5224 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5225 const auto *StVar = 5226 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5227 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5228 StLVal.getQuals(), 5229 /*IsInitializer=*/true); 5230 // Store reductions address. 5231 LValue RedLVal = CGF.EmitLValueForField( 5232 Result.TDBase, 5233 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5234 if (Data.Reductions) { 5235 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5236 } else { 5237 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5238 CGF.getContext().VoidPtrTy); 5239 } 5240 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5241 llvm::Value *TaskArgs[] = { 5242 UpLoc, 5243 ThreadID, 5244 Result.NewTask, 5245 IfVal, 5246 LBLVal.getPointer(CGF), 5247 UBLVal.getPointer(CGF), 5248 CGF.EmitLoadOfScalar(StLVal, Loc), 5249 llvm::ConstantInt::getSigned( 5250 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5251 llvm::ConstantInt::getSigned( 5252 CGF.IntTy, Data.Schedule.getPointer() 5253 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5254 : NoSchedule), 5255 Data.Schedule.getPointer() 5256 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5257 /*isSigned=*/false) 5258 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5259 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5260 Result.TaskDupFn, CGF.VoidPtrTy) 5261 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5262 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 5263 CGM.getModule(), OMPRTL___kmpc_taskloop), 5264 TaskArgs); 5265 } 5266 5267 /// Emit reduction operation for each element of array (required for 5268 /// array sections) LHS op = RHS. 5269 /// \param Type Type of array. 5270 /// \param LHSVar Variable on the left side of the reduction operation 5271 /// (references element of array in original variable). 5272 /// \param RHSVar Variable on the right side of the reduction operation 5273 /// (references element of array in original variable). 5274 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5275 /// RHSVar. 5276 static void EmitOMPAggregateReduction( 5277 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5278 const VarDecl *RHSVar, 5279 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5280 const Expr *, const Expr *)> &RedOpGen, 5281 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5282 const Expr *UpExpr = nullptr) { 5283 // Perform element-by-element initialization. 5284 QualType ElementTy; 5285 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5286 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5287 5288 // Drill down to the base element type on both arrays. 5289 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5290 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5291 5292 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5293 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5294 // Cast from pointer to array type to pointer to single element. 5295 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5296 // The basic structure here is a while-do loop. 5297 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5298 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5299 llvm::Value *IsEmpty = 5300 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5301 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5302 5303 // Enter the loop body, making that address the current address. 5304 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5305 CGF.EmitBlock(BodyBB); 5306 5307 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5308 5309 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5310 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5311 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5312 Address RHSElementCurrent = 5313 Address(RHSElementPHI, 5314 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5315 5316 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5317 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5318 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5319 Address LHSElementCurrent = 5320 Address(LHSElementPHI, 5321 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5322 5323 // Emit copy. 5324 CodeGenFunction::OMPPrivateScope Scope(CGF); 5325 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5326 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5327 Scope.Privatize(); 5328 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5329 Scope.ForceCleanup(); 5330 5331 // Shift the address forward by one element. 5332 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5333 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5334 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5335 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5336 // Check whether we've reached the end. 5337 llvm::Value *Done = 5338 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5339 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5340 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5341 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5342 5343 // Done. 5344 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5345 } 5346 5347 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5348 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5349 /// UDR combiner function. 5350 static void emitReductionCombiner(CodeGenFunction &CGF, 5351 const Expr *ReductionOp) { 5352 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5353 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5354 if (const auto *DRE = 5355 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5356 if (const auto *DRD = 5357 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5358 std::pair<llvm::Function *, llvm::Function *> Reduction = 5359 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5360 RValue Func = RValue::get(Reduction.first); 5361 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5362 CGF.EmitIgnoredExpr(ReductionOp); 5363 return; 5364 } 5365 CGF.EmitIgnoredExpr(ReductionOp); 5366 } 5367 5368 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5369 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5370 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5371 ArrayRef<const Expr *> ReductionOps) { 5372 ASTContext &C = CGM.getContext(); 5373 5374 // void reduction_func(void *LHSArg, void *RHSArg); 5375 FunctionArgList Args; 5376 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5377 ImplicitParamDecl::Other); 5378 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5379 ImplicitParamDecl::Other); 5380 Args.push_back(&LHSArg); 5381 Args.push_back(&RHSArg); 5382 const auto &CGFI = 5383 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5384 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5385 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5386 llvm::GlobalValue::InternalLinkage, Name, 5387 &CGM.getModule()); 5388 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5389 Fn->setDoesNotRecurse(); 5390 CodeGenFunction CGF(CGM); 5391 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5392 5393 // Dst = (void*[n])(LHSArg); 5394 // Src = (void*[n])(RHSArg); 5395 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5396 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5397 ArgsType), CGF.getPointerAlign()); 5398 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5399 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5400 ArgsType), CGF.getPointerAlign()); 5401 5402 // ... 5403 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5404 // ... 5405 CodeGenFunction::OMPPrivateScope Scope(CGF); 5406 auto IPriv = Privates.begin(); 5407 unsigned Idx = 0; 5408 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5409 const auto *RHSVar = 5410 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5411 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5412 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5413 }); 5414 const auto *LHSVar = 5415 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5416 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5417 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5418 }); 5419 QualType PrivTy = (*IPriv)->getType(); 5420 if (PrivTy->isVariablyModifiedType()) { 5421 // Get array size and emit VLA type. 5422 ++Idx; 5423 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5424 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5425 const VariableArrayType *VLA = 5426 CGF.getContext().getAsVariableArrayType(PrivTy); 5427 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5428 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5429 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5430 CGF.EmitVariablyModifiedType(PrivTy); 5431 } 5432 } 5433 Scope.Privatize(); 5434 IPriv = Privates.begin(); 5435 auto ILHS = LHSExprs.begin(); 5436 auto IRHS = RHSExprs.begin(); 5437 for (const Expr *E : ReductionOps) { 5438 if ((*IPriv)->getType()->isArrayType()) { 5439 // Emit reduction for array section. 5440 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5441 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5442 EmitOMPAggregateReduction( 5443 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5444 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5445 emitReductionCombiner(CGF, E); 5446 }); 5447 } else { 5448 // Emit reduction for array subscript or single variable. 5449 emitReductionCombiner(CGF, E); 5450 } 5451 ++IPriv; 5452 ++ILHS; 5453 ++IRHS; 5454 } 5455 Scope.ForceCleanup(); 5456 CGF.FinishFunction(); 5457 return Fn; 5458 } 5459 5460 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5461 const Expr *ReductionOp, 5462 const Expr *PrivateRef, 5463 const DeclRefExpr *LHS, 5464 const DeclRefExpr *RHS) { 5465 if (PrivateRef->getType()->isArrayType()) { 5466 // Emit reduction for array section. 5467 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5468 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5469 EmitOMPAggregateReduction( 5470 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5471 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5472 emitReductionCombiner(CGF, ReductionOp); 5473 }); 5474 } else { 5475 // Emit reduction for array subscript or single variable. 5476 emitReductionCombiner(CGF, ReductionOp); 5477 } 5478 } 5479 5480 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5481 ArrayRef<const Expr *> Privates, 5482 ArrayRef<const Expr *> LHSExprs, 5483 ArrayRef<const Expr *> RHSExprs, 5484 ArrayRef<const Expr *> ReductionOps, 5485 ReductionOptionsTy Options) { 5486 if (!CGF.HaveInsertPoint()) 5487 return; 5488 5489 bool WithNowait = Options.WithNowait; 5490 bool SimpleReduction = Options.SimpleReduction; 5491 5492 // Next code should be emitted for reduction: 5493 // 5494 // static kmp_critical_name lock = { 0 }; 5495 // 5496 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5497 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5498 // ... 5499 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5500 // *(Type<n>-1*)rhs[<n>-1]); 5501 // } 5502 // 5503 // ... 5504 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5505 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5506 // RedList, reduce_func, &<lock>)) { 5507 // case 1: 5508 // ... 5509 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5510 // ... 5511 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5512 // break; 5513 // case 2: 5514 // ... 5515 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5516 // ... 5517 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5518 // break; 5519 // default:; 5520 // } 5521 // 5522 // if SimpleReduction is true, only the next code is generated: 5523 // ... 5524 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5525 // ... 5526 5527 ASTContext &C = CGM.getContext(); 5528 5529 if (SimpleReduction) { 5530 CodeGenFunction::RunCleanupsScope Scope(CGF); 5531 auto IPriv = Privates.begin(); 5532 auto ILHS = LHSExprs.begin(); 5533 auto IRHS = RHSExprs.begin(); 5534 for (const Expr *E : ReductionOps) { 5535 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5536 cast<DeclRefExpr>(*IRHS)); 5537 ++IPriv; 5538 ++ILHS; 5539 ++IRHS; 5540 } 5541 return; 5542 } 5543 5544 // 1. Build a list of reduction variables. 5545 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5546 auto Size = RHSExprs.size(); 5547 for (const Expr *E : Privates) { 5548 if (E->getType()->isVariablyModifiedType()) 5549 // Reserve place for array size. 5550 ++Size; 5551 } 5552 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5553 QualType ReductionArrayTy = 5554 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5555 /*IndexTypeQuals=*/0); 5556 Address ReductionList = 5557 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5558 auto IPriv = Privates.begin(); 5559 unsigned Idx = 0; 5560 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5561 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5562 CGF.Builder.CreateStore( 5563 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5564 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5565 Elem); 5566 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5567 // Store array size. 5568 ++Idx; 5569 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5570 llvm::Value *Size = CGF.Builder.CreateIntCast( 5571 CGF.getVLASize( 5572 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5573 .NumElts, 5574 CGF.SizeTy, /*isSigned=*/false); 5575 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5576 Elem); 5577 } 5578 } 5579 5580 // 2. Emit reduce_func(). 5581 llvm::Function *ReductionFn = emitReductionFunction( 5582 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5583 LHSExprs, RHSExprs, ReductionOps); 5584 5585 // 3. Create static kmp_critical_name lock = { 0 }; 5586 std::string Name = getName({"reduction"}); 5587 llvm::Value *Lock = getCriticalRegionLock(Name); 5588 5589 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5590 // RedList, reduce_func, &<lock>); 5591 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5592 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5593 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5594 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5595 ReductionList.getPointer(), CGF.VoidPtrTy); 5596 llvm::Value *Args[] = { 5597 IdentTLoc, // ident_t *<loc> 5598 ThreadId, // i32 <gtid> 5599 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5600 ReductionArrayTySize, // size_type sizeof(RedList) 5601 RL, // void *RedList 5602 ReductionFn, // void (*) (void *, void *) <reduce_func> 5603 Lock // kmp_critical_name *&<lock> 5604 }; 5605 llvm::Value *Res = CGF.EmitRuntimeCall( 5606 OMPBuilder.getOrCreateRuntimeFunction( 5607 CGM.getModule(), 5608 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), 5609 Args); 5610 5611 // 5. Build switch(res) 5612 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5613 llvm::SwitchInst *SwInst = 5614 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5615 5616 // 6. Build case 1: 5617 // ... 5618 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5619 // ... 5620 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5621 // break; 5622 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5623 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5624 CGF.EmitBlock(Case1BB); 5625 5626 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5627 llvm::Value *EndArgs[] = { 5628 IdentTLoc, // ident_t *<loc> 5629 ThreadId, // i32 <gtid> 5630 Lock // kmp_critical_name *&<lock> 5631 }; 5632 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5633 CodeGenFunction &CGF, PrePostActionTy &Action) { 5634 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5635 auto IPriv = Privates.begin(); 5636 auto ILHS = LHSExprs.begin(); 5637 auto IRHS = RHSExprs.begin(); 5638 for (const Expr *E : ReductionOps) { 5639 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5640 cast<DeclRefExpr>(*IRHS)); 5641 ++IPriv; 5642 ++ILHS; 5643 ++IRHS; 5644 } 5645 }; 5646 RegionCodeGenTy RCG(CodeGen); 5647 CommonActionTy Action( 5648 nullptr, llvm::None, 5649 OMPBuilder.getOrCreateRuntimeFunction( 5650 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait 5651 : OMPRTL___kmpc_end_reduce), 5652 EndArgs); 5653 RCG.setAction(Action); 5654 RCG(CGF); 5655 5656 CGF.EmitBranch(DefaultBB); 5657 5658 // 7. Build case 2: 5659 // ... 5660 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5661 // ... 5662 // break; 5663 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5664 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5665 CGF.EmitBlock(Case2BB); 5666 5667 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5668 CodeGenFunction &CGF, PrePostActionTy &Action) { 5669 auto ILHS = LHSExprs.begin(); 5670 auto IRHS = RHSExprs.begin(); 5671 auto IPriv = Privates.begin(); 5672 for (const Expr *E : ReductionOps) { 5673 const Expr *XExpr = nullptr; 5674 const Expr *EExpr = nullptr; 5675 const Expr *UpExpr = nullptr; 5676 BinaryOperatorKind BO = BO_Comma; 5677 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5678 if (BO->getOpcode() == BO_Assign) { 5679 XExpr = BO->getLHS(); 5680 UpExpr = BO->getRHS(); 5681 } 5682 } 5683 // Try to emit update expression as a simple atomic. 5684 const Expr *RHSExpr = UpExpr; 5685 if (RHSExpr) { 5686 // Analyze RHS part of the whole expression. 5687 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5688 RHSExpr->IgnoreParenImpCasts())) { 5689 // If this is a conditional operator, analyze its condition for 5690 // min/max reduction operator. 5691 RHSExpr = ACO->getCond(); 5692 } 5693 if (const auto *BORHS = 5694 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5695 EExpr = BORHS->getRHS(); 5696 BO = BORHS->getOpcode(); 5697 } 5698 } 5699 if (XExpr) { 5700 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5701 auto &&AtomicRedGen = [BO, VD, 5702 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5703 const Expr *EExpr, const Expr *UpExpr) { 5704 LValue X = CGF.EmitLValue(XExpr); 5705 RValue E; 5706 if (EExpr) 5707 E = CGF.EmitAnyExpr(EExpr); 5708 CGF.EmitOMPAtomicSimpleUpdateExpr( 5709 X, E, BO, /*IsXLHSInRHSPart=*/true, 5710 llvm::AtomicOrdering::Monotonic, Loc, 5711 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5712 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5713 PrivateScope.addPrivate( 5714 VD, [&CGF, VD, XRValue, Loc]() { 5715 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5716 CGF.emitOMPSimpleStore( 5717 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5718 VD->getType().getNonReferenceType(), Loc); 5719 return LHSTemp; 5720 }); 5721 (void)PrivateScope.Privatize(); 5722 return CGF.EmitAnyExpr(UpExpr); 5723 }); 5724 }; 5725 if ((*IPriv)->getType()->isArrayType()) { 5726 // Emit atomic reduction for array section. 5727 const auto *RHSVar = 5728 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5729 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5730 AtomicRedGen, XExpr, EExpr, UpExpr); 5731 } else { 5732 // Emit atomic reduction for array subscript or single variable. 5733 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5734 } 5735 } else { 5736 // Emit as a critical region. 5737 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5738 const Expr *, const Expr *) { 5739 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5740 std::string Name = RT.getName({"atomic_reduction"}); 5741 RT.emitCriticalRegion( 5742 CGF, Name, 5743 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5744 Action.Enter(CGF); 5745 emitReductionCombiner(CGF, E); 5746 }, 5747 Loc); 5748 }; 5749 if ((*IPriv)->getType()->isArrayType()) { 5750 const auto *LHSVar = 5751 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5752 const auto *RHSVar = 5753 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5754 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5755 CritRedGen); 5756 } else { 5757 CritRedGen(CGF, nullptr, nullptr, nullptr); 5758 } 5759 } 5760 ++ILHS; 5761 ++IRHS; 5762 ++IPriv; 5763 } 5764 }; 5765 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5766 if (!WithNowait) { 5767 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5768 llvm::Value *EndArgs[] = { 5769 IdentTLoc, // ident_t *<loc> 5770 ThreadId, // i32 <gtid> 5771 Lock // kmp_critical_name *&<lock> 5772 }; 5773 CommonActionTy Action(nullptr, llvm::None, 5774 OMPBuilder.getOrCreateRuntimeFunction( 5775 CGM.getModule(), OMPRTL___kmpc_end_reduce), 5776 EndArgs); 5777 AtomicRCG.setAction(Action); 5778 AtomicRCG(CGF); 5779 } else { 5780 AtomicRCG(CGF); 5781 } 5782 5783 CGF.EmitBranch(DefaultBB); 5784 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5785 } 5786 5787 /// Generates unique name for artificial threadprivate variables. 5788 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5789 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5790 const Expr *Ref) { 5791 SmallString<256> Buffer; 5792 llvm::raw_svector_ostream Out(Buffer); 5793 const clang::DeclRefExpr *DE; 5794 const VarDecl *D = ::getBaseDecl(Ref, DE); 5795 if (!D) 5796 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5797 D = D->getCanonicalDecl(); 5798 std::string Name = CGM.getOpenMPRuntime().getName( 5799 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5800 Out << Prefix << Name << "_" 5801 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5802 return std::string(Out.str()); 5803 } 5804 5805 /// Emits reduction initializer function: 5806 /// \code 5807 /// void @.red_init(void* %arg, void* %orig) { 5808 /// %0 = bitcast void* %arg to <type>* 5809 /// store <type> <init>, <type>* %0 5810 /// ret void 5811 /// } 5812 /// \endcode 5813 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5814 SourceLocation Loc, 5815 ReductionCodeGen &RCG, unsigned N) { 5816 ASTContext &C = CGM.getContext(); 5817 QualType VoidPtrTy = C.VoidPtrTy; 5818 VoidPtrTy.addRestrict(); 5819 FunctionArgList Args; 5820 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5821 ImplicitParamDecl::Other); 5822 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 5823 ImplicitParamDecl::Other); 5824 Args.emplace_back(&Param); 5825 Args.emplace_back(&ParamOrig); 5826 const auto &FnInfo = 5827 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5828 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5829 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 5830 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5831 Name, &CGM.getModule()); 5832 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5833 Fn->setDoesNotRecurse(); 5834 CodeGenFunction CGF(CGM); 5835 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5836 Address PrivateAddr = CGF.EmitLoadOfPointer( 5837 CGF.GetAddrOfLocalVar(&Param), 5838 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5839 llvm::Value *Size = nullptr; 5840 // If the size of the reduction item is non-constant, load it from global 5841 // threadprivate variable. 5842 if (RCG.getSizes(N).second) { 5843 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5844 CGF, CGM.getContext().getSizeType(), 5845 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5846 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5847 CGM.getContext().getSizeType(), Loc); 5848 } 5849 RCG.emitAggregateType(CGF, N, Size); 5850 LValue OrigLVal; 5851 // If initializer uses initializer from declare reduction construct, emit a 5852 // pointer to the address of the original reduction item (reuired by reduction 5853 // initializer) 5854 if (RCG.usesReductionInitializer(N)) { 5855 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 5856 SharedAddr = CGF.EmitLoadOfPointer( 5857 SharedAddr, 5858 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 5859 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 5860 } else { 5861 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 5862 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 5863 CGM.getContext().VoidPtrTy); 5864 } 5865 // Emit the initializer: 5866 // %0 = bitcast void* %arg to <type>* 5867 // store <type> <init>, <type>* %0 5868 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 5869 [](CodeGenFunction &) { return false; }); 5870 CGF.FinishFunction(); 5871 return Fn; 5872 } 5873 5874 /// Emits reduction combiner function: 5875 /// \code 5876 /// void @.red_comb(void* %arg0, void* %arg1) { 5877 /// %lhs = bitcast void* %arg0 to <type>* 5878 /// %rhs = bitcast void* %arg1 to <type>* 5879 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 5880 /// store <type> %2, <type>* %lhs 5881 /// ret void 5882 /// } 5883 /// \endcode 5884 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 5885 SourceLocation Loc, 5886 ReductionCodeGen &RCG, unsigned N, 5887 const Expr *ReductionOp, 5888 const Expr *LHS, const Expr *RHS, 5889 const Expr *PrivateRef) { 5890 ASTContext &C = CGM.getContext(); 5891 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 5892 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 5893 FunctionArgList Args; 5894 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 5895 C.VoidPtrTy, ImplicitParamDecl::Other); 5896 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5897 ImplicitParamDecl::Other); 5898 Args.emplace_back(&ParamInOut); 5899 Args.emplace_back(&ParamIn); 5900 const auto &FnInfo = 5901 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5902 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5903 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 5904 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5905 Name, &CGM.getModule()); 5906 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5907 Fn->setDoesNotRecurse(); 5908 CodeGenFunction CGF(CGM); 5909 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5910 llvm::Value *Size = nullptr; 5911 // If the size of the reduction item is non-constant, load it from global 5912 // threadprivate variable. 5913 if (RCG.getSizes(N).second) { 5914 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5915 CGF, CGM.getContext().getSizeType(), 5916 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5917 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5918 CGM.getContext().getSizeType(), Loc); 5919 } 5920 RCG.emitAggregateType(CGF, N, Size); 5921 // Remap lhs and rhs variables to the addresses of the function arguments. 5922 // %lhs = bitcast void* %arg0 to <type>* 5923 // %rhs = bitcast void* %arg1 to <type>* 5924 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5925 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 5926 // Pull out the pointer to the variable. 5927 Address PtrAddr = CGF.EmitLoadOfPointer( 5928 CGF.GetAddrOfLocalVar(&ParamInOut), 5929 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5930 return CGF.Builder.CreateElementBitCast( 5931 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 5932 }); 5933 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 5934 // Pull out the pointer to the variable. 5935 Address PtrAddr = CGF.EmitLoadOfPointer( 5936 CGF.GetAddrOfLocalVar(&ParamIn), 5937 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5938 return CGF.Builder.CreateElementBitCast( 5939 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 5940 }); 5941 PrivateScope.Privatize(); 5942 // Emit the combiner body: 5943 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 5944 // store <type> %2, <type>* %lhs 5945 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 5946 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 5947 cast<DeclRefExpr>(RHS)); 5948 CGF.FinishFunction(); 5949 return Fn; 5950 } 5951 5952 /// Emits reduction finalizer function: 5953 /// \code 5954 /// void @.red_fini(void* %arg) { 5955 /// %0 = bitcast void* %arg to <type>* 5956 /// <destroy>(<type>* %0) 5957 /// ret void 5958 /// } 5959 /// \endcode 5960 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 5961 SourceLocation Loc, 5962 ReductionCodeGen &RCG, unsigned N) { 5963 if (!RCG.needCleanups(N)) 5964 return nullptr; 5965 ASTContext &C = CGM.getContext(); 5966 FunctionArgList Args; 5967 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5968 ImplicitParamDecl::Other); 5969 Args.emplace_back(&Param); 5970 const auto &FnInfo = 5971 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5972 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 5973 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 5974 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 5975 Name, &CGM.getModule()); 5976 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 5977 Fn->setDoesNotRecurse(); 5978 CodeGenFunction CGF(CGM); 5979 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 5980 Address PrivateAddr = CGF.EmitLoadOfPointer( 5981 CGF.GetAddrOfLocalVar(&Param), 5982 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5983 llvm::Value *Size = nullptr; 5984 // If the size of the reduction item is non-constant, load it from global 5985 // threadprivate variable. 5986 if (RCG.getSizes(N).second) { 5987 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 5988 CGF, CGM.getContext().getSizeType(), 5989 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 5990 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 5991 CGM.getContext().getSizeType(), Loc); 5992 } 5993 RCG.emitAggregateType(CGF, N, Size); 5994 // Emit the finalizer body: 5995 // <destroy>(<type>* %0) 5996 RCG.emitCleanups(CGF, N, PrivateAddr); 5997 CGF.FinishFunction(Loc); 5998 return Fn; 5999 } 6000 6001 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6002 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6003 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6004 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6005 return nullptr; 6006 6007 // Build typedef struct: 6008 // kmp_taskred_input { 6009 // void *reduce_shar; // shared reduction item 6010 // void *reduce_orig; // original reduction item used for initialization 6011 // size_t reduce_size; // size of data item 6012 // void *reduce_init; // data initialization routine 6013 // void *reduce_fini; // data finalization routine 6014 // void *reduce_comb; // data combiner routine 6015 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6016 // } kmp_taskred_input_t; 6017 ASTContext &C = CGM.getContext(); 6018 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6019 RD->startDefinition(); 6020 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6021 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6022 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6023 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6024 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6025 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6026 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6027 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6028 RD->completeDefinition(); 6029 QualType RDType = C.getRecordType(RD); 6030 unsigned Size = Data.ReductionVars.size(); 6031 llvm::APInt ArraySize(/*numBits=*/64, Size); 6032 QualType ArrayRDType = C.getConstantArrayType( 6033 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6034 // kmp_task_red_input_t .rd_input.[Size]; 6035 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6036 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6037 Data.ReductionCopies, Data.ReductionOps); 6038 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6039 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6040 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6041 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6042 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6043 TaskRedInput.getPointer(), Idxs, 6044 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6045 ".rd_input.gep."); 6046 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6047 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6048 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6049 RCG.emitSharedOrigLValue(CGF, Cnt); 6050 llvm::Value *CastedShared = 6051 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6052 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6053 // ElemLVal.reduce_orig = &Origs[Cnt]; 6054 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6055 llvm::Value *CastedOrig = 6056 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6057 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6058 RCG.emitAggregateType(CGF, Cnt); 6059 llvm::Value *SizeValInChars; 6060 llvm::Value *SizeVal; 6061 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6062 // We use delayed creation/initialization for VLAs and array sections. It is 6063 // required because runtime does not provide the way to pass the sizes of 6064 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6065 // threadprivate global variables are used to store these values and use 6066 // them in the functions. 6067 bool DelayedCreation = !!SizeVal; 6068 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6069 /*isSigned=*/false); 6070 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6071 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6072 // ElemLVal.reduce_init = init; 6073 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6074 llvm::Value *InitAddr = 6075 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6076 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6077 // ElemLVal.reduce_fini = fini; 6078 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6079 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6080 llvm::Value *FiniAddr = Fini 6081 ? CGF.EmitCastToVoidPtr(Fini) 6082 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6083 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6084 // ElemLVal.reduce_comb = comb; 6085 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6086 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6087 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6088 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6089 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6090 // ElemLVal.flags = 0; 6091 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6092 if (DelayedCreation) { 6093 CGF.EmitStoreOfScalar( 6094 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6095 FlagsLVal); 6096 } else 6097 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6098 FlagsLVal.getType()); 6099 } 6100 if (Data.IsReductionWithTaskMod) { 6101 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6102 // is_ws, int num, void *data); 6103 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6104 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6105 CGM.IntTy, /*isSigned=*/true); 6106 llvm::Value *Args[] = { 6107 IdentTLoc, GTid, 6108 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6109 /*isSigned=*/true), 6110 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6111 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6112 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6113 return CGF.EmitRuntimeCall( 6114 OMPBuilder.getOrCreateRuntimeFunction( 6115 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), 6116 Args); 6117 } 6118 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6119 llvm::Value *Args[] = { 6120 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6121 /*isSigned=*/true), 6122 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6123 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6124 CGM.VoidPtrTy)}; 6125 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6126 CGM.getModule(), OMPRTL___kmpc_taskred_init), 6127 Args); 6128 } 6129 6130 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6131 SourceLocation Loc, 6132 bool IsWorksharingReduction) { 6133 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6134 // is_ws, int num, void *data); 6135 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6136 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6137 CGM.IntTy, /*isSigned=*/true); 6138 llvm::Value *Args[] = {IdentTLoc, GTid, 6139 llvm::ConstantInt::get(CGM.IntTy, 6140 IsWorksharingReduction ? 1 : 0, 6141 /*isSigned=*/true)}; 6142 (void)CGF.EmitRuntimeCall( 6143 OMPBuilder.getOrCreateRuntimeFunction( 6144 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), 6145 Args); 6146 } 6147 6148 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6149 SourceLocation Loc, 6150 ReductionCodeGen &RCG, 6151 unsigned N) { 6152 auto Sizes = RCG.getSizes(N); 6153 // Emit threadprivate global variable if the type is non-constant 6154 // (Sizes.second = nullptr). 6155 if (Sizes.second) { 6156 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6157 /*isSigned=*/false); 6158 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6159 CGF, CGM.getContext().getSizeType(), 6160 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6161 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6162 } 6163 } 6164 6165 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6166 SourceLocation Loc, 6167 llvm::Value *ReductionsPtr, 6168 LValue SharedLVal) { 6169 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6170 // *d); 6171 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6172 CGM.IntTy, 6173 /*isSigned=*/true), 6174 ReductionsPtr, 6175 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6176 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6177 return Address( 6178 CGF.EmitRuntimeCall( 6179 OMPBuilder.getOrCreateRuntimeFunction( 6180 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), 6181 Args), 6182 SharedLVal.getAlignment()); 6183 } 6184 6185 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6186 SourceLocation Loc) { 6187 if (!CGF.HaveInsertPoint()) 6188 return; 6189 6190 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { 6191 OMPBuilder.createTaskwait(CGF.Builder); 6192 } else { 6193 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6194 // global_tid); 6195 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6196 // Ignore return result until untied tasks are supported. 6197 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6198 CGM.getModule(), OMPRTL___kmpc_omp_taskwait), 6199 Args); 6200 } 6201 6202 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6203 Region->emitUntiedSwitch(CGF); 6204 } 6205 6206 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6207 OpenMPDirectiveKind InnerKind, 6208 const RegionCodeGenTy &CodeGen, 6209 bool HasCancel) { 6210 if (!CGF.HaveInsertPoint()) 6211 return; 6212 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6213 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6214 } 6215 6216 namespace { 6217 enum RTCancelKind { 6218 CancelNoreq = 0, 6219 CancelParallel = 1, 6220 CancelLoop = 2, 6221 CancelSections = 3, 6222 CancelTaskgroup = 4 6223 }; 6224 } // anonymous namespace 6225 6226 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6227 RTCancelKind CancelKind = CancelNoreq; 6228 if (CancelRegion == OMPD_parallel) 6229 CancelKind = CancelParallel; 6230 else if (CancelRegion == OMPD_for) 6231 CancelKind = CancelLoop; 6232 else if (CancelRegion == OMPD_sections) 6233 CancelKind = CancelSections; 6234 else { 6235 assert(CancelRegion == OMPD_taskgroup); 6236 CancelKind = CancelTaskgroup; 6237 } 6238 return CancelKind; 6239 } 6240 6241 void CGOpenMPRuntime::emitCancellationPointCall( 6242 CodeGenFunction &CGF, SourceLocation Loc, 6243 OpenMPDirectiveKind CancelRegion) { 6244 if (!CGF.HaveInsertPoint()) 6245 return; 6246 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6247 // global_tid, kmp_int32 cncl_kind); 6248 if (auto *OMPRegionInfo = 6249 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6250 // For 'cancellation point taskgroup', the task region info may not have a 6251 // cancel. This may instead happen in another adjacent task. 6252 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6253 llvm::Value *Args[] = { 6254 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6255 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6256 // Ignore return result until untied tasks are supported. 6257 llvm::Value *Result = CGF.EmitRuntimeCall( 6258 OMPBuilder.getOrCreateRuntimeFunction( 6259 CGM.getModule(), OMPRTL___kmpc_cancellationpoint), 6260 Args); 6261 // if (__kmpc_cancellationpoint()) { 6262 // exit from construct; 6263 // } 6264 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6265 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6266 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6267 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6268 CGF.EmitBlock(ExitBB); 6269 // exit from construct; 6270 CodeGenFunction::JumpDest CancelDest = 6271 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6272 CGF.EmitBranchThroughCleanup(CancelDest); 6273 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6274 } 6275 } 6276 } 6277 6278 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6279 const Expr *IfCond, 6280 OpenMPDirectiveKind CancelRegion) { 6281 if (!CGF.HaveInsertPoint()) 6282 return; 6283 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6284 // kmp_int32 cncl_kind); 6285 auto &M = CGM.getModule(); 6286 if (auto *OMPRegionInfo = 6287 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6288 auto &&ThenGen = [this, &M, Loc, CancelRegion, 6289 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) { 6290 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6291 llvm::Value *Args[] = { 6292 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6293 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6294 // Ignore return result until untied tasks are supported. 6295 llvm::Value *Result = CGF.EmitRuntimeCall( 6296 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); 6297 // if (__kmpc_cancel()) { 6298 // exit from construct; 6299 // } 6300 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6301 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6302 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6303 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6304 CGF.EmitBlock(ExitBB); 6305 // exit from construct; 6306 CodeGenFunction::JumpDest CancelDest = 6307 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6308 CGF.EmitBranchThroughCleanup(CancelDest); 6309 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6310 }; 6311 if (IfCond) { 6312 emitIfClause(CGF, IfCond, ThenGen, 6313 [](CodeGenFunction &, PrePostActionTy &) {}); 6314 } else { 6315 RegionCodeGenTy ThenRCG(ThenGen); 6316 ThenRCG(CGF); 6317 } 6318 } 6319 } 6320 6321 namespace { 6322 /// Cleanup action for uses_allocators support. 6323 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 6324 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 6325 6326 public: 6327 OMPUsesAllocatorsActionTy( 6328 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 6329 : Allocators(Allocators) {} 6330 void Enter(CodeGenFunction &CGF) override { 6331 if (!CGF.HaveInsertPoint()) 6332 return; 6333 for (const auto &AllocatorData : Allocators) { 6334 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 6335 CGF, AllocatorData.first, AllocatorData.second); 6336 } 6337 } 6338 void Exit(CodeGenFunction &CGF) override { 6339 if (!CGF.HaveInsertPoint()) 6340 return; 6341 for (const auto &AllocatorData : Allocators) { 6342 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 6343 AllocatorData.first); 6344 } 6345 } 6346 }; 6347 } // namespace 6348 6349 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6350 const OMPExecutableDirective &D, StringRef ParentName, 6351 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6352 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6353 assert(!ParentName.empty() && "Invalid target region parent name!"); 6354 HasEmittedTargetRegion = true; 6355 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 6356 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 6357 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 6358 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 6359 if (!D.AllocatorTraits) 6360 continue; 6361 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 6362 } 6363 } 6364 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 6365 CodeGen.setAction(UsesAllocatorAction); 6366 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6367 IsOffloadEntry, CodeGen); 6368 } 6369 6370 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 6371 const Expr *Allocator, 6372 const Expr *AllocatorTraits) { 6373 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6374 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6375 // Use default memspace handle. 6376 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 6377 llvm::Value *NumTraits = llvm::ConstantInt::get( 6378 CGF.IntTy, cast<ConstantArrayType>( 6379 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 6380 ->getSize() 6381 .getLimitedValue()); 6382 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 6383 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6384 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 6385 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 6386 AllocatorTraitsLVal.getBaseInfo(), 6387 AllocatorTraitsLVal.getTBAAInfo()); 6388 llvm::Value *Traits = 6389 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 6390 6391 llvm::Value *AllocatorVal = 6392 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 6393 CGM.getModule(), OMPRTL___kmpc_init_allocator), 6394 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 6395 // Store to allocator. 6396 CGF.EmitVarDecl(*cast<VarDecl>( 6397 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 6398 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6399 AllocatorVal = 6400 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 6401 Allocator->getType(), Allocator->getExprLoc()); 6402 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 6403 } 6404 6405 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 6406 const Expr *Allocator) { 6407 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 6408 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 6409 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 6410 llvm::Value *AllocatorVal = 6411 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 6412 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 6413 CGF.getContext().VoidPtrTy, 6414 Allocator->getExprLoc()); 6415 (void)CGF.EmitRuntimeCall( 6416 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 6417 OMPRTL___kmpc_destroy_allocator), 6418 {ThreadId, AllocatorVal}); 6419 } 6420 6421 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6422 const OMPExecutableDirective &D, StringRef ParentName, 6423 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6424 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6425 // Create a unique name for the entry function using the source location 6426 // information of the current target region. The name will be something like: 6427 // 6428 // __omp_offloading_DD_FFFF_PP_lBB 6429 // 6430 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6431 // mangled name of the function that encloses the target region and BB is the 6432 // line number of the target region. 6433 6434 unsigned DeviceID; 6435 unsigned FileID; 6436 unsigned Line; 6437 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6438 Line); 6439 SmallString<64> EntryFnName; 6440 { 6441 llvm::raw_svector_ostream OS(EntryFnName); 6442 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6443 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6444 } 6445 6446 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6447 6448 CodeGenFunction CGF(CGM, true); 6449 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6450 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6451 6452 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6453 6454 // If this target outline function is not an offload entry, we don't need to 6455 // register it. 6456 if (!IsOffloadEntry) 6457 return; 6458 6459 // The target region ID is used by the runtime library to identify the current 6460 // target region, so it only has to be unique and not necessarily point to 6461 // anything. It could be the pointer to the outlined function that implements 6462 // the target region, but we aren't using that so that the compiler doesn't 6463 // need to keep that, and could therefore inline the host function if proven 6464 // worthwhile during optimization. In the other hand, if emitting code for the 6465 // device, the ID has to be the function address so that it can retrieved from 6466 // the offloading entry and launched by the runtime library. We also mark the 6467 // outlined function to have external linkage in case we are emitting code for 6468 // the device, because these functions will be entry points to the device. 6469 6470 if (CGM.getLangOpts().OpenMPIsDevice) { 6471 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6472 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6473 OutlinedFn->setDSOLocal(false); 6474 if (CGM.getTriple().isAMDGCN()) 6475 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); 6476 } else { 6477 std::string Name = getName({EntryFnName, "region_id"}); 6478 OutlinedFnID = new llvm::GlobalVariable( 6479 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6480 llvm::GlobalValue::WeakAnyLinkage, 6481 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6482 } 6483 6484 // Register the information for the entry associated with this target region. 6485 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6486 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6487 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6488 } 6489 6490 /// Checks if the expression is constant or does not have non-trivial function 6491 /// calls. 6492 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6493 // We can skip constant expressions. 6494 // We can skip expressions with trivial calls or simple expressions. 6495 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6496 !E->hasNonTrivialCall(Ctx)) && 6497 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6498 } 6499 6500 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6501 const Stmt *Body) { 6502 const Stmt *Child = Body->IgnoreContainers(); 6503 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6504 Child = nullptr; 6505 for (const Stmt *S : C->body()) { 6506 if (const auto *E = dyn_cast<Expr>(S)) { 6507 if (isTrivial(Ctx, E)) 6508 continue; 6509 } 6510 // Some of the statements can be ignored. 6511 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6512 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6513 continue; 6514 // Analyze declarations. 6515 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6516 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6517 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6518 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6519 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6520 isa<UsingDirectiveDecl>(D) || 6521 isa<OMPDeclareReductionDecl>(D) || 6522 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6523 return true; 6524 const auto *VD = dyn_cast<VarDecl>(D); 6525 if (!VD) 6526 return false; 6527 return VD->isConstexpr() || 6528 ((VD->getType().isTrivialType(Ctx) || 6529 VD->getType()->isReferenceType()) && 6530 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6531 })) 6532 continue; 6533 } 6534 // Found multiple children - cannot get the one child only. 6535 if (Child) 6536 return nullptr; 6537 Child = S; 6538 } 6539 if (Child) 6540 Child = Child->IgnoreContainers(); 6541 } 6542 return Child; 6543 } 6544 6545 /// Emit the number of teams for a target directive. Inspect the num_teams 6546 /// clause associated with a teams construct combined or closely nested 6547 /// with the target directive. 6548 /// 6549 /// Emit a team of size one for directives such as 'target parallel' that 6550 /// have no associated teams construct. 6551 /// 6552 /// Otherwise, return nullptr. 6553 static llvm::Value * 6554 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6555 const OMPExecutableDirective &D) { 6556 assert(!CGF.getLangOpts().OpenMPIsDevice && 6557 "Clauses associated with the teams directive expected to be emitted " 6558 "only for the host!"); 6559 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6560 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6561 "Expected target-based executable directive."); 6562 CGBuilderTy &Bld = CGF.Builder; 6563 switch (DirectiveKind) { 6564 case OMPD_target: { 6565 const auto *CS = D.getInnermostCapturedStmt(); 6566 const auto *Body = 6567 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6568 const Stmt *ChildStmt = 6569 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6570 if (const auto *NestedDir = 6571 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6572 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6573 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6574 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6575 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6576 const Expr *NumTeams = 6577 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6578 llvm::Value *NumTeamsVal = 6579 CGF.EmitScalarExpr(NumTeams, 6580 /*IgnoreResultAssign*/ true); 6581 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6582 /*isSigned=*/true); 6583 } 6584 return Bld.getInt32(0); 6585 } 6586 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6587 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6588 return Bld.getInt32(1); 6589 return Bld.getInt32(0); 6590 } 6591 return nullptr; 6592 } 6593 case OMPD_target_teams: 6594 case OMPD_target_teams_distribute: 6595 case OMPD_target_teams_distribute_simd: 6596 case OMPD_target_teams_distribute_parallel_for: 6597 case OMPD_target_teams_distribute_parallel_for_simd: { 6598 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6599 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6600 const Expr *NumTeams = 6601 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6602 llvm::Value *NumTeamsVal = 6603 CGF.EmitScalarExpr(NumTeams, 6604 /*IgnoreResultAssign*/ true); 6605 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6606 /*isSigned=*/true); 6607 } 6608 return Bld.getInt32(0); 6609 } 6610 case OMPD_target_parallel: 6611 case OMPD_target_parallel_for: 6612 case OMPD_target_parallel_for_simd: 6613 case OMPD_target_simd: 6614 return Bld.getInt32(1); 6615 case OMPD_parallel: 6616 case OMPD_for: 6617 case OMPD_parallel_for: 6618 case OMPD_parallel_master: 6619 case OMPD_parallel_sections: 6620 case OMPD_for_simd: 6621 case OMPD_parallel_for_simd: 6622 case OMPD_cancel: 6623 case OMPD_cancellation_point: 6624 case OMPD_ordered: 6625 case OMPD_threadprivate: 6626 case OMPD_allocate: 6627 case OMPD_task: 6628 case OMPD_simd: 6629 case OMPD_sections: 6630 case OMPD_section: 6631 case OMPD_single: 6632 case OMPD_master: 6633 case OMPD_critical: 6634 case OMPD_taskyield: 6635 case OMPD_barrier: 6636 case OMPD_taskwait: 6637 case OMPD_taskgroup: 6638 case OMPD_atomic: 6639 case OMPD_flush: 6640 case OMPD_depobj: 6641 case OMPD_scan: 6642 case OMPD_teams: 6643 case OMPD_target_data: 6644 case OMPD_target_exit_data: 6645 case OMPD_target_enter_data: 6646 case OMPD_distribute: 6647 case OMPD_distribute_simd: 6648 case OMPD_distribute_parallel_for: 6649 case OMPD_distribute_parallel_for_simd: 6650 case OMPD_teams_distribute: 6651 case OMPD_teams_distribute_simd: 6652 case OMPD_teams_distribute_parallel_for: 6653 case OMPD_teams_distribute_parallel_for_simd: 6654 case OMPD_target_update: 6655 case OMPD_declare_simd: 6656 case OMPD_declare_variant: 6657 case OMPD_begin_declare_variant: 6658 case OMPD_end_declare_variant: 6659 case OMPD_declare_target: 6660 case OMPD_end_declare_target: 6661 case OMPD_declare_reduction: 6662 case OMPD_declare_mapper: 6663 case OMPD_taskloop: 6664 case OMPD_taskloop_simd: 6665 case OMPD_master_taskloop: 6666 case OMPD_master_taskloop_simd: 6667 case OMPD_parallel_master_taskloop: 6668 case OMPD_parallel_master_taskloop_simd: 6669 case OMPD_requires: 6670 case OMPD_unknown: 6671 break; 6672 default: 6673 break; 6674 } 6675 llvm_unreachable("Unexpected directive kind."); 6676 } 6677 6678 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6679 llvm::Value *DefaultThreadLimitVal) { 6680 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6681 CGF.getContext(), CS->getCapturedStmt()); 6682 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6683 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6684 llvm::Value *NumThreads = nullptr; 6685 llvm::Value *CondVal = nullptr; 6686 // Handle if clause. If if clause present, the number of threads is 6687 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6688 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6689 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6690 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6691 const OMPIfClause *IfClause = nullptr; 6692 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6693 if (C->getNameModifier() == OMPD_unknown || 6694 C->getNameModifier() == OMPD_parallel) { 6695 IfClause = C; 6696 break; 6697 } 6698 } 6699 if (IfClause) { 6700 const Expr *Cond = IfClause->getCondition(); 6701 bool Result; 6702 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6703 if (!Result) 6704 return CGF.Builder.getInt32(1); 6705 } else { 6706 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6707 if (const auto *PreInit = 6708 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6709 for (const auto *I : PreInit->decls()) { 6710 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6711 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6712 } else { 6713 CodeGenFunction::AutoVarEmission Emission = 6714 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6715 CGF.EmitAutoVarCleanups(Emission); 6716 } 6717 } 6718 } 6719 CondVal = CGF.EvaluateExprAsBool(Cond); 6720 } 6721 } 6722 } 6723 // Check the value of num_threads clause iff if clause was not specified 6724 // or is not evaluated to false. 6725 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6726 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6727 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6728 const auto *NumThreadsClause = 6729 Dir->getSingleClause<OMPNumThreadsClause>(); 6730 CodeGenFunction::LexicalScope Scope( 6731 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6732 if (const auto *PreInit = 6733 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6734 for (const auto *I : PreInit->decls()) { 6735 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6736 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6737 } else { 6738 CodeGenFunction::AutoVarEmission Emission = 6739 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6740 CGF.EmitAutoVarCleanups(Emission); 6741 } 6742 } 6743 } 6744 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6745 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6746 /*isSigned=*/false); 6747 if (DefaultThreadLimitVal) 6748 NumThreads = CGF.Builder.CreateSelect( 6749 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6750 DefaultThreadLimitVal, NumThreads); 6751 } else { 6752 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6753 : CGF.Builder.getInt32(0); 6754 } 6755 // Process condition of the if clause. 6756 if (CondVal) { 6757 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6758 CGF.Builder.getInt32(1)); 6759 } 6760 return NumThreads; 6761 } 6762 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6763 return CGF.Builder.getInt32(1); 6764 return DefaultThreadLimitVal; 6765 } 6766 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6767 : CGF.Builder.getInt32(0); 6768 } 6769 6770 /// Emit the number of threads for a target directive. Inspect the 6771 /// thread_limit clause associated with a teams construct combined or closely 6772 /// nested with the target directive. 6773 /// 6774 /// Emit the num_threads clause for directives such as 'target parallel' that 6775 /// have no associated teams construct. 6776 /// 6777 /// Otherwise, return nullptr. 6778 static llvm::Value * 6779 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6780 const OMPExecutableDirective &D) { 6781 assert(!CGF.getLangOpts().OpenMPIsDevice && 6782 "Clauses associated with the teams directive expected to be emitted " 6783 "only for the host!"); 6784 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6785 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6786 "Expected target-based executable directive."); 6787 CGBuilderTy &Bld = CGF.Builder; 6788 llvm::Value *ThreadLimitVal = nullptr; 6789 llvm::Value *NumThreadsVal = nullptr; 6790 switch (DirectiveKind) { 6791 case OMPD_target: { 6792 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6793 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6794 return NumThreads; 6795 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6796 CGF.getContext(), CS->getCapturedStmt()); 6797 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6798 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6799 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6800 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6801 const auto *ThreadLimitClause = 6802 Dir->getSingleClause<OMPThreadLimitClause>(); 6803 CodeGenFunction::LexicalScope Scope( 6804 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6805 if (const auto *PreInit = 6806 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6807 for (const auto *I : PreInit->decls()) { 6808 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6809 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6810 } else { 6811 CodeGenFunction::AutoVarEmission Emission = 6812 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6813 CGF.EmitAutoVarCleanups(Emission); 6814 } 6815 } 6816 } 6817 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6818 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6819 ThreadLimitVal = 6820 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6821 } 6822 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6823 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6824 CS = Dir->getInnermostCapturedStmt(); 6825 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6826 CGF.getContext(), CS->getCapturedStmt()); 6827 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6828 } 6829 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6830 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6831 CS = Dir->getInnermostCapturedStmt(); 6832 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6833 return NumThreads; 6834 } 6835 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6836 return Bld.getInt32(1); 6837 } 6838 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6839 } 6840 case OMPD_target_teams: { 6841 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6842 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6843 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6844 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6845 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6846 ThreadLimitVal = 6847 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6848 } 6849 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6850 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6851 return NumThreads; 6852 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6853 CGF.getContext(), CS->getCapturedStmt()); 6854 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6855 if (Dir->getDirectiveKind() == OMPD_distribute) { 6856 CS = Dir->getInnermostCapturedStmt(); 6857 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6858 return NumThreads; 6859 } 6860 } 6861 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6862 } 6863 case OMPD_target_teams_distribute: 6864 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6865 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6866 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6867 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6868 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6869 ThreadLimitVal = 6870 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6871 } 6872 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6873 case OMPD_target_parallel: 6874 case OMPD_target_parallel_for: 6875 case OMPD_target_parallel_for_simd: 6876 case OMPD_target_teams_distribute_parallel_for: 6877 case OMPD_target_teams_distribute_parallel_for_simd: { 6878 llvm::Value *CondVal = nullptr; 6879 // Handle if clause. If if clause present, the number of threads is 6880 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6881 if (D.hasClausesOfKind<OMPIfClause>()) { 6882 const OMPIfClause *IfClause = nullptr; 6883 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6884 if (C->getNameModifier() == OMPD_unknown || 6885 C->getNameModifier() == OMPD_parallel) { 6886 IfClause = C; 6887 break; 6888 } 6889 } 6890 if (IfClause) { 6891 const Expr *Cond = IfClause->getCondition(); 6892 bool Result; 6893 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6894 if (!Result) 6895 return Bld.getInt32(1); 6896 } else { 6897 CodeGenFunction::RunCleanupsScope Scope(CGF); 6898 CondVal = CGF.EvaluateExprAsBool(Cond); 6899 } 6900 } 6901 } 6902 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6903 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6904 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6905 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6906 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6907 ThreadLimitVal = 6908 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6909 } 6910 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6911 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6912 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6913 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6914 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6915 NumThreadsVal = 6916 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6917 ThreadLimitVal = ThreadLimitVal 6918 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6919 ThreadLimitVal), 6920 NumThreadsVal, ThreadLimitVal) 6921 : NumThreadsVal; 6922 } 6923 if (!ThreadLimitVal) 6924 ThreadLimitVal = Bld.getInt32(0); 6925 if (CondVal) 6926 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6927 return ThreadLimitVal; 6928 } 6929 case OMPD_target_teams_distribute_simd: 6930 case OMPD_target_simd: 6931 return Bld.getInt32(1); 6932 case OMPD_parallel: 6933 case OMPD_for: 6934 case OMPD_parallel_for: 6935 case OMPD_parallel_master: 6936 case OMPD_parallel_sections: 6937 case OMPD_for_simd: 6938 case OMPD_parallel_for_simd: 6939 case OMPD_cancel: 6940 case OMPD_cancellation_point: 6941 case OMPD_ordered: 6942 case OMPD_threadprivate: 6943 case OMPD_allocate: 6944 case OMPD_task: 6945 case OMPD_simd: 6946 case OMPD_sections: 6947 case OMPD_section: 6948 case OMPD_single: 6949 case OMPD_master: 6950 case OMPD_critical: 6951 case OMPD_taskyield: 6952 case OMPD_barrier: 6953 case OMPD_taskwait: 6954 case OMPD_taskgroup: 6955 case OMPD_atomic: 6956 case OMPD_flush: 6957 case OMPD_depobj: 6958 case OMPD_scan: 6959 case OMPD_teams: 6960 case OMPD_target_data: 6961 case OMPD_target_exit_data: 6962 case OMPD_target_enter_data: 6963 case OMPD_distribute: 6964 case OMPD_distribute_simd: 6965 case OMPD_distribute_parallel_for: 6966 case OMPD_distribute_parallel_for_simd: 6967 case OMPD_teams_distribute: 6968 case OMPD_teams_distribute_simd: 6969 case OMPD_teams_distribute_parallel_for: 6970 case OMPD_teams_distribute_parallel_for_simd: 6971 case OMPD_target_update: 6972 case OMPD_declare_simd: 6973 case OMPD_declare_variant: 6974 case OMPD_begin_declare_variant: 6975 case OMPD_end_declare_variant: 6976 case OMPD_declare_target: 6977 case OMPD_end_declare_target: 6978 case OMPD_declare_reduction: 6979 case OMPD_declare_mapper: 6980 case OMPD_taskloop: 6981 case OMPD_taskloop_simd: 6982 case OMPD_master_taskloop: 6983 case OMPD_master_taskloop_simd: 6984 case OMPD_parallel_master_taskloop: 6985 case OMPD_parallel_master_taskloop_simd: 6986 case OMPD_requires: 6987 case OMPD_unknown: 6988 break; 6989 default: 6990 break; 6991 } 6992 llvm_unreachable("Unsupported directive kind."); 6993 } 6994 6995 namespace { 6996 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 6997 6998 // Utility to handle information from clauses associated with a given 6999 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7000 // It provides a convenient interface to obtain the information and generate 7001 // code for that information. 7002 class MappableExprsHandler { 7003 public: 7004 /// Values for bit flags used to specify the mapping type for 7005 /// offloading. 7006 enum OpenMPOffloadMappingFlags : uint64_t { 7007 /// No flags 7008 OMP_MAP_NONE = 0x0, 7009 /// Allocate memory on the device and move data from host to device. 7010 OMP_MAP_TO = 0x01, 7011 /// Allocate memory on the device and move data from device to host. 7012 OMP_MAP_FROM = 0x02, 7013 /// Always perform the requested mapping action on the element, even 7014 /// if it was already mapped before. 7015 OMP_MAP_ALWAYS = 0x04, 7016 /// Delete the element from the device environment, ignoring the 7017 /// current reference count associated with the element. 7018 OMP_MAP_DELETE = 0x08, 7019 /// The element being mapped is a pointer-pointee pair; both the 7020 /// pointer and the pointee should be mapped. 7021 OMP_MAP_PTR_AND_OBJ = 0x10, 7022 /// This flags signals that the base address of an entry should be 7023 /// passed to the target kernel as an argument. 7024 OMP_MAP_TARGET_PARAM = 0x20, 7025 /// Signal that the runtime library has to return the device pointer 7026 /// in the current position for the data being mapped. Used when we have the 7027 /// use_device_ptr or use_device_addr clause. 7028 OMP_MAP_RETURN_PARAM = 0x40, 7029 /// This flag signals that the reference being passed is a pointer to 7030 /// private data. 7031 OMP_MAP_PRIVATE = 0x80, 7032 /// Pass the element to the device by value. 7033 OMP_MAP_LITERAL = 0x100, 7034 /// Implicit map 7035 OMP_MAP_IMPLICIT = 0x200, 7036 /// Close is a hint to the runtime to allocate memory close to 7037 /// the target device. 7038 OMP_MAP_CLOSE = 0x400, 7039 /// 0x800 is reserved for compatibility with XLC. 7040 /// Produce a runtime error if the data is not already allocated. 7041 OMP_MAP_PRESENT = 0x1000, 7042 /// Signal that the runtime library should use args as an array of 7043 /// descriptor_dim pointers and use args_size as dims. Used when we have 7044 /// non-contiguous list items in target update directive 7045 OMP_MAP_NON_CONTIG = 0x100000000000, 7046 /// The 16 MSBs of the flags indicate whether the entry is member of some 7047 /// struct/class. 7048 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7049 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7050 }; 7051 7052 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7053 static unsigned getFlagMemberOffset() { 7054 unsigned Offset = 0; 7055 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7056 Remain = Remain >> 1) 7057 Offset++; 7058 return Offset; 7059 } 7060 7061 /// Class that holds debugging information for a data mapping to be passed to 7062 /// the runtime library. 7063 class MappingExprInfo { 7064 /// The variable declaration used for the data mapping. 7065 const ValueDecl *MapDecl = nullptr; 7066 /// The original expression used in the map clause, or null if there is 7067 /// none. 7068 const Expr *MapExpr = nullptr; 7069 7070 public: 7071 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr) 7072 : MapDecl(MapDecl), MapExpr(MapExpr) {} 7073 7074 const ValueDecl *getMapDecl() const { return MapDecl; } 7075 const Expr *getMapExpr() const { return MapExpr; } 7076 }; 7077 7078 /// Class that associates information with a base pointer to be passed to the 7079 /// runtime library. 7080 class BasePointerInfo { 7081 /// The base pointer. 7082 llvm::Value *Ptr = nullptr; 7083 /// The base declaration that refers to this device pointer, or null if 7084 /// there is none. 7085 const ValueDecl *DevPtrDecl = nullptr; 7086 7087 public: 7088 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7089 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7090 llvm::Value *operator*() const { return Ptr; } 7091 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7092 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7093 }; 7094 7095 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; 7096 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7097 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7098 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7099 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; 7100 using MapDimArrayTy = SmallVector<uint64_t, 4>; 7101 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; 7102 7103 /// This structure contains combined information generated for mappable 7104 /// clauses, including base pointers, pointers, sizes, map types, user-defined 7105 /// mappers, and non-contiguous information. 7106 struct MapCombinedInfoTy { 7107 struct StructNonContiguousInfo { 7108 bool IsNonContiguous = false; 7109 MapDimArrayTy Dims; 7110 MapNonContiguousArrayTy Offsets; 7111 MapNonContiguousArrayTy Counts; 7112 MapNonContiguousArrayTy Strides; 7113 }; 7114 MapExprsArrayTy Exprs; 7115 MapBaseValuesArrayTy BasePointers; 7116 MapValuesArrayTy Pointers; 7117 MapValuesArrayTy Sizes; 7118 MapFlagsArrayTy Types; 7119 MapMappersArrayTy Mappers; 7120 StructNonContiguousInfo NonContigInfo; 7121 7122 /// Append arrays in \a CurInfo. 7123 void append(MapCombinedInfoTy &CurInfo) { 7124 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); 7125 BasePointers.append(CurInfo.BasePointers.begin(), 7126 CurInfo.BasePointers.end()); 7127 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); 7128 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); 7129 Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); 7130 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); 7131 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), 7132 CurInfo.NonContigInfo.Dims.end()); 7133 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), 7134 CurInfo.NonContigInfo.Offsets.end()); 7135 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), 7136 CurInfo.NonContigInfo.Counts.end()); 7137 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), 7138 CurInfo.NonContigInfo.Strides.end()); 7139 } 7140 }; 7141 7142 /// Map between a struct and the its lowest & highest elements which have been 7143 /// mapped. 7144 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7145 /// HE(FieldIndex, Pointer)} 7146 struct StructRangeInfoTy { 7147 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7148 0, Address::invalid()}; 7149 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7150 0, Address::invalid()}; 7151 Address Base = Address::invalid(); 7152 bool IsArraySection = false; 7153 }; 7154 7155 private: 7156 /// Kind that defines how a device pointer has to be returned. 7157 struct MapInfo { 7158 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7159 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7160 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7161 ArrayRef<OpenMPMotionModifierKind> MotionModifiers; 7162 bool ReturnDevicePointer = false; 7163 bool IsImplicit = false; 7164 const ValueDecl *Mapper = nullptr; 7165 const Expr *VarRef = nullptr; 7166 bool ForDeviceAddr = false; 7167 7168 MapInfo() = default; 7169 MapInfo( 7170 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7171 OpenMPMapClauseKind MapType, 7172 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7173 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7174 bool ReturnDevicePointer, bool IsImplicit, 7175 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr, 7176 bool ForDeviceAddr = false) 7177 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7178 MotionModifiers(MotionModifiers), 7179 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit), 7180 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {} 7181 }; 7182 7183 /// If use_device_ptr or use_device_addr is used on a decl which is a struct 7184 /// member and there is no map information about it, then emission of that 7185 /// entry is deferred until the whole struct has been processed. 7186 struct DeferredDevicePtrEntryTy { 7187 const Expr *IE = nullptr; 7188 const ValueDecl *VD = nullptr; 7189 bool ForDeviceAddr = false; 7190 7191 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD, 7192 bool ForDeviceAddr) 7193 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {} 7194 }; 7195 7196 /// The target directive from where the mappable clauses were extracted. It 7197 /// is either a executable directive or a user-defined mapper directive. 7198 llvm::PointerUnion<const OMPExecutableDirective *, 7199 const OMPDeclareMapperDecl *> 7200 CurDir; 7201 7202 /// Function the directive is being generated for. 7203 CodeGenFunction &CGF; 7204 7205 /// Set of all first private variables in the current directive. 7206 /// bool data is set to true if the variable is implicitly marked as 7207 /// firstprivate, false otherwise. 7208 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7209 7210 /// Map between device pointer declarations and their expression components. 7211 /// The key value for declarations in 'this' is null. 7212 llvm::DenseMap< 7213 const ValueDecl *, 7214 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7215 DevPointersMap; 7216 7217 llvm::Value *getExprTypeSize(const Expr *E) const { 7218 QualType ExprTy = E->getType().getCanonicalType(); 7219 7220 // Calculate the size for array shaping expression. 7221 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7222 llvm::Value *Size = 7223 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7224 for (const Expr *SE : OAE->getDimensions()) { 7225 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7226 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7227 CGF.getContext().getSizeType(), 7228 SE->getExprLoc()); 7229 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7230 } 7231 return Size; 7232 } 7233 7234 // Reference types are ignored for mapping purposes. 7235 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7236 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7237 7238 // Given that an array section is considered a built-in type, we need to 7239 // do the calculation based on the length of the section instead of relying 7240 // on CGF.getTypeSize(E->getType()). 7241 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7242 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7243 OAE->getBase()->IgnoreParenImpCasts()) 7244 .getCanonicalType(); 7245 7246 // If there is no length associated with the expression and lower bound is 7247 // not specified too, that means we are using the whole length of the 7248 // base. 7249 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7250 !OAE->getLowerBound()) 7251 return CGF.getTypeSize(BaseTy); 7252 7253 llvm::Value *ElemSize; 7254 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7255 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7256 } else { 7257 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7258 assert(ATy && "Expecting array type if not a pointer type."); 7259 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7260 } 7261 7262 // If we don't have a length at this point, that is because we have an 7263 // array section with a single element. 7264 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid()) 7265 return ElemSize; 7266 7267 if (const Expr *LenExpr = OAE->getLength()) { 7268 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7269 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7270 CGF.getContext().getSizeType(), 7271 LenExpr->getExprLoc()); 7272 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7273 } 7274 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() && 7275 OAE->getLowerBound() && "expected array_section[lb:]."); 7276 // Size = sizetype - lb * elemtype; 7277 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7278 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7279 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7280 CGF.getContext().getSizeType(), 7281 OAE->getLowerBound()->getExprLoc()); 7282 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7283 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7284 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7285 LengthVal = CGF.Builder.CreateSelect( 7286 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7287 return LengthVal; 7288 } 7289 return CGF.getTypeSize(ExprTy); 7290 } 7291 7292 /// Return the corresponding bits for a given map clause modifier. Add 7293 /// a flag marking the map as a pointer if requested. Add a flag marking the 7294 /// map as the first one of a series of maps that relate to the same map 7295 /// expression. 7296 OpenMPOffloadMappingFlags getMapTypeBits( 7297 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7298 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, 7299 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { 7300 OpenMPOffloadMappingFlags Bits = 7301 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7302 switch (MapType) { 7303 case OMPC_MAP_alloc: 7304 case OMPC_MAP_release: 7305 // alloc and release is the default behavior in the runtime library, i.e. 7306 // if we don't pass any bits alloc/release that is what the runtime is 7307 // going to do. Therefore, we don't need to signal anything for these two 7308 // type modifiers. 7309 break; 7310 case OMPC_MAP_to: 7311 Bits |= OMP_MAP_TO; 7312 break; 7313 case OMPC_MAP_from: 7314 Bits |= OMP_MAP_FROM; 7315 break; 7316 case OMPC_MAP_tofrom: 7317 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7318 break; 7319 case OMPC_MAP_delete: 7320 Bits |= OMP_MAP_DELETE; 7321 break; 7322 case OMPC_MAP_unknown: 7323 llvm_unreachable("Unexpected map type!"); 7324 } 7325 if (AddPtrFlag) 7326 Bits |= OMP_MAP_PTR_AND_OBJ; 7327 if (AddIsTargetParamFlag) 7328 Bits |= OMP_MAP_TARGET_PARAM; 7329 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7330 != MapModifiers.end()) 7331 Bits |= OMP_MAP_ALWAYS; 7332 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7333 != MapModifiers.end()) 7334 Bits |= OMP_MAP_CLOSE; 7335 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) 7336 != MapModifiers.end()) 7337 Bits |= OMP_MAP_PRESENT; 7338 if (llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) 7339 != MotionModifiers.end()) 7340 Bits |= OMP_MAP_PRESENT; 7341 if (IsNonContiguous) 7342 Bits |= OMP_MAP_NON_CONTIG; 7343 return Bits; 7344 } 7345 7346 /// Return true if the provided expression is a final array section. A 7347 /// final array section, is one whose length can't be proved to be one. 7348 bool isFinalArraySectionExpression(const Expr *E) const { 7349 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7350 7351 // It is not an array section and therefore not a unity-size one. 7352 if (!OASE) 7353 return false; 7354 7355 // An array section with no colon always refer to a single element. 7356 if (OASE->getColonLocFirst().isInvalid()) 7357 return false; 7358 7359 const Expr *Length = OASE->getLength(); 7360 7361 // If we don't have a length we have to check if the array has size 1 7362 // for this dimension. Also, we should always expect a length if the 7363 // base type is pointer. 7364 if (!Length) { 7365 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7366 OASE->getBase()->IgnoreParenImpCasts()) 7367 .getCanonicalType(); 7368 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7369 return ATy->getSize().getSExtValue() != 1; 7370 // If we don't have a constant dimension length, we have to consider 7371 // the current section as having any size, so it is not necessarily 7372 // unitary. If it happen to be unity size, that's user fault. 7373 return true; 7374 } 7375 7376 // Check if the length evaluates to 1. 7377 Expr::EvalResult Result; 7378 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7379 return true; // Can have more that size 1. 7380 7381 llvm::APSInt ConstLength = Result.Val.getInt(); 7382 return ConstLength.getSExtValue() != 1; 7383 } 7384 7385 /// Generate the base pointers, section pointers, sizes, map type bits, and 7386 /// user-defined mappers (all included in \a CombinedInfo) for the provided 7387 /// map type, map or motion modifiers, and expression components. 7388 /// \a IsFirstComponent should be set to true if the provided set of 7389 /// components is the first associated with a capture. 7390 void generateInfoForComponentList( 7391 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7392 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 7393 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7394 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, 7395 bool IsFirstComponentList, bool IsImplicit, 7396 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, 7397 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, 7398 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7399 OverlappedElements = llvm::None) const { 7400 // The following summarizes what has to be generated for each map and the 7401 // types below. The generated information is expressed in this order: 7402 // base pointer, section pointer, size, flags 7403 // (to add to the ones that come from the map type and modifier). 7404 // 7405 // double d; 7406 // int i[100]; 7407 // float *p; 7408 // 7409 // struct S1 { 7410 // int i; 7411 // float f[50]; 7412 // } 7413 // struct S2 { 7414 // int i; 7415 // float f[50]; 7416 // S1 s; 7417 // double *p; 7418 // struct S2 *ps; 7419 // } 7420 // S2 s; 7421 // S2 *ps; 7422 // 7423 // map(d) 7424 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7425 // 7426 // map(i) 7427 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7428 // 7429 // map(i[1:23]) 7430 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7431 // 7432 // map(p) 7433 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7434 // 7435 // map(p[1:24]) 7436 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ 7437 // in unified shared memory mode or for local pointers 7438 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7439 // 7440 // map(s) 7441 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7442 // 7443 // map(s.i) 7444 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7445 // 7446 // map(s.s.f) 7447 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7448 // 7449 // map(s.p) 7450 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7451 // 7452 // map(to: s.p[:22]) 7453 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7454 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7455 // &(s.p), &(s.p[0]), 22*sizeof(double), 7456 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7457 // (*) alloc space for struct members, only this is a target parameter 7458 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7459 // optimizes this entry out, same in the examples below) 7460 // (***) map the pointee (map: to) 7461 // 7462 // map(s.ps) 7463 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7464 // 7465 // map(from: s.ps->s.i) 7466 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7467 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7468 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7469 // 7470 // map(to: s.ps->ps) 7471 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7472 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7473 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7474 // 7475 // map(s.ps->ps->ps) 7476 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7477 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7478 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7479 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7480 // 7481 // map(to: s.ps->ps->s.f[:22]) 7482 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7483 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7484 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7485 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7486 // 7487 // map(ps) 7488 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7489 // 7490 // map(ps->i) 7491 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7492 // 7493 // map(ps->s.f) 7494 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7495 // 7496 // map(from: ps->p) 7497 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7498 // 7499 // map(to: ps->p[:22]) 7500 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7501 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7502 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7503 // 7504 // map(ps->ps) 7505 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7506 // 7507 // map(from: ps->ps->s.i) 7508 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7509 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7510 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7511 // 7512 // map(from: ps->ps->ps) 7513 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7514 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7515 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7516 // 7517 // map(ps->ps->ps->ps) 7518 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7519 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7520 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7521 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7522 // 7523 // map(to: ps->ps->ps->s.f[:22]) 7524 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7525 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7526 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7527 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7528 // 7529 // map(to: s.f[:22]) map(from: s.p[:33]) 7530 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7531 // sizeof(double*) (**), TARGET_PARAM 7532 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7533 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7534 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7535 // (*) allocate contiguous space needed to fit all mapped members even if 7536 // we allocate space for members not mapped (in this example, 7537 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7538 // them as well because they fall between &s.f[0] and &s.p) 7539 // 7540 // map(from: s.f[:22]) map(to: ps->p[:33]) 7541 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7542 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7543 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7544 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7545 // (*) the struct this entry pertains to is the 2nd element in the list of 7546 // arguments, hence MEMBER_OF(2) 7547 // 7548 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7549 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7550 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7551 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7552 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7553 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7554 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7555 // (*) the struct this entry pertains to is the 4th element in the list 7556 // of arguments, hence MEMBER_OF(4) 7557 7558 // Track if the map information being generated is the first for a capture. 7559 bool IsCaptureFirstInfo = IsFirstComponentList; 7560 // When the variable is on a declare target link or in a to clause with 7561 // unified memory, a reference is needed to hold the host/device address 7562 // of the variable. 7563 bool RequiresReference = false; 7564 7565 // Scan the components from the base to the complete expression. 7566 auto CI = Components.rbegin(); 7567 auto CE = Components.rend(); 7568 auto I = CI; 7569 7570 // Track if the map information being generated is the first for a list of 7571 // components. 7572 bool IsExpressionFirstInfo = true; 7573 bool FirstPointerInComplexData = false; 7574 Address BP = Address::invalid(); 7575 const Expr *AssocExpr = I->getAssociatedExpression(); 7576 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7577 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7578 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7579 7580 if (isa<MemberExpr>(AssocExpr)) { 7581 // The base is the 'this' pointer. The content of the pointer is going 7582 // to be the base of the field being mapped. 7583 BP = CGF.LoadCXXThisAddress(); 7584 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7585 (OASE && 7586 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7587 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7588 } else if (OAShE && 7589 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7590 BP = Address( 7591 CGF.EmitScalarExpr(OAShE->getBase()), 7592 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7593 } else { 7594 // The base is the reference to the variable. 7595 // BP = &Var. 7596 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7597 if (const auto *VD = 7598 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7599 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7600 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7601 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7602 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7603 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7604 RequiresReference = true; 7605 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7606 } 7607 } 7608 } 7609 7610 // If the variable is a pointer and is being dereferenced (i.e. is not 7611 // the last component), the base has to be the pointer itself, not its 7612 // reference. References are ignored for mapping purposes. 7613 QualType Ty = 7614 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7615 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7616 // No need to generate individual map information for the pointer, it 7617 // can be associated with the combined storage if shared memory mode is 7618 // active or the base declaration is not global variable. 7619 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); 7620 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 7621 !VD || VD->hasLocalStorage()) 7622 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7623 else 7624 FirstPointerInComplexData = true; 7625 ++I; 7626 } 7627 } 7628 7629 // Track whether a component of the list should be marked as MEMBER_OF some 7630 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7631 // in a component list should be marked as MEMBER_OF, all subsequent entries 7632 // do not belong to the base struct. E.g. 7633 // struct S2 s; 7634 // s.ps->ps->ps->f[:] 7635 // (1) (2) (3) (4) 7636 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7637 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7638 // is the pointee of ps(2) which is not member of struct s, so it should not 7639 // be marked as such (it is still PTR_AND_OBJ). 7640 // The variable is initialized to false so that PTR_AND_OBJ entries which 7641 // are not struct members are not considered (e.g. array of pointers to 7642 // data). 7643 bool ShouldBeMemberOf = false; 7644 7645 // Variable keeping track of whether or not we have encountered a component 7646 // in the component list which is a member expression. Useful when we have a 7647 // pointer or a final array section, in which case it is the previous 7648 // component in the list which tells us whether we have a member expression. 7649 // E.g. X.f[:] 7650 // While processing the final array section "[:]" it is "f" which tells us 7651 // whether we are dealing with a member of a declared struct. 7652 const MemberExpr *EncounteredME = nullptr; 7653 7654 // Track for the total number of dimension. Start from one for the dummy 7655 // dimension. 7656 uint64_t DimSize = 1; 7657 7658 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; 7659 7660 for (; I != CE; ++I) { 7661 // If the current component is member of a struct (parent struct) mark it. 7662 if (!EncounteredME) { 7663 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7664 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7665 // as MEMBER_OF the parent struct. 7666 if (EncounteredME) { 7667 ShouldBeMemberOf = true; 7668 // Do not emit as complex pointer if this is actually not array-like 7669 // expression. 7670 if (FirstPointerInComplexData) { 7671 QualType Ty = std::prev(I) 7672 ->getAssociatedDeclaration() 7673 ->getType() 7674 .getNonReferenceType(); 7675 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7676 FirstPointerInComplexData = false; 7677 } 7678 } 7679 } 7680 7681 auto Next = std::next(I); 7682 7683 // We need to generate the addresses and sizes if this is the last 7684 // component, if the component is a pointer or if it is an array section 7685 // whose length can't be proved to be one. If this is a pointer, it 7686 // becomes the base address for the following components. 7687 7688 // A final array section, is one whose length can't be proved to be one. 7689 // If the map item is non-contiguous then we don't treat any array section 7690 // as final array section. 7691 bool IsFinalArraySection = 7692 !IsNonContiguous && 7693 isFinalArraySectionExpression(I->getAssociatedExpression()); 7694 7695 // If we have a declaration for the mapping use that, otherwise use 7696 // the base declaration of the map clause. 7697 const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) 7698 ? I->getAssociatedDeclaration() 7699 : BaseDecl; 7700 7701 // Get information on whether the element is a pointer. Have to do a 7702 // special treatment for array sections given that they are built-in 7703 // types. 7704 const auto *OASE = 7705 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7706 const auto *OAShE = 7707 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7708 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7709 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7710 bool IsPointer = 7711 OAShE || 7712 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7713 .getCanonicalType() 7714 ->isAnyPointerType()) || 7715 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7716 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; 7717 7718 if (OASE) 7719 ++DimSize; 7720 7721 if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { 7722 // If this is not the last component, we expect the pointer to be 7723 // associated with an array expression or member expression. 7724 assert((Next == CE || 7725 isa<MemberExpr>(Next->getAssociatedExpression()) || 7726 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7727 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7728 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || 7729 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7730 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7731 "Unexpected expression"); 7732 7733 Address LB = Address::invalid(); 7734 if (OAShE) { 7735 LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 7736 CGF.getContext().getTypeAlignInChars( 7737 OAShE->getBase()->getType())); 7738 } else { 7739 LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7740 .getAddress(CGF); 7741 } 7742 7743 // If this component is a pointer inside the base struct then we don't 7744 // need to create any entry for it - it will be combined with the object 7745 // it is pointing to into a single PTR_AND_OBJ entry. 7746 bool IsMemberPointerOrAddr = 7747 (IsPointer || ForDeviceAddr) && EncounteredME && 7748 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7749 EncounteredME); 7750 if (!OverlappedElements.empty()) { 7751 // Handle base element with the info for overlapped elements. 7752 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7753 assert(Next == CE && 7754 "Expected last element for the overlapped elements."); 7755 assert(!IsPointer && 7756 "Unexpected base element with the pointer type."); 7757 // Mark the whole struct as the struct that requires allocation on the 7758 // device. 7759 PartialStruct.LowestElem = {0, LB}; 7760 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7761 I->getAssociatedExpression()->getType()); 7762 Address HB = CGF.Builder.CreateConstGEP( 7763 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7764 CGF.VoidPtrTy), 7765 TypeSize.getQuantity() - 1); 7766 PartialStruct.HighestElem = { 7767 std::numeric_limits<decltype( 7768 PartialStruct.HighestElem.first)>::max(), 7769 HB}; 7770 PartialStruct.Base = BP; 7771 // Emit data for non-overlapped data. 7772 OpenMPOffloadMappingFlags Flags = 7773 OMP_MAP_MEMBER_OF | 7774 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, 7775 /*AddPtrFlag=*/false, 7776 /*AddIsTargetParamFlag=*/false, IsNonContiguous); 7777 LB = BP; 7778 llvm::Value *Size = nullptr; 7779 // Do bitcopy of all non-overlapped structure elements. 7780 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7781 Component : OverlappedElements) { 7782 Address ComponentLB = Address::invalid(); 7783 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7784 Component) { 7785 if (MC.getAssociatedDeclaration()) { 7786 ComponentLB = 7787 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7788 .getAddress(CGF); 7789 Size = CGF.Builder.CreatePtrDiff( 7790 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7791 CGF.EmitCastToVoidPtr(LB.getPointer())); 7792 break; 7793 } 7794 } 7795 assert(Size && "Failed to determine structure size"); 7796 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7797 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7798 CombinedInfo.Pointers.push_back(LB.getPointer()); 7799 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 7800 Size, CGF.Int64Ty, /*isSigned=*/true)); 7801 CombinedInfo.Types.push_back(Flags); 7802 CombinedInfo.Mappers.push_back(nullptr); 7803 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7804 : 1); 7805 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7806 } 7807 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7808 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7809 CombinedInfo.Pointers.push_back(LB.getPointer()); 7810 Size = CGF.Builder.CreatePtrDiff( 7811 CGF.EmitCastToVoidPtr( 7812 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7813 CGF.EmitCastToVoidPtr(LB.getPointer())); 7814 CombinedInfo.Sizes.push_back( 7815 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7816 CombinedInfo.Types.push_back(Flags); 7817 CombinedInfo.Mappers.push_back(nullptr); 7818 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7819 : 1); 7820 break; 7821 } 7822 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7823 if (!IsMemberPointerOrAddr || 7824 (Next == CE && MapType != OMPC_MAP_unknown)) { 7825 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); 7826 CombinedInfo.BasePointers.push_back(BP.getPointer()); 7827 CombinedInfo.Pointers.push_back(LB.getPointer()); 7828 CombinedInfo.Sizes.push_back( 7829 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7830 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize 7831 : 1); 7832 7833 // If Mapper is valid, the last component inherits the mapper. 7834 bool HasMapper = Mapper && Next == CE; 7835 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); 7836 7837 // We need to add a pointer flag for each map that comes from the 7838 // same expression except for the first one. We also need to signal 7839 // this map is the first one that relates with the current capture 7840 // (there is a set of entries for each capture). 7841 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7842 MapType, MapModifiers, MotionModifiers, IsImplicit, 7843 !IsExpressionFirstInfo || RequiresReference || 7844 FirstPointerInComplexData, 7845 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); 7846 7847 if (!IsExpressionFirstInfo) { 7848 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7849 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7850 if (IsPointer) 7851 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7852 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7853 7854 if (ShouldBeMemberOf) { 7855 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7856 // should be later updated with the correct value of MEMBER_OF. 7857 Flags |= OMP_MAP_MEMBER_OF; 7858 // From now on, all subsequent PTR_AND_OBJ entries should not be 7859 // marked as MEMBER_OF. 7860 ShouldBeMemberOf = false; 7861 } 7862 } 7863 7864 CombinedInfo.Types.push_back(Flags); 7865 } 7866 7867 // If we have encountered a member expression so far, keep track of the 7868 // mapped member. If the parent is "*this", then the value declaration 7869 // is nullptr. 7870 if (EncounteredME) { 7871 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7872 unsigned FieldIndex = FD->getFieldIndex(); 7873 7874 // Update info about the lowest and highest elements for this struct 7875 if (!PartialStruct.Base.isValid()) { 7876 PartialStruct.LowestElem = {FieldIndex, LB}; 7877 if (IsFinalArraySection) { 7878 Address HB = 7879 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) 7880 .getAddress(CGF); 7881 PartialStruct.HighestElem = {FieldIndex, HB}; 7882 } else { 7883 PartialStruct.HighestElem = {FieldIndex, LB}; 7884 } 7885 PartialStruct.Base = BP; 7886 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7887 PartialStruct.LowestElem = {FieldIndex, LB}; 7888 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7889 PartialStruct.HighestElem = {FieldIndex, LB}; 7890 } 7891 } 7892 7893 // Need to emit combined struct for array sections. 7894 if (IsFinalArraySection || IsNonContiguous) 7895 PartialStruct.IsArraySection = true; 7896 7897 // If we have a final array section, we are done with this expression. 7898 if (IsFinalArraySection) 7899 break; 7900 7901 // The pointer becomes the base for the next element. 7902 if (Next != CE) 7903 BP = LB; 7904 7905 IsExpressionFirstInfo = false; 7906 IsCaptureFirstInfo = false; 7907 FirstPointerInComplexData = false; 7908 } else if (FirstPointerInComplexData) { 7909 QualType Ty = Components.rbegin() 7910 ->getAssociatedDeclaration() 7911 ->getType() 7912 .getNonReferenceType(); 7913 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7914 FirstPointerInComplexData = false; 7915 } 7916 } 7917 7918 if (!IsNonContiguous) 7919 return; 7920 7921 const ASTContext &Context = CGF.getContext(); 7922 7923 // For supporting stride in array section, we need to initialize the first 7924 // dimension size as 1, first offset as 0, and first count as 1 7925 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)}; 7926 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7927 MapValuesArrayTy CurStrides; 7928 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)}; 7929 uint64_t ElementTypeSize; 7930 7931 // Collect Size information for each dimension and get the element size as 7932 // the first Stride. For example, for `int arr[10][10]`, the DimSizes 7933 // should be [10, 10] and the first stride is 4 btyes. 7934 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 7935 Components) { 7936 const Expr *AssocExpr = Component.getAssociatedExpression(); 7937 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7938 7939 if (!OASE) 7940 continue; 7941 7942 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); 7943 auto *CAT = Context.getAsConstantArrayType(Ty); 7944 auto *VAT = Context.getAsVariableArrayType(Ty); 7945 7946 // We need all the dimension size except for the last dimension. 7947 assert((VAT || CAT || &Component == &*Components.begin()) && 7948 "Should be either ConstantArray or VariableArray if not the " 7949 "first Component"); 7950 7951 // Get element size if CurStrides is empty. 7952 if (CurStrides.empty()) { 7953 const Type *ElementType = nullptr; 7954 if (CAT) 7955 ElementType = CAT->getElementType().getTypePtr(); 7956 else if (VAT) 7957 ElementType = VAT->getElementType().getTypePtr(); 7958 else 7959 assert(&Component == &*Components.begin() && 7960 "Only expect pointer (non CAT or VAT) when this is the " 7961 "first Component"); 7962 // If ElementType is null, then it means the base is a pointer 7963 // (neither CAT nor VAT) and we'll attempt to get ElementType again 7964 // for next iteration. 7965 if (ElementType) { 7966 // For the case that having pointer as base, we need to remove one 7967 // level of indirection. 7968 if (&Component != &*Components.begin()) 7969 ElementType = ElementType->getPointeeOrArrayElementType(); 7970 ElementTypeSize = 7971 Context.getTypeSizeInChars(ElementType).getQuantity(); 7972 CurStrides.push_back( 7973 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize)); 7974 } 7975 } 7976 // Get dimension value except for the last dimension since we don't need 7977 // it. 7978 if (DimSizes.size() < Components.size() - 1) { 7979 if (CAT) 7980 DimSizes.push_back(llvm::ConstantInt::get( 7981 CGF.Int64Ty, CAT->getSize().getZExtValue())); 7982 else if (VAT) 7983 DimSizes.push_back(CGF.Builder.CreateIntCast( 7984 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, 7985 /*IsSigned=*/false)); 7986 } 7987 } 7988 7989 // Skip the dummy dimension since we have already have its information. 7990 auto DI = DimSizes.begin() + 1; 7991 // Product of dimension. 7992 llvm::Value *DimProd = 7993 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); 7994 7995 // Collect info for non-contiguous. Notice that offset, count, and stride 7996 // are only meaningful for array-section, so we insert a null for anything 7997 // other than array-section. 7998 // Also, the size of offset, count, and stride are not the same as 7999 // pointers, base_pointers, sizes, or dims. Instead, the size of offset, 8000 // count, and stride are the same as the number of non-contiguous 8001 // declaration in target update to/from clause. 8002 for (const OMPClauseMappableExprCommon::MappableComponent &Component : 8003 Components) { 8004 const Expr *AssocExpr = Component.getAssociatedExpression(); 8005 8006 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) { 8007 llvm::Value *Offset = CGF.Builder.CreateIntCast( 8008 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty, 8009 /*isSigned=*/false); 8010 CurOffsets.push_back(Offset); 8011 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1)); 8012 CurStrides.push_back(CurStrides.back()); 8013 continue; 8014 } 8015 8016 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8017 8018 if (!OASE) 8019 continue; 8020 8021 // Offset 8022 const Expr *OffsetExpr = OASE->getLowerBound(); 8023 llvm::Value *Offset = nullptr; 8024 if (!OffsetExpr) { 8025 // If offset is absent, then we just set it to zero. 8026 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0); 8027 } else { 8028 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr), 8029 CGF.Int64Ty, 8030 /*isSigned=*/false); 8031 } 8032 CurOffsets.push_back(Offset); 8033 8034 // Count 8035 const Expr *CountExpr = OASE->getLength(); 8036 llvm::Value *Count = nullptr; 8037 if (!CountExpr) { 8038 // In Clang, once a high dimension is an array section, we construct all 8039 // the lower dimension as array section, however, for case like 8040 // arr[0:2][2], Clang construct the inner dimension as an array section 8041 // but it actually is not in an array section form according to spec. 8042 if (!OASE->getColonLocFirst().isValid() && 8043 !OASE->getColonLocSecond().isValid()) { 8044 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1); 8045 } else { 8046 // OpenMP 5.0, 2.1.5 Array Sections, Description. 8047 // When the length is absent it defaults to ⌈(size − 8048 // lower-bound)/stride⌉, where size is the size of the array 8049 // dimension. 8050 const Expr *StrideExpr = OASE->getStride(); 8051 llvm::Value *Stride = 8052 StrideExpr 8053 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8054 CGF.Int64Ty, /*isSigned=*/false) 8055 : nullptr; 8056 if (Stride) 8057 Count = CGF.Builder.CreateUDiv( 8058 CGF.Builder.CreateNUWSub(*DI, Offset), Stride); 8059 else 8060 Count = CGF.Builder.CreateNUWSub(*DI, Offset); 8061 } 8062 } else { 8063 Count = CGF.EmitScalarExpr(CountExpr); 8064 } 8065 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false); 8066 CurCounts.push_back(Count); 8067 8068 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size 8069 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example: 8070 // Offset Count Stride 8071 // D0 0 1 4 (int) <- dummy dimension 8072 // D1 0 2 8 (2 * (1) * 4) 8073 // D2 1 2 20 (1 * (1 * 5) * 4) 8074 // D3 0 2 200 (2 * (1 * 5 * 4) * 4) 8075 const Expr *StrideExpr = OASE->getStride(); 8076 llvm::Value *Stride = 8077 StrideExpr 8078 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr), 8079 CGF.Int64Ty, /*isSigned=*/false) 8080 : nullptr; 8081 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1)); 8082 if (Stride) 8083 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride)); 8084 else 8085 CurStrides.push_back(DimProd); 8086 if (DI != DimSizes.end()) 8087 ++DI; 8088 } 8089 8090 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets); 8091 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts); 8092 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides); 8093 } 8094 8095 /// Return the adjusted map modifiers if the declaration a capture refers to 8096 /// appears in a first-private clause. This is expected to be used only with 8097 /// directives that start with 'target'. 8098 MappableExprsHandler::OpenMPOffloadMappingFlags 8099 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8100 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8101 8102 // A first private variable captured by reference will use only the 8103 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8104 // declaration is known as first-private in this handler. 8105 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8106 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 8107 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 8108 return MappableExprsHandler::OMP_MAP_ALWAYS | 8109 MappableExprsHandler::OMP_MAP_TO; 8110 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8111 return MappableExprsHandler::OMP_MAP_TO | 8112 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8113 return MappableExprsHandler::OMP_MAP_PRIVATE | 8114 MappableExprsHandler::OMP_MAP_TO; 8115 } 8116 return MappableExprsHandler::OMP_MAP_TO | 8117 MappableExprsHandler::OMP_MAP_FROM; 8118 } 8119 8120 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8121 // Rotate by getFlagMemberOffset() bits. 8122 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8123 << getFlagMemberOffset()); 8124 } 8125 8126 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8127 OpenMPOffloadMappingFlags MemberOfFlag) { 8128 // If the entry is PTR_AND_OBJ but has not been marked with the special 8129 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8130 // marked as MEMBER_OF. 8131 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8132 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8133 return; 8134 8135 // Reset the placeholder value to prepare the flag for the assignment of the 8136 // proper MEMBER_OF value. 8137 Flags &= ~OMP_MAP_MEMBER_OF; 8138 Flags |= MemberOfFlag; 8139 } 8140 8141 void getPlainLayout(const CXXRecordDecl *RD, 8142 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8143 bool AsBase) const { 8144 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8145 8146 llvm::StructType *St = 8147 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8148 8149 unsigned NumElements = St->getNumElements(); 8150 llvm::SmallVector< 8151 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8152 RecordLayout(NumElements); 8153 8154 // Fill bases. 8155 for (const auto &I : RD->bases()) { 8156 if (I.isVirtual()) 8157 continue; 8158 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8159 // Ignore empty bases. 8160 if (Base->isEmpty() || CGF.getContext() 8161 .getASTRecordLayout(Base) 8162 .getNonVirtualSize() 8163 .isZero()) 8164 continue; 8165 8166 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8167 RecordLayout[FieldIndex] = Base; 8168 } 8169 // Fill in virtual bases. 8170 for (const auto &I : RD->vbases()) { 8171 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8172 // Ignore empty bases. 8173 if (Base->isEmpty()) 8174 continue; 8175 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8176 if (RecordLayout[FieldIndex]) 8177 continue; 8178 RecordLayout[FieldIndex] = Base; 8179 } 8180 // Fill in all the fields. 8181 assert(!RD->isUnion() && "Unexpected union."); 8182 for (const auto *Field : RD->fields()) { 8183 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8184 // will fill in later.) 8185 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8186 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8187 RecordLayout[FieldIndex] = Field; 8188 } 8189 } 8190 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8191 &Data : RecordLayout) { 8192 if (Data.isNull()) 8193 continue; 8194 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8195 getPlainLayout(Base, Layout, /*AsBase=*/true); 8196 else 8197 Layout.push_back(Data.get<const FieldDecl *>()); 8198 } 8199 } 8200 8201 public: 8202 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8203 : CurDir(&Dir), CGF(CGF) { 8204 // Extract firstprivate clause information. 8205 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8206 for (const auto *D : C->varlists()) 8207 FirstPrivateDecls.try_emplace( 8208 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8209 // Extract implicit firstprivates from uses_allocators clauses. 8210 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8211 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8212 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8213 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8214 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8215 /*Implicit=*/true); 8216 else if (const auto *VD = dyn_cast<VarDecl>( 8217 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8218 ->getDecl())) 8219 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8220 } 8221 } 8222 // Extract device pointer clause information. 8223 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8224 for (auto L : C->component_lists()) 8225 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); 8226 } 8227 8228 /// Constructor for the declare mapper directive. 8229 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8230 : CurDir(&Dir), CGF(CGF) {} 8231 8232 /// Generate code for the combined entry if we have a partially mapped struct 8233 /// and take care of the mapping flags of the arguments corresponding to 8234 /// individual struct members. 8235 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, 8236 MapFlagsArrayTy &CurTypes, 8237 const StructRangeInfoTy &PartialStruct, 8238 const ValueDecl *VD = nullptr, 8239 bool NotTargetParams = false) const { 8240 if (CurTypes.size() == 1 && 8241 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && 8242 !PartialStruct.IsArraySection) 8243 return; 8244 CombinedInfo.Exprs.push_back(VD); 8245 // Base is the base of the struct 8246 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); 8247 // Pointer is the address of the lowest element 8248 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 8249 CombinedInfo.Pointers.push_back(LB); 8250 // There should not be a mapper for a combined entry. 8251 CombinedInfo.Mappers.push_back(nullptr); 8252 // Size is (addr of {highest+1} element) - (addr of lowest element) 8253 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 8254 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 8255 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8256 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8257 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8258 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8259 /*isSigned=*/false); 8260 CombinedInfo.Sizes.push_back(Size); 8261 // Map type is always TARGET_PARAM, if generate info for captures. 8262 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE 8263 : OMP_MAP_TARGET_PARAM); 8264 // If any element has the present modifier, then make sure the runtime 8265 // doesn't attempt to allocate the struct. 8266 if (CurTypes.end() != 8267 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { 8268 return Type & OMP_MAP_PRESENT; 8269 })) 8270 CombinedInfo.Types.back() |= OMP_MAP_PRESENT; 8271 // Remove TARGET_PARAM flag from the first element if any. 8272 if (!CurTypes.empty()) 8273 CurTypes.front() &= ~OMP_MAP_TARGET_PARAM; 8274 8275 // All other current entries will be MEMBER_OF the combined entry 8276 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8277 // 0xFFFF in the MEMBER_OF field). 8278 OpenMPOffloadMappingFlags MemberOfFlag = 8279 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); 8280 for (auto &M : CurTypes) 8281 setCorrectMemberOfFlag(M, MemberOfFlag); 8282 } 8283 8284 /// Generate all the base pointers, section pointers, sizes, map types, and 8285 /// mappers for the extracted mappable expressions (all included in \a 8286 /// CombinedInfo). Also, for each item that relates with a device pointer, a 8287 /// pair of the relevant declaration and index where it occurs is appended to 8288 /// the device pointers info array. 8289 void generateAllInfo( 8290 MapCombinedInfoTy &CombinedInfo, bool NotTargetParams = false, 8291 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = 8292 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { 8293 // We have to process the component lists that relate with the same 8294 // declaration in a single chunk so that we can generate the map flags 8295 // correctly. Therefore, we organize all lists in a map. 8296 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8297 8298 // Helper function to fill the information map for the different supported 8299 // clauses. 8300 auto &&InfoGen = 8301 [&Info, &SkipVarSet]( 8302 const ValueDecl *D, 8303 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8304 OpenMPMapClauseKind MapType, 8305 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8306 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, 8307 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, 8308 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { 8309 const ValueDecl *VD = 8310 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8311 if (SkipVarSet.count(VD)) 8312 return; 8313 Info[VD].emplace_back(L, MapType, MapModifiers, MotionModifiers, 8314 ReturnDevicePointer, IsImplicit, Mapper, VarRef, 8315 ForDeviceAddr); 8316 }; 8317 8318 assert(CurDir.is<const OMPExecutableDirective *>() && 8319 "Expect a executable directive"); 8320 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8321 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8322 const auto *EI = C->getVarRefs().begin(); 8323 for (const auto L : C->component_lists()) { 8324 // The Expression is not correct if the mapping is implicit 8325 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8326 InfoGen(std::get<0>(L), std::get<1>(L), C->getMapType(), 8327 C->getMapTypeModifiers(), llvm::None, 8328 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), 8329 E); 8330 ++EI; 8331 } 8332 } 8333 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) { 8334 const auto *EI = C->getVarRefs().begin(); 8335 for (const auto L : C->component_lists()) { 8336 InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_to, llvm::None, 8337 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8338 C->isImplicit(), std::get<2>(L), *EI); 8339 ++EI; 8340 } 8341 } 8342 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) { 8343 const auto *EI = C->getVarRefs().begin(); 8344 for (const auto L : C->component_lists()) { 8345 InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_from, llvm::None, 8346 C->getMotionModifiers(), /*ReturnDevicePointer=*/false, 8347 C->isImplicit(), std::get<2>(L), *EI); 8348 ++EI; 8349 } 8350 } 8351 8352 // Look at the use_device_ptr clause information and mark the existing map 8353 // entries as such. If there is no map information for an entry in the 8354 // use_device_ptr list, we create one with map type 'alloc' and zero size 8355 // section. It is the user fault if that was not mapped before. If there is 8356 // no map information and the pointer is a struct member, then we defer the 8357 // emission of that entry until the whole struct has been processed. 8358 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 8359 DeferredInfo; 8360 MapCombinedInfoTy UseDevicePtrCombinedInfo; 8361 8362 for (const auto *C : 8363 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 8364 for (const auto L : C->component_lists()) { 8365 OMPClauseMappableExprCommon::MappableExprComponentListRef Components = 8366 std::get<1>(L); 8367 assert(!Components.empty() && 8368 "Not expecting empty list of components!"); 8369 const ValueDecl *VD = Components.back().getAssociatedDeclaration(); 8370 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8371 const Expr *IE = Components.back().getAssociatedExpression(); 8372 // If the first component is a member expression, we have to look into 8373 // 'this', which maps to null in the map of map information. Otherwise 8374 // look directly for the information. 8375 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8376 8377 // We potentially have map information for this declaration already. 8378 // Look for the first set of components that refer to it. 8379 if (It != Info.end()) { 8380 auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { 8381 return MI.Components.back().getAssociatedDeclaration() == VD; 8382 }); 8383 // If we found a map entry, signal that the pointer has to be returned 8384 // and move on to the next declaration. 8385 // Exclude cases where the base pointer is mapped as array subscript, 8386 // array section or array shaping. The base address is passed as a 8387 // pointer to base in this case and cannot be used as a base for 8388 // use_device_ptr list item. 8389 if (CI != It->second.end()) { 8390 auto PrevCI = std::next(CI->Components.rbegin()); 8391 const auto *VarD = dyn_cast<VarDecl>(VD); 8392 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8393 isa<MemberExpr>(IE) || 8394 !VD->getType().getNonReferenceType()->isPointerType() || 8395 PrevCI == CI->Components.rend() || 8396 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || 8397 VarD->hasLocalStorage()) { 8398 CI->ReturnDevicePointer = true; 8399 continue; 8400 } 8401 } 8402 } 8403 8404 // We didn't find any match in our map information - generate a zero 8405 // size array section - if the pointer is a struct member we defer this 8406 // action until the whole struct has been processed. 8407 if (isa<MemberExpr>(IE)) { 8408 // Insert the pointer into Info to be processed by 8409 // generateInfoForComponentList. Because it is a member pointer 8410 // without a pointee, no entry will be generated for it, therefore 8411 // we need to generate one after the whole struct has been processed. 8412 // Nonetheless, generateInfoForComponentList must be called to take 8413 // the pointer into account for the calculation of the range of the 8414 // partial struct. 8415 InfoGen(nullptr, Components, OMPC_MAP_unknown, llvm::None, llvm::None, 8416 /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr); 8417 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); 8418 } else { 8419 llvm::Value *Ptr = 8420 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8421 UseDevicePtrCombinedInfo.Exprs.push_back(VD); 8422 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); 8423 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); 8424 UseDevicePtrCombinedInfo.Sizes.push_back( 8425 llvm::Constant::getNullValue(CGF.Int64Ty)); 8426 UseDevicePtrCombinedInfo.Types.push_back( 8427 OMP_MAP_RETURN_PARAM | 8428 (NotTargetParams ? OMP_MAP_NONE : OMP_MAP_TARGET_PARAM)); 8429 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); 8430 } 8431 } 8432 } 8433 8434 // Look at the use_device_addr clause information and mark the existing map 8435 // entries as such. If there is no map information for an entry in the 8436 // use_device_addr list, we create one with map type 'alloc' and zero size 8437 // section. It is the user fault if that was not mapped before. If there is 8438 // no map information and the pointer is a struct member, then we defer the 8439 // emission of that entry until the whole struct has been processed. 8440 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; 8441 for (const auto *C : 8442 CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) { 8443 for (const auto L : C->component_lists()) { 8444 assert(!std::get<1>(L).empty() && 8445 "Not expecting empty list of components!"); 8446 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); 8447 if (!Processed.insert(VD).second) 8448 continue; 8449 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8450 const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); 8451 // If the first component is a member expression, we have to look into 8452 // 'this', which maps to null in the map of map information. Otherwise 8453 // look directly for the information. 8454 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8455 8456 // We potentially have map information for this declaration already. 8457 // Look for the first set of components that refer to it. 8458 if (It != Info.end()) { 8459 auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { 8460 return MI.Components.back().getAssociatedDeclaration() == VD; 8461 }); 8462 // If we found a map entry, signal that the pointer has to be returned 8463 // and move on to the next declaration. 8464 if (CI != It->second.end()) { 8465 CI->ReturnDevicePointer = true; 8466 continue; 8467 } 8468 } 8469 8470 // We didn't find any match in our map information - generate a zero 8471 // size array section - if the pointer is a struct member we defer this 8472 // action until the whole struct has been processed. 8473 if (isa<MemberExpr>(IE)) { 8474 // Insert the pointer into Info to be processed by 8475 // generateInfoForComponentList. Because it is a member pointer 8476 // without a pointee, no entry will be generated for it, therefore 8477 // we need to generate one after the whole struct has been processed. 8478 // Nonetheless, generateInfoForComponentList must be called to take 8479 // the pointer into account for the calculation of the range of the 8480 // partial struct. 8481 InfoGen(nullptr, std::get<1>(L), OMPC_MAP_unknown, llvm::None, 8482 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), 8483 nullptr, nullptr, /*ForDeviceAddr=*/true); 8484 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); 8485 } else { 8486 llvm::Value *Ptr; 8487 if (IE->isGLValue()) 8488 Ptr = CGF.EmitLValue(IE).getPointer(CGF); 8489 else 8490 Ptr = CGF.EmitScalarExpr(IE); 8491 CombinedInfo.Exprs.push_back(VD); 8492 CombinedInfo.BasePointers.emplace_back(Ptr, VD); 8493 CombinedInfo.Pointers.push_back(Ptr); 8494 CombinedInfo.Sizes.push_back( 8495 llvm::Constant::getNullValue(CGF.Int64Ty)); 8496 CombinedInfo.Types.push_back( 8497 OMP_MAP_RETURN_PARAM | 8498 (NotTargetParams ? OMP_MAP_NONE : OMP_MAP_TARGET_PARAM)); 8499 CombinedInfo.Mappers.push_back(nullptr); 8500 } 8501 } 8502 } 8503 8504 for (const auto &M : Info) { 8505 // We need to know when we generate information for the first component 8506 // associated with a capture, because the mapping flags depend on it. 8507 bool IsFirstComponentList = !NotTargetParams; 8508 8509 // Underlying variable declaration used in the map clause. 8510 const ValueDecl *VD = std::get<0>(M); 8511 8512 // Temporary generated information. 8513 MapCombinedInfoTy CurInfo; 8514 StructRangeInfoTy PartialStruct; 8515 8516 for (const MapInfo &L : M.second) { 8517 assert(!L.Components.empty() && 8518 "Not expecting declaration with no component lists."); 8519 8520 // Remember the current base pointer index. 8521 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); 8522 CurInfo.NonContigInfo.IsNonContiguous = 8523 L.Components.back().isNonContiguous(); 8524 generateInfoForComponentList( 8525 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, CurInfo, 8526 PartialStruct, IsFirstComponentList, L.IsImplicit, L.Mapper, 8527 L.ForDeviceAddr, VD, L.VarRef); 8528 8529 // If this entry relates with a device pointer, set the relevant 8530 // declaration and add the 'return pointer' flag. 8531 if (L.ReturnDevicePointer) { 8532 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && 8533 "Unexpected number of mapped base pointers."); 8534 8535 const ValueDecl *RelevantVD = 8536 L.Components.back().getAssociatedDeclaration(); 8537 assert(RelevantVD && 8538 "No relevant declaration related with device pointer??"); 8539 8540 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( 8541 RelevantVD); 8542 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8543 } 8544 IsFirstComponentList = false; 8545 } 8546 8547 // Append any pending zero-length pointers which are struct members and 8548 // used with use_device_ptr or use_device_addr. 8549 auto CI = DeferredInfo.find(M.first); 8550 if (CI != DeferredInfo.end()) { 8551 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8552 llvm::Value *BasePtr; 8553 llvm::Value *Ptr; 8554 if (L.ForDeviceAddr) { 8555 if (L.IE->isGLValue()) 8556 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8557 else 8558 Ptr = this->CGF.EmitScalarExpr(L.IE); 8559 BasePtr = Ptr; 8560 // Entry is RETURN_PARAM. Also, set the placeholder value 8561 // MEMBER_OF=FFFF so that the entry is later updated with the 8562 // correct value of MEMBER_OF. 8563 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); 8564 } else { 8565 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8566 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), 8567 L.IE->getExprLoc()); 8568 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8569 // value MEMBER_OF=FFFF so that the entry is later updated with the 8570 // correct value of MEMBER_OF. 8571 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8572 OMP_MAP_MEMBER_OF); 8573 } 8574 CurInfo.Exprs.push_back(L.VD); 8575 CurInfo.BasePointers.emplace_back(BasePtr, L.VD); 8576 CurInfo.Pointers.push_back(Ptr); 8577 CurInfo.Sizes.push_back( 8578 llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8579 CurInfo.Mappers.push_back(nullptr); 8580 } 8581 } 8582 8583 // If there is an entry in PartialStruct it means we have a struct with 8584 // individual members mapped. Emit an extra combined entry. 8585 if (PartialStruct.Base.isValid()) 8586 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD, 8587 NotTargetParams); 8588 8589 // We need to append the results of this capture to what we already have. 8590 CombinedInfo.append(CurInfo); 8591 } 8592 // Append data for use_device_ptr clauses. 8593 CombinedInfo.append(UseDevicePtrCombinedInfo); 8594 } 8595 8596 /// Generate all the base pointers, section pointers, sizes, map types, and 8597 /// mappers for the extracted map clauses of user-defined mapper (all included 8598 /// in \a CombinedInfo). 8599 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { 8600 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8601 "Expect a declare mapper directive"); 8602 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8603 // We have to process the component lists that relate with the same 8604 // declaration in a single chunk so that we can generate the map flags 8605 // correctly. Therefore, we organize all lists in a map. 8606 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8607 8608 // Fill the information map for map clauses. 8609 for (const auto *C : CurMapperDir->clauselists()) { 8610 const auto *MC = cast<OMPMapClause>(C); 8611 const auto *EI = MC->getVarRefs().begin(); 8612 for (const auto L : MC->component_lists()) { 8613 // The Expression is not correct if the mapping is implicit 8614 const Expr *E = (MC->getMapLoc().isValid()) ? *EI : nullptr; 8615 const ValueDecl *VD = 8616 std::get<0>(L) ? cast<ValueDecl>(std::get<0>(L)->getCanonicalDecl()) 8617 : nullptr; 8618 // Get the corresponding user-defined mapper. 8619 Info[VD].emplace_back(std::get<1>(L), MC->getMapType(), 8620 MC->getMapTypeModifiers(), llvm::None, 8621 /*ReturnDevicePointer=*/false, MC->isImplicit(), 8622 std::get<2>(L), E); 8623 ++EI; 8624 } 8625 } 8626 8627 for (const auto &M : Info) { 8628 // We need to know when we generate information for the first component 8629 // associated with a capture, because the mapping flags depend on it. 8630 bool IsFirstComponentList = true; 8631 8632 // Underlying variable declaration used in the map clause. 8633 const ValueDecl *VD = std::get<0>(M); 8634 8635 // Temporary generated information. 8636 MapCombinedInfoTy CurInfo; 8637 StructRangeInfoTy PartialStruct; 8638 8639 for (const MapInfo &L : M.second) { 8640 assert(!L.Components.empty() && 8641 "Not expecting declaration with no component lists."); 8642 generateInfoForComponentList( 8643 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, CurInfo, 8644 PartialStruct, IsFirstComponentList, L.IsImplicit, L.Mapper, 8645 L.ForDeviceAddr, VD, L.VarRef); 8646 IsFirstComponentList = false; 8647 } 8648 8649 // If there is an entry in PartialStruct it means we have a struct with 8650 // individual members mapped. Emit an extra combined entry. 8651 if (PartialStruct.Base.isValid()) { 8652 CurInfo.NonContigInfo.Dims.push_back(0); 8653 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); 8654 } 8655 8656 // We need to append the results of this capture to what we already have. 8657 CombinedInfo.append(CurInfo); 8658 } 8659 } 8660 8661 /// Emit capture info for lambdas for variables captured by reference. 8662 void generateInfoForLambdaCaptures( 8663 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8664 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8665 const auto *RD = VD->getType() 8666 .getCanonicalType() 8667 .getNonReferenceType() 8668 ->getAsCXXRecordDecl(); 8669 if (!RD || !RD->isLambda()) 8670 return; 8671 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8672 LValue VDLVal = CGF.MakeAddrLValue( 8673 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8674 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8675 FieldDecl *ThisCapture = nullptr; 8676 RD->getCaptureFields(Captures, ThisCapture); 8677 if (ThisCapture) { 8678 LValue ThisLVal = 8679 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8680 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8681 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8682 VDLVal.getPointer(CGF)); 8683 CombinedInfo.Exprs.push_back(VD); 8684 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); 8685 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); 8686 CombinedInfo.Sizes.push_back( 8687 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8688 CGF.Int64Ty, /*isSigned=*/true)); 8689 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8690 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8691 CombinedInfo.Mappers.push_back(nullptr); 8692 } 8693 for (const LambdaCapture &LC : RD->captures()) { 8694 if (!LC.capturesVariable()) 8695 continue; 8696 const VarDecl *VD = LC.getCapturedVar(); 8697 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8698 continue; 8699 auto It = Captures.find(VD); 8700 assert(It != Captures.end() && "Found lambda capture without field."); 8701 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8702 if (LC.getCaptureKind() == LCK_ByRef) { 8703 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8704 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8705 VDLVal.getPointer(CGF)); 8706 CombinedInfo.Exprs.push_back(VD); 8707 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8708 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); 8709 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8710 CGF.getTypeSize( 8711 VD->getType().getCanonicalType().getNonReferenceType()), 8712 CGF.Int64Ty, /*isSigned=*/true)); 8713 } else { 8714 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8715 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8716 VDLVal.getPointer(CGF)); 8717 CombinedInfo.Exprs.push_back(VD); 8718 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); 8719 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); 8720 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8721 } 8722 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8723 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8724 CombinedInfo.Mappers.push_back(nullptr); 8725 } 8726 } 8727 8728 /// Set correct indices for lambdas captures. 8729 void adjustMemberOfForLambdaCaptures( 8730 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8731 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8732 MapFlagsArrayTy &Types) const { 8733 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8734 // Set correct member_of idx for all implicit lambda captures. 8735 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8736 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8737 continue; 8738 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8739 assert(BasePtr && "Unable to find base lambda address."); 8740 int TgtIdx = -1; 8741 for (unsigned J = I; J > 0; --J) { 8742 unsigned Idx = J - 1; 8743 if (Pointers[Idx] != BasePtr) 8744 continue; 8745 TgtIdx = Idx; 8746 break; 8747 } 8748 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8749 // All other current entries will be MEMBER_OF the combined entry 8750 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8751 // 0xFFFF in the MEMBER_OF field). 8752 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8753 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8754 } 8755 } 8756 8757 /// Generate the base pointers, section pointers, sizes, map types, and 8758 /// mappers associated to a given capture (all included in \a CombinedInfo). 8759 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8760 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, 8761 StructRangeInfoTy &PartialStruct) const { 8762 assert(!Cap->capturesVariableArrayType() && 8763 "Not expecting to generate map info for a variable array type!"); 8764 8765 // We need to know when we generating information for the first component 8766 const ValueDecl *VD = Cap->capturesThis() 8767 ? nullptr 8768 : Cap->getCapturedVar()->getCanonicalDecl(); 8769 8770 // If this declaration appears in a is_device_ptr clause we just have to 8771 // pass the pointer by value. If it is a reference to a declaration, we just 8772 // pass its value. 8773 if (DevPointersMap.count(VD)) { 8774 CombinedInfo.Exprs.push_back(VD); 8775 CombinedInfo.BasePointers.emplace_back(Arg, VD); 8776 CombinedInfo.Pointers.push_back(Arg); 8777 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8778 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, 8779 /*isSigned=*/true)); 8780 CombinedInfo.Types.push_back( 8781 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | 8782 OMP_MAP_TARGET_PARAM); 8783 CombinedInfo.Mappers.push_back(nullptr); 8784 return; 8785 } 8786 8787 using MapData = 8788 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8789 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, 8790 const ValueDecl *, const Expr *>; 8791 SmallVector<MapData, 4> DeclComponentLists; 8792 assert(CurDir.is<const OMPExecutableDirective *>() && 8793 "Expect a executable directive"); 8794 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8795 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8796 const auto *EI = C->getVarRefs().begin(); 8797 for (const auto L : C->decl_component_lists(VD)) { 8798 const ValueDecl *VDecl, *Mapper; 8799 // The Expression is not correct if the mapping is implicit 8800 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; 8801 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8802 std::tie(VDecl, Components, Mapper) = L; 8803 assert(VDecl == VD && "We got information for the wrong declaration??"); 8804 assert(!Components.empty() && 8805 "Not expecting declaration with no component lists."); 8806 DeclComponentLists.emplace_back(Components, C->getMapType(), 8807 C->getMapTypeModifiers(), 8808 C->isImplicit(), Mapper, E); 8809 ++EI; 8810 } 8811 } 8812 8813 // Find overlapping elements (including the offset from the base element). 8814 llvm::SmallDenseMap< 8815 const MapData *, 8816 llvm::SmallVector< 8817 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8818 4> 8819 OverlappedData; 8820 size_t Count = 0; 8821 for (const MapData &L : DeclComponentLists) { 8822 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8823 OpenMPMapClauseKind MapType; 8824 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8825 bool IsImplicit; 8826 const ValueDecl *Mapper; 8827 const Expr *VarRef; 8828 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8829 L; 8830 ++Count; 8831 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8832 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8833 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, 8834 VarRef) = L1; 8835 auto CI = Components.rbegin(); 8836 auto CE = Components.rend(); 8837 auto SI = Components1.rbegin(); 8838 auto SE = Components1.rend(); 8839 for (; CI != CE && SI != SE; ++CI, ++SI) { 8840 if (CI->getAssociatedExpression()->getStmtClass() != 8841 SI->getAssociatedExpression()->getStmtClass()) 8842 break; 8843 // Are we dealing with different variables/fields? 8844 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8845 break; 8846 } 8847 // Found overlapping if, at least for one component, reached the head of 8848 // the components list. 8849 if (CI == CE || SI == SE) { 8850 assert((CI != CE || SI != SE) && 8851 "Unexpected full match of the mapping components."); 8852 const MapData &BaseData = CI == CE ? L : L1; 8853 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8854 SI == SE ? Components : Components1; 8855 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8856 OverlappedElements.getSecond().push_back(SubData); 8857 } 8858 } 8859 } 8860 // Sort the overlapped elements for each item. 8861 llvm::SmallVector<const FieldDecl *, 4> Layout; 8862 if (!OverlappedData.empty()) { 8863 if (const auto *CRD = 8864 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8865 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8866 else { 8867 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8868 Layout.append(RD->field_begin(), RD->field_end()); 8869 } 8870 } 8871 for (auto &Pair : OverlappedData) { 8872 llvm::sort( 8873 Pair.getSecond(), 8874 [&Layout]( 8875 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8876 OMPClauseMappableExprCommon::MappableExprComponentListRef 8877 Second) { 8878 auto CI = First.rbegin(); 8879 auto CE = First.rend(); 8880 auto SI = Second.rbegin(); 8881 auto SE = Second.rend(); 8882 for (; CI != CE && SI != SE; ++CI, ++SI) { 8883 if (CI->getAssociatedExpression()->getStmtClass() != 8884 SI->getAssociatedExpression()->getStmtClass()) 8885 break; 8886 // Are we dealing with different variables/fields? 8887 if (CI->getAssociatedDeclaration() != 8888 SI->getAssociatedDeclaration()) 8889 break; 8890 } 8891 8892 // Lists contain the same elements. 8893 if (CI == CE && SI == SE) 8894 return false; 8895 8896 // List with less elements is less than list with more elements. 8897 if (CI == CE || SI == SE) 8898 return CI == CE; 8899 8900 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8901 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8902 if (FD1->getParent() == FD2->getParent()) 8903 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8904 const auto It = 8905 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8906 return FD == FD1 || FD == FD2; 8907 }); 8908 return *It == FD1; 8909 }); 8910 } 8911 8912 // Associated with a capture, because the mapping flags depend on it. 8913 // Go through all of the elements with the overlapped elements. 8914 for (const auto &Pair : OverlappedData) { 8915 const MapData &L = *Pair.getFirst(); 8916 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8917 OpenMPMapClauseKind MapType; 8918 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8919 bool IsImplicit; 8920 const ValueDecl *Mapper; 8921 const Expr *VarRef; 8922 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8923 L; 8924 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8925 OverlappedComponents = Pair.getSecond(); 8926 bool IsFirstComponentList = true; 8927 generateInfoForComponentList( 8928 MapType, MapModifiers, llvm::None, Components, CombinedInfo, 8929 PartialStruct, IsFirstComponentList, IsImplicit, Mapper, 8930 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); 8931 } 8932 // Go through other elements without overlapped elements. 8933 bool IsFirstComponentList = OverlappedData.empty(); 8934 for (const MapData &L : DeclComponentLists) { 8935 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8936 OpenMPMapClauseKind MapType; 8937 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8938 bool IsImplicit; 8939 const ValueDecl *Mapper; 8940 const Expr *VarRef; 8941 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = 8942 L; 8943 auto It = OverlappedData.find(&L); 8944 if (It == OverlappedData.end()) 8945 generateInfoForComponentList(MapType, MapModifiers, llvm::None, 8946 Components, CombinedInfo, PartialStruct, 8947 IsFirstComponentList, IsImplicit, Mapper, 8948 /*ForDeviceAddr=*/false, VD, VarRef); 8949 IsFirstComponentList = false; 8950 } 8951 } 8952 8953 /// Generate the default map information for a given capture \a CI, 8954 /// record field declaration \a RI and captured value \a CV. 8955 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8956 const FieldDecl &RI, llvm::Value *CV, 8957 MapCombinedInfoTy &CombinedInfo) const { 8958 bool IsImplicit = true; 8959 // Do the default mapping. 8960 if (CI.capturesThis()) { 8961 CombinedInfo.Exprs.push_back(nullptr); 8962 CombinedInfo.BasePointers.push_back(CV); 8963 CombinedInfo.Pointers.push_back(CV); 8964 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8965 CombinedInfo.Sizes.push_back( 8966 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8967 CGF.Int64Ty, /*isSigned=*/true)); 8968 // Default map type. 8969 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8970 } else if (CI.capturesVariableByCopy()) { 8971 const VarDecl *VD = CI.getCapturedVar(); 8972 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 8973 CombinedInfo.BasePointers.push_back(CV); 8974 CombinedInfo.Pointers.push_back(CV); 8975 if (!RI.getType()->isAnyPointerType()) { 8976 // We have to signal to the runtime captures passed by value that are 8977 // not pointers. 8978 CombinedInfo.Types.push_back(OMP_MAP_LITERAL); 8979 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8980 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8981 } else { 8982 // Pointers are implicitly mapped with a zero size and no flags 8983 // (other than first map that is added for all implicit maps). 8984 CombinedInfo.Types.push_back(OMP_MAP_NONE); 8985 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8986 } 8987 auto I = FirstPrivateDecls.find(VD); 8988 if (I != FirstPrivateDecls.end()) 8989 IsImplicit = I->getSecond(); 8990 } else { 8991 assert(CI.capturesVariable() && "Expected captured reference."); 8992 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8993 QualType ElementType = PtrTy->getPointeeType(); 8994 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 8995 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8996 // The default map type for a scalar/complex type is 'to' because by 8997 // default the value doesn't have to be retrieved. For an aggregate 8998 // type, the default is 'tofrom'. 8999 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); 9000 const VarDecl *VD = CI.getCapturedVar(); 9001 auto I = FirstPrivateDecls.find(VD); 9002 if (I != FirstPrivateDecls.end() && 9003 VD->getType().isConstant(CGF.getContext())) { 9004 llvm::Constant *Addr = 9005 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 9006 // Copy the value of the original variable to the new global copy. 9007 CGF.Builder.CreateMemCpy( 9008 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 9009 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 9010 CombinedInfo.Sizes.back(), /*IsVolatile=*/false); 9011 // Use new global variable as the base pointers. 9012 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9013 CombinedInfo.BasePointers.push_back(Addr); 9014 CombinedInfo.Pointers.push_back(Addr); 9015 } else { 9016 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); 9017 CombinedInfo.BasePointers.push_back(CV); 9018 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9019 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9020 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9021 AlignmentSource::Decl)); 9022 CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); 9023 } else { 9024 CombinedInfo.Pointers.push_back(CV); 9025 } 9026 } 9027 if (I != FirstPrivateDecls.end()) 9028 IsImplicit = I->getSecond(); 9029 } 9030 // Every default map produces a single argument which is a target parameter. 9031 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; 9032 9033 // Add flag stating this is an implicit map. 9034 if (IsImplicit) 9035 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; 9036 9037 // No user-defined mapper for default mapping. 9038 CombinedInfo.Mappers.push_back(nullptr); 9039 } 9040 }; 9041 } // anonymous namespace 9042 9043 static void emitNonContiguousDescriptor( 9044 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9045 CGOpenMPRuntime::TargetDataInfo &Info) { 9046 CodeGenModule &CGM = CGF.CGM; 9047 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo 9048 &NonContigInfo = CombinedInfo.NonContigInfo; 9049 9050 // Build an array of struct descriptor_dim and then assign it to 9051 // offload_args. 9052 // 9053 // struct descriptor_dim { 9054 // uint64_t offset; 9055 // uint64_t count; 9056 // uint64_t stride 9057 // }; 9058 ASTContext &C = CGF.getContext(); 9059 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 9060 RecordDecl *RD; 9061 RD = C.buildImplicitRecord("descriptor_dim"); 9062 RD->startDefinition(); 9063 addFieldToRecordDecl(C, RD, Int64Ty); 9064 addFieldToRecordDecl(C, RD, Int64Ty); 9065 addFieldToRecordDecl(C, RD, Int64Ty); 9066 RD->completeDefinition(); 9067 QualType DimTy = C.getRecordType(RD); 9068 9069 enum { OffsetFD = 0, CountFD, StrideFD }; 9070 // We need two index variable here since the size of "Dims" is the same as the 9071 // size of Components, however, the size of offset, count, and stride is equal 9072 // to the size of base declaration that is non-contiguous. 9073 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { 9074 // Skip emitting ir if dimension size is 1 since it cannot be 9075 // non-contiguous. 9076 if (NonContigInfo.Dims[I] == 1) 9077 continue; 9078 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); 9079 QualType ArrayTy = 9080 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); 9081 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 9082 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { 9083 unsigned RevIdx = EE - II - 1; 9084 LValue DimsLVal = CGF.MakeAddrLValue( 9085 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); 9086 // Offset 9087 LValue OffsetLVal = CGF.EmitLValueForField( 9088 DimsLVal, *std::next(RD->field_begin(), OffsetFD)); 9089 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); 9090 // Count 9091 LValue CountLVal = CGF.EmitLValueForField( 9092 DimsLVal, *std::next(RD->field_begin(), CountFD)); 9093 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); 9094 // Stride 9095 LValue StrideLVal = CGF.EmitLValueForField( 9096 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 9097 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); 9098 } 9099 // args[I] = &dims 9100 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9101 DimsAddr, CGM.Int8PtrTy); 9102 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9103 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9104 Info.PointersArray, 0, I); 9105 Address PAddr(P, CGF.getPointerAlign()); 9106 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); 9107 ++L; 9108 } 9109 } 9110 9111 /// Emit a string constant containing the names of the values mapped to the 9112 /// offloading runtime library. 9113 llvm::Constant * 9114 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, 9115 MappableExprsHandler::MappingExprInfo &MapExprs) { 9116 llvm::Constant *SrcLocStr; 9117 if (!MapExprs.getMapDecl()) { 9118 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); 9119 } else { 9120 std::string ExprName = ""; 9121 if (MapExprs.getMapExpr()) { 9122 PrintingPolicy P(CGF.getContext().getLangOpts()); 9123 llvm::raw_string_ostream OS(ExprName); 9124 MapExprs.getMapExpr()->printPretty(OS, nullptr, P); 9125 OS.flush(); 9126 } else { 9127 ExprName = MapExprs.getMapDecl()->getNameAsString(); 9128 } 9129 9130 SourceLocation Loc = MapExprs.getMapDecl()->getLocation(); 9131 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 9132 const char *FileName = PLoc.getFilename(); 9133 unsigned Line = PLoc.getLine(); 9134 unsigned Column = PLoc.getColumn(); 9135 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(), 9136 Line, Column); 9137 } 9138 9139 return SrcLocStr; 9140 } 9141 9142 /// Emit the arrays used to pass the captures and map information to the 9143 /// offloading runtime library. If there is no map or capture information, 9144 /// return nullptr by reference. 9145 static void emitOffloadingArrays( 9146 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, 9147 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, 9148 bool IsNonContiguous = false) { 9149 CodeGenModule &CGM = CGF.CGM; 9150 ASTContext &Ctx = CGF.getContext(); 9151 9152 // Reset the array information. 9153 Info.clearArrayInfo(); 9154 Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); 9155 9156 if (Info.NumberOfPtrs) { 9157 // Detect if we have any capture size requiring runtime evaluation of the 9158 // size so that a constant array could be eventually used. 9159 bool hasRuntimeEvaluationCaptureSize = false; 9160 for (llvm::Value *S : CombinedInfo.Sizes) 9161 if (!isa<llvm::Constant>(S)) { 9162 hasRuntimeEvaluationCaptureSize = true; 9163 break; 9164 } 9165 9166 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9167 QualType PointerArrayType = Ctx.getConstantArrayType( 9168 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9169 /*IndexTypeQuals=*/0); 9170 9171 Info.BasePointersArray = 9172 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9173 Info.PointersArray = 9174 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9175 Address MappersArray = 9176 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); 9177 Info.MappersArray = MappersArray.getPointer(); 9178 9179 // If we don't have any VLA types or other types that require runtime 9180 // evaluation, we can use a constant array for the map sizes, otherwise we 9181 // need to fill up the arrays as we do for the pointers. 9182 QualType Int64Ty = 9183 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9184 if (hasRuntimeEvaluationCaptureSize) { 9185 QualType SizeArrayType = Ctx.getConstantArrayType( 9186 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9187 /*IndexTypeQuals=*/0); 9188 Info.SizesArray = 9189 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9190 } else { 9191 // We expect all the sizes to be constant, so we collect them to create 9192 // a constant array. 9193 SmallVector<llvm::Constant *, 16> ConstSizes; 9194 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { 9195 if (IsNonContiguous && 9196 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { 9197 ConstSizes.push_back(llvm::ConstantInt::get( 9198 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); 9199 } else { 9200 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); 9201 } 9202 } 9203 9204 auto *SizesArrayInit = llvm::ConstantArray::get( 9205 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9206 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9207 auto *SizesArrayGbl = new llvm::GlobalVariable( 9208 CGM.getModule(), SizesArrayInit->getType(), 9209 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9210 SizesArrayInit, Name); 9211 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9212 Info.SizesArray = SizesArrayGbl; 9213 } 9214 9215 // The map types are always constant so we don't need to generate code to 9216 // fill arrays. Instead, we create an array constant. 9217 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); 9218 llvm::copy(CombinedInfo.Types, Mapping.begin()); 9219 llvm::Constant *MapTypesArrayInit = 9220 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 9221 std::string MaptypesName = 9222 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9223 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 9224 CGM.getModule(), MapTypesArrayInit->getType(), 9225 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9226 MapTypesArrayInit, MaptypesName); 9227 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9228 Info.MapTypesArray = MapTypesArrayGbl; 9229 9230 // The information types are only built if there is debug information 9231 // requested. 9232 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { 9233 Info.MapNamesArray = llvm::Constant::getNullValue( 9234 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); 9235 } else { 9236 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { 9237 return emitMappingInformation(CGF, OMPBuilder, MapExpr); 9238 }; 9239 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); 9240 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); 9241 9242 llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get( 9243 llvm::ArrayType::get( 9244 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo(), 9245 CombinedInfo.Exprs.size()), 9246 InfoMap); 9247 auto *MapNamesArrayGbl = new llvm::GlobalVariable( 9248 CGM.getModule(), MapNamesArrayInit->getType(), 9249 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9250 MapNamesArrayInit, 9251 CGM.getOpenMPRuntime().getName({"offload_mapnames"})); 9252 Info.MapNamesArray = MapNamesArrayGbl; 9253 } 9254 9255 // If there's a present map type modifier, it must not be applied to the end 9256 // of a region, so generate a separate map type array in that case. 9257 if (Info.separateBeginEndCalls()) { 9258 bool EndMapTypesDiffer = false; 9259 for (uint64_t &Type : Mapping) { 9260 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { 9261 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; 9262 EndMapTypesDiffer = true; 9263 } 9264 } 9265 if (EndMapTypesDiffer) { 9266 MapTypesArrayInit = 9267 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 9268 MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9269 MapTypesArrayGbl = new llvm::GlobalVariable( 9270 CGM.getModule(), MapTypesArrayInit->getType(), 9271 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9272 MapTypesArrayInit, MaptypesName); 9273 MapTypesArrayGbl->setUnnamedAddr( 9274 llvm::GlobalValue::UnnamedAddr::Global); 9275 Info.MapTypesArrayEnd = MapTypesArrayGbl; 9276 } 9277 } 9278 9279 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9280 llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; 9281 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9282 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9283 Info.BasePointersArray, 0, I); 9284 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9285 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9286 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9287 CGF.Builder.CreateStore(BPVal, BPAddr); 9288 9289 if (Info.requiresDevicePointerInfo()) 9290 if (const ValueDecl *DevVD = 9291 CombinedInfo.BasePointers[I].getDevicePtrDecl()) 9292 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9293 9294 llvm::Value *PVal = CombinedInfo.Pointers[I]; 9295 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9296 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9297 Info.PointersArray, 0, I); 9298 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9299 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9300 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9301 CGF.Builder.CreateStore(PVal, PAddr); 9302 9303 if (hasRuntimeEvaluationCaptureSize) { 9304 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9305 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9306 Info.SizesArray, 9307 /*Idx0=*/0, 9308 /*Idx1=*/I); 9309 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9310 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], 9311 CGM.Int64Ty, 9312 /*isSigned=*/true), 9313 SAddr); 9314 } 9315 9316 // Fill up the mapper array. 9317 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 9318 if (CombinedInfo.Mappers[I]) { 9319 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( 9320 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); 9321 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); 9322 Info.HasMapper = true; 9323 } 9324 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); 9325 CGF.Builder.CreateStore(MFunc, MAddr); 9326 } 9327 } 9328 9329 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || 9330 Info.NumberOfPtrs == 0) 9331 return; 9332 9333 emitNonContiguousDescriptor(CGF, CombinedInfo, Info); 9334 } 9335 9336 namespace { 9337 /// Additional arguments for emitOffloadingArraysArgument function. 9338 struct ArgumentsOptions { 9339 bool ForEndCall = false; 9340 ArgumentsOptions() = default; 9341 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} 9342 }; 9343 } // namespace 9344 9345 /// Emit the arguments to be passed to the runtime library based on the 9346 /// arrays of base pointers, pointers, sizes, map types, and mappers. If 9347 /// ForEndCall, emit map types to be passed for the end of the region instead of 9348 /// the beginning. 9349 static void emitOffloadingArraysArgument( 9350 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9351 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9352 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, 9353 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, 9354 const ArgumentsOptions &Options = ArgumentsOptions()) { 9355 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && 9356 "expected region end call to runtime only when end call is separate"); 9357 CodeGenModule &CGM = CGF.CGM; 9358 if (Info.NumberOfPtrs) { 9359 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9360 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9361 Info.BasePointersArray, 9362 /*Idx0=*/0, /*Idx1=*/0); 9363 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9364 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9365 Info.PointersArray, 9366 /*Idx0=*/0, 9367 /*Idx1=*/0); 9368 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9369 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9370 /*Idx0=*/0, /*Idx1=*/0); 9371 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9372 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9373 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd 9374 : Info.MapTypesArray, 9375 /*Idx0=*/0, 9376 /*Idx1=*/0); 9377 9378 // Only emit the mapper information arrays if debug information is 9379 // requested. 9380 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) 9381 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9382 else 9383 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9384 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9385 Info.MapNamesArray, 9386 /*Idx0=*/0, 9387 /*Idx1=*/0); 9388 // If there is no user-defined mapper, set the mapper array to nullptr to 9389 // avoid an unnecessary data privatization 9390 if (!Info.HasMapper) 9391 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9392 else 9393 MappersArrayArg = 9394 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); 9395 } else { 9396 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9397 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9398 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9399 MapTypesArrayArg = 9400 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9401 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9402 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9403 } 9404 } 9405 9406 /// Check for inner distribute directive. 9407 static const OMPExecutableDirective * 9408 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9409 const auto *CS = D.getInnermostCapturedStmt(); 9410 const auto *Body = 9411 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9412 const Stmt *ChildStmt = 9413 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9414 9415 if (const auto *NestedDir = 9416 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9417 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9418 switch (D.getDirectiveKind()) { 9419 case OMPD_target: 9420 if (isOpenMPDistributeDirective(DKind)) 9421 return NestedDir; 9422 if (DKind == OMPD_teams) { 9423 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9424 /*IgnoreCaptured=*/true); 9425 if (!Body) 9426 return nullptr; 9427 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9428 if (const auto *NND = 9429 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9430 DKind = NND->getDirectiveKind(); 9431 if (isOpenMPDistributeDirective(DKind)) 9432 return NND; 9433 } 9434 } 9435 return nullptr; 9436 case OMPD_target_teams: 9437 if (isOpenMPDistributeDirective(DKind)) 9438 return NestedDir; 9439 return nullptr; 9440 case OMPD_target_parallel: 9441 case OMPD_target_simd: 9442 case OMPD_target_parallel_for: 9443 case OMPD_target_parallel_for_simd: 9444 return nullptr; 9445 case OMPD_target_teams_distribute: 9446 case OMPD_target_teams_distribute_simd: 9447 case OMPD_target_teams_distribute_parallel_for: 9448 case OMPD_target_teams_distribute_parallel_for_simd: 9449 case OMPD_parallel: 9450 case OMPD_for: 9451 case OMPD_parallel_for: 9452 case OMPD_parallel_master: 9453 case OMPD_parallel_sections: 9454 case OMPD_for_simd: 9455 case OMPD_parallel_for_simd: 9456 case OMPD_cancel: 9457 case OMPD_cancellation_point: 9458 case OMPD_ordered: 9459 case OMPD_threadprivate: 9460 case OMPD_allocate: 9461 case OMPD_task: 9462 case OMPD_simd: 9463 case OMPD_sections: 9464 case OMPD_section: 9465 case OMPD_single: 9466 case OMPD_master: 9467 case OMPD_critical: 9468 case OMPD_taskyield: 9469 case OMPD_barrier: 9470 case OMPD_taskwait: 9471 case OMPD_taskgroup: 9472 case OMPD_atomic: 9473 case OMPD_flush: 9474 case OMPD_depobj: 9475 case OMPD_scan: 9476 case OMPD_teams: 9477 case OMPD_target_data: 9478 case OMPD_target_exit_data: 9479 case OMPD_target_enter_data: 9480 case OMPD_distribute: 9481 case OMPD_distribute_simd: 9482 case OMPD_distribute_parallel_for: 9483 case OMPD_distribute_parallel_for_simd: 9484 case OMPD_teams_distribute: 9485 case OMPD_teams_distribute_simd: 9486 case OMPD_teams_distribute_parallel_for: 9487 case OMPD_teams_distribute_parallel_for_simd: 9488 case OMPD_target_update: 9489 case OMPD_declare_simd: 9490 case OMPD_declare_variant: 9491 case OMPD_begin_declare_variant: 9492 case OMPD_end_declare_variant: 9493 case OMPD_declare_target: 9494 case OMPD_end_declare_target: 9495 case OMPD_declare_reduction: 9496 case OMPD_declare_mapper: 9497 case OMPD_taskloop: 9498 case OMPD_taskloop_simd: 9499 case OMPD_master_taskloop: 9500 case OMPD_master_taskloop_simd: 9501 case OMPD_parallel_master_taskloop: 9502 case OMPD_parallel_master_taskloop_simd: 9503 case OMPD_requires: 9504 case OMPD_unknown: 9505 default: 9506 llvm_unreachable("Unexpected directive."); 9507 } 9508 } 9509 9510 return nullptr; 9511 } 9512 9513 /// Emit the user-defined mapper function. The code generation follows the 9514 /// pattern in the example below. 9515 /// \code 9516 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9517 /// void *base, void *begin, 9518 /// int64_t size, int64_t type) { 9519 /// // Allocate space for an array section first. 9520 /// if (size > 1 && !maptype.IsDelete) 9521 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9522 /// size*sizeof(Ty), clearToFrom(type)); 9523 /// // Map members. 9524 /// for (unsigned i = 0; i < size; i++) { 9525 /// // For each component specified by this mapper: 9526 /// for (auto c : all_components) { 9527 /// if (c.hasMapper()) 9528 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9529 /// c.arg_type); 9530 /// else 9531 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9532 /// c.arg_begin, c.arg_size, c.arg_type); 9533 /// } 9534 /// } 9535 /// // Delete the array section. 9536 /// if (size > 1 && maptype.IsDelete) 9537 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9538 /// size*sizeof(Ty), clearToFrom(type)); 9539 /// } 9540 /// \endcode 9541 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9542 CodeGenFunction *CGF) { 9543 if (UDMMap.count(D) > 0) 9544 return; 9545 ASTContext &C = CGM.getContext(); 9546 QualType Ty = D->getType(); 9547 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9548 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9549 auto *MapperVarDecl = 9550 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9551 SourceLocation Loc = D->getLocation(); 9552 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9553 9554 // Prepare mapper function arguments and attributes. 9555 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9556 C.VoidPtrTy, ImplicitParamDecl::Other); 9557 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9558 ImplicitParamDecl::Other); 9559 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9560 C.VoidPtrTy, ImplicitParamDecl::Other); 9561 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9562 ImplicitParamDecl::Other); 9563 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9564 ImplicitParamDecl::Other); 9565 FunctionArgList Args; 9566 Args.push_back(&HandleArg); 9567 Args.push_back(&BaseArg); 9568 Args.push_back(&BeginArg); 9569 Args.push_back(&SizeArg); 9570 Args.push_back(&TypeArg); 9571 const CGFunctionInfo &FnInfo = 9572 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9573 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9574 SmallString<64> TyStr; 9575 llvm::raw_svector_ostream Out(TyStr); 9576 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9577 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9578 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9579 Name, &CGM.getModule()); 9580 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9581 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9582 // Start the mapper function code generation. 9583 CodeGenFunction MapperCGF(CGM); 9584 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9585 // Compute the starting and end addreses of array elements. 9586 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9587 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9588 C.getPointerType(Int64Ty), Loc); 9589 // Convert the size in bytes into the number of array elements. 9590 Size = MapperCGF.Builder.CreateExactUDiv( 9591 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9592 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9593 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 9594 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 9595 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9596 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9597 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9598 C.getPointerType(Int64Ty), Loc); 9599 // Prepare common arguments for array initiation and deletion. 9600 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9601 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9602 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9603 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9604 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9605 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9606 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9607 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9608 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9609 9610 // Emit array initiation if this is an array section and \p MapType indicates 9611 // that memory allocation is required. 9612 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9613 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9614 ElementSize, HeadBB, /*IsInit=*/true); 9615 9616 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9617 9618 // Emit the loop header block. 9619 MapperCGF.EmitBlock(HeadBB); 9620 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9621 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9622 // Evaluate whether the initial condition is satisfied. 9623 llvm::Value *IsEmpty = 9624 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9625 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9626 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9627 9628 // Emit the loop body block. 9629 MapperCGF.EmitBlock(BodyBB); 9630 llvm::BasicBlock *LastBB = BodyBB; 9631 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9632 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9633 PtrPHI->addIncoming(PtrBegin, EntryBB); 9634 Address PtrCurrent = 9635 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9636 .getAlignment() 9637 .alignmentOfArrayElement(ElementSize)); 9638 // Privatize the declared variable of mapper to be the current array element. 9639 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9640 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 9641 return MapperCGF 9642 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 9643 .getAddress(MapperCGF); 9644 }); 9645 (void)Scope.Privatize(); 9646 9647 // Get map clause information. Fill up the arrays with all mapped variables. 9648 MappableExprsHandler::MapCombinedInfoTy Info; 9649 MappableExprsHandler MEHandler(*D, MapperCGF); 9650 MEHandler.generateAllInfoForMapper(Info); 9651 9652 // Call the runtime API __tgt_mapper_num_components to get the number of 9653 // pre-existing components. 9654 llvm::Value *OffloadingArgs[] = {Handle}; 9655 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9656 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9657 OMPRTL___tgt_mapper_num_components), 9658 OffloadingArgs); 9659 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9660 PreviousSize, 9661 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9662 9663 // Fill up the runtime mapper handle for all components. 9664 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { 9665 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9666 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9667 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9668 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9669 llvm::Value *CurSizeArg = Info.Sizes[I]; 9670 9671 // Extract the MEMBER_OF field from the map type. 9672 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 9673 MapperCGF.EmitBlock(MemberBB); 9674 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); 9675 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 9676 OriMapType, 9677 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 9678 llvm::BasicBlock *MemberCombineBB = 9679 MapperCGF.createBasicBlock("omp.member.combine"); 9680 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 9681 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 9682 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 9683 // Add the number of pre-existing components to the MEMBER_OF field if it 9684 // is valid. 9685 MapperCGF.EmitBlock(MemberCombineBB); 9686 llvm::Value *CombinedMember = 9687 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9688 // Do nothing if it is not a member of previous components. 9689 MapperCGF.EmitBlock(TypeBB); 9690 llvm::PHINode *MemberMapType = 9691 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9692 MemberMapType->addIncoming(OriMapType, MemberBB); 9693 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9694 9695 // Combine the map type inherited from user-defined mapper with that 9696 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9697 // bits of the \a MapType, which is the input argument of the mapper 9698 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9699 // bits of MemberMapType. 9700 // [OpenMP 5.0], 1.2.6. map-type decay. 9701 // | alloc | to | from | tofrom | release | delete 9702 // ---------------------------------------------------------- 9703 // alloc | alloc | alloc | alloc | alloc | release | delete 9704 // to | alloc | to | alloc | to | release | delete 9705 // from | alloc | alloc | from | from | release | delete 9706 // tofrom | alloc | to | from | tofrom | release | delete 9707 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9708 MapType, 9709 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9710 MappableExprsHandler::OMP_MAP_FROM)); 9711 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9712 llvm::BasicBlock *AllocElseBB = 9713 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9714 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9715 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9716 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9717 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9718 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9719 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9720 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9721 MapperCGF.EmitBlock(AllocBB); 9722 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9723 MemberMapType, 9724 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9725 MappableExprsHandler::OMP_MAP_FROM))); 9726 MapperCGF.Builder.CreateBr(EndBB); 9727 MapperCGF.EmitBlock(AllocElseBB); 9728 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9729 LeftToFrom, 9730 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9731 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9732 // In case of to, clear OMP_MAP_FROM. 9733 MapperCGF.EmitBlock(ToBB); 9734 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9735 MemberMapType, 9736 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9737 MapperCGF.Builder.CreateBr(EndBB); 9738 MapperCGF.EmitBlock(ToElseBB); 9739 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9740 LeftToFrom, 9741 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9742 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9743 // In case of from, clear OMP_MAP_TO. 9744 MapperCGF.EmitBlock(FromBB); 9745 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9746 MemberMapType, 9747 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9748 // In case of tofrom, do nothing. 9749 MapperCGF.EmitBlock(EndBB); 9750 LastBB = EndBB; 9751 llvm::PHINode *CurMapType = 9752 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9753 CurMapType->addIncoming(AllocMapType, AllocBB); 9754 CurMapType->addIncoming(ToMapType, ToBB); 9755 CurMapType->addIncoming(FromMapType, FromBB); 9756 CurMapType->addIncoming(MemberMapType, ToElseBB); 9757 9758 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9759 CurSizeArg, CurMapType}; 9760 if (Info.Mappers[I]) { 9761 // Call the corresponding mapper function. 9762 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc( 9763 cast<OMPDeclareMapperDecl>(Info.Mappers[I])); 9764 assert(MapperFunc && "Expect a valid mapper function is available."); 9765 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs); 9766 } else { 9767 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9768 // data structure. 9769 MapperCGF.EmitRuntimeCall( 9770 OMPBuilder.getOrCreateRuntimeFunction( 9771 CGM.getModule(), OMPRTL___tgt_push_mapper_component), 9772 OffloadingArgs); 9773 } 9774 } 9775 9776 // Update the pointer to point to the next element that needs to be mapped, 9777 // and check whether we have mapped all elements. 9778 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9779 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9780 PtrPHI->addIncoming(PtrNext, LastBB); 9781 llvm::Value *IsDone = 9782 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9783 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9784 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9785 9786 MapperCGF.EmitBlock(ExitBB); 9787 // Emit array deletion if this is an array section and \p MapType indicates 9788 // that deletion is required. 9789 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9790 ElementSize, DoneBB, /*IsInit=*/false); 9791 9792 // Emit the function exit block. 9793 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9794 MapperCGF.FinishFunction(); 9795 UDMMap.try_emplace(D, Fn); 9796 if (CGF) { 9797 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9798 Decls.second.push_back(D); 9799 } 9800 } 9801 9802 /// Emit the array initialization or deletion portion for user-defined mapper 9803 /// code generation. First, it evaluates whether an array section is mapped and 9804 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9805 /// true, and \a MapType indicates to not delete this array, array 9806 /// initialization code is generated. If \a IsInit is false, and \a MapType 9807 /// indicates to not this array, array deletion code is generated. 9808 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9809 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9810 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9811 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9812 StringRef Prefix = IsInit ? ".init" : ".del"; 9813 9814 // Evaluate if this is an array section. 9815 llvm::BasicBlock *IsDeleteBB = 9816 MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"})); 9817 llvm::BasicBlock *BodyBB = 9818 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9819 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9820 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9821 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9822 9823 // Evaluate if we are going to delete this section. 9824 MapperCGF.EmitBlock(IsDeleteBB); 9825 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9826 MapType, 9827 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9828 llvm::Value *DeleteCond; 9829 if (IsInit) { 9830 DeleteCond = MapperCGF.Builder.CreateIsNull( 9831 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9832 } else { 9833 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9834 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9835 } 9836 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9837 9838 MapperCGF.EmitBlock(BodyBB); 9839 // Get the array size by multiplying element size and element number (i.e., \p 9840 // Size). 9841 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9842 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9843 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9844 // memory allocation/deletion purpose only. 9845 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9846 MapType, 9847 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9848 MappableExprsHandler::OMP_MAP_FROM))); 9849 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9850 // data structure. 9851 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9852 MapperCGF.EmitRuntimeCall( 9853 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 9854 OMPRTL___tgt_push_mapper_component), 9855 OffloadingArgs); 9856 } 9857 9858 llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( 9859 const OMPDeclareMapperDecl *D) { 9860 auto I = UDMMap.find(D); 9861 if (I != UDMMap.end()) 9862 return I->second; 9863 emitUserDefinedMapper(D); 9864 return UDMMap.lookup(D); 9865 } 9866 9867 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9868 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9869 llvm::Value *DeviceID, 9870 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9871 const OMPLoopDirective &D)> 9872 SizeEmitter) { 9873 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9874 const OMPExecutableDirective *TD = &D; 9875 // Get nested teams distribute kind directive, if any. 9876 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9877 TD = getNestedDistributeDirective(CGM.getContext(), D); 9878 if (!TD) 9879 return; 9880 const auto *LD = cast<OMPLoopDirective>(TD); 9881 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, 9882 PrePostActionTy &) { 9883 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9884 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 9885 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; 9886 CGF.EmitRuntimeCall( 9887 OMPBuilder.getOrCreateRuntimeFunction( 9888 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount), 9889 Args); 9890 } 9891 }; 9892 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9893 } 9894 9895 void CGOpenMPRuntime::emitTargetCall( 9896 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9897 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9898 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9899 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9900 const OMPLoopDirective &D)> 9901 SizeEmitter) { 9902 if (!CGF.HaveInsertPoint()) 9903 return; 9904 9905 assert(OutlinedFn && "Invalid outlined function!"); 9906 9907 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 9908 D.hasClausesOfKind<OMPNowaitClause>(); 9909 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9910 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9911 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9912 PrePostActionTy &) { 9913 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9914 }; 9915 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9916 9917 CodeGenFunction::OMPTargetDataInfo InputInfo; 9918 llvm::Value *MapTypesArray = nullptr; 9919 llvm::Value *MapNamesArray = nullptr; 9920 // Fill up the pointer arrays and transfer execution to the device. 9921 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9922 &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, 9923 &CapturedVars, 9924 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9925 if (Device.getInt() == OMPC_DEVICE_ancestor) { 9926 // Reverse offloading is not supported, so just execute on the host. 9927 if (RequiresOuterTask) { 9928 CapturedVars.clear(); 9929 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9930 } 9931 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9932 return; 9933 } 9934 9935 // On top of the arrays that were filled up, the target offloading call 9936 // takes as arguments the device id as well as the host pointer. The host 9937 // pointer is used by the runtime library to identify the current target 9938 // region, so it only has to be unique and not necessarily point to 9939 // anything. It could be the pointer to the outlined function that 9940 // implements the target region, but we aren't using that so that the 9941 // compiler doesn't need to keep that, and could therefore inline the host 9942 // function if proven worthwhile during optimization. 9943 9944 // From this point on, we need to have an ID of the target region defined. 9945 assert(OutlinedFnID && "Invalid outlined function ID!"); 9946 9947 // Emit device ID if any. 9948 llvm::Value *DeviceID; 9949 if (Device.getPointer()) { 9950 assert((Device.getInt() == OMPC_DEVICE_unknown || 9951 Device.getInt() == OMPC_DEVICE_device_num) && 9952 "Expected device_num modifier."); 9953 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 9954 DeviceID = 9955 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 9956 } else { 9957 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9958 } 9959 9960 // Emit the number of elements in the offloading arrays. 9961 llvm::Value *PointerNum = 9962 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9963 9964 // Return value of the runtime offloading call. 9965 llvm::Value *Return; 9966 9967 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9968 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9969 9970 // Source location for the ident struct 9971 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 9972 9973 // Emit tripcount for the target loop-based directive. 9974 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 9975 9976 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9977 // The target region is an outlined function launched by the runtime 9978 // via calls __tgt_target() or __tgt_target_teams(). 9979 // 9980 // __tgt_target() launches a target region with one team and one thread, 9981 // executing a serial region. This master thread may in turn launch 9982 // more threads within its team upon encountering a parallel region, 9983 // however, no additional teams can be launched on the device. 9984 // 9985 // __tgt_target_teams() launches a target region with one or more teams, 9986 // each with one or more threads. This call is required for target 9987 // constructs such as: 9988 // 'target teams' 9989 // 'target' / 'teams' 9990 // 'target teams distribute parallel for' 9991 // 'target parallel' 9992 // and so on. 9993 // 9994 // Note that on the host and CPU targets, the runtime implementation of 9995 // these calls simply call the outlined function without forking threads. 9996 // The outlined functions themselves have runtime calls to 9997 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9998 // the compiler in emitTeamsCall() and emitParallelCall(). 9999 // 10000 // In contrast, on the NVPTX target, the implementation of 10001 // __tgt_target_teams() launches a GPU kernel with the requested number 10002 // of teams and threads so no additional calls to the runtime are required. 10003 if (NumTeams) { 10004 // If we have NumTeams defined this means that we have an enclosed teams 10005 // region. Therefore we also expect to have NumThreads defined. These two 10006 // values should be defined in the presence of a teams directive, 10007 // regardless of having any clauses associated. If the user is using teams 10008 // but no clauses, these two values will be the default that should be 10009 // passed to the runtime library - a 32-bit integer with the value zero. 10010 assert(NumThreads && "Thread limit expression should be available along " 10011 "with number of teams."); 10012 llvm::Value *OffloadingArgs[] = {RTLoc, 10013 DeviceID, 10014 OutlinedFnID, 10015 PointerNum, 10016 InputInfo.BasePointersArray.getPointer(), 10017 InputInfo.PointersArray.getPointer(), 10018 InputInfo.SizesArray.getPointer(), 10019 MapTypesArray, 10020 MapNamesArray, 10021 InputInfo.MappersArray.getPointer(), 10022 NumTeams, 10023 NumThreads}; 10024 Return = CGF.EmitRuntimeCall( 10025 OMPBuilder.getOrCreateRuntimeFunction( 10026 CGM.getModule(), HasNowait 10027 ? OMPRTL___tgt_target_teams_nowait_mapper 10028 : OMPRTL___tgt_target_teams_mapper), 10029 OffloadingArgs); 10030 } else { 10031 llvm::Value *OffloadingArgs[] = {RTLoc, 10032 DeviceID, 10033 OutlinedFnID, 10034 PointerNum, 10035 InputInfo.BasePointersArray.getPointer(), 10036 InputInfo.PointersArray.getPointer(), 10037 InputInfo.SizesArray.getPointer(), 10038 MapTypesArray, 10039 MapNamesArray, 10040 InputInfo.MappersArray.getPointer()}; 10041 Return = CGF.EmitRuntimeCall( 10042 OMPBuilder.getOrCreateRuntimeFunction( 10043 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper 10044 : OMPRTL___tgt_target_mapper), 10045 OffloadingArgs); 10046 } 10047 10048 // Check the error code and execute the host version if required. 10049 llvm::BasicBlock *OffloadFailedBlock = 10050 CGF.createBasicBlock("omp_offload.failed"); 10051 llvm::BasicBlock *OffloadContBlock = 10052 CGF.createBasicBlock("omp_offload.cont"); 10053 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10054 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10055 10056 CGF.EmitBlock(OffloadFailedBlock); 10057 if (RequiresOuterTask) { 10058 CapturedVars.clear(); 10059 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10060 } 10061 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10062 CGF.EmitBranch(OffloadContBlock); 10063 10064 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10065 }; 10066 10067 // Notify that the host version must be executed. 10068 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10069 RequiresOuterTask](CodeGenFunction &CGF, 10070 PrePostActionTy &) { 10071 if (RequiresOuterTask) { 10072 CapturedVars.clear(); 10073 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10074 } 10075 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10076 }; 10077 10078 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10079 &MapNamesArray, &CapturedVars, RequiresOuterTask, 10080 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10081 // Fill up the arrays with all the captured variables. 10082 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10083 10084 // Get mappable expression information. 10085 MappableExprsHandler MEHandler(D, CGF); 10086 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10087 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; 10088 10089 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10090 auto CV = CapturedVars.begin(); 10091 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10092 CE = CS.capture_end(); 10093 CI != CE; ++CI, ++RI, ++CV) { 10094 MappableExprsHandler::MapCombinedInfoTy CurInfo; 10095 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10096 10097 // VLA sizes are passed to the outlined region by copy and do not have map 10098 // information associated. 10099 if (CI->capturesVariableArrayType()) { 10100 CurInfo.Exprs.push_back(nullptr); 10101 CurInfo.BasePointers.push_back(*CV); 10102 CurInfo.Pointers.push_back(*CV); 10103 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( 10104 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10105 // Copy to the device as an argument. No need to retrieve it. 10106 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10107 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10108 MappableExprsHandler::OMP_MAP_IMPLICIT); 10109 CurInfo.Mappers.push_back(nullptr); 10110 } else { 10111 // If we have any information in the map clause, we use it, otherwise we 10112 // just do a default mapping. 10113 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); 10114 if (!CI->capturesThis()) 10115 MappedVarSet.insert(CI->getCapturedVar()); 10116 else 10117 MappedVarSet.insert(nullptr); 10118 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) 10119 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); 10120 // Generate correct mapping for variables captured by reference in 10121 // lambdas. 10122 if (CI->capturesVariable()) 10123 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, 10124 CurInfo, LambdaPointers); 10125 } 10126 // We expect to have at least an element of information for this capture. 10127 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && 10128 "Non-existing map pointer for capture!"); 10129 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && 10130 CurInfo.BasePointers.size() == CurInfo.Sizes.size() && 10131 CurInfo.BasePointers.size() == CurInfo.Types.size() && 10132 CurInfo.BasePointers.size() == CurInfo.Mappers.size() && 10133 "Inconsistent map information sizes!"); 10134 10135 // If there is an entry in PartialStruct it means we have a struct with 10136 // individual members mapped. Emit an extra combined entry. 10137 if (PartialStruct.Base.isValid()) 10138 MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct); 10139 10140 // We need to append the results of this capture to what we already have. 10141 CombinedInfo.append(CurInfo); 10142 } 10143 // Adjust MEMBER_OF flags for the lambdas captures. 10144 MEHandler.adjustMemberOfForLambdaCaptures( 10145 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, 10146 CombinedInfo.Types); 10147 // Map any list items in a map clause that were not captures because they 10148 // weren't referenced within the construct. 10149 MEHandler.generateAllInfo(CombinedInfo, /*NotTargetParams=*/true, 10150 MappedVarSet); 10151 10152 TargetDataInfo Info; 10153 // Fill up the arrays and create the arguments. 10154 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); 10155 emitOffloadingArraysArgument( 10156 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 10157 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 10158 {/*ForEndTask=*/false}); 10159 10160 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10161 InputInfo.BasePointersArray = 10162 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10163 InputInfo.PointersArray = 10164 Address(Info.PointersArray, CGM.getPointerAlign()); 10165 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10166 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 10167 MapTypesArray = Info.MapTypesArray; 10168 MapNamesArray = Info.MapNamesArray; 10169 if (RequiresOuterTask) 10170 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10171 else 10172 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10173 }; 10174 10175 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10176 CodeGenFunction &CGF, PrePostActionTy &) { 10177 if (RequiresOuterTask) { 10178 CodeGenFunction::OMPTargetDataInfo InputInfo; 10179 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10180 } else { 10181 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10182 } 10183 }; 10184 10185 // If we have a target function ID it means that we need to support 10186 // offloading, otherwise, just execute on the host. We need to execute on host 10187 // regardless of the conditional in the if clause if, e.g., the user do not 10188 // specify target triples. 10189 if (OutlinedFnID) { 10190 if (IfCond) { 10191 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10192 } else { 10193 RegionCodeGenTy ThenRCG(TargetThenGen); 10194 ThenRCG(CGF); 10195 } 10196 } else { 10197 RegionCodeGenTy ElseRCG(TargetElseGen); 10198 ElseRCG(CGF); 10199 } 10200 } 10201 10202 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10203 StringRef ParentName) { 10204 if (!S) 10205 return; 10206 10207 // Codegen OMP target directives that offload compute to the device. 10208 bool RequiresDeviceCodegen = 10209 isa<OMPExecutableDirective>(S) && 10210 isOpenMPTargetExecutionDirective( 10211 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10212 10213 if (RequiresDeviceCodegen) { 10214 const auto &E = *cast<OMPExecutableDirective>(S); 10215 unsigned DeviceID; 10216 unsigned FileID; 10217 unsigned Line; 10218 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10219 FileID, Line); 10220 10221 // Is this a target region that should not be emitted as an entry point? If 10222 // so just signal we are done with this target region. 10223 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10224 ParentName, Line)) 10225 return; 10226 10227 switch (E.getDirectiveKind()) { 10228 case OMPD_target: 10229 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10230 cast<OMPTargetDirective>(E)); 10231 break; 10232 case OMPD_target_parallel: 10233 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10234 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10235 break; 10236 case OMPD_target_teams: 10237 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10238 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10239 break; 10240 case OMPD_target_teams_distribute: 10241 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10242 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10243 break; 10244 case OMPD_target_teams_distribute_simd: 10245 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10246 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10247 break; 10248 case OMPD_target_parallel_for: 10249 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10250 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10251 break; 10252 case OMPD_target_parallel_for_simd: 10253 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10254 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10255 break; 10256 case OMPD_target_simd: 10257 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10258 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10259 break; 10260 case OMPD_target_teams_distribute_parallel_for: 10261 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10262 CGM, ParentName, 10263 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10264 break; 10265 case OMPD_target_teams_distribute_parallel_for_simd: 10266 CodeGenFunction:: 10267 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10268 CGM, ParentName, 10269 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10270 break; 10271 case OMPD_parallel: 10272 case OMPD_for: 10273 case OMPD_parallel_for: 10274 case OMPD_parallel_master: 10275 case OMPD_parallel_sections: 10276 case OMPD_for_simd: 10277 case OMPD_parallel_for_simd: 10278 case OMPD_cancel: 10279 case OMPD_cancellation_point: 10280 case OMPD_ordered: 10281 case OMPD_threadprivate: 10282 case OMPD_allocate: 10283 case OMPD_task: 10284 case OMPD_simd: 10285 case OMPD_sections: 10286 case OMPD_section: 10287 case OMPD_single: 10288 case OMPD_master: 10289 case OMPD_critical: 10290 case OMPD_taskyield: 10291 case OMPD_barrier: 10292 case OMPD_taskwait: 10293 case OMPD_taskgroup: 10294 case OMPD_atomic: 10295 case OMPD_flush: 10296 case OMPD_depobj: 10297 case OMPD_scan: 10298 case OMPD_teams: 10299 case OMPD_target_data: 10300 case OMPD_target_exit_data: 10301 case OMPD_target_enter_data: 10302 case OMPD_distribute: 10303 case OMPD_distribute_simd: 10304 case OMPD_distribute_parallel_for: 10305 case OMPD_distribute_parallel_for_simd: 10306 case OMPD_teams_distribute: 10307 case OMPD_teams_distribute_simd: 10308 case OMPD_teams_distribute_parallel_for: 10309 case OMPD_teams_distribute_parallel_for_simd: 10310 case OMPD_target_update: 10311 case OMPD_declare_simd: 10312 case OMPD_declare_variant: 10313 case OMPD_begin_declare_variant: 10314 case OMPD_end_declare_variant: 10315 case OMPD_declare_target: 10316 case OMPD_end_declare_target: 10317 case OMPD_declare_reduction: 10318 case OMPD_declare_mapper: 10319 case OMPD_taskloop: 10320 case OMPD_taskloop_simd: 10321 case OMPD_master_taskloop: 10322 case OMPD_master_taskloop_simd: 10323 case OMPD_parallel_master_taskloop: 10324 case OMPD_parallel_master_taskloop_simd: 10325 case OMPD_requires: 10326 case OMPD_unknown: 10327 default: 10328 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10329 } 10330 return; 10331 } 10332 10333 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10334 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10335 return; 10336 10337 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName); 10338 return; 10339 } 10340 10341 // If this is a lambda function, look into its body. 10342 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10343 S = L->getBody(); 10344 10345 // Keep looking for target regions recursively. 10346 for (const Stmt *II : S->children()) 10347 scanForTargetRegionsFunctions(II, ParentName); 10348 } 10349 10350 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10351 // If emitting code for the host, we do not process FD here. Instead we do 10352 // the normal code generation. 10353 if (!CGM.getLangOpts().OpenMPIsDevice) { 10354 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 10355 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10356 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10357 // Do not emit device_type(nohost) functions for the host. 10358 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10359 return true; 10360 } 10361 return false; 10362 } 10363 10364 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10365 // Try to detect target regions in the function. 10366 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10367 StringRef Name = CGM.getMangledName(GD); 10368 scanForTargetRegionsFunctions(FD->getBody(), Name); 10369 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10370 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10371 // Do not emit device_type(nohost) functions for the host. 10372 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10373 return true; 10374 } 10375 10376 // Do not to emit function if it is not marked as declare target. 10377 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10378 AlreadyEmittedTargetDecls.count(VD) == 0; 10379 } 10380 10381 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10382 if (!CGM.getLangOpts().OpenMPIsDevice) 10383 return false; 10384 10385 // Check if there are Ctors/Dtors in this declaration and look for target 10386 // regions in it. We use the complete variant to produce the kernel name 10387 // mangling. 10388 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10389 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10390 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10391 StringRef ParentName = 10392 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10393 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10394 } 10395 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10396 StringRef ParentName = 10397 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10398 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10399 } 10400 } 10401 10402 // Do not to emit variable if it is not marked as declare target. 10403 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10404 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10405 cast<VarDecl>(GD.getDecl())); 10406 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10407 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10408 HasRequiresUnifiedSharedMemory)) { 10409 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10410 return true; 10411 } 10412 return false; 10413 } 10414 10415 llvm::Constant * 10416 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 10417 const VarDecl *VD) { 10418 assert(VD->getType().isConstant(CGM.getContext()) && 10419 "Expected constant variable."); 10420 StringRef VarName; 10421 llvm::Constant *Addr; 10422 llvm::GlobalValue::LinkageTypes Linkage; 10423 QualType Ty = VD->getType(); 10424 SmallString<128> Buffer; 10425 { 10426 unsigned DeviceID; 10427 unsigned FileID; 10428 unsigned Line; 10429 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 10430 FileID, Line); 10431 llvm::raw_svector_ostream OS(Buffer); 10432 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 10433 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 10434 VarName = OS.str(); 10435 } 10436 Linkage = llvm::GlobalValue::InternalLinkage; 10437 Addr = 10438 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 10439 getDefaultFirstprivateAddressSpace()); 10440 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 10441 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 10442 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 10443 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10444 VarName, Addr, VarSize, 10445 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 10446 return Addr; 10447 } 10448 10449 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10450 llvm::Constant *Addr) { 10451 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10452 !CGM.getLangOpts().OpenMPIsDevice) 10453 return; 10454 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10455 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10456 if (!Res) { 10457 if (CGM.getLangOpts().OpenMPIsDevice) { 10458 // Register non-target variables being emitted in device code (debug info 10459 // may cause this). 10460 StringRef VarName = CGM.getMangledName(VD); 10461 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10462 } 10463 return; 10464 } 10465 // Register declare target variables. 10466 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10467 StringRef VarName; 10468 CharUnits VarSize; 10469 llvm::GlobalValue::LinkageTypes Linkage; 10470 10471 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10472 !HasRequiresUnifiedSharedMemory) { 10473 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10474 VarName = CGM.getMangledName(VD); 10475 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10476 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10477 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10478 } else { 10479 VarSize = CharUnits::Zero(); 10480 } 10481 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10482 // Temp solution to prevent optimizations of the internal variables. 10483 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10484 std::string RefName = getName({VarName, "ref"}); 10485 if (!CGM.GetGlobalValue(RefName)) { 10486 llvm::Constant *AddrRef = 10487 getOrCreateInternalVariable(Addr->getType(), RefName); 10488 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10489 GVAddrRef->setConstant(/*Val=*/true); 10490 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10491 GVAddrRef->setInitializer(Addr); 10492 CGM.addCompilerUsedGlobal(GVAddrRef); 10493 } 10494 } 10495 } else { 10496 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10497 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10498 HasRequiresUnifiedSharedMemory)) && 10499 "Declare target attribute must link or to with unified memory."); 10500 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10501 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10502 else 10503 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10504 10505 if (CGM.getLangOpts().OpenMPIsDevice) { 10506 VarName = Addr->getName(); 10507 Addr = nullptr; 10508 } else { 10509 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10510 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10511 } 10512 VarSize = CGM.getPointerSize(); 10513 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10514 } 10515 10516 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10517 VarName, Addr, VarSize, Flags, Linkage); 10518 } 10519 10520 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10521 if (isa<FunctionDecl>(GD.getDecl()) || 10522 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10523 return emitTargetFunctions(GD); 10524 10525 return emitTargetGlobalVariable(GD); 10526 } 10527 10528 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10529 for (const VarDecl *VD : DeferredGlobalVariables) { 10530 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10531 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10532 if (!Res) 10533 continue; 10534 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10535 !HasRequiresUnifiedSharedMemory) { 10536 CGM.EmitGlobal(VD); 10537 } else { 10538 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10539 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10540 HasRequiresUnifiedSharedMemory)) && 10541 "Expected link clause or to clause with unified memory."); 10542 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10543 } 10544 } 10545 } 10546 10547 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10548 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10549 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10550 " Expected target-based directive."); 10551 } 10552 10553 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10554 for (const OMPClause *Clause : D->clauselists()) { 10555 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10556 HasRequiresUnifiedSharedMemory = true; 10557 } else if (const auto *AC = 10558 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10559 switch (AC->getAtomicDefaultMemOrderKind()) { 10560 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10561 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10562 break; 10563 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10564 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10565 break; 10566 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10567 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10568 break; 10569 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10570 break; 10571 } 10572 } 10573 } 10574 } 10575 10576 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10577 return RequiresAtomicOrdering; 10578 } 10579 10580 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10581 LangAS &AS) { 10582 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10583 return false; 10584 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10585 switch(A->getAllocatorType()) { 10586 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10587 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10588 // Not supported, fallback to the default mem space. 10589 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10590 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10591 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10592 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10593 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10594 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10595 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10596 AS = LangAS::Default; 10597 return true; 10598 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10599 llvm_unreachable("Expected predefined allocator for the variables with the " 10600 "static storage."); 10601 } 10602 return false; 10603 } 10604 10605 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10606 return HasRequiresUnifiedSharedMemory; 10607 } 10608 10609 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10610 CodeGenModule &CGM) 10611 : CGM(CGM) { 10612 if (CGM.getLangOpts().OpenMPIsDevice) { 10613 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10614 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10615 } 10616 } 10617 10618 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10619 if (CGM.getLangOpts().OpenMPIsDevice) 10620 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10621 } 10622 10623 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10624 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10625 return true; 10626 10627 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10628 // Do not to emit function if it is marked as declare target as it was already 10629 // emitted. 10630 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10631 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10632 if (auto *F = dyn_cast_or_null<llvm::Function>( 10633 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10634 return !F->isDeclaration(); 10635 return false; 10636 } 10637 return true; 10638 } 10639 10640 return !AlreadyEmittedTargetDecls.insert(D).second; 10641 } 10642 10643 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10644 // If we don't have entries or if we are emitting code for the device, we 10645 // don't need to do anything. 10646 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10647 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10648 (OffloadEntriesInfoManager.empty() && 10649 !HasEmittedDeclareTargetRegion && 10650 !HasEmittedTargetRegion)) 10651 return nullptr; 10652 10653 // Create and register the function that handles the requires directives. 10654 ASTContext &C = CGM.getContext(); 10655 10656 llvm::Function *RequiresRegFn; 10657 { 10658 CodeGenFunction CGF(CGM); 10659 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10660 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10661 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10662 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); 10663 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10664 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10665 // TODO: check for other requires clauses. 10666 // The requires directive takes effect only when a target region is 10667 // present in the compilation unit. Otherwise it is ignored and not 10668 // passed to the runtime. This avoids the runtime from throwing an error 10669 // for mismatching requires clauses across compilation units that don't 10670 // contain at least 1 target region. 10671 assert((HasEmittedTargetRegion || 10672 HasEmittedDeclareTargetRegion || 10673 !OffloadEntriesInfoManager.empty()) && 10674 "Target or declare target region expected."); 10675 if (HasRequiresUnifiedSharedMemory) 10676 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10677 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10678 CGM.getModule(), OMPRTL___tgt_register_requires), 10679 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10680 CGF.FinishFunction(); 10681 } 10682 return RequiresRegFn; 10683 } 10684 10685 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10686 const OMPExecutableDirective &D, 10687 SourceLocation Loc, 10688 llvm::Function *OutlinedFn, 10689 ArrayRef<llvm::Value *> CapturedVars) { 10690 if (!CGF.HaveInsertPoint()) 10691 return; 10692 10693 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10694 CodeGenFunction::RunCleanupsScope Scope(CGF); 10695 10696 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10697 llvm::Value *Args[] = { 10698 RTLoc, 10699 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10700 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10701 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10702 RealArgs.append(std::begin(Args), std::end(Args)); 10703 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10704 10705 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 10706 CGM.getModule(), OMPRTL___kmpc_fork_teams); 10707 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10708 } 10709 10710 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10711 const Expr *NumTeams, 10712 const Expr *ThreadLimit, 10713 SourceLocation Loc) { 10714 if (!CGF.HaveInsertPoint()) 10715 return; 10716 10717 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10718 10719 llvm::Value *NumTeamsVal = 10720 NumTeams 10721 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10722 CGF.CGM.Int32Ty, /* isSigned = */ true) 10723 : CGF.Builder.getInt32(0); 10724 10725 llvm::Value *ThreadLimitVal = 10726 ThreadLimit 10727 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10728 CGF.CGM.Int32Ty, /* isSigned = */ true) 10729 : CGF.Builder.getInt32(0); 10730 10731 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10732 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10733 ThreadLimitVal}; 10734 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 10735 CGM.getModule(), OMPRTL___kmpc_push_num_teams), 10736 PushNumTeamsArgs); 10737 } 10738 10739 void CGOpenMPRuntime::emitTargetDataCalls( 10740 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10741 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10742 if (!CGF.HaveInsertPoint()) 10743 return; 10744 10745 // Action used to replace the default codegen action and turn privatization 10746 // off. 10747 PrePostActionTy NoPrivAction; 10748 10749 // Generate the code for the opening of the data environment. Capture all the 10750 // arguments of the runtime call by reference because they are used in the 10751 // closing of the region. 10752 auto &&BeginThenGen = [this, &D, Device, &Info, 10753 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10754 // Fill up the arrays with all the mapped variables. 10755 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 10756 10757 // Get map clause information. 10758 MappableExprsHandler MEHandler(D, CGF); 10759 MEHandler.generateAllInfo(CombinedInfo); 10760 10761 // Fill up the arrays and create the arguments. 10762 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 10763 /*IsNonContiguous=*/true); 10764 10765 llvm::Value *BasePointersArrayArg = nullptr; 10766 llvm::Value *PointersArrayArg = nullptr; 10767 llvm::Value *SizesArrayArg = nullptr; 10768 llvm::Value *MapTypesArrayArg = nullptr; 10769 llvm::Value *MapNamesArrayArg = nullptr; 10770 llvm::Value *MappersArrayArg = nullptr; 10771 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10772 SizesArrayArg, MapTypesArrayArg, 10773 MapNamesArrayArg, MappersArrayArg, Info); 10774 10775 // Emit device ID if any. 10776 llvm::Value *DeviceID = nullptr; 10777 if (Device) { 10778 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10779 CGF.Int64Ty, /*isSigned=*/true); 10780 } else { 10781 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10782 } 10783 10784 // Emit the number of elements in the offloading arrays. 10785 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10786 // 10787 // Source location for the ident struct 10788 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10789 10790 llvm::Value *OffloadingArgs[] = {RTLoc, 10791 DeviceID, 10792 PointerNum, 10793 BasePointersArrayArg, 10794 PointersArrayArg, 10795 SizesArrayArg, 10796 MapTypesArrayArg, 10797 MapNamesArrayArg, 10798 MappersArrayArg}; 10799 CGF.EmitRuntimeCall( 10800 OMPBuilder.getOrCreateRuntimeFunction( 10801 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), 10802 OffloadingArgs); 10803 10804 // If device pointer privatization is required, emit the body of the region 10805 // here. It will have to be duplicated: with and without privatization. 10806 if (!Info.CaptureDeviceAddrMap.empty()) 10807 CodeGen(CGF); 10808 }; 10809 10810 // Generate code for the closing of the data region. 10811 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, 10812 PrePostActionTy &) { 10813 assert(Info.isValid() && "Invalid data environment closing arguments."); 10814 10815 llvm::Value *BasePointersArrayArg = nullptr; 10816 llvm::Value *PointersArrayArg = nullptr; 10817 llvm::Value *SizesArrayArg = nullptr; 10818 llvm::Value *MapTypesArrayArg = nullptr; 10819 llvm::Value *MapNamesArrayArg = nullptr; 10820 llvm::Value *MappersArrayArg = nullptr; 10821 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10822 SizesArrayArg, MapTypesArrayArg, 10823 MapNamesArrayArg, MappersArrayArg, Info, 10824 {/*ForEndCall=*/true}); 10825 10826 // Emit device ID if any. 10827 llvm::Value *DeviceID = nullptr; 10828 if (Device) { 10829 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10830 CGF.Int64Ty, /*isSigned=*/true); 10831 } else { 10832 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10833 } 10834 10835 // Emit the number of elements in the offloading arrays. 10836 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10837 10838 // Source location for the ident struct 10839 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10840 10841 llvm::Value *OffloadingArgs[] = {RTLoc, 10842 DeviceID, 10843 PointerNum, 10844 BasePointersArrayArg, 10845 PointersArrayArg, 10846 SizesArrayArg, 10847 MapTypesArrayArg, 10848 MapNamesArrayArg, 10849 MappersArrayArg}; 10850 CGF.EmitRuntimeCall( 10851 OMPBuilder.getOrCreateRuntimeFunction( 10852 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), 10853 OffloadingArgs); 10854 }; 10855 10856 // If we need device pointer privatization, we need to emit the body of the 10857 // region with no privatization in the 'else' branch of the conditional. 10858 // Otherwise, we don't have to do anything. 10859 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10860 PrePostActionTy &) { 10861 if (!Info.CaptureDeviceAddrMap.empty()) { 10862 CodeGen.setAction(NoPrivAction); 10863 CodeGen(CGF); 10864 } 10865 }; 10866 10867 // We don't have to do anything to close the region if the if clause evaluates 10868 // to false. 10869 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10870 10871 if (IfCond) { 10872 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10873 } else { 10874 RegionCodeGenTy RCG(BeginThenGen); 10875 RCG(CGF); 10876 } 10877 10878 // If we don't require privatization of device pointers, we emit the body in 10879 // between the runtime calls. This avoids duplicating the body code. 10880 if (Info.CaptureDeviceAddrMap.empty()) { 10881 CodeGen.setAction(NoPrivAction); 10882 CodeGen(CGF); 10883 } 10884 10885 if (IfCond) { 10886 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10887 } else { 10888 RegionCodeGenTy RCG(EndThenGen); 10889 RCG(CGF); 10890 } 10891 } 10892 10893 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10894 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10895 const Expr *Device) { 10896 if (!CGF.HaveInsertPoint()) 10897 return; 10898 10899 assert((isa<OMPTargetEnterDataDirective>(D) || 10900 isa<OMPTargetExitDataDirective>(D) || 10901 isa<OMPTargetUpdateDirective>(D)) && 10902 "Expecting either target enter, exit data, or update directives."); 10903 10904 CodeGenFunction::OMPTargetDataInfo InputInfo; 10905 llvm::Value *MapTypesArray = nullptr; 10906 llvm::Value *MapNamesArray = nullptr; 10907 // Generate the code for the opening of the data environment. 10908 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray, 10909 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10910 // Emit device ID if any. 10911 llvm::Value *DeviceID = nullptr; 10912 if (Device) { 10913 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10914 CGF.Int64Ty, /*isSigned=*/true); 10915 } else { 10916 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10917 } 10918 10919 // Emit the number of elements in the offloading arrays. 10920 llvm::Constant *PointerNum = 10921 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10922 10923 // Source location for the ident struct 10924 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 10925 10926 llvm::Value *OffloadingArgs[] = {RTLoc, 10927 DeviceID, 10928 PointerNum, 10929 InputInfo.BasePointersArray.getPointer(), 10930 InputInfo.PointersArray.getPointer(), 10931 InputInfo.SizesArray.getPointer(), 10932 MapTypesArray, 10933 MapNamesArray, 10934 InputInfo.MappersArray.getPointer()}; 10935 10936 // Select the right runtime function call for each standalone 10937 // directive. 10938 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10939 RuntimeFunction RTLFn; 10940 switch (D.getDirectiveKind()) { 10941 case OMPD_target_enter_data: 10942 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper 10943 : OMPRTL___tgt_target_data_begin_mapper; 10944 break; 10945 case OMPD_target_exit_data: 10946 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper 10947 : OMPRTL___tgt_target_data_end_mapper; 10948 break; 10949 case OMPD_target_update: 10950 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper 10951 : OMPRTL___tgt_target_data_update_mapper; 10952 break; 10953 case OMPD_parallel: 10954 case OMPD_for: 10955 case OMPD_parallel_for: 10956 case OMPD_parallel_master: 10957 case OMPD_parallel_sections: 10958 case OMPD_for_simd: 10959 case OMPD_parallel_for_simd: 10960 case OMPD_cancel: 10961 case OMPD_cancellation_point: 10962 case OMPD_ordered: 10963 case OMPD_threadprivate: 10964 case OMPD_allocate: 10965 case OMPD_task: 10966 case OMPD_simd: 10967 case OMPD_sections: 10968 case OMPD_section: 10969 case OMPD_single: 10970 case OMPD_master: 10971 case OMPD_critical: 10972 case OMPD_taskyield: 10973 case OMPD_barrier: 10974 case OMPD_taskwait: 10975 case OMPD_taskgroup: 10976 case OMPD_atomic: 10977 case OMPD_flush: 10978 case OMPD_depobj: 10979 case OMPD_scan: 10980 case OMPD_teams: 10981 case OMPD_target_data: 10982 case OMPD_distribute: 10983 case OMPD_distribute_simd: 10984 case OMPD_distribute_parallel_for: 10985 case OMPD_distribute_parallel_for_simd: 10986 case OMPD_teams_distribute: 10987 case OMPD_teams_distribute_simd: 10988 case OMPD_teams_distribute_parallel_for: 10989 case OMPD_teams_distribute_parallel_for_simd: 10990 case OMPD_declare_simd: 10991 case OMPD_declare_variant: 10992 case OMPD_begin_declare_variant: 10993 case OMPD_end_declare_variant: 10994 case OMPD_declare_target: 10995 case OMPD_end_declare_target: 10996 case OMPD_declare_reduction: 10997 case OMPD_declare_mapper: 10998 case OMPD_taskloop: 10999 case OMPD_taskloop_simd: 11000 case OMPD_master_taskloop: 11001 case OMPD_master_taskloop_simd: 11002 case OMPD_parallel_master_taskloop: 11003 case OMPD_parallel_master_taskloop_simd: 11004 case OMPD_target: 11005 case OMPD_target_simd: 11006 case OMPD_target_teams_distribute: 11007 case OMPD_target_teams_distribute_simd: 11008 case OMPD_target_teams_distribute_parallel_for: 11009 case OMPD_target_teams_distribute_parallel_for_simd: 11010 case OMPD_target_teams: 11011 case OMPD_target_parallel: 11012 case OMPD_target_parallel_for: 11013 case OMPD_target_parallel_for_simd: 11014 case OMPD_requires: 11015 case OMPD_unknown: 11016 default: 11017 llvm_unreachable("Unexpected standalone target data directive."); 11018 break; 11019 } 11020 CGF.EmitRuntimeCall( 11021 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), 11022 OffloadingArgs); 11023 }; 11024 11025 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 11026 &MapNamesArray](CodeGenFunction &CGF, 11027 PrePostActionTy &) { 11028 // Fill up the arrays with all the mapped variables. 11029 MappableExprsHandler::MapCombinedInfoTy CombinedInfo; 11030 11031 // Get map clause information. 11032 MappableExprsHandler MEHandler(D, CGF); 11033 MEHandler.generateAllInfo(CombinedInfo); 11034 11035 TargetDataInfo Info; 11036 // Fill up the arrays and create the arguments. 11037 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, 11038 /*IsNonContiguous=*/true); 11039 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || 11040 D.hasClausesOfKind<OMPNowaitClause>(); 11041 emitOffloadingArraysArgument( 11042 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, 11043 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, 11044 {/*ForEndTask=*/false}); 11045 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11046 InputInfo.BasePointersArray = 11047 Address(Info.BasePointersArray, CGM.getPointerAlign()); 11048 InputInfo.PointersArray = 11049 Address(Info.PointersArray, CGM.getPointerAlign()); 11050 InputInfo.SizesArray = 11051 Address(Info.SizesArray, CGM.getPointerAlign()); 11052 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); 11053 MapTypesArray = Info.MapTypesArray; 11054 MapNamesArray = Info.MapNamesArray; 11055 if (RequiresOuterTask) 11056 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11057 else 11058 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11059 }; 11060 11061 if (IfCond) { 11062 emitIfClause(CGF, IfCond, TargetThenGen, 11063 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11064 } else { 11065 RegionCodeGenTy ThenRCG(TargetThenGen); 11066 ThenRCG(CGF); 11067 } 11068 } 11069 11070 namespace { 11071 /// Kind of parameter in a function with 'declare simd' directive. 11072 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11073 /// Attribute set of the parameter. 11074 struct ParamAttrTy { 11075 ParamKindTy Kind = Vector; 11076 llvm::APSInt StrideOrArg; 11077 llvm::APSInt Alignment; 11078 }; 11079 } // namespace 11080 11081 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11082 ArrayRef<ParamAttrTy> ParamAttrs) { 11083 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11084 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11085 // of that clause. The VLEN value must be power of 2. 11086 // In other case the notion of the function`s "characteristic data type" (CDT) 11087 // is used to compute the vector length. 11088 // CDT is defined in the following order: 11089 // a) For non-void function, the CDT is the return type. 11090 // b) If the function has any non-uniform, non-linear parameters, then the 11091 // CDT is the type of the first such parameter. 11092 // c) If the CDT determined by a) or b) above is struct, union, or class 11093 // type which is pass-by-value (except for the type that maps to the 11094 // built-in complex data type), the characteristic data type is int. 11095 // d) If none of the above three cases is applicable, the CDT is int. 11096 // The VLEN is then determined based on the CDT and the size of vector 11097 // register of that ISA for which current vector version is generated. The 11098 // VLEN is computed using the formula below: 11099 // VLEN = sizeof(vector_register) / sizeof(CDT), 11100 // where vector register size specified in section 3.2.1 Registers and the 11101 // Stack Frame of original AMD64 ABI document. 11102 QualType RetType = FD->getReturnType(); 11103 if (RetType.isNull()) 11104 return 0; 11105 ASTContext &C = FD->getASTContext(); 11106 QualType CDT; 11107 if (!RetType.isNull() && !RetType->isVoidType()) { 11108 CDT = RetType; 11109 } else { 11110 unsigned Offset = 0; 11111 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11112 if (ParamAttrs[Offset].Kind == Vector) 11113 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11114 ++Offset; 11115 } 11116 if (CDT.isNull()) { 11117 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11118 if (ParamAttrs[I + Offset].Kind == Vector) { 11119 CDT = FD->getParamDecl(I)->getType(); 11120 break; 11121 } 11122 } 11123 } 11124 } 11125 if (CDT.isNull()) 11126 CDT = C.IntTy; 11127 CDT = CDT->getCanonicalTypeUnqualified(); 11128 if (CDT->isRecordType() || CDT->isUnionType()) 11129 CDT = C.IntTy; 11130 return C.getTypeSize(CDT); 11131 } 11132 11133 static void 11134 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11135 const llvm::APSInt &VLENVal, 11136 ArrayRef<ParamAttrTy> ParamAttrs, 11137 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11138 struct ISADataTy { 11139 char ISA; 11140 unsigned VecRegSize; 11141 }; 11142 ISADataTy ISAData[] = { 11143 { 11144 'b', 128 11145 }, // SSE 11146 { 11147 'c', 256 11148 }, // AVX 11149 { 11150 'd', 256 11151 }, // AVX2 11152 { 11153 'e', 512 11154 }, // AVX512 11155 }; 11156 llvm::SmallVector<char, 2> Masked; 11157 switch (State) { 11158 case OMPDeclareSimdDeclAttr::BS_Undefined: 11159 Masked.push_back('N'); 11160 Masked.push_back('M'); 11161 break; 11162 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11163 Masked.push_back('N'); 11164 break; 11165 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11166 Masked.push_back('M'); 11167 break; 11168 } 11169 for (char Mask : Masked) { 11170 for (const ISADataTy &Data : ISAData) { 11171 SmallString<256> Buffer; 11172 llvm::raw_svector_ostream Out(Buffer); 11173 Out << "_ZGV" << Data.ISA << Mask; 11174 if (!VLENVal) { 11175 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11176 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11177 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11178 } else { 11179 Out << VLENVal; 11180 } 11181 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11182 switch (ParamAttr.Kind){ 11183 case LinearWithVarStride: 11184 Out << 's' << ParamAttr.StrideOrArg; 11185 break; 11186 case Linear: 11187 Out << 'l'; 11188 if (ParamAttr.StrideOrArg != 1) 11189 Out << ParamAttr.StrideOrArg; 11190 break; 11191 case Uniform: 11192 Out << 'u'; 11193 break; 11194 case Vector: 11195 Out << 'v'; 11196 break; 11197 } 11198 if (!!ParamAttr.Alignment) 11199 Out << 'a' << ParamAttr.Alignment; 11200 } 11201 Out << '_' << Fn->getName(); 11202 Fn->addFnAttr(Out.str()); 11203 } 11204 } 11205 } 11206 11207 // This are the Functions that are needed to mangle the name of the 11208 // vector functions generated by the compiler, according to the rules 11209 // defined in the "Vector Function ABI specifications for AArch64", 11210 // available at 11211 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11212 11213 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11214 /// 11215 /// TODO: Need to implement the behavior for reference marked with a 11216 /// var or no linear modifiers (1.b in the section). For this, we 11217 /// need to extend ParamKindTy to support the linear modifiers. 11218 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11219 QT = QT.getCanonicalType(); 11220 11221 if (QT->isVoidType()) 11222 return false; 11223 11224 if (Kind == ParamKindTy::Uniform) 11225 return false; 11226 11227 if (Kind == ParamKindTy::Linear) 11228 return false; 11229 11230 // TODO: Handle linear references with modifiers 11231 11232 if (Kind == ParamKindTy::LinearWithVarStride) 11233 return false; 11234 11235 return true; 11236 } 11237 11238 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11239 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11240 QT = QT.getCanonicalType(); 11241 unsigned Size = C.getTypeSize(QT); 11242 11243 // Only scalars and complex within 16 bytes wide set PVB to true. 11244 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11245 return false; 11246 11247 if (QT->isFloatingType()) 11248 return true; 11249 11250 if (QT->isIntegerType()) 11251 return true; 11252 11253 if (QT->isPointerType()) 11254 return true; 11255 11256 // TODO: Add support for complex types (section 3.1.2, item 2). 11257 11258 return false; 11259 } 11260 11261 /// Computes the lane size (LS) of a return type or of an input parameter, 11262 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11263 /// TODO: Add support for references, section 3.2.1, item 1. 11264 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11265 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11266 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11267 if (getAArch64PBV(PTy, C)) 11268 return C.getTypeSize(PTy); 11269 } 11270 if (getAArch64PBV(QT, C)) 11271 return C.getTypeSize(QT); 11272 11273 return C.getTypeSize(C.getUIntPtrType()); 11274 } 11275 11276 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11277 // signature of the scalar function, as defined in 3.2.2 of the 11278 // AAVFABI. 11279 static std::tuple<unsigned, unsigned, bool> 11280 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11281 QualType RetType = FD->getReturnType().getCanonicalType(); 11282 11283 ASTContext &C = FD->getASTContext(); 11284 11285 bool OutputBecomesInput = false; 11286 11287 llvm::SmallVector<unsigned, 8> Sizes; 11288 if (!RetType->isVoidType()) { 11289 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11290 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11291 OutputBecomesInput = true; 11292 } 11293 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11294 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11295 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11296 } 11297 11298 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11299 // The LS of a function parameter / return value can only be a power 11300 // of 2, starting from 8 bits, up to 128. 11301 assert(std::all_of(Sizes.begin(), Sizes.end(), 11302 [](unsigned Size) { 11303 return Size == 8 || Size == 16 || Size == 32 || 11304 Size == 64 || Size == 128; 11305 }) && 11306 "Invalid size"); 11307 11308 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11309 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11310 OutputBecomesInput); 11311 } 11312 11313 /// Mangle the parameter part of the vector function name according to 11314 /// their OpenMP classification. The mangling function is defined in 11315 /// section 3.5 of the AAVFABI. 11316 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11317 SmallString<256> Buffer; 11318 llvm::raw_svector_ostream Out(Buffer); 11319 for (const auto &ParamAttr : ParamAttrs) { 11320 switch (ParamAttr.Kind) { 11321 case LinearWithVarStride: 11322 Out << "ls" << ParamAttr.StrideOrArg; 11323 break; 11324 case Linear: 11325 Out << 'l'; 11326 // Don't print the step value if it is not present or if it is 11327 // equal to 1. 11328 if (ParamAttr.StrideOrArg != 1) 11329 Out << ParamAttr.StrideOrArg; 11330 break; 11331 case Uniform: 11332 Out << 'u'; 11333 break; 11334 case Vector: 11335 Out << 'v'; 11336 break; 11337 } 11338 11339 if (!!ParamAttr.Alignment) 11340 Out << 'a' << ParamAttr.Alignment; 11341 } 11342 11343 return std::string(Out.str()); 11344 } 11345 11346 // Function used to add the attribute. The parameter `VLEN` is 11347 // templated to allow the use of "x" when targeting scalable functions 11348 // for SVE. 11349 template <typename T> 11350 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11351 char ISA, StringRef ParSeq, 11352 StringRef MangledName, bool OutputBecomesInput, 11353 llvm::Function *Fn) { 11354 SmallString<256> Buffer; 11355 llvm::raw_svector_ostream Out(Buffer); 11356 Out << Prefix << ISA << LMask << VLEN; 11357 if (OutputBecomesInput) 11358 Out << "v"; 11359 Out << ParSeq << "_" << MangledName; 11360 Fn->addFnAttr(Out.str()); 11361 } 11362 11363 // Helper function to generate the Advanced SIMD names depending on 11364 // the value of the NDS when simdlen is not present. 11365 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11366 StringRef Prefix, char ISA, 11367 StringRef ParSeq, StringRef MangledName, 11368 bool OutputBecomesInput, 11369 llvm::Function *Fn) { 11370 switch (NDS) { 11371 case 8: 11372 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11373 OutputBecomesInput, Fn); 11374 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11375 OutputBecomesInput, Fn); 11376 break; 11377 case 16: 11378 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11379 OutputBecomesInput, Fn); 11380 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11381 OutputBecomesInput, Fn); 11382 break; 11383 case 32: 11384 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11385 OutputBecomesInput, Fn); 11386 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11387 OutputBecomesInput, Fn); 11388 break; 11389 case 64: 11390 case 128: 11391 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11392 OutputBecomesInput, Fn); 11393 break; 11394 default: 11395 llvm_unreachable("Scalar type is too wide."); 11396 } 11397 } 11398 11399 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11400 static void emitAArch64DeclareSimdFunction( 11401 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11402 ArrayRef<ParamAttrTy> ParamAttrs, 11403 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11404 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11405 11406 // Get basic data for building the vector signature. 11407 const auto Data = getNDSWDS(FD, ParamAttrs); 11408 const unsigned NDS = std::get<0>(Data); 11409 const unsigned WDS = std::get<1>(Data); 11410 const bool OutputBecomesInput = std::get<2>(Data); 11411 11412 // Check the values provided via `simdlen` by the user. 11413 // 1. A `simdlen(1)` doesn't produce vector signatures, 11414 if (UserVLEN == 1) { 11415 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11416 DiagnosticsEngine::Warning, 11417 "The clause simdlen(1) has no effect when targeting aarch64."); 11418 CGM.getDiags().Report(SLoc, DiagID); 11419 return; 11420 } 11421 11422 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11423 // Advanced SIMD output. 11424 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11425 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11426 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11427 "power of 2 when targeting Advanced SIMD."); 11428 CGM.getDiags().Report(SLoc, DiagID); 11429 return; 11430 } 11431 11432 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11433 // limits. 11434 if (ISA == 's' && UserVLEN != 0) { 11435 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11436 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11437 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11438 "lanes in the architectural constraints " 11439 "for SVE (min is 128-bit, max is " 11440 "2048-bit, by steps of 128-bit)"); 11441 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11442 return; 11443 } 11444 } 11445 11446 // Sort out parameter sequence. 11447 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11448 StringRef Prefix = "_ZGV"; 11449 // Generate simdlen from user input (if any). 11450 if (UserVLEN) { 11451 if (ISA == 's') { 11452 // SVE generates only a masked function. 11453 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11454 OutputBecomesInput, Fn); 11455 } else { 11456 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11457 // Advanced SIMD generates one or two functions, depending on 11458 // the `[not]inbranch` clause. 11459 switch (State) { 11460 case OMPDeclareSimdDeclAttr::BS_Undefined: 11461 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11462 OutputBecomesInput, Fn); 11463 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11464 OutputBecomesInput, Fn); 11465 break; 11466 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11467 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11468 OutputBecomesInput, Fn); 11469 break; 11470 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11471 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11472 OutputBecomesInput, Fn); 11473 break; 11474 } 11475 } 11476 } else { 11477 // If no user simdlen is provided, follow the AAVFABI rules for 11478 // generating the vector length. 11479 if (ISA == 's') { 11480 // SVE, section 3.4.1, item 1. 11481 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11482 OutputBecomesInput, Fn); 11483 } else { 11484 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11485 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11486 // two vector names depending on the use of the clause 11487 // `[not]inbranch`. 11488 switch (State) { 11489 case OMPDeclareSimdDeclAttr::BS_Undefined: 11490 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11491 OutputBecomesInput, Fn); 11492 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11493 OutputBecomesInput, Fn); 11494 break; 11495 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11496 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11497 OutputBecomesInput, Fn); 11498 break; 11499 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11500 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11501 OutputBecomesInput, Fn); 11502 break; 11503 } 11504 } 11505 } 11506 } 11507 11508 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11509 llvm::Function *Fn) { 11510 ASTContext &C = CGM.getContext(); 11511 FD = FD->getMostRecentDecl(); 11512 // Map params to their positions in function decl. 11513 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11514 if (isa<CXXMethodDecl>(FD)) 11515 ParamPositions.try_emplace(FD, 0); 11516 unsigned ParamPos = ParamPositions.size(); 11517 for (const ParmVarDecl *P : FD->parameters()) { 11518 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11519 ++ParamPos; 11520 } 11521 while (FD) { 11522 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11523 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11524 // Mark uniform parameters. 11525 for (const Expr *E : Attr->uniforms()) { 11526 E = E->IgnoreParenImpCasts(); 11527 unsigned Pos; 11528 if (isa<CXXThisExpr>(E)) { 11529 Pos = ParamPositions[FD]; 11530 } else { 11531 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11532 ->getCanonicalDecl(); 11533 Pos = ParamPositions[PVD]; 11534 } 11535 ParamAttrs[Pos].Kind = Uniform; 11536 } 11537 // Get alignment info. 11538 auto NI = Attr->alignments_begin(); 11539 for (const Expr *E : Attr->aligneds()) { 11540 E = E->IgnoreParenImpCasts(); 11541 unsigned Pos; 11542 QualType ParmTy; 11543 if (isa<CXXThisExpr>(E)) { 11544 Pos = ParamPositions[FD]; 11545 ParmTy = E->getType(); 11546 } else { 11547 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11548 ->getCanonicalDecl(); 11549 Pos = ParamPositions[PVD]; 11550 ParmTy = PVD->getType(); 11551 } 11552 ParamAttrs[Pos].Alignment = 11553 (*NI) 11554 ? (*NI)->EvaluateKnownConstInt(C) 11555 : llvm::APSInt::getUnsigned( 11556 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11557 .getQuantity()); 11558 ++NI; 11559 } 11560 // Mark linear parameters. 11561 auto SI = Attr->steps_begin(); 11562 auto MI = Attr->modifiers_begin(); 11563 for (const Expr *E : Attr->linears()) { 11564 E = E->IgnoreParenImpCasts(); 11565 unsigned Pos; 11566 // Rescaling factor needed to compute the linear parameter 11567 // value in the mangled name. 11568 unsigned PtrRescalingFactor = 1; 11569 if (isa<CXXThisExpr>(E)) { 11570 Pos = ParamPositions[FD]; 11571 } else { 11572 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11573 ->getCanonicalDecl(); 11574 Pos = ParamPositions[PVD]; 11575 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11576 PtrRescalingFactor = CGM.getContext() 11577 .getTypeSizeInChars(P->getPointeeType()) 11578 .getQuantity(); 11579 } 11580 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11581 ParamAttr.Kind = Linear; 11582 // Assuming a stride of 1, for `linear` without modifiers. 11583 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11584 if (*SI) { 11585 Expr::EvalResult Result; 11586 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11587 if (const auto *DRE = 11588 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11589 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 11590 ParamAttr.Kind = LinearWithVarStride; 11591 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 11592 ParamPositions[StridePVD->getCanonicalDecl()]); 11593 } 11594 } 11595 } else { 11596 ParamAttr.StrideOrArg = Result.Val.getInt(); 11597 } 11598 } 11599 // If we are using a linear clause on a pointer, we need to 11600 // rescale the value of linear_step with the byte size of the 11601 // pointee type. 11602 if (Linear == ParamAttr.Kind) 11603 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11604 ++SI; 11605 ++MI; 11606 } 11607 llvm::APSInt VLENVal; 11608 SourceLocation ExprLoc; 11609 const Expr *VLENExpr = Attr->getSimdlen(); 11610 if (VLENExpr) { 11611 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11612 ExprLoc = VLENExpr->getExprLoc(); 11613 } 11614 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11615 if (CGM.getTriple().isX86()) { 11616 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11617 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11618 unsigned VLEN = VLENVal.getExtValue(); 11619 StringRef MangledName = Fn->getName(); 11620 if (CGM.getTarget().hasFeature("sve")) 11621 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11622 MangledName, 's', 128, Fn, ExprLoc); 11623 if (CGM.getTarget().hasFeature("neon")) 11624 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11625 MangledName, 'n', 128, Fn, ExprLoc); 11626 } 11627 } 11628 FD = FD->getPreviousDecl(); 11629 } 11630 } 11631 11632 namespace { 11633 /// Cleanup action for doacross support. 11634 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11635 public: 11636 static const int DoacrossFinArgs = 2; 11637 11638 private: 11639 llvm::FunctionCallee RTLFn; 11640 llvm::Value *Args[DoacrossFinArgs]; 11641 11642 public: 11643 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11644 ArrayRef<llvm::Value *> CallArgs) 11645 : RTLFn(RTLFn) { 11646 assert(CallArgs.size() == DoacrossFinArgs); 11647 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11648 } 11649 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11650 if (!CGF.HaveInsertPoint()) 11651 return; 11652 CGF.EmitRuntimeCall(RTLFn, Args); 11653 } 11654 }; 11655 } // namespace 11656 11657 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11658 const OMPLoopDirective &D, 11659 ArrayRef<Expr *> NumIterations) { 11660 if (!CGF.HaveInsertPoint()) 11661 return; 11662 11663 ASTContext &C = CGM.getContext(); 11664 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11665 RecordDecl *RD; 11666 if (KmpDimTy.isNull()) { 11667 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11668 // kmp_int64 lo; // lower 11669 // kmp_int64 up; // upper 11670 // kmp_int64 st; // stride 11671 // }; 11672 RD = C.buildImplicitRecord("kmp_dim"); 11673 RD->startDefinition(); 11674 addFieldToRecordDecl(C, RD, Int64Ty); 11675 addFieldToRecordDecl(C, RD, Int64Ty); 11676 addFieldToRecordDecl(C, RD, Int64Ty); 11677 RD->completeDefinition(); 11678 KmpDimTy = C.getRecordType(RD); 11679 } else { 11680 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11681 } 11682 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11683 QualType ArrayTy = 11684 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11685 11686 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11687 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11688 enum { LowerFD = 0, UpperFD, StrideFD }; 11689 // Fill dims with data. 11690 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11691 LValue DimsLVal = CGF.MakeAddrLValue( 11692 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11693 // dims.upper = num_iterations; 11694 LValue UpperLVal = CGF.EmitLValueForField( 11695 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11696 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11697 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11698 Int64Ty, NumIterations[I]->getExprLoc()); 11699 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11700 // dims.stride = 1; 11701 LValue StrideLVal = CGF.EmitLValueForField( 11702 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11703 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11704 StrideLVal); 11705 } 11706 11707 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11708 // kmp_int32 num_dims, struct kmp_dim * dims); 11709 llvm::Value *Args[] = { 11710 emitUpdateLocation(CGF, D.getBeginLoc()), 11711 getThreadID(CGF, D.getBeginLoc()), 11712 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11713 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11714 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11715 CGM.VoidPtrTy)}; 11716 11717 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11718 CGM.getModule(), OMPRTL___kmpc_doacross_init); 11719 CGF.EmitRuntimeCall(RTLFn, Args); 11720 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11721 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11722 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11723 CGM.getModule(), OMPRTL___kmpc_doacross_fini); 11724 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11725 llvm::makeArrayRef(FiniArgs)); 11726 } 11727 11728 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11729 const OMPDependClause *C) { 11730 QualType Int64Ty = 11731 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11732 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11733 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11734 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11735 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11736 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11737 const Expr *CounterVal = C->getLoopData(I); 11738 assert(CounterVal); 11739 llvm::Value *CntVal = CGF.EmitScalarConversion( 11740 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11741 CounterVal->getExprLoc()); 11742 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11743 /*Volatile=*/false, Int64Ty); 11744 } 11745 llvm::Value *Args[] = { 11746 emitUpdateLocation(CGF, C->getBeginLoc()), 11747 getThreadID(CGF, C->getBeginLoc()), 11748 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11749 llvm::FunctionCallee RTLFn; 11750 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11751 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11752 OMPRTL___kmpc_doacross_post); 11753 } else { 11754 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11755 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), 11756 OMPRTL___kmpc_doacross_wait); 11757 } 11758 CGF.EmitRuntimeCall(RTLFn, Args); 11759 } 11760 11761 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11762 llvm::FunctionCallee Callee, 11763 ArrayRef<llvm::Value *> Args) const { 11764 assert(Loc.isValid() && "Outlined function call location must be valid."); 11765 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11766 11767 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11768 if (Fn->doesNotThrow()) { 11769 CGF.EmitNounwindRuntimeCall(Fn, Args); 11770 return; 11771 } 11772 } 11773 CGF.EmitRuntimeCall(Callee, Args); 11774 } 11775 11776 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11777 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11778 ArrayRef<llvm::Value *> Args) const { 11779 emitCall(CGF, Loc, OutlinedFn, Args); 11780 } 11781 11782 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11783 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11784 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11785 HasEmittedDeclareTargetRegion = true; 11786 } 11787 11788 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11789 const VarDecl *NativeParam, 11790 const VarDecl *TargetParam) const { 11791 return CGF.GetAddrOfLocalVar(NativeParam); 11792 } 11793 11794 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11795 const VarDecl *VD) { 11796 if (!VD) 11797 return Address::invalid(); 11798 Address UntiedAddr = Address::invalid(); 11799 Address UntiedRealAddr = Address::invalid(); 11800 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11801 if (It != FunctionToUntiedTaskStackMap.end()) { 11802 const UntiedLocalVarsAddressesMap &UntiedData = 11803 UntiedLocalVarsStack[It->second]; 11804 auto I = UntiedData.find(VD); 11805 if (I != UntiedData.end()) { 11806 UntiedAddr = I->second.first; 11807 UntiedRealAddr = I->second.second; 11808 } 11809 } 11810 const VarDecl *CVD = VD->getCanonicalDecl(); 11811 if (CVD->hasAttr<OMPAllocateDeclAttr>()) { 11812 // Use the default allocation. 11813 if (!isAllocatableDecl(VD)) 11814 return UntiedAddr; 11815 llvm::Value *Size; 11816 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11817 if (CVD->getType()->isVariablyModifiedType()) { 11818 Size = CGF.getTypeSize(CVD->getType()); 11819 // Align the size: ((size + align - 1) / align) * align 11820 Size = CGF.Builder.CreateNUWAdd( 11821 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11822 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11823 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11824 } else { 11825 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11826 Size = CGM.getSize(Sz.alignTo(Align)); 11827 } 11828 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11829 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11830 assert(AA->getAllocator() && 11831 "Expected allocator expression for non-default allocator."); 11832 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11833 // According to the standard, the original allocator type is a enum 11834 // (integer). Convert to pointer type, if required. 11835 Allocator = CGF.EmitScalarConversion( 11836 Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, 11837 AA->getAllocator()->getExprLoc()); 11838 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11839 11840 llvm::Value *Addr = 11841 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( 11842 CGM.getModule(), OMPRTL___kmpc_alloc), 11843 Args, getName({CVD->getName(), ".void.addr"})); 11844 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( 11845 CGM.getModule(), OMPRTL___kmpc_free); 11846 QualType Ty = CGM.getContext().getPointerType(CVD->getType()); 11847 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11848 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"})); 11849 if (UntiedAddr.isValid()) 11850 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty); 11851 11852 // Cleanup action for allocate support. 11853 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11854 llvm::FunctionCallee RTLFn; 11855 unsigned LocEncoding; 11856 Address Addr; 11857 const Expr *Allocator; 11858 11859 public: 11860 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding, 11861 Address Addr, const Expr *Allocator) 11862 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), 11863 Allocator(Allocator) {} 11864 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11865 if (!CGF.HaveInsertPoint()) 11866 return; 11867 llvm::Value *Args[3]; 11868 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( 11869 CGF, SourceLocation::getFromRawEncoding(LocEncoding)); 11870 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11871 Addr.getPointer(), CGF.VoidPtrTy); 11872 llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); 11873 // According to the standard, the original allocator type is a enum 11874 // (integer). Convert to pointer type, if required. 11875 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), 11876 CGF.getContext().VoidPtrTy, 11877 Allocator->getExprLoc()); 11878 Args[2] = AllocVal; 11879 11880 CGF.EmitRuntimeCall(RTLFn, Args); 11881 } 11882 }; 11883 Address VDAddr = 11884 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); 11885 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( 11886 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), 11887 VDAddr, AA->getAllocator()); 11888 if (UntiedRealAddr.isValid()) 11889 if (auto *Region = 11890 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 11891 Region->emitUntiedSwitch(CGF); 11892 return VDAddr; 11893 } 11894 return UntiedAddr; 11895 } 11896 11897 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF, 11898 const VarDecl *VD) const { 11899 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn); 11900 if (It == FunctionToUntiedTaskStackMap.end()) 11901 return false; 11902 return UntiedLocalVarsStack[It->second].count(VD) > 0; 11903 } 11904 11905 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11906 CodeGenModule &CGM, const OMPLoopDirective &S) 11907 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11908 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11909 if (!NeedToPush) 11910 return; 11911 NontemporalDeclsSet &DS = 11912 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11913 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11914 for (const Stmt *Ref : C->private_refs()) { 11915 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11916 const ValueDecl *VD; 11917 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11918 VD = DRE->getDecl(); 11919 } else { 11920 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11921 assert((ME->isImplicitCXXThis() || 11922 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11923 "Expected member of current class."); 11924 VD = ME->getMemberDecl(); 11925 } 11926 DS.insert(VD); 11927 } 11928 } 11929 } 11930 11931 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11932 if (!NeedToPush) 11933 return; 11934 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11935 } 11936 11937 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( 11938 CodeGenFunction &CGF, 11939 const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, 11940 std::pair<Address, Address>> &LocalVars) 11941 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { 11942 if (!NeedToPush) 11943 return; 11944 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace( 11945 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size()); 11946 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); 11947 } 11948 11949 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { 11950 if (!NeedToPush) 11951 return; 11952 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); 11953 } 11954 11955 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11956 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11957 11958 return llvm::any_of( 11959 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11960 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 11961 } 11962 11963 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11964 const OMPExecutableDirective &S, 11965 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11966 const { 11967 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11968 // Vars in target/task regions must be excluded completely. 11969 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11970 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11971 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11972 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11973 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11974 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11975 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11976 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11977 } 11978 } 11979 // Exclude vars in private clauses. 11980 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11981 for (const Expr *Ref : C->varlists()) { 11982 if (!Ref->getType()->isScalarType()) 11983 continue; 11984 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11985 if (!DRE) 11986 continue; 11987 NeedToCheckForLPCs.insert(DRE->getDecl()); 11988 } 11989 } 11990 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11991 for (const Expr *Ref : C->varlists()) { 11992 if (!Ref->getType()->isScalarType()) 11993 continue; 11994 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11995 if (!DRE) 11996 continue; 11997 NeedToCheckForLPCs.insert(DRE->getDecl()); 11998 } 11999 } 12000 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12001 for (const Expr *Ref : C->varlists()) { 12002 if (!Ref->getType()->isScalarType()) 12003 continue; 12004 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12005 if (!DRE) 12006 continue; 12007 NeedToCheckForLPCs.insert(DRE->getDecl()); 12008 } 12009 } 12010 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 12011 for (const Expr *Ref : C->varlists()) { 12012 if (!Ref->getType()->isScalarType()) 12013 continue; 12014 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12015 if (!DRE) 12016 continue; 12017 NeedToCheckForLPCs.insert(DRE->getDecl()); 12018 } 12019 } 12020 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 12021 for (const Expr *Ref : C->varlists()) { 12022 if (!Ref->getType()->isScalarType()) 12023 continue; 12024 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 12025 if (!DRE) 12026 continue; 12027 NeedToCheckForLPCs.insert(DRE->getDecl()); 12028 } 12029 } 12030 for (const Decl *VD : NeedToCheckForLPCs) { 12031 for (const LastprivateConditionalData &Data : 12032 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12033 if (Data.DeclToUniqueName.count(VD) > 0) { 12034 if (!Data.Disabled) 12035 NeedToAddForLPCsAsDisabled.insert(VD); 12036 break; 12037 } 12038 } 12039 } 12040 } 12041 12042 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12043 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12044 : CGM(CGF.CGM), 12045 Action((CGM.getLangOpts().OpenMP >= 50 && 12046 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12047 [](const OMPLastprivateClause *C) { 12048 return C->getKind() == 12049 OMPC_LASTPRIVATE_conditional; 12050 })) 12051 ? ActionToDo::PushAsLastprivateConditional 12052 : ActionToDo::DoNotPush) { 12053 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12054 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12055 return; 12056 assert(Action == ActionToDo::PushAsLastprivateConditional && 12057 "Expected a push action."); 12058 LastprivateConditionalData &Data = 12059 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12060 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12061 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12062 continue; 12063 12064 for (const Expr *Ref : C->varlists()) { 12065 Data.DeclToUniqueName.insert(std::make_pair( 12066 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12067 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12068 } 12069 } 12070 Data.IVLVal = IVLVal; 12071 Data.Fn = CGF.CurFn; 12072 } 12073 12074 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12075 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12076 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12077 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12078 if (CGM.getLangOpts().OpenMP < 50) 12079 return; 12080 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12081 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12082 if (!NeedToAddForLPCsAsDisabled.empty()) { 12083 Action = ActionToDo::DisableLastprivateConditional; 12084 LastprivateConditionalData &Data = 12085 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12086 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12087 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12088 Data.Fn = CGF.CurFn; 12089 Data.Disabled = true; 12090 } 12091 } 12092 12093 CGOpenMPRuntime::LastprivateConditionalRAII 12094 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12095 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12096 return LastprivateConditionalRAII(CGF, S); 12097 } 12098 12099 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12100 if (CGM.getLangOpts().OpenMP < 50) 12101 return; 12102 if (Action == ActionToDo::DisableLastprivateConditional) { 12103 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12104 "Expected list of disabled private vars."); 12105 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12106 } 12107 if (Action == ActionToDo::PushAsLastprivateConditional) { 12108 assert( 12109 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12110 "Expected list of lastprivate conditional vars."); 12111 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12112 } 12113 } 12114 12115 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12116 const VarDecl *VD) { 12117 ASTContext &C = CGM.getContext(); 12118 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12119 if (I == LastprivateConditionalToTypes.end()) 12120 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12121 QualType NewType; 12122 const FieldDecl *VDField; 12123 const FieldDecl *FiredField; 12124 LValue BaseLVal; 12125 auto VI = I->getSecond().find(VD); 12126 if (VI == I->getSecond().end()) { 12127 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12128 RD->startDefinition(); 12129 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12130 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12131 RD->completeDefinition(); 12132 NewType = C.getRecordType(RD); 12133 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12134 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12135 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12136 } else { 12137 NewType = std::get<0>(VI->getSecond()); 12138 VDField = std::get<1>(VI->getSecond()); 12139 FiredField = std::get<2>(VI->getSecond()); 12140 BaseLVal = std::get<3>(VI->getSecond()); 12141 } 12142 LValue FiredLVal = 12143 CGF.EmitLValueForField(BaseLVal, FiredField); 12144 CGF.EmitStoreOfScalar( 12145 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12146 FiredLVal); 12147 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12148 } 12149 12150 namespace { 12151 /// Checks if the lastprivate conditional variable is referenced in LHS. 12152 class LastprivateConditionalRefChecker final 12153 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12154 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12155 const Expr *FoundE = nullptr; 12156 const Decl *FoundD = nullptr; 12157 StringRef UniqueDeclName; 12158 LValue IVLVal; 12159 llvm::Function *FoundFn = nullptr; 12160 SourceLocation Loc; 12161 12162 public: 12163 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12164 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12165 llvm::reverse(LPM)) { 12166 auto It = D.DeclToUniqueName.find(E->getDecl()); 12167 if (It == D.DeclToUniqueName.end()) 12168 continue; 12169 if (D.Disabled) 12170 return false; 12171 FoundE = E; 12172 FoundD = E->getDecl()->getCanonicalDecl(); 12173 UniqueDeclName = It->second; 12174 IVLVal = D.IVLVal; 12175 FoundFn = D.Fn; 12176 break; 12177 } 12178 return FoundE == E; 12179 } 12180 bool VisitMemberExpr(const MemberExpr *E) { 12181 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12182 return false; 12183 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12184 llvm::reverse(LPM)) { 12185 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12186 if (It == D.DeclToUniqueName.end()) 12187 continue; 12188 if (D.Disabled) 12189 return false; 12190 FoundE = E; 12191 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12192 UniqueDeclName = It->second; 12193 IVLVal = D.IVLVal; 12194 FoundFn = D.Fn; 12195 break; 12196 } 12197 return FoundE == E; 12198 } 12199 bool VisitStmt(const Stmt *S) { 12200 for (const Stmt *Child : S->children()) { 12201 if (!Child) 12202 continue; 12203 if (const auto *E = dyn_cast<Expr>(Child)) 12204 if (!E->isGLValue()) 12205 continue; 12206 if (Visit(Child)) 12207 return true; 12208 } 12209 return false; 12210 } 12211 explicit LastprivateConditionalRefChecker( 12212 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12213 : LPM(LPM) {} 12214 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12215 getFoundData() const { 12216 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12217 } 12218 }; 12219 } // namespace 12220 12221 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12222 LValue IVLVal, 12223 StringRef UniqueDeclName, 12224 LValue LVal, 12225 SourceLocation Loc) { 12226 // Last updated loop counter for the lastprivate conditional var. 12227 // int<xx> last_iv = 0; 12228 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12229 llvm::Constant *LastIV = 12230 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12231 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12232 IVLVal.getAlignment().getAsAlign()); 12233 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12234 12235 // Last value of the lastprivate conditional. 12236 // decltype(priv_a) last_a; 12237 llvm::Constant *Last = getOrCreateInternalVariable( 12238 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12239 cast<llvm::GlobalVariable>(Last)->setAlignment( 12240 LVal.getAlignment().getAsAlign()); 12241 LValue LastLVal = 12242 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 12243 12244 // Global loop counter. Required to handle inner parallel-for regions. 12245 // iv 12246 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12247 12248 // #pragma omp critical(a) 12249 // if (last_iv <= iv) { 12250 // last_iv = iv; 12251 // last_a = priv_a; 12252 // } 12253 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12254 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12255 Action.Enter(CGF); 12256 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12257 // (last_iv <= iv) ? Check if the variable is updated and store new 12258 // value in global var. 12259 llvm::Value *CmpRes; 12260 if (IVLVal.getType()->isSignedIntegerType()) { 12261 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12262 } else { 12263 assert(IVLVal.getType()->isUnsignedIntegerType() && 12264 "Loop iteration variable must be integer."); 12265 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12266 } 12267 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12268 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12269 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12270 // { 12271 CGF.EmitBlock(ThenBB); 12272 12273 // last_iv = iv; 12274 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12275 12276 // last_a = priv_a; 12277 switch (CGF.getEvaluationKind(LVal.getType())) { 12278 case TEK_Scalar: { 12279 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12280 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12281 break; 12282 } 12283 case TEK_Complex: { 12284 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12285 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12286 break; 12287 } 12288 case TEK_Aggregate: 12289 llvm_unreachable( 12290 "Aggregates are not supported in lastprivate conditional."); 12291 } 12292 // } 12293 CGF.EmitBranch(ExitBB); 12294 // There is no need to emit line number for unconditional branch. 12295 (void)ApplyDebugLocation::CreateEmpty(CGF); 12296 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12297 }; 12298 12299 if (CGM.getLangOpts().OpenMPSimd) { 12300 // Do not emit as a critical region as no parallel region could be emitted. 12301 RegionCodeGenTy ThenRCG(CodeGen); 12302 ThenRCG(CGF); 12303 } else { 12304 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12305 } 12306 } 12307 12308 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12309 const Expr *LHS) { 12310 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12311 return; 12312 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12313 if (!Checker.Visit(LHS)) 12314 return; 12315 const Expr *FoundE; 12316 const Decl *FoundD; 12317 StringRef UniqueDeclName; 12318 LValue IVLVal; 12319 llvm::Function *FoundFn; 12320 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12321 Checker.getFoundData(); 12322 if (FoundFn != CGF.CurFn) { 12323 // Special codegen for inner parallel regions. 12324 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12325 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12326 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12327 "Lastprivate conditional is not found in outer region."); 12328 QualType StructTy = std::get<0>(It->getSecond()); 12329 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12330 LValue PrivLVal = CGF.EmitLValue(FoundE); 12331 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12332 PrivLVal.getAddress(CGF), 12333 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12334 LValue BaseLVal = 12335 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12336 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12337 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12338 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12339 FiredLVal, llvm::AtomicOrdering::Unordered, 12340 /*IsVolatile=*/true, /*isInit=*/false); 12341 return; 12342 } 12343 12344 // Private address of the lastprivate conditional in the current context. 12345 // priv_a 12346 LValue LVal = CGF.EmitLValue(FoundE); 12347 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12348 FoundE->getExprLoc()); 12349 } 12350 12351 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12352 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12353 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12354 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12355 return; 12356 auto Range = llvm::reverse(LastprivateConditionalStack); 12357 auto It = llvm::find_if( 12358 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12359 if (It == Range.end() || It->Fn != CGF.CurFn) 12360 return; 12361 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12362 assert(LPCI != LastprivateConditionalToTypes.end() && 12363 "Lastprivates must be registered already."); 12364 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12365 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12366 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12367 for (const auto &Pair : It->DeclToUniqueName) { 12368 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12369 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 12370 continue; 12371 auto I = LPCI->getSecond().find(Pair.first); 12372 assert(I != LPCI->getSecond().end() && 12373 "Lastprivate must be rehistered already."); 12374 // bool Cmp = priv_a.Fired != 0; 12375 LValue BaseLVal = std::get<3>(I->getSecond()); 12376 LValue FiredLVal = 12377 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12378 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12379 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12380 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12381 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12382 // if (Cmp) { 12383 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12384 CGF.EmitBlock(ThenBB); 12385 Address Addr = CGF.GetAddrOfLocalVar(VD); 12386 LValue LVal; 12387 if (VD->getType()->isReferenceType()) 12388 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12389 AlignmentSource::Decl); 12390 else 12391 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12392 AlignmentSource::Decl); 12393 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12394 D.getBeginLoc()); 12395 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12396 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12397 // } 12398 } 12399 } 12400 12401 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12402 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12403 SourceLocation Loc) { 12404 if (CGF.getLangOpts().OpenMP < 50) 12405 return; 12406 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12407 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12408 "Unknown lastprivate conditional variable."); 12409 StringRef UniqueName = It->second; 12410 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12411 // The variable was not updated in the region - exit. 12412 if (!GV) 12413 return; 12414 LValue LPLVal = CGF.MakeAddrLValue( 12415 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 12416 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12417 CGF.EmitStoreOfScalar(Res, PrivLVal); 12418 } 12419 12420 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12421 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12422 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12423 llvm_unreachable("Not supported in SIMD-only mode"); 12424 } 12425 12426 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12427 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12428 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12429 llvm_unreachable("Not supported in SIMD-only mode"); 12430 } 12431 12432 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12433 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12434 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12435 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12436 bool Tied, unsigned &NumberOfParts) { 12437 llvm_unreachable("Not supported in SIMD-only mode"); 12438 } 12439 12440 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12441 SourceLocation Loc, 12442 llvm::Function *OutlinedFn, 12443 ArrayRef<llvm::Value *> CapturedVars, 12444 const Expr *IfCond) { 12445 llvm_unreachable("Not supported in SIMD-only mode"); 12446 } 12447 12448 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12449 CodeGenFunction &CGF, StringRef CriticalName, 12450 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12451 const Expr *Hint) { 12452 llvm_unreachable("Not supported in SIMD-only mode"); 12453 } 12454 12455 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12456 const RegionCodeGenTy &MasterOpGen, 12457 SourceLocation Loc) { 12458 llvm_unreachable("Not supported in SIMD-only mode"); 12459 } 12460 12461 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12462 SourceLocation Loc) { 12463 llvm_unreachable("Not supported in SIMD-only mode"); 12464 } 12465 12466 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12467 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12468 SourceLocation Loc) { 12469 llvm_unreachable("Not supported in SIMD-only mode"); 12470 } 12471 12472 void CGOpenMPSIMDRuntime::emitSingleRegion( 12473 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12474 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12475 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12476 ArrayRef<const Expr *> AssignmentOps) { 12477 llvm_unreachable("Not supported in SIMD-only mode"); 12478 } 12479 12480 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12481 const RegionCodeGenTy &OrderedOpGen, 12482 SourceLocation Loc, 12483 bool IsThreads) { 12484 llvm_unreachable("Not supported in SIMD-only mode"); 12485 } 12486 12487 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12488 SourceLocation Loc, 12489 OpenMPDirectiveKind Kind, 12490 bool EmitChecks, 12491 bool ForceSimpleCall) { 12492 llvm_unreachable("Not supported in SIMD-only mode"); 12493 } 12494 12495 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12496 CodeGenFunction &CGF, SourceLocation Loc, 12497 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12498 bool Ordered, const DispatchRTInput &DispatchValues) { 12499 llvm_unreachable("Not supported in SIMD-only mode"); 12500 } 12501 12502 void CGOpenMPSIMDRuntime::emitForStaticInit( 12503 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12504 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12505 llvm_unreachable("Not supported in SIMD-only mode"); 12506 } 12507 12508 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12509 CodeGenFunction &CGF, SourceLocation Loc, 12510 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12511 llvm_unreachable("Not supported in SIMD-only mode"); 12512 } 12513 12514 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12515 SourceLocation Loc, 12516 unsigned IVSize, 12517 bool IVSigned) { 12518 llvm_unreachable("Not supported in SIMD-only mode"); 12519 } 12520 12521 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12522 SourceLocation Loc, 12523 OpenMPDirectiveKind DKind) { 12524 llvm_unreachable("Not supported in SIMD-only mode"); 12525 } 12526 12527 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12528 SourceLocation Loc, 12529 unsigned IVSize, bool IVSigned, 12530 Address IL, Address LB, 12531 Address UB, Address ST) { 12532 llvm_unreachable("Not supported in SIMD-only mode"); 12533 } 12534 12535 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12536 llvm::Value *NumThreads, 12537 SourceLocation Loc) { 12538 llvm_unreachable("Not supported in SIMD-only mode"); 12539 } 12540 12541 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12542 ProcBindKind ProcBind, 12543 SourceLocation Loc) { 12544 llvm_unreachable("Not supported in SIMD-only mode"); 12545 } 12546 12547 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12548 const VarDecl *VD, 12549 Address VDAddr, 12550 SourceLocation Loc) { 12551 llvm_unreachable("Not supported in SIMD-only mode"); 12552 } 12553 12554 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12555 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12556 CodeGenFunction *CGF) { 12557 llvm_unreachable("Not supported in SIMD-only mode"); 12558 } 12559 12560 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12561 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12562 llvm_unreachable("Not supported in SIMD-only mode"); 12563 } 12564 12565 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12566 ArrayRef<const Expr *> Vars, 12567 SourceLocation Loc, 12568 llvm::AtomicOrdering AO) { 12569 llvm_unreachable("Not supported in SIMD-only mode"); 12570 } 12571 12572 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12573 const OMPExecutableDirective &D, 12574 llvm::Function *TaskFunction, 12575 QualType SharedsTy, Address Shareds, 12576 const Expr *IfCond, 12577 const OMPTaskDataTy &Data) { 12578 llvm_unreachable("Not supported in SIMD-only mode"); 12579 } 12580 12581 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12582 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12583 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12584 const Expr *IfCond, const OMPTaskDataTy &Data) { 12585 llvm_unreachable("Not supported in SIMD-only mode"); 12586 } 12587 12588 void CGOpenMPSIMDRuntime::emitReduction( 12589 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12590 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12591 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12592 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12593 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12594 ReductionOps, Options); 12595 } 12596 12597 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12598 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12599 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12600 llvm_unreachable("Not supported in SIMD-only mode"); 12601 } 12602 12603 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12604 SourceLocation Loc, 12605 bool IsWorksharingReduction) { 12606 llvm_unreachable("Not supported in SIMD-only mode"); 12607 } 12608 12609 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12610 SourceLocation Loc, 12611 ReductionCodeGen &RCG, 12612 unsigned N) { 12613 llvm_unreachable("Not supported in SIMD-only mode"); 12614 } 12615 12616 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12617 SourceLocation Loc, 12618 llvm::Value *ReductionsPtr, 12619 LValue SharedLVal) { 12620 llvm_unreachable("Not supported in SIMD-only mode"); 12621 } 12622 12623 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12624 SourceLocation Loc) { 12625 llvm_unreachable("Not supported in SIMD-only mode"); 12626 } 12627 12628 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12629 CodeGenFunction &CGF, SourceLocation Loc, 12630 OpenMPDirectiveKind CancelRegion) { 12631 llvm_unreachable("Not supported in SIMD-only mode"); 12632 } 12633 12634 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12635 SourceLocation Loc, const Expr *IfCond, 12636 OpenMPDirectiveKind CancelRegion) { 12637 llvm_unreachable("Not supported in SIMD-only mode"); 12638 } 12639 12640 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12641 const OMPExecutableDirective &D, StringRef ParentName, 12642 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12643 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12644 llvm_unreachable("Not supported in SIMD-only mode"); 12645 } 12646 12647 void CGOpenMPSIMDRuntime::emitTargetCall( 12648 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12649 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12650 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12651 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12652 const OMPLoopDirective &D)> 12653 SizeEmitter) { 12654 llvm_unreachable("Not supported in SIMD-only mode"); 12655 } 12656 12657 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12658 llvm_unreachable("Not supported in SIMD-only mode"); 12659 } 12660 12661 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12662 llvm_unreachable("Not supported in SIMD-only mode"); 12663 } 12664 12665 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12666 return false; 12667 } 12668 12669 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12670 const OMPExecutableDirective &D, 12671 SourceLocation Loc, 12672 llvm::Function *OutlinedFn, 12673 ArrayRef<llvm::Value *> CapturedVars) { 12674 llvm_unreachable("Not supported in SIMD-only mode"); 12675 } 12676 12677 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12678 const Expr *NumTeams, 12679 const Expr *ThreadLimit, 12680 SourceLocation Loc) { 12681 llvm_unreachable("Not supported in SIMD-only mode"); 12682 } 12683 12684 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12685 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12686 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12687 llvm_unreachable("Not supported in SIMD-only mode"); 12688 } 12689 12690 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12691 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12692 const Expr *Device) { 12693 llvm_unreachable("Not supported in SIMD-only mode"); 12694 } 12695 12696 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12697 const OMPLoopDirective &D, 12698 ArrayRef<Expr *> NumIterations) { 12699 llvm_unreachable("Not supported in SIMD-only mode"); 12700 } 12701 12702 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12703 const OMPDependClause *C) { 12704 llvm_unreachable("Not supported in SIMD-only mode"); 12705 } 12706 12707 const VarDecl * 12708 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12709 const VarDecl *NativeParam) const { 12710 llvm_unreachable("Not supported in SIMD-only mode"); 12711 } 12712 12713 Address 12714 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12715 const VarDecl *NativeParam, 12716 const VarDecl *TargetParam) const { 12717 llvm_unreachable("Not supported in SIMD-only mode"); 12718 } 12719