1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/FileManager.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/CodeGen/ConstantInitBuilder.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/SetOperations.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/Bitcode/BitcodeReader.h" 32 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 33 #include "llvm/IR/Constants.h" 34 #include "llvm/IR/DerivedTypes.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/IR/Value.h" 37 #include "llvm/Support/AtomicOrdering.h" 38 #include "llvm/Support/Format.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include <cassert> 41 #include <numeric> 42 43 using namespace clang; 44 using namespace CodeGen; 45 using namespace llvm::omp; 46 47 namespace { 48 /// Base class for handling code generation inside OpenMP regions. 49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 50 public: 51 /// Kinds of OpenMP regions used in codegen. 52 enum CGOpenMPRegionKind { 53 /// Region with outlined function for standalone 'parallel' 54 /// directive. 55 ParallelOutlinedRegion, 56 /// Region with outlined function for standalone 'task' directive. 57 TaskOutlinedRegion, 58 /// Region for constructs that do not require function outlining, 59 /// like 'for', 'sections', 'atomic' etc. directives. 60 InlinedRegion, 61 /// Region with outlined function for standalone 'target' directive. 62 TargetRegion, 63 }; 64 65 CGOpenMPRegionInfo(const CapturedStmt &CS, 66 const CGOpenMPRegionKind RegionKind, 67 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 68 bool HasCancel) 69 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 70 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 71 72 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 73 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 74 bool HasCancel) 75 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 76 Kind(Kind), HasCancel(HasCancel) {} 77 78 /// Get a variable or parameter for storing global thread id 79 /// inside OpenMP construct. 80 virtual const VarDecl *getThreadIDVariable() const = 0; 81 82 /// Emit the captured statement body. 83 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 84 85 /// Get an LValue for the current ThreadID variable. 86 /// \return LValue for thread id variable. This LValue always has type int32*. 87 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 88 89 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 90 91 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 92 93 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 94 95 bool hasCancel() const { return HasCancel; } 96 97 static bool classof(const CGCapturedStmtInfo *Info) { 98 return Info->getKind() == CR_OpenMP; 99 } 100 101 ~CGOpenMPRegionInfo() override = default; 102 103 protected: 104 CGOpenMPRegionKind RegionKind; 105 RegionCodeGenTy CodeGen; 106 OpenMPDirectiveKind Kind; 107 bool HasCancel; 108 }; 109 110 /// API for captured statement code generation in OpenMP constructs. 111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 112 public: 113 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 114 const RegionCodeGenTy &CodeGen, 115 OpenMPDirectiveKind Kind, bool HasCancel, 116 StringRef HelperName) 117 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 118 HasCancel), 119 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 120 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 121 } 122 123 /// Get a variable or parameter for storing global thread id 124 /// inside OpenMP construct. 125 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 126 127 /// Get the name of the capture helper. 128 StringRef getHelperName() const override { return HelperName; } 129 130 static bool classof(const CGCapturedStmtInfo *Info) { 131 return CGOpenMPRegionInfo::classof(Info) && 132 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 133 ParallelOutlinedRegion; 134 } 135 136 private: 137 /// A variable or parameter storing global thread id for OpenMP 138 /// constructs. 139 const VarDecl *ThreadIDVar; 140 StringRef HelperName; 141 }; 142 143 /// API for captured statement code generation in OpenMP constructs. 144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 145 public: 146 class UntiedTaskActionTy final : public PrePostActionTy { 147 bool Untied; 148 const VarDecl *PartIDVar; 149 const RegionCodeGenTy UntiedCodeGen; 150 llvm::SwitchInst *UntiedSwitch = nullptr; 151 152 public: 153 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 154 const RegionCodeGenTy &UntiedCodeGen) 155 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 156 void Enter(CodeGenFunction &CGF) override { 157 if (Untied) { 158 // Emit task switching point. 159 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 160 CGF.GetAddrOfLocalVar(PartIDVar), 161 PartIDVar->getType()->castAs<PointerType>()); 162 llvm::Value *Res = 163 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 164 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 165 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 166 CGF.EmitBlock(DoneBB); 167 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 168 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 169 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 170 CGF.Builder.GetInsertBlock()); 171 emitUntiedSwitch(CGF); 172 } 173 } 174 void emitUntiedSwitch(CodeGenFunction &CGF) const { 175 if (Untied) { 176 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 177 CGF.GetAddrOfLocalVar(PartIDVar), 178 PartIDVar->getType()->castAs<PointerType>()); 179 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 180 PartIdLVal); 181 UntiedCodeGen(CGF); 182 CodeGenFunction::JumpDest CurPoint = 183 CGF.getJumpDestInCurrentScope(".untied.next."); 184 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 185 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 186 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 187 CGF.Builder.GetInsertBlock()); 188 CGF.EmitBranchThroughCleanup(CurPoint); 189 CGF.EmitBlock(CurPoint.getBlock()); 190 } 191 } 192 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 193 }; 194 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 195 const VarDecl *ThreadIDVar, 196 const RegionCodeGenTy &CodeGen, 197 OpenMPDirectiveKind Kind, bool HasCancel, 198 const UntiedTaskActionTy &Action) 199 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 200 ThreadIDVar(ThreadIDVar), Action(Action) { 201 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 202 } 203 204 /// Get a variable or parameter for storing global thread id 205 /// inside OpenMP construct. 206 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 207 208 /// Get an LValue for the current ThreadID variable. 209 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 210 211 /// Get the name of the capture helper. 212 StringRef getHelperName() const override { return ".omp_outlined."; } 213 214 void emitUntiedSwitch(CodeGenFunction &CGF) override { 215 Action.emitUntiedSwitch(CGF); 216 } 217 218 static bool classof(const CGCapturedStmtInfo *Info) { 219 return CGOpenMPRegionInfo::classof(Info) && 220 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 221 TaskOutlinedRegion; 222 } 223 224 private: 225 /// A variable or parameter storing global thread id for OpenMP 226 /// constructs. 227 const VarDecl *ThreadIDVar; 228 /// Action for emitting code for untied tasks. 229 const UntiedTaskActionTy &Action; 230 }; 231 232 /// API for inlined captured statement code generation in OpenMP 233 /// constructs. 234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 235 public: 236 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 237 const RegionCodeGenTy &CodeGen, 238 OpenMPDirectiveKind Kind, bool HasCancel) 239 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 240 OldCSI(OldCSI), 241 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 242 243 // Retrieve the value of the context parameter. 244 llvm::Value *getContextValue() const override { 245 if (OuterRegionInfo) 246 return OuterRegionInfo->getContextValue(); 247 llvm_unreachable("No context value for inlined OpenMP region"); 248 } 249 250 void setContextValue(llvm::Value *V) override { 251 if (OuterRegionInfo) { 252 OuterRegionInfo->setContextValue(V); 253 return; 254 } 255 llvm_unreachable("No context value for inlined OpenMP region"); 256 } 257 258 /// Lookup the captured field decl for a variable. 259 const FieldDecl *lookup(const VarDecl *VD) const override { 260 if (OuterRegionInfo) 261 return OuterRegionInfo->lookup(VD); 262 // If there is no outer outlined region,no need to lookup in a list of 263 // captured variables, we can use the original one. 264 return nullptr; 265 } 266 267 FieldDecl *getThisFieldDecl() const override { 268 if (OuterRegionInfo) 269 return OuterRegionInfo->getThisFieldDecl(); 270 return nullptr; 271 } 272 273 /// Get a variable or parameter for storing global thread id 274 /// inside OpenMP construct. 275 const VarDecl *getThreadIDVariable() const override { 276 if (OuterRegionInfo) 277 return OuterRegionInfo->getThreadIDVariable(); 278 return nullptr; 279 } 280 281 /// Get an LValue for the current ThreadID variable. 282 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 285 llvm_unreachable("No LValue for inlined OpenMP construct"); 286 } 287 288 /// Get the name of the capture helper. 289 StringRef getHelperName() const override { 290 if (auto *OuterRegionInfo = getOldCSI()) 291 return OuterRegionInfo->getHelperName(); 292 llvm_unreachable("No helper name for inlined OpenMP construct"); 293 } 294 295 void emitUntiedSwitch(CodeGenFunction &CGF) override { 296 if (OuterRegionInfo) 297 OuterRegionInfo->emitUntiedSwitch(CGF); 298 } 299 300 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 301 302 static bool classof(const CGCapturedStmtInfo *Info) { 303 return CGOpenMPRegionInfo::classof(Info) && 304 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 305 } 306 307 ~CGOpenMPInlinedRegionInfo() override = default; 308 309 private: 310 /// CodeGen info about outer OpenMP region. 311 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 312 CGOpenMPRegionInfo *OuterRegionInfo; 313 }; 314 315 /// API for captured statement code generation in OpenMP target 316 /// constructs. For this captures, implicit parameters are used instead of the 317 /// captured fields. The name of the target region has to be unique in a given 318 /// application so it is provided by the client, because only the client has 319 /// the information to generate that. 320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 321 public: 322 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 323 const RegionCodeGenTy &CodeGen, StringRef HelperName) 324 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 325 /*HasCancel=*/false), 326 HelperName(HelperName) {} 327 328 /// This is unused for target regions because each starts executing 329 /// with a single thread. 330 const VarDecl *getThreadIDVariable() const override { return nullptr; } 331 332 /// Get the name of the capture helper. 333 StringRef getHelperName() const override { return HelperName; } 334 335 static bool classof(const CGCapturedStmtInfo *Info) { 336 return CGOpenMPRegionInfo::classof(Info) && 337 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 338 } 339 340 private: 341 StringRef HelperName; 342 }; 343 344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 345 llvm_unreachable("No codegen for expressions"); 346 } 347 /// API for generation of expressions captured in a innermost OpenMP 348 /// region. 349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 350 public: 351 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 352 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 353 OMPD_unknown, 354 /*HasCancel=*/false), 355 PrivScope(CGF) { 356 // Make sure the globals captured in the provided statement are local by 357 // using the privatization logic. We assume the same variable is not 358 // captured more than once. 359 for (const auto &C : CS.captures()) { 360 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 361 continue; 362 363 const VarDecl *VD = C.getCapturedVar(); 364 if (VD->isLocalVarDeclOrParm()) 365 continue; 366 367 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 368 /*RefersToEnclosingVariableOrCapture=*/false, 369 VD->getType().getNonReferenceType(), VK_LValue, 370 C.getLocation()); 371 PrivScope.addPrivate( 372 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 373 } 374 (void)PrivScope.Privatize(); 375 } 376 377 /// Lookup the captured field decl for a variable. 378 const FieldDecl *lookup(const VarDecl *VD) const override { 379 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 380 return FD; 381 return nullptr; 382 } 383 384 /// Emit the captured statement body. 385 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 386 llvm_unreachable("No body for expressions"); 387 } 388 389 /// Get a variable or parameter for storing global thread id 390 /// inside OpenMP construct. 391 const VarDecl *getThreadIDVariable() const override { 392 llvm_unreachable("No thread id for expressions"); 393 } 394 395 /// Get the name of the capture helper. 396 StringRef getHelperName() const override { 397 llvm_unreachable("No helper name for expressions"); 398 } 399 400 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 401 402 private: 403 /// Private scope to capture global variables. 404 CodeGenFunction::OMPPrivateScope PrivScope; 405 }; 406 407 /// RAII for emitting code of OpenMP constructs. 408 class InlinedOpenMPRegionRAII { 409 CodeGenFunction &CGF; 410 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 411 FieldDecl *LambdaThisCaptureField = nullptr; 412 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 413 414 public: 415 /// Constructs region for combined constructs. 416 /// \param CodeGen Code generation sequence for combined directives. Includes 417 /// a list of functions used for code generation of implicitly inlined 418 /// regions. 419 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 420 OpenMPDirectiveKind Kind, bool HasCancel) 421 : CGF(CGF) { 422 // Start emission for the construct. 423 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 424 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 425 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 426 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 427 CGF.LambdaThisCaptureField = nullptr; 428 BlockInfo = CGF.BlockInfo; 429 CGF.BlockInfo = nullptr; 430 } 431 432 ~InlinedOpenMPRegionRAII() { 433 // Restore original CapturedStmtInfo only if we're done with code emission. 434 auto *OldCSI = 435 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 436 delete CGF.CapturedStmtInfo; 437 CGF.CapturedStmtInfo = OldCSI; 438 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 439 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 440 CGF.BlockInfo = BlockInfo; 441 } 442 }; 443 444 /// Values for bit flags used in the ident_t to describe the fields. 445 /// All enumeric elements are named and described in accordance with the code 446 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 447 enum OpenMPLocationFlags : unsigned { 448 /// Use trampoline for internal microtask. 449 OMP_IDENT_IMD = 0x01, 450 /// Use c-style ident structure. 451 OMP_IDENT_KMPC = 0x02, 452 /// Atomic reduction option for kmpc_reduce. 453 OMP_ATOMIC_REDUCE = 0x10, 454 /// Explicit 'barrier' directive. 455 OMP_IDENT_BARRIER_EXPL = 0x20, 456 /// Implicit barrier in code. 457 OMP_IDENT_BARRIER_IMPL = 0x40, 458 /// Implicit barrier in 'for' directive. 459 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 460 /// Implicit barrier in 'sections' directive. 461 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 462 /// Implicit barrier in 'single' directive. 463 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 464 /// Call of __kmp_for_static_init for static loop. 465 OMP_IDENT_WORK_LOOP = 0x200, 466 /// Call of __kmp_for_static_init for sections. 467 OMP_IDENT_WORK_SECTIONS = 0x400, 468 /// Call of __kmp_for_static_init for distribute. 469 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 470 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 471 }; 472 473 namespace { 474 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 475 /// Values for bit flags for marking which requires clauses have been used. 476 enum OpenMPOffloadingRequiresDirFlags : int64_t { 477 /// flag undefined. 478 OMP_REQ_UNDEFINED = 0x000, 479 /// no requires clause present. 480 OMP_REQ_NONE = 0x001, 481 /// reverse_offload clause. 482 OMP_REQ_REVERSE_OFFLOAD = 0x002, 483 /// unified_address clause. 484 OMP_REQ_UNIFIED_ADDRESS = 0x004, 485 /// unified_shared_memory clause. 486 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 487 /// dynamic_allocators clause. 488 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 489 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 490 }; 491 492 enum OpenMPOffloadingReservedDeviceIDs { 493 /// Device ID if the device was not defined, runtime should get it 494 /// from environment variables in the spec. 495 OMP_DEVICEID_UNDEF = -1, 496 }; 497 } // anonymous namespace 498 499 /// Describes ident structure that describes a source location. 500 /// All descriptions are taken from 501 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 502 /// Original structure: 503 /// typedef struct ident { 504 /// kmp_int32 reserved_1; /**< might be used in Fortran; 505 /// see above */ 506 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 507 /// KMP_IDENT_KMPC identifies this union 508 /// member */ 509 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 510 /// see above */ 511 ///#if USE_ITT_BUILD 512 /// /* but currently used for storing 513 /// region-specific ITT */ 514 /// /* contextual information. */ 515 ///#endif /* USE_ITT_BUILD */ 516 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 517 /// C++ */ 518 /// char const *psource; /**< String describing the source location. 519 /// The string is composed of semi-colon separated 520 // fields which describe the source file, 521 /// the function and a pair of line numbers that 522 /// delimit the construct. 523 /// */ 524 /// } ident_t; 525 enum IdentFieldIndex { 526 /// might be used in Fortran 527 IdentField_Reserved_1, 528 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 529 IdentField_Flags, 530 /// Not really used in Fortran any more 531 IdentField_Reserved_2, 532 /// Source[4] in Fortran, do not use for C++ 533 IdentField_Reserved_3, 534 /// String describing the source location. The string is composed of 535 /// semi-colon separated fields which describe the source file, the function 536 /// and a pair of line numbers that delimit the construct. 537 IdentField_PSource 538 }; 539 540 /// Schedule types for 'omp for' loops (these enumerators are taken from 541 /// the enum sched_type in kmp.h). 542 enum OpenMPSchedType { 543 /// Lower bound for default (unordered) versions. 544 OMP_sch_lower = 32, 545 OMP_sch_static_chunked = 33, 546 OMP_sch_static = 34, 547 OMP_sch_dynamic_chunked = 35, 548 OMP_sch_guided_chunked = 36, 549 OMP_sch_runtime = 37, 550 OMP_sch_auto = 38, 551 /// static with chunk adjustment (e.g., simd) 552 OMP_sch_static_balanced_chunked = 45, 553 /// Lower bound for 'ordered' versions. 554 OMP_ord_lower = 64, 555 OMP_ord_static_chunked = 65, 556 OMP_ord_static = 66, 557 OMP_ord_dynamic_chunked = 67, 558 OMP_ord_guided_chunked = 68, 559 OMP_ord_runtime = 69, 560 OMP_ord_auto = 70, 561 OMP_sch_default = OMP_sch_static, 562 /// dist_schedule types 563 OMP_dist_sch_static_chunked = 91, 564 OMP_dist_sch_static = 92, 565 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 566 /// Set if the monotonic schedule modifier was present. 567 OMP_sch_modifier_monotonic = (1 << 29), 568 /// Set if the nonmonotonic schedule modifier was present. 569 OMP_sch_modifier_nonmonotonic = (1 << 30), 570 }; 571 572 enum OpenMPRTLFunction { 573 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 574 /// kmpc_micro microtask, ...); 575 OMPRTL__kmpc_fork_call, 576 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, 577 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 578 OMPRTL__kmpc_threadprivate_cached, 579 /// Call to void __kmpc_threadprivate_register( ident_t *, 580 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 581 OMPRTL__kmpc_threadprivate_register, 582 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 583 OMPRTL__kmpc_global_thread_num, 584 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 585 // kmp_critical_name *crit); 586 OMPRTL__kmpc_critical, 587 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 588 // global_tid, kmp_critical_name *crit, uintptr_t hint); 589 OMPRTL__kmpc_critical_with_hint, 590 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 591 // kmp_critical_name *crit); 592 OMPRTL__kmpc_end_critical, 593 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 594 // global_tid); 595 OMPRTL__kmpc_cancel_barrier, 596 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 597 OMPRTL__kmpc_barrier, 598 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 599 OMPRTL__kmpc_for_static_fini, 600 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 601 // global_tid); 602 OMPRTL__kmpc_serialized_parallel, 603 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 604 // global_tid); 605 OMPRTL__kmpc_end_serialized_parallel, 606 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 607 // kmp_int32 num_threads); 608 OMPRTL__kmpc_push_num_threads, 609 // Call to void __kmpc_flush(ident_t *loc); 610 OMPRTL__kmpc_flush, 611 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 612 OMPRTL__kmpc_master, 613 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 614 OMPRTL__kmpc_end_master, 615 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 616 // int end_part); 617 OMPRTL__kmpc_omp_taskyield, 618 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 619 OMPRTL__kmpc_single, 620 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 621 OMPRTL__kmpc_end_single, 622 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 623 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 624 // kmp_routine_entry_t *task_entry); 625 OMPRTL__kmpc_omp_task_alloc, 626 // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *, 627 // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, 628 // size_t sizeof_shareds, kmp_routine_entry_t *task_entry, 629 // kmp_int64 device_id); 630 OMPRTL__kmpc_omp_target_task_alloc, 631 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 632 // new_task); 633 OMPRTL__kmpc_omp_task, 634 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 635 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 636 // kmp_int32 didit); 637 OMPRTL__kmpc_copyprivate, 638 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 639 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 640 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 641 OMPRTL__kmpc_reduce, 642 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 643 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 644 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 645 // *lck); 646 OMPRTL__kmpc_reduce_nowait, 647 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 648 // kmp_critical_name *lck); 649 OMPRTL__kmpc_end_reduce, 650 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 651 // kmp_critical_name *lck); 652 OMPRTL__kmpc_end_reduce_nowait, 653 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 654 // kmp_task_t * new_task); 655 OMPRTL__kmpc_omp_task_begin_if0, 656 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 657 // kmp_task_t * new_task); 658 OMPRTL__kmpc_omp_task_complete_if0, 659 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 660 OMPRTL__kmpc_ordered, 661 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 662 OMPRTL__kmpc_end_ordered, 663 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 664 // global_tid); 665 OMPRTL__kmpc_omp_taskwait, 666 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 667 OMPRTL__kmpc_taskgroup, 668 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 669 OMPRTL__kmpc_end_taskgroup, 670 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 671 // int proc_bind); 672 OMPRTL__kmpc_push_proc_bind, 673 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 674 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 675 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 676 OMPRTL__kmpc_omp_task_with_deps, 677 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 678 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 679 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 680 OMPRTL__kmpc_omp_wait_deps, 681 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 682 // global_tid, kmp_int32 cncl_kind); 683 OMPRTL__kmpc_cancellationpoint, 684 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 685 // kmp_int32 cncl_kind); 686 OMPRTL__kmpc_cancel, 687 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 688 // kmp_int32 num_teams, kmp_int32 thread_limit); 689 OMPRTL__kmpc_push_num_teams, 690 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 691 // microtask, ...); 692 OMPRTL__kmpc_fork_teams, 693 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 694 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 695 // sched, kmp_uint64 grainsize, void *task_dup); 696 OMPRTL__kmpc_taskloop, 697 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 698 // num_dims, struct kmp_dim *dims); 699 OMPRTL__kmpc_doacross_init, 700 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 701 OMPRTL__kmpc_doacross_fini, 702 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 703 // *vec); 704 OMPRTL__kmpc_doacross_post, 705 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 706 // *vec); 707 OMPRTL__kmpc_doacross_wait, 708 // Call to void *__kmpc_taskred_init(int gtid, int num_data, void *data); 709 OMPRTL__kmpc_taskred_init, 710 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 711 // *d); 712 OMPRTL__kmpc_task_reduction_get_th_data, 713 // Call to void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 714 // is_ws, int num, void *data); 715 OMPRTL__kmpc_taskred_modifier_init, 716 // Call to void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, 717 // int is_ws); 718 OMPRTL__kmpc_task_reduction_modifier_fini, 719 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); 720 OMPRTL__kmpc_alloc, 721 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); 722 OMPRTL__kmpc_free, 723 // Call to omp_allocator_handle_t __kmpc_init_allocator(int gtid, 724 // omp_memspace_handle_t, int ntraits, omp_alloctrait_t traits[]); 725 OMPRTL__kmpc_init_allocator, 726 // Call to void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t al); 727 OMPRTL__kmpc_destroy_allocator, 728 729 // 730 // Offloading related calls 731 // 732 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 733 // size); 734 OMPRTL__kmpc_push_target_tripcount, 735 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 736 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 737 // *arg_types); 738 OMPRTL__tgt_target, 739 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 740 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 741 // *arg_types); 742 OMPRTL__tgt_target_nowait, 743 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 744 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 745 // *arg_types, int32_t num_teams, int32_t thread_limit); 746 OMPRTL__tgt_target_teams, 747 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 748 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 749 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 750 OMPRTL__tgt_target_teams_nowait, 751 // Call to void __tgt_register_requires(int64_t flags); 752 OMPRTL__tgt_register_requires, 753 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 754 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 755 OMPRTL__tgt_target_data_begin, 756 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 757 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 758 // *arg_types); 759 OMPRTL__tgt_target_data_begin_nowait, 760 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 761 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 762 OMPRTL__tgt_target_data_end, 763 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 764 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 765 // *arg_types); 766 OMPRTL__tgt_target_data_end_nowait, 767 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 768 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 769 OMPRTL__tgt_target_data_update, 770 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 771 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 772 // *arg_types); 773 OMPRTL__tgt_target_data_update_nowait, 774 // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 775 OMPRTL__tgt_mapper_num_components, 776 // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void 777 // *base, void *begin, int64_t size, int64_t type); 778 OMPRTL__tgt_push_mapper_component, 779 // Call to kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 780 // int gtid, kmp_task_t *task); 781 OMPRTL__kmpc_task_allow_completion_event, 782 }; 783 784 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 785 /// region. 786 class CleanupTy final : public EHScopeStack::Cleanup { 787 PrePostActionTy *Action; 788 789 public: 790 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 791 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 792 if (!CGF.HaveInsertPoint()) 793 return; 794 Action->Exit(CGF); 795 } 796 }; 797 798 } // anonymous namespace 799 800 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 801 CodeGenFunction::RunCleanupsScope Scope(CGF); 802 if (PrePostAction) { 803 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 804 Callback(CodeGen, CGF, *PrePostAction); 805 } else { 806 PrePostActionTy Action; 807 Callback(CodeGen, CGF, Action); 808 } 809 } 810 811 /// Check if the combiner is a call to UDR combiner and if it is so return the 812 /// UDR decl used for reduction. 813 static const OMPDeclareReductionDecl * 814 getReductionInit(const Expr *ReductionOp) { 815 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 816 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 817 if (const auto *DRE = 818 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 819 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 820 return DRD; 821 return nullptr; 822 } 823 824 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 825 const OMPDeclareReductionDecl *DRD, 826 const Expr *InitOp, 827 Address Private, Address Original, 828 QualType Ty) { 829 if (DRD->getInitializer()) { 830 std::pair<llvm::Function *, llvm::Function *> Reduction = 831 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 832 const auto *CE = cast<CallExpr>(InitOp); 833 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 834 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 835 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 836 const auto *LHSDRE = 837 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 838 const auto *RHSDRE = 839 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 840 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 841 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 842 [=]() { return Private; }); 843 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 844 [=]() { return Original; }); 845 (void)PrivateScope.Privatize(); 846 RValue Func = RValue::get(Reduction.second); 847 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 848 CGF.EmitIgnoredExpr(InitOp); 849 } else { 850 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 851 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 852 auto *GV = new llvm::GlobalVariable( 853 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 854 llvm::GlobalValue::PrivateLinkage, Init, Name); 855 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 856 RValue InitRVal; 857 switch (CGF.getEvaluationKind(Ty)) { 858 case TEK_Scalar: 859 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 860 break; 861 case TEK_Complex: 862 InitRVal = 863 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 864 break; 865 case TEK_Aggregate: 866 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 867 break; 868 } 869 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 870 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 871 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 872 /*IsInitializer=*/false); 873 } 874 } 875 876 /// Emit initialization of arrays of complex types. 877 /// \param DestAddr Address of the array. 878 /// \param Type Type of array. 879 /// \param Init Initial expression of array. 880 /// \param SrcAddr Address of the original array. 881 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 882 QualType Type, bool EmitDeclareReductionInit, 883 const Expr *Init, 884 const OMPDeclareReductionDecl *DRD, 885 Address SrcAddr = Address::invalid()) { 886 // Perform element-by-element initialization. 887 QualType ElementTy; 888 889 // Drill down to the base element type on both arrays. 890 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 891 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 892 DestAddr = 893 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 894 if (DRD) 895 SrcAddr = 896 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 897 898 llvm::Value *SrcBegin = nullptr; 899 if (DRD) 900 SrcBegin = SrcAddr.getPointer(); 901 llvm::Value *DestBegin = DestAddr.getPointer(); 902 // Cast from pointer to array type to pointer to single element. 903 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 904 // The basic structure here is a while-do loop. 905 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 906 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 907 llvm::Value *IsEmpty = 908 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 909 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 910 911 // Enter the loop body, making that address the current address. 912 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 913 CGF.EmitBlock(BodyBB); 914 915 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 916 917 llvm::PHINode *SrcElementPHI = nullptr; 918 Address SrcElementCurrent = Address::invalid(); 919 if (DRD) { 920 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 921 "omp.arraycpy.srcElementPast"); 922 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 923 SrcElementCurrent = 924 Address(SrcElementPHI, 925 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 926 } 927 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 928 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 929 DestElementPHI->addIncoming(DestBegin, EntryBB); 930 Address DestElementCurrent = 931 Address(DestElementPHI, 932 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 933 934 // Emit copy. 935 { 936 CodeGenFunction::RunCleanupsScope InitScope(CGF); 937 if (EmitDeclareReductionInit) { 938 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 939 SrcElementCurrent, ElementTy); 940 } else 941 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 942 /*IsInitializer=*/false); 943 } 944 945 if (DRD) { 946 // Shift the address forward by one element. 947 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 948 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 949 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 950 } 951 952 // Shift the address forward by one element. 953 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 954 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 955 // Check whether we've reached the end. 956 llvm::Value *Done = 957 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 958 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 959 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 960 961 // Done. 962 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 963 } 964 965 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 966 return CGF.EmitOMPSharedLValue(E); 967 } 968 969 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 970 const Expr *E) { 971 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 972 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 973 return LValue(); 974 } 975 976 void ReductionCodeGen::emitAggregateInitialization( 977 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 978 const OMPDeclareReductionDecl *DRD) { 979 // Emit VarDecl with copy init for arrays. 980 // Get the address of the original variable captured in current 981 // captured region. 982 const auto *PrivateVD = 983 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 984 bool EmitDeclareReductionInit = 985 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 986 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 987 EmitDeclareReductionInit, 988 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 989 : PrivateVD->getInit(), 990 DRD, SharedLVal.getAddress(CGF)); 991 } 992 993 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 994 ArrayRef<const Expr *> Origs, 995 ArrayRef<const Expr *> Privates, 996 ArrayRef<const Expr *> ReductionOps) { 997 ClausesData.reserve(Shareds.size()); 998 SharedAddresses.reserve(Shareds.size()); 999 Sizes.reserve(Shareds.size()); 1000 BaseDecls.reserve(Shareds.size()); 1001 const auto *IOrig = Origs.begin(); 1002 const auto *IPriv = Privates.begin(); 1003 const auto *IRed = ReductionOps.begin(); 1004 for (const Expr *Ref : Shareds) { 1005 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 1006 std::advance(IOrig, 1); 1007 std::advance(IPriv, 1); 1008 std::advance(IRed, 1); 1009 } 1010 } 1011 1012 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 1013 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 1014 "Number of generated lvalues must be exactly N."); 1015 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 1016 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 1017 SharedAddresses.emplace_back(First, Second); 1018 if (ClausesData[N].Shared == ClausesData[N].Ref) { 1019 OrigAddresses.emplace_back(First, Second); 1020 } else { 1021 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 1022 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 1023 OrigAddresses.emplace_back(First, Second); 1024 } 1025 } 1026 1027 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 1028 const auto *PrivateVD = 1029 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1030 QualType PrivateType = PrivateVD->getType(); 1031 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 1032 if (!PrivateType->isVariablyModifiedType()) { 1033 Sizes.emplace_back( 1034 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 1035 nullptr); 1036 return; 1037 } 1038 llvm::Value *Size; 1039 llvm::Value *SizeInChars; 1040 auto *ElemType = 1041 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 1042 ->getElementType(); 1043 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 1044 if (AsArraySection) { 1045 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 1046 OrigAddresses[N].first.getPointer(CGF)); 1047 Size = CGF.Builder.CreateNUWAdd( 1048 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 1049 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 1050 } else { 1051 SizeInChars = 1052 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 1053 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 1054 } 1055 Sizes.emplace_back(SizeInChars, Size); 1056 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1057 CGF, 1058 cast<OpaqueValueExpr>( 1059 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1060 RValue::get(Size)); 1061 CGF.EmitVariablyModifiedType(PrivateType); 1062 } 1063 1064 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 1065 llvm::Value *Size) { 1066 const auto *PrivateVD = 1067 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1068 QualType PrivateType = PrivateVD->getType(); 1069 if (!PrivateType->isVariablyModifiedType()) { 1070 assert(!Size && !Sizes[N].second && 1071 "Size should be nullptr for non-variably modified reduction " 1072 "items."); 1073 return; 1074 } 1075 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1076 CGF, 1077 cast<OpaqueValueExpr>( 1078 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1079 RValue::get(Size)); 1080 CGF.EmitVariablyModifiedType(PrivateType); 1081 } 1082 1083 void ReductionCodeGen::emitInitialization( 1084 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1085 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1086 assert(SharedAddresses.size() > N && "No variable was generated"); 1087 const auto *PrivateVD = 1088 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1089 const OMPDeclareReductionDecl *DRD = 1090 getReductionInit(ClausesData[N].ReductionOp); 1091 QualType PrivateType = PrivateVD->getType(); 1092 PrivateAddr = CGF.Builder.CreateElementBitCast( 1093 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1094 QualType SharedType = SharedAddresses[N].first.getType(); 1095 SharedLVal = CGF.MakeAddrLValue( 1096 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 1097 CGF.ConvertTypeForMem(SharedType)), 1098 SharedType, SharedAddresses[N].first.getBaseInfo(), 1099 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1100 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1101 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1102 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1103 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1104 PrivateAddr, SharedLVal.getAddress(CGF), 1105 SharedLVal.getType()); 1106 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1107 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1108 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1109 PrivateVD->getType().getQualifiers(), 1110 /*IsInitializer=*/false); 1111 } 1112 } 1113 1114 bool ReductionCodeGen::needCleanups(unsigned N) { 1115 const auto *PrivateVD = 1116 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1117 QualType PrivateType = PrivateVD->getType(); 1118 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1119 return DTorKind != QualType::DK_none; 1120 } 1121 1122 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1123 Address PrivateAddr) { 1124 const auto *PrivateVD = 1125 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1126 QualType PrivateType = PrivateVD->getType(); 1127 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1128 if (needCleanups(N)) { 1129 PrivateAddr = CGF.Builder.CreateElementBitCast( 1130 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1131 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1132 } 1133 } 1134 1135 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1136 LValue BaseLV) { 1137 BaseTy = BaseTy.getNonReferenceType(); 1138 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1139 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1140 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1141 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 1142 } else { 1143 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 1144 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1145 } 1146 BaseTy = BaseTy->getPointeeType(); 1147 } 1148 return CGF.MakeAddrLValue( 1149 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 1150 CGF.ConvertTypeForMem(ElTy)), 1151 BaseLV.getType(), BaseLV.getBaseInfo(), 1152 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1153 } 1154 1155 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1156 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1157 llvm::Value *Addr) { 1158 Address Tmp = Address::invalid(); 1159 Address TopTmp = Address::invalid(); 1160 Address MostTopTmp = Address::invalid(); 1161 BaseTy = BaseTy.getNonReferenceType(); 1162 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1163 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1164 Tmp = CGF.CreateMemTemp(BaseTy); 1165 if (TopTmp.isValid()) 1166 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1167 else 1168 MostTopTmp = Tmp; 1169 TopTmp = Tmp; 1170 BaseTy = BaseTy->getPointeeType(); 1171 } 1172 llvm::Type *Ty = BaseLVType; 1173 if (Tmp.isValid()) 1174 Ty = Tmp.getElementType(); 1175 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1176 if (Tmp.isValid()) { 1177 CGF.Builder.CreateStore(Addr, Tmp); 1178 return MostTopTmp; 1179 } 1180 return Address(Addr, BaseLVAlignment); 1181 } 1182 1183 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1184 const VarDecl *OrigVD = nullptr; 1185 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1186 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1187 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1188 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1189 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1190 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1191 DE = cast<DeclRefExpr>(Base); 1192 OrigVD = cast<VarDecl>(DE->getDecl()); 1193 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1194 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1195 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1196 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1197 DE = cast<DeclRefExpr>(Base); 1198 OrigVD = cast<VarDecl>(DE->getDecl()); 1199 } 1200 return OrigVD; 1201 } 1202 1203 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1204 Address PrivateAddr) { 1205 const DeclRefExpr *DE; 1206 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1207 BaseDecls.emplace_back(OrigVD); 1208 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1209 LValue BaseLValue = 1210 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1211 OriginalBaseLValue); 1212 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1213 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1214 llvm::Value *PrivatePointer = 1215 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1216 PrivateAddr.getPointer(), 1217 SharedAddresses[N].first.getAddress(CGF).getType()); 1218 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1219 return castToBase(CGF, OrigVD->getType(), 1220 SharedAddresses[N].first.getType(), 1221 OriginalBaseLValue.getAddress(CGF).getType(), 1222 OriginalBaseLValue.getAlignment(), Ptr); 1223 } 1224 BaseDecls.emplace_back( 1225 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1226 return PrivateAddr; 1227 } 1228 1229 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1230 const OMPDeclareReductionDecl *DRD = 1231 getReductionInit(ClausesData[N].ReductionOp); 1232 return DRD && DRD->getInitializer(); 1233 } 1234 1235 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1236 return CGF.EmitLoadOfPointerLValue( 1237 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1238 getThreadIDVariable()->getType()->castAs<PointerType>()); 1239 } 1240 1241 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1242 if (!CGF.HaveInsertPoint()) 1243 return; 1244 // 1.2.2 OpenMP Language Terminology 1245 // Structured block - An executable statement with a single entry at the 1246 // top and a single exit at the bottom. 1247 // The point of exit cannot be a branch out of the structured block. 1248 // longjmp() and throw() must not violate the entry/exit criteria. 1249 CGF.EHStack.pushTerminate(); 1250 CodeGen(CGF); 1251 CGF.EHStack.popTerminate(); 1252 } 1253 1254 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1255 CodeGenFunction &CGF) { 1256 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1257 getThreadIDVariable()->getType(), 1258 AlignmentSource::Decl); 1259 } 1260 1261 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1262 QualType FieldTy) { 1263 auto *Field = FieldDecl::Create( 1264 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1265 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1266 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1267 Field->setAccess(AS_public); 1268 DC->addDecl(Field); 1269 return Field; 1270 } 1271 1272 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1273 StringRef Separator) 1274 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1275 OffloadEntriesInfoManager(CGM) { 1276 ASTContext &C = CGM.getContext(); 1277 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1278 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1279 RD->startDefinition(); 1280 // reserved_1 1281 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1282 // flags 1283 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1284 // reserved_2 1285 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1286 // reserved_3 1287 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1288 // psource 1289 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1290 RD->completeDefinition(); 1291 IdentQTy = C.getRecordType(RD); 1292 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1293 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1294 1295 loadOffloadInfoMetadata(); 1296 } 1297 1298 void CGOpenMPRuntime::clear() { 1299 InternalVars.clear(); 1300 // Clean non-target variable declarations possibly used only in debug info. 1301 for (const auto &Data : EmittedNonTargetVariables) { 1302 if (!Data.getValue().pointsToAliveValue()) 1303 continue; 1304 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1305 if (!GV) 1306 continue; 1307 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1308 continue; 1309 GV->eraseFromParent(); 1310 } 1311 } 1312 1313 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1314 SmallString<128> Buffer; 1315 llvm::raw_svector_ostream OS(Buffer); 1316 StringRef Sep = FirstSeparator; 1317 for (StringRef Part : Parts) { 1318 OS << Sep << Part; 1319 Sep = Separator; 1320 } 1321 return std::string(OS.str()); 1322 } 1323 1324 static llvm::Function * 1325 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1326 const Expr *CombinerInitializer, const VarDecl *In, 1327 const VarDecl *Out, bool IsCombiner) { 1328 // void .omp_combiner.(Ty *in, Ty *out); 1329 ASTContext &C = CGM.getContext(); 1330 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1331 FunctionArgList Args; 1332 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1333 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1334 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1335 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1336 Args.push_back(&OmpOutParm); 1337 Args.push_back(&OmpInParm); 1338 const CGFunctionInfo &FnInfo = 1339 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1340 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1341 std::string Name = CGM.getOpenMPRuntime().getName( 1342 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1343 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1344 Name, &CGM.getModule()); 1345 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1346 if (CGM.getLangOpts().Optimize) { 1347 Fn->removeFnAttr(llvm::Attribute::NoInline); 1348 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1349 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1350 } 1351 CodeGenFunction CGF(CGM); 1352 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1353 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1354 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1355 Out->getLocation()); 1356 CodeGenFunction::OMPPrivateScope Scope(CGF); 1357 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1358 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1359 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1360 .getAddress(CGF); 1361 }); 1362 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1363 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1364 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1365 .getAddress(CGF); 1366 }); 1367 (void)Scope.Privatize(); 1368 if (!IsCombiner && Out->hasInit() && 1369 !CGF.isTrivialInitializer(Out->getInit())) { 1370 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1371 Out->getType().getQualifiers(), 1372 /*IsInitializer=*/true); 1373 } 1374 if (CombinerInitializer) 1375 CGF.EmitIgnoredExpr(CombinerInitializer); 1376 Scope.ForceCleanup(); 1377 CGF.FinishFunction(); 1378 return Fn; 1379 } 1380 1381 void CGOpenMPRuntime::emitUserDefinedReduction( 1382 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1383 if (UDRMap.count(D) > 0) 1384 return; 1385 llvm::Function *Combiner = emitCombinerOrInitializer( 1386 CGM, D->getType(), D->getCombiner(), 1387 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1388 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1389 /*IsCombiner=*/true); 1390 llvm::Function *Initializer = nullptr; 1391 if (const Expr *Init = D->getInitializer()) { 1392 Initializer = emitCombinerOrInitializer( 1393 CGM, D->getType(), 1394 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1395 : nullptr, 1396 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1397 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1398 /*IsCombiner=*/false); 1399 } 1400 UDRMap.try_emplace(D, Combiner, Initializer); 1401 if (CGF) { 1402 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1403 Decls.second.push_back(D); 1404 } 1405 } 1406 1407 std::pair<llvm::Function *, llvm::Function *> 1408 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1409 auto I = UDRMap.find(D); 1410 if (I != UDRMap.end()) 1411 return I->second; 1412 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1413 return UDRMap.lookup(D); 1414 } 1415 1416 namespace { 1417 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1418 // Builder if one is present. 1419 struct PushAndPopStackRAII { 1420 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1421 bool HasCancel) 1422 : OMPBuilder(OMPBuilder) { 1423 if (!OMPBuilder) 1424 return; 1425 1426 // The following callback is the crucial part of clangs cleanup process. 1427 // 1428 // NOTE: 1429 // Once the OpenMPIRBuilder is used to create parallel regions (and 1430 // similar), the cancellation destination (Dest below) is determined via 1431 // IP. That means if we have variables to finalize we split the block at IP, 1432 // use the new block (=BB) as destination to build a JumpDest (via 1433 // getJumpDestInCurrentScope(BB)) which then is fed to 1434 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1435 // to push & pop an FinalizationInfo object. 1436 // The FiniCB will still be needed but at the point where the 1437 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1438 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1439 assert(IP.getBlock()->end() == IP.getPoint() && 1440 "Clang CG should cause non-terminated block!"); 1441 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1442 CGF.Builder.restoreIP(IP); 1443 CodeGenFunction::JumpDest Dest = 1444 CGF.getOMPCancelDestination(OMPD_parallel); 1445 CGF.EmitBranchThroughCleanup(Dest); 1446 }; 1447 1448 // TODO: Remove this once we emit parallel regions through the 1449 // OpenMPIRBuilder as it can do this setup internally. 1450 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1451 {FiniCB, OMPD_parallel, HasCancel}); 1452 OMPBuilder->pushFinalizationCB(std::move(FI)); 1453 } 1454 ~PushAndPopStackRAII() { 1455 if (OMPBuilder) 1456 OMPBuilder->popFinalizationCB(); 1457 } 1458 llvm::OpenMPIRBuilder *OMPBuilder; 1459 }; 1460 } // namespace 1461 1462 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1463 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1464 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1465 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1466 assert(ThreadIDVar->getType()->isPointerType() && 1467 "thread id variable must be of type kmp_int32 *"); 1468 CodeGenFunction CGF(CGM, true); 1469 bool HasCancel = false; 1470 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1471 HasCancel = OPD->hasCancel(); 1472 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1473 HasCancel = OPD->hasCancel(); 1474 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1475 HasCancel = OPSD->hasCancel(); 1476 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1477 HasCancel = OPFD->hasCancel(); 1478 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1479 HasCancel = OPFD->hasCancel(); 1480 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1481 HasCancel = OPFD->hasCancel(); 1482 else if (const auto *OPFD = 1483 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1484 HasCancel = OPFD->hasCancel(); 1485 else if (const auto *OPFD = 1486 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1487 HasCancel = OPFD->hasCancel(); 1488 1489 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1490 // parallel region to make cancellation barriers work properly. 1491 llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder(); 1492 PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel); 1493 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1494 HasCancel, OutlinedHelperName); 1495 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1496 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1497 } 1498 1499 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1500 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1501 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1502 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1503 return emitParallelOrTeamsOutlinedFunction( 1504 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1505 } 1506 1507 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1508 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1509 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1510 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1511 return emitParallelOrTeamsOutlinedFunction( 1512 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1513 } 1514 1515 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1516 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1517 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1518 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1519 bool Tied, unsigned &NumberOfParts) { 1520 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1521 PrePostActionTy &) { 1522 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1523 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1524 llvm::Value *TaskArgs[] = { 1525 UpLoc, ThreadID, 1526 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1527 TaskTVar->getType()->castAs<PointerType>()) 1528 .getPointer(CGF)}; 1529 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1530 }; 1531 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1532 UntiedCodeGen); 1533 CodeGen.setAction(Action); 1534 assert(!ThreadIDVar->getType()->isPointerType() && 1535 "thread id variable must be of type kmp_int32 for tasks"); 1536 const OpenMPDirectiveKind Region = 1537 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1538 : OMPD_task; 1539 const CapturedStmt *CS = D.getCapturedStmt(Region); 1540 bool HasCancel = false; 1541 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1542 HasCancel = TD->hasCancel(); 1543 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1544 HasCancel = TD->hasCancel(); 1545 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1546 HasCancel = TD->hasCancel(); 1547 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1548 HasCancel = TD->hasCancel(); 1549 1550 CodeGenFunction CGF(CGM, true); 1551 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1552 InnermostKind, HasCancel, Action); 1553 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1554 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1555 if (!Tied) 1556 NumberOfParts = Action.getNumberOfParts(); 1557 return Res; 1558 } 1559 1560 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1561 const RecordDecl *RD, const CGRecordLayout &RL, 1562 ArrayRef<llvm::Constant *> Data) { 1563 llvm::StructType *StructTy = RL.getLLVMType(); 1564 unsigned PrevIdx = 0; 1565 ConstantInitBuilder CIBuilder(CGM); 1566 auto DI = Data.begin(); 1567 for (const FieldDecl *FD : RD->fields()) { 1568 unsigned Idx = RL.getLLVMFieldNo(FD); 1569 // Fill the alignment. 1570 for (unsigned I = PrevIdx; I < Idx; ++I) 1571 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1572 PrevIdx = Idx + 1; 1573 Fields.add(*DI); 1574 ++DI; 1575 } 1576 } 1577 1578 template <class... As> 1579 static llvm::GlobalVariable * 1580 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1581 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1582 As &&... Args) { 1583 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1584 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1585 ConstantInitBuilder CIBuilder(CGM); 1586 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1587 buildStructValue(Fields, CGM, RD, RL, Data); 1588 return Fields.finishAndCreateGlobal( 1589 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1590 std::forward<As>(Args)...); 1591 } 1592 1593 template <typename T> 1594 static void 1595 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1596 ArrayRef<llvm::Constant *> Data, 1597 T &Parent) { 1598 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1599 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1600 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1601 buildStructValue(Fields, CGM, RD, RL, Data); 1602 Fields.finishAndAddTo(Parent); 1603 } 1604 1605 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1606 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1607 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1608 FlagsTy FlagsKey(Flags, Reserved2Flags); 1609 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1610 if (!Entry) { 1611 if (!DefaultOpenMPPSource) { 1612 // Initialize default location for psource field of ident_t structure of 1613 // all ident_t objects. Format is ";file;function;line;column;;". 1614 // Taken from 1615 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1616 DefaultOpenMPPSource = 1617 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1618 DefaultOpenMPPSource = 1619 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1620 } 1621 1622 llvm::Constant *Data[] = { 1623 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1624 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1625 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1626 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1627 llvm::GlobalValue *DefaultOpenMPLocation = 1628 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1629 llvm::GlobalValue::PrivateLinkage); 1630 DefaultOpenMPLocation->setUnnamedAddr( 1631 llvm::GlobalValue::UnnamedAddr::Global); 1632 1633 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1634 } 1635 return Address(Entry, Align); 1636 } 1637 1638 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1639 bool AtCurrentPoint) { 1640 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1641 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1642 1643 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1644 if (AtCurrentPoint) { 1645 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1646 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1647 } else { 1648 Elem.second.ServiceInsertPt = 1649 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1650 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1651 } 1652 } 1653 1654 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1655 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1656 if (Elem.second.ServiceInsertPt) { 1657 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1658 Elem.second.ServiceInsertPt = nullptr; 1659 Ptr->eraseFromParent(); 1660 } 1661 } 1662 1663 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1664 SourceLocation Loc, 1665 unsigned Flags) { 1666 Flags |= OMP_IDENT_KMPC; 1667 // If no debug info is generated - return global default location. 1668 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1669 Loc.isInvalid()) 1670 return getOrCreateDefaultLocation(Flags).getPointer(); 1671 1672 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1673 1674 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1675 Address LocValue = Address::invalid(); 1676 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1677 if (I != OpenMPLocThreadIDMap.end()) 1678 LocValue = Address(I->second.DebugLoc, Align); 1679 1680 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1681 // GetOpenMPThreadID was called before this routine. 1682 if (!LocValue.isValid()) { 1683 // Generate "ident_t .kmpc_loc.addr;" 1684 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1685 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1686 Elem.second.DebugLoc = AI.getPointer(); 1687 LocValue = AI; 1688 1689 if (!Elem.second.ServiceInsertPt) 1690 setLocThreadIdInsertPt(CGF); 1691 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1692 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1693 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1694 CGF.getTypeSize(IdentQTy)); 1695 } 1696 1697 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1698 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1699 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1700 LValue PSource = 1701 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1702 1703 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1704 if (OMPDebugLoc == nullptr) { 1705 SmallString<128> Buffer2; 1706 llvm::raw_svector_ostream OS2(Buffer2); 1707 // Build debug location 1708 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1709 OS2 << ";" << PLoc.getFilename() << ";"; 1710 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1711 OS2 << FD->getQualifiedNameAsString(); 1712 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1713 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1714 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1715 } 1716 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1717 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1718 1719 // Our callers always pass this to a runtime function, so for 1720 // convenience, go ahead and return a naked pointer. 1721 return LocValue.getPointer(); 1722 } 1723 1724 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1725 SourceLocation Loc) { 1726 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1727 1728 llvm::Value *ThreadID = nullptr; 1729 // Check whether we've already cached a load of the thread id in this 1730 // function. 1731 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1732 if (I != OpenMPLocThreadIDMap.end()) { 1733 ThreadID = I->second.ThreadID; 1734 if (ThreadID != nullptr) 1735 return ThreadID; 1736 } 1737 // If exceptions are enabled, do not use parameter to avoid possible crash. 1738 if (auto *OMPRegionInfo = 1739 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1740 if (OMPRegionInfo->getThreadIDVariable()) { 1741 // Check if this an outlined function with thread id passed as argument. 1742 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1743 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1744 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1745 !CGF.getLangOpts().CXXExceptions || 1746 CGF.Builder.GetInsertBlock() == TopBlock || 1747 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1748 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1749 TopBlock || 1750 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1751 CGF.Builder.GetInsertBlock()) { 1752 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1753 // If value loaded in entry block, cache it and use it everywhere in 1754 // function. 1755 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1756 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1757 Elem.second.ThreadID = ThreadID; 1758 } 1759 return ThreadID; 1760 } 1761 } 1762 } 1763 1764 // This is not an outlined function region - need to call __kmpc_int32 1765 // kmpc_global_thread_num(ident_t *loc). 1766 // Generate thread id value and cache this value for use across the 1767 // function. 1768 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1769 if (!Elem.second.ServiceInsertPt) 1770 setLocThreadIdInsertPt(CGF); 1771 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1772 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1773 llvm::CallInst *Call = CGF.Builder.CreateCall( 1774 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1775 emitUpdateLocation(CGF, Loc)); 1776 Call->setCallingConv(CGF.getRuntimeCC()); 1777 Elem.second.ThreadID = Call; 1778 return Call; 1779 } 1780 1781 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1782 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1783 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1784 clearLocThreadIdInsertPt(CGF); 1785 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1786 } 1787 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1788 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1789 UDRMap.erase(D); 1790 FunctionUDRMap.erase(CGF.CurFn); 1791 } 1792 auto I = FunctionUDMMap.find(CGF.CurFn); 1793 if (I != FunctionUDMMap.end()) { 1794 for(const auto *D : I->second) 1795 UDMMap.erase(D); 1796 FunctionUDMMap.erase(I); 1797 } 1798 LastprivateConditionalToTypes.erase(CGF.CurFn); 1799 } 1800 1801 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1802 return IdentTy->getPointerTo(); 1803 } 1804 1805 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1806 if (!Kmpc_MicroTy) { 1807 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1808 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1809 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1810 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1811 } 1812 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1813 } 1814 1815 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1816 llvm::FunctionCallee RTLFn = nullptr; 1817 switch (static_cast<OpenMPRTLFunction>(Function)) { 1818 case OMPRTL__kmpc_fork_call: { 1819 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1820 // microtask, ...); 1821 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1822 getKmpc_MicroPointerTy()}; 1823 auto *FnTy = 1824 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1825 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1826 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 1827 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 1828 llvm::LLVMContext &Ctx = F->getContext(); 1829 llvm::MDBuilder MDB(Ctx); 1830 // Annotate the callback behavior of the __kmpc_fork_call: 1831 // - The callback callee is argument number 2 (microtask). 1832 // - The first two arguments of the callback callee are unknown (-1). 1833 // - All variadic arguments to the __kmpc_fork_call are passed to the 1834 // callback callee. 1835 F->addMetadata( 1836 llvm::LLVMContext::MD_callback, 1837 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 1838 2, {-1, -1}, 1839 /* VarArgsArePassed */ true)})); 1840 } 1841 } 1842 break; 1843 } 1844 case OMPRTL__kmpc_global_thread_num: { 1845 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1846 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1847 auto *FnTy = 1848 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1849 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1850 break; 1851 } 1852 case OMPRTL__kmpc_threadprivate_cached: { 1853 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1854 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1855 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1856 CGM.VoidPtrTy, CGM.SizeTy, 1857 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1858 auto *FnTy = 1859 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1860 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1861 break; 1862 } 1863 case OMPRTL__kmpc_critical: { 1864 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1865 // kmp_critical_name *crit); 1866 llvm::Type *TypeParams[] = { 1867 getIdentTyPointerTy(), CGM.Int32Ty, 1868 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1869 auto *FnTy = 1870 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1871 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1872 break; 1873 } 1874 case OMPRTL__kmpc_critical_with_hint: { 1875 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1876 // kmp_critical_name *crit, uintptr_t hint); 1877 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1878 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1879 CGM.IntPtrTy}; 1880 auto *FnTy = 1881 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1882 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1883 break; 1884 } 1885 case OMPRTL__kmpc_threadprivate_register: { 1886 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1887 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1888 // typedef void *(*kmpc_ctor)(void *); 1889 auto *KmpcCtorTy = 1890 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1891 /*isVarArg*/ false)->getPointerTo(); 1892 // typedef void *(*kmpc_cctor)(void *, void *); 1893 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1894 auto *KmpcCopyCtorTy = 1895 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1896 /*isVarArg*/ false) 1897 ->getPointerTo(); 1898 // typedef void (*kmpc_dtor)(void *); 1899 auto *KmpcDtorTy = 1900 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1901 ->getPointerTo(); 1902 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1903 KmpcCopyCtorTy, KmpcDtorTy}; 1904 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1905 /*isVarArg*/ false); 1906 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1907 break; 1908 } 1909 case OMPRTL__kmpc_end_critical: { 1910 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1911 // kmp_critical_name *crit); 1912 llvm::Type *TypeParams[] = { 1913 getIdentTyPointerTy(), CGM.Int32Ty, 1914 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1915 auto *FnTy = 1916 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1917 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1918 break; 1919 } 1920 case OMPRTL__kmpc_cancel_barrier: { 1921 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1922 // global_tid); 1923 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1924 auto *FnTy = 1925 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1926 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1927 break; 1928 } 1929 case OMPRTL__kmpc_barrier: { 1930 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1931 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1932 auto *FnTy = 1933 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1934 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1935 break; 1936 } 1937 case OMPRTL__kmpc_for_static_fini: { 1938 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1939 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1940 auto *FnTy = 1941 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1942 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1943 break; 1944 } 1945 case OMPRTL__kmpc_push_num_threads: { 1946 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1947 // kmp_int32 num_threads) 1948 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1949 CGM.Int32Ty}; 1950 auto *FnTy = 1951 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1952 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1953 break; 1954 } 1955 case OMPRTL__kmpc_serialized_parallel: { 1956 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1957 // global_tid); 1958 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1959 auto *FnTy = 1960 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1961 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1962 break; 1963 } 1964 case OMPRTL__kmpc_end_serialized_parallel: { 1965 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1966 // global_tid); 1967 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1968 auto *FnTy = 1969 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1970 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1971 break; 1972 } 1973 case OMPRTL__kmpc_flush: { 1974 // Build void __kmpc_flush(ident_t *loc); 1975 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1976 auto *FnTy = 1977 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1978 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1979 break; 1980 } 1981 case OMPRTL__kmpc_master: { 1982 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1983 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1984 auto *FnTy = 1985 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1986 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1987 break; 1988 } 1989 case OMPRTL__kmpc_end_master: { 1990 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1991 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1992 auto *FnTy = 1993 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1994 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1995 break; 1996 } 1997 case OMPRTL__kmpc_omp_taskyield: { 1998 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1999 // int end_part); 2000 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2001 auto *FnTy = 2002 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2003 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 2004 break; 2005 } 2006 case OMPRTL__kmpc_single: { 2007 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 2008 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2009 auto *FnTy = 2010 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2011 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 2012 break; 2013 } 2014 case OMPRTL__kmpc_end_single: { 2015 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 2016 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2017 auto *FnTy = 2018 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2019 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 2020 break; 2021 } 2022 case OMPRTL__kmpc_omp_task_alloc: { 2023 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 2024 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2025 // kmp_routine_entry_t *task_entry); 2026 assert(KmpRoutineEntryPtrTy != nullptr && 2027 "Type kmp_routine_entry_t must be created."); 2028 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2029 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 2030 // Return void * and then cast to particular kmp_task_t type. 2031 auto *FnTy = 2032 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2033 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 2034 break; 2035 } 2036 case OMPRTL__kmpc_omp_target_task_alloc: { 2037 // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid, 2038 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2039 // kmp_routine_entry_t *task_entry, kmp_int64 device_id); 2040 assert(KmpRoutineEntryPtrTy != nullptr && 2041 "Type kmp_routine_entry_t must be created."); 2042 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2043 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy, 2044 CGM.Int64Ty}; 2045 // Return void * and then cast to particular kmp_task_t type. 2046 auto *FnTy = 2047 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2048 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc"); 2049 break; 2050 } 2051 case OMPRTL__kmpc_omp_task: { 2052 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2053 // *new_task); 2054 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2055 CGM.VoidPtrTy}; 2056 auto *FnTy = 2057 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2058 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 2059 break; 2060 } 2061 case OMPRTL__kmpc_copyprivate: { 2062 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 2063 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 2064 // kmp_int32 didit); 2065 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2066 auto *CpyFnTy = 2067 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 2068 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 2069 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 2070 CGM.Int32Ty}; 2071 auto *FnTy = 2072 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2073 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 2074 break; 2075 } 2076 case OMPRTL__kmpc_reduce: { 2077 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 2078 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 2079 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 2080 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2081 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2082 /*isVarArg=*/false); 2083 llvm::Type *TypeParams[] = { 2084 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2085 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2086 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2087 auto *FnTy = 2088 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2089 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 2090 break; 2091 } 2092 case OMPRTL__kmpc_reduce_nowait: { 2093 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 2094 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 2095 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 2096 // *lck); 2097 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2098 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2099 /*isVarArg=*/false); 2100 llvm::Type *TypeParams[] = { 2101 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2102 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2103 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2104 auto *FnTy = 2105 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2106 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 2107 break; 2108 } 2109 case OMPRTL__kmpc_end_reduce: { 2110 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 2111 // kmp_critical_name *lck); 2112 llvm::Type *TypeParams[] = { 2113 getIdentTyPointerTy(), CGM.Int32Ty, 2114 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2115 auto *FnTy = 2116 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2117 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 2118 break; 2119 } 2120 case OMPRTL__kmpc_end_reduce_nowait: { 2121 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 2122 // kmp_critical_name *lck); 2123 llvm::Type *TypeParams[] = { 2124 getIdentTyPointerTy(), CGM.Int32Ty, 2125 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2126 auto *FnTy = 2127 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2128 RTLFn = 2129 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 2130 break; 2131 } 2132 case OMPRTL__kmpc_omp_task_begin_if0: { 2133 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2134 // *new_task); 2135 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2136 CGM.VoidPtrTy}; 2137 auto *FnTy = 2138 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2139 RTLFn = 2140 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 2141 break; 2142 } 2143 case OMPRTL__kmpc_omp_task_complete_if0: { 2144 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2145 // *new_task); 2146 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2147 CGM.VoidPtrTy}; 2148 auto *FnTy = 2149 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2150 RTLFn = CGM.CreateRuntimeFunction(FnTy, 2151 /*Name=*/"__kmpc_omp_task_complete_if0"); 2152 break; 2153 } 2154 case OMPRTL__kmpc_ordered: { 2155 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 2156 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2157 auto *FnTy = 2158 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2159 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 2160 break; 2161 } 2162 case OMPRTL__kmpc_end_ordered: { 2163 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 2164 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2165 auto *FnTy = 2166 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2167 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 2168 break; 2169 } 2170 case OMPRTL__kmpc_omp_taskwait: { 2171 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 2172 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2173 auto *FnTy = 2174 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2175 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 2176 break; 2177 } 2178 case OMPRTL__kmpc_taskgroup: { 2179 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 2180 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2181 auto *FnTy = 2182 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2183 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 2184 break; 2185 } 2186 case OMPRTL__kmpc_end_taskgroup: { 2187 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 2188 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2189 auto *FnTy = 2190 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2191 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 2192 break; 2193 } 2194 case OMPRTL__kmpc_push_proc_bind: { 2195 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 2196 // int proc_bind) 2197 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2198 auto *FnTy = 2199 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2200 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 2201 break; 2202 } 2203 case OMPRTL__kmpc_omp_task_with_deps: { 2204 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2205 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2206 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 2207 llvm::Type *TypeParams[] = { 2208 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 2209 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 2210 auto *FnTy = 2211 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2212 RTLFn = 2213 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 2214 break; 2215 } 2216 case OMPRTL__kmpc_omp_wait_deps: { 2217 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2218 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2219 // kmp_depend_info_t *noalias_dep_list); 2220 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2221 CGM.Int32Ty, CGM.VoidPtrTy, 2222 CGM.Int32Ty, CGM.VoidPtrTy}; 2223 auto *FnTy = 2224 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2225 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2226 break; 2227 } 2228 case OMPRTL__kmpc_cancellationpoint: { 2229 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2230 // global_tid, kmp_int32 cncl_kind) 2231 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2232 auto *FnTy = 2233 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2234 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2235 break; 2236 } 2237 case OMPRTL__kmpc_cancel: { 2238 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2239 // kmp_int32 cncl_kind) 2240 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2241 auto *FnTy = 2242 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2243 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2244 break; 2245 } 2246 case OMPRTL__kmpc_push_num_teams: { 2247 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2248 // kmp_int32 num_teams, kmp_int32 num_threads) 2249 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2250 CGM.Int32Ty}; 2251 auto *FnTy = 2252 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2253 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2254 break; 2255 } 2256 case OMPRTL__kmpc_fork_teams: { 2257 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2258 // microtask, ...); 2259 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2260 getKmpc_MicroPointerTy()}; 2261 auto *FnTy = 2262 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2263 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2264 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 2265 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 2266 llvm::LLVMContext &Ctx = F->getContext(); 2267 llvm::MDBuilder MDB(Ctx); 2268 // Annotate the callback behavior of the __kmpc_fork_teams: 2269 // - The callback callee is argument number 2 (microtask). 2270 // - The first two arguments of the callback callee are unknown (-1). 2271 // - All variadic arguments to the __kmpc_fork_teams are passed to the 2272 // callback callee. 2273 F->addMetadata( 2274 llvm::LLVMContext::MD_callback, 2275 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 2276 2, {-1, -1}, 2277 /* VarArgsArePassed */ true)})); 2278 } 2279 } 2280 break; 2281 } 2282 case OMPRTL__kmpc_taskloop: { 2283 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2284 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2285 // sched, kmp_uint64 grainsize, void *task_dup); 2286 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2287 CGM.IntTy, 2288 CGM.VoidPtrTy, 2289 CGM.IntTy, 2290 CGM.Int64Ty->getPointerTo(), 2291 CGM.Int64Ty->getPointerTo(), 2292 CGM.Int64Ty, 2293 CGM.IntTy, 2294 CGM.IntTy, 2295 CGM.Int64Ty, 2296 CGM.VoidPtrTy}; 2297 auto *FnTy = 2298 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2299 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2300 break; 2301 } 2302 case OMPRTL__kmpc_doacross_init: { 2303 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2304 // num_dims, struct kmp_dim *dims); 2305 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2306 CGM.Int32Ty, 2307 CGM.Int32Ty, 2308 CGM.VoidPtrTy}; 2309 auto *FnTy = 2310 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2311 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2312 break; 2313 } 2314 case OMPRTL__kmpc_doacross_fini: { 2315 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2316 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2317 auto *FnTy = 2318 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2319 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2320 break; 2321 } 2322 case OMPRTL__kmpc_doacross_post: { 2323 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2324 // *vec); 2325 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2326 CGM.Int64Ty->getPointerTo()}; 2327 auto *FnTy = 2328 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2329 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2330 break; 2331 } 2332 case OMPRTL__kmpc_doacross_wait: { 2333 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2334 // *vec); 2335 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2336 CGM.Int64Ty->getPointerTo()}; 2337 auto *FnTy = 2338 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2339 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2340 break; 2341 } 2342 case OMPRTL__kmpc_taskred_init: { 2343 // Build void *__kmpc_taskred_init(int gtid, int num_data, void *data); 2344 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2345 auto *FnTy = 2346 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2347 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskred_init"); 2348 break; 2349 } 2350 case OMPRTL__kmpc_task_reduction_get_th_data: { 2351 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2352 // *d); 2353 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2354 auto *FnTy = 2355 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2356 RTLFn = CGM.CreateRuntimeFunction( 2357 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2358 break; 2359 } 2360 case OMPRTL__kmpc_taskred_modifier_init: { 2361 // Build void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 2362 // is_ws, int num_data, void *data); 2363 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.IntTy, CGM.IntTy, 2364 CGM.IntTy, CGM.VoidPtrTy}; 2365 auto *FnTy = 2366 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2367 RTLFn = CGM.CreateRuntimeFunction(FnTy, 2368 /*Name=*/"__kmpc_taskred_modifier_init"); 2369 break; 2370 } 2371 case OMPRTL__kmpc_task_reduction_modifier_fini: { 2372 // Build void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, 2373 // int is_ws); 2374 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.IntTy, CGM.IntTy}; 2375 auto *FnTy = 2376 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2377 RTLFn = CGM.CreateRuntimeFunction( 2378 FnTy, 2379 /*Name=*/"__kmpc_task_reduction_modifier_fini"); 2380 break; 2381 } 2382 case OMPRTL__kmpc_alloc: { 2383 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t 2384 // al); omp_allocator_handle_t type is void *. 2385 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy}; 2386 auto *FnTy = 2387 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2388 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc"); 2389 break; 2390 } 2391 case OMPRTL__kmpc_free: { 2392 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t 2393 // al); omp_allocator_handle_t type is void *. 2394 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2395 auto *FnTy = 2396 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2397 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); 2398 break; 2399 } 2400 case OMPRTL__kmpc_init_allocator: { 2401 // Build omp_allocator_handle_t __kmpc_init_allocator(int gtid, 2402 // omp_memspace_handle_t, int ntraits, omp_alloctrait_t traits[]); 2403 // omp_allocator_handle_t type is void*, omp_memspace_handle_t type is 2404 // void*. 2405 auto *FnTy = llvm::FunctionType::get( 2406 CGM.VoidPtrTy, {CGM.IntTy, CGM.VoidPtrTy, CGM.IntTy, CGM.VoidPtrTy}, 2407 /*isVarArg=*/false); 2408 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_init_allocator"); 2409 break; 2410 } 2411 case OMPRTL__kmpc_destroy_allocator: { 2412 // Build void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t al); 2413 // omp_allocator_handle_t type is void*. 2414 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, {CGM.IntTy, CGM.VoidPtrTy}, 2415 /*isVarArg=*/false); 2416 RTLFn = 2417 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_destroy_allocator"); 2418 break; 2419 } 2420 case OMPRTL__kmpc_push_target_tripcount: { 2421 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 2422 // size); 2423 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; 2424 llvm::FunctionType *FnTy = 2425 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2426 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); 2427 break; 2428 } 2429 case OMPRTL__tgt_target: { 2430 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2431 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2432 // *arg_types); 2433 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2434 CGM.VoidPtrTy, 2435 CGM.Int32Ty, 2436 CGM.VoidPtrPtrTy, 2437 CGM.VoidPtrPtrTy, 2438 CGM.Int64Ty->getPointerTo(), 2439 CGM.Int64Ty->getPointerTo()}; 2440 auto *FnTy = 2441 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2442 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2443 break; 2444 } 2445 case OMPRTL__tgt_target_nowait: { 2446 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2447 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2448 // int64_t *arg_types); 2449 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2450 CGM.VoidPtrTy, 2451 CGM.Int32Ty, 2452 CGM.VoidPtrPtrTy, 2453 CGM.VoidPtrPtrTy, 2454 CGM.Int64Ty->getPointerTo(), 2455 CGM.Int64Ty->getPointerTo()}; 2456 auto *FnTy = 2457 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2458 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2459 break; 2460 } 2461 case OMPRTL__tgt_target_teams: { 2462 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2463 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2464 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2465 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2466 CGM.VoidPtrTy, 2467 CGM.Int32Ty, 2468 CGM.VoidPtrPtrTy, 2469 CGM.VoidPtrPtrTy, 2470 CGM.Int64Ty->getPointerTo(), 2471 CGM.Int64Ty->getPointerTo(), 2472 CGM.Int32Ty, 2473 CGM.Int32Ty}; 2474 auto *FnTy = 2475 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2476 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2477 break; 2478 } 2479 case OMPRTL__tgt_target_teams_nowait: { 2480 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2481 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 2482 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2483 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2484 CGM.VoidPtrTy, 2485 CGM.Int32Ty, 2486 CGM.VoidPtrPtrTy, 2487 CGM.VoidPtrPtrTy, 2488 CGM.Int64Ty->getPointerTo(), 2489 CGM.Int64Ty->getPointerTo(), 2490 CGM.Int32Ty, 2491 CGM.Int32Ty}; 2492 auto *FnTy = 2493 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2494 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2495 break; 2496 } 2497 case OMPRTL__tgt_register_requires: { 2498 // Build void __tgt_register_requires(int64_t flags); 2499 llvm::Type *TypeParams[] = {CGM.Int64Ty}; 2500 auto *FnTy = 2501 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2502 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); 2503 break; 2504 } 2505 case OMPRTL__tgt_target_data_begin: { 2506 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2507 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2508 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2509 CGM.Int32Ty, 2510 CGM.VoidPtrPtrTy, 2511 CGM.VoidPtrPtrTy, 2512 CGM.Int64Ty->getPointerTo(), 2513 CGM.Int64Ty->getPointerTo()}; 2514 auto *FnTy = 2515 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2516 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2517 break; 2518 } 2519 case OMPRTL__tgt_target_data_begin_nowait: { 2520 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2521 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2522 // *arg_types); 2523 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2524 CGM.Int32Ty, 2525 CGM.VoidPtrPtrTy, 2526 CGM.VoidPtrPtrTy, 2527 CGM.Int64Ty->getPointerTo(), 2528 CGM.Int64Ty->getPointerTo()}; 2529 auto *FnTy = 2530 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2531 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2532 break; 2533 } 2534 case OMPRTL__tgt_target_data_end: { 2535 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2536 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2537 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2538 CGM.Int32Ty, 2539 CGM.VoidPtrPtrTy, 2540 CGM.VoidPtrPtrTy, 2541 CGM.Int64Ty->getPointerTo(), 2542 CGM.Int64Ty->getPointerTo()}; 2543 auto *FnTy = 2544 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2545 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2546 break; 2547 } 2548 case OMPRTL__tgt_target_data_end_nowait: { 2549 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2550 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2551 // *arg_types); 2552 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2553 CGM.Int32Ty, 2554 CGM.VoidPtrPtrTy, 2555 CGM.VoidPtrPtrTy, 2556 CGM.Int64Ty->getPointerTo(), 2557 CGM.Int64Ty->getPointerTo()}; 2558 auto *FnTy = 2559 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2560 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2561 break; 2562 } 2563 case OMPRTL__tgt_target_data_update: { 2564 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2565 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2566 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2567 CGM.Int32Ty, 2568 CGM.VoidPtrPtrTy, 2569 CGM.VoidPtrPtrTy, 2570 CGM.Int64Ty->getPointerTo(), 2571 CGM.Int64Ty->getPointerTo()}; 2572 auto *FnTy = 2573 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2574 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2575 break; 2576 } 2577 case OMPRTL__tgt_target_data_update_nowait: { 2578 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2579 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2580 // *arg_types); 2581 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2582 CGM.Int32Ty, 2583 CGM.VoidPtrPtrTy, 2584 CGM.VoidPtrPtrTy, 2585 CGM.Int64Ty->getPointerTo(), 2586 CGM.Int64Ty->getPointerTo()}; 2587 auto *FnTy = 2588 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2589 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2590 break; 2591 } 2592 case OMPRTL__tgt_mapper_num_components: { 2593 // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 2594 llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; 2595 auto *FnTy = 2596 llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); 2597 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components"); 2598 break; 2599 } 2600 case OMPRTL__tgt_push_mapper_component: { 2601 // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void 2602 // *base, void *begin, int64_t size, int64_t type); 2603 llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy, 2604 CGM.Int64Ty, CGM.Int64Ty}; 2605 auto *FnTy = 2606 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2607 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component"); 2608 break; 2609 } 2610 case OMPRTL__kmpc_task_allow_completion_event: { 2611 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 2612 // int gtid, kmp_task_t *task); 2613 auto *FnTy = llvm::FunctionType::get( 2614 CGM.VoidPtrTy, {getIdentTyPointerTy(), CGM.IntTy, CGM.VoidPtrTy}, 2615 /*isVarArg=*/false); 2616 RTLFn = 2617 CGM.CreateRuntimeFunction(FnTy, "__kmpc_task_allow_completion_event"); 2618 break; 2619 } 2620 } 2621 assert(RTLFn && "Unable to find OpenMP runtime function"); 2622 return RTLFn; 2623 } 2624 2625 llvm::FunctionCallee 2626 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 2627 assert((IVSize == 32 || IVSize == 64) && 2628 "IV size is not compatible with the omp runtime"); 2629 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2630 : "__kmpc_for_static_init_4u") 2631 : (IVSigned ? "__kmpc_for_static_init_8" 2632 : "__kmpc_for_static_init_8u"); 2633 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2634 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2635 llvm::Type *TypeParams[] = { 2636 getIdentTyPointerTy(), // loc 2637 CGM.Int32Ty, // tid 2638 CGM.Int32Ty, // schedtype 2639 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2640 PtrTy, // p_lower 2641 PtrTy, // p_upper 2642 PtrTy, // p_stride 2643 ITy, // incr 2644 ITy // chunk 2645 }; 2646 auto *FnTy = 2647 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2648 return CGM.CreateRuntimeFunction(FnTy, Name); 2649 } 2650 2651 llvm::FunctionCallee 2652 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 2653 assert((IVSize == 32 || IVSize == 64) && 2654 "IV size is not compatible with the omp runtime"); 2655 StringRef Name = 2656 IVSize == 32 2657 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2658 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2659 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2660 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2661 CGM.Int32Ty, // tid 2662 CGM.Int32Ty, // schedtype 2663 ITy, // lower 2664 ITy, // upper 2665 ITy, // stride 2666 ITy // chunk 2667 }; 2668 auto *FnTy = 2669 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2670 return CGM.CreateRuntimeFunction(FnTy, Name); 2671 } 2672 2673 llvm::FunctionCallee 2674 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 2675 assert((IVSize == 32 || IVSize == 64) && 2676 "IV size is not compatible with the omp runtime"); 2677 StringRef Name = 2678 IVSize == 32 2679 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2680 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2681 llvm::Type *TypeParams[] = { 2682 getIdentTyPointerTy(), // loc 2683 CGM.Int32Ty, // tid 2684 }; 2685 auto *FnTy = 2686 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2687 return CGM.CreateRuntimeFunction(FnTy, Name); 2688 } 2689 2690 llvm::FunctionCallee 2691 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 2692 assert((IVSize == 32 || IVSize == 64) && 2693 "IV size is not compatible with the omp runtime"); 2694 StringRef Name = 2695 IVSize == 32 2696 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2697 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2698 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2699 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2700 llvm::Type *TypeParams[] = { 2701 getIdentTyPointerTy(), // loc 2702 CGM.Int32Ty, // tid 2703 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2704 PtrTy, // p_lower 2705 PtrTy, // p_upper 2706 PtrTy // p_stride 2707 }; 2708 auto *FnTy = 2709 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2710 return CGM.CreateRuntimeFunction(FnTy, Name); 2711 } 2712 2713 /// Obtain information that uniquely identifies a target entry. This 2714 /// consists of the file and device IDs as well as line number associated with 2715 /// the relevant entry source location. 2716 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2717 unsigned &DeviceID, unsigned &FileID, 2718 unsigned &LineNum) { 2719 SourceManager &SM = C.getSourceManager(); 2720 2721 // The loc should be always valid and have a file ID (the user cannot use 2722 // #pragma directives in macros) 2723 2724 assert(Loc.isValid() && "Source location is expected to be always valid."); 2725 2726 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2727 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2728 2729 llvm::sys::fs::UniqueID ID; 2730 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2731 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2732 << PLoc.getFilename() << EC.message(); 2733 2734 DeviceID = ID.getDevice(); 2735 FileID = ID.getFile(); 2736 LineNum = PLoc.getLine(); 2737 } 2738 2739 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 2740 if (CGM.getLangOpts().OpenMPSimd) 2741 return Address::invalid(); 2742 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2743 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2744 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 2745 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2746 HasRequiresUnifiedSharedMemory))) { 2747 SmallString<64> PtrName; 2748 { 2749 llvm::raw_svector_ostream OS(PtrName); 2750 OS << CGM.getMangledName(GlobalDecl(VD)); 2751 if (!VD->isExternallyVisible()) { 2752 unsigned DeviceID, FileID, Line; 2753 getTargetEntryUniqueInfo(CGM.getContext(), 2754 VD->getCanonicalDecl()->getBeginLoc(), 2755 DeviceID, FileID, Line); 2756 OS << llvm::format("_%x", FileID); 2757 } 2758 OS << "_decl_tgt_ref_ptr"; 2759 } 2760 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2761 if (!Ptr) { 2762 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2763 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2764 PtrName); 2765 2766 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2767 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 2768 2769 if (!CGM.getLangOpts().OpenMPIsDevice) 2770 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2771 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2772 } 2773 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2774 } 2775 return Address::invalid(); 2776 } 2777 2778 llvm::Constant * 2779 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2780 assert(!CGM.getLangOpts().OpenMPUseTLS || 2781 !CGM.getContext().getTargetInfo().isTLSSupported()); 2782 // Lookup the entry, lazily creating it if necessary. 2783 std::string Suffix = getName({"cache", ""}); 2784 return getOrCreateInternalVariable( 2785 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 2786 } 2787 2788 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2789 const VarDecl *VD, 2790 Address VDAddr, 2791 SourceLocation Loc) { 2792 if (CGM.getLangOpts().OpenMPUseTLS && 2793 CGM.getContext().getTargetInfo().isTLSSupported()) 2794 return VDAddr; 2795 2796 llvm::Type *VarTy = VDAddr.getElementType(); 2797 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2798 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2799 CGM.Int8PtrTy), 2800 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2801 getOrCreateThreadPrivateCache(VD)}; 2802 return Address(CGF.EmitRuntimeCall( 2803 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2804 VDAddr.getAlignment()); 2805 } 2806 2807 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2808 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2809 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2810 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2811 // library. 2812 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2813 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2814 OMPLoc); 2815 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2816 // to register constructor/destructor for variable. 2817 llvm::Value *Args[] = { 2818 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2819 Ctor, CopyCtor, Dtor}; 2820 CGF.EmitRuntimeCall( 2821 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2822 } 2823 2824 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2825 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2826 bool PerformInit, CodeGenFunction *CGF) { 2827 if (CGM.getLangOpts().OpenMPUseTLS && 2828 CGM.getContext().getTargetInfo().isTLSSupported()) 2829 return nullptr; 2830 2831 VD = VD->getDefinition(CGM.getContext()); 2832 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 2833 QualType ASTTy = VD->getType(); 2834 2835 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2836 const Expr *Init = VD->getAnyInitializer(); 2837 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2838 // Generate function that re-emits the declaration's initializer into the 2839 // threadprivate copy of the variable VD 2840 CodeGenFunction CtorCGF(CGM); 2841 FunctionArgList Args; 2842 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2843 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2844 ImplicitParamDecl::Other); 2845 Args.push_back(&Dst); 2846 2847 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2848 CGM.getContext().VoidPtrTy, Args); 2849 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2850 std::string Name = getName({"__kmpc_global_ctor_", ""}); 2851 llvm::Function *Fn = 2852 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2853 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2854 Args, Loc, Loc); 2855 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2856 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2857 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2858 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2859 Arg = CtorCGF.Builder.CreateElementBitCast( 2860 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2861 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2862 /*IsInitializer=*/true); 2863 ArgVal = CtorCGF.EmitLoadOfScalar( 2864 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2865 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2866 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2867 CtorCGF.FinishFunction(); 2868 Ctor = Fn; 2869 } 2870 if (VD->getType().isDestructedType() != QualType::DK_none) { 2871 // Generate function that emits destructor call for the threadprivate copy 2872 // of the variable VD 2873 CodeGenFunction DtorCGF(CGM); 2874 FunctionArgList Args; 2875 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2876 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2877 ImplicitParamDecl::Other); 2878 Args.push_back(&Dst); 2879 2880 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2881 CGM.getContext().VoidTy, Args); 2882 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2883 std::string Name = getName({"__kmpc_global_dtor_", ""}); 2884 llvm::Function *Fn = 2885 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2886 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2887 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2888 Loc, Loc); 2889 // Create a scope with an artificial location for the body of this function. 2890 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2891 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2892 DtorCGF.GetAddrOfLocalVar(&Dst), 2893 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2894 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2895 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2896 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2897 DtorCGF.FinishFunction(); 2898 Dtor = Fn; 2899 } 2900 // Do not emit init function if it is not required. 2901 if (!Ctor && !Dtor) 2902 return nullptr; 2903 2904 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2905 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2906 /*isVarArg=*/false) 2907 ->getPointerTo(); 2908 // Copying constructor for the threadprivate variable. 2909 // Must be NULL - reserved by runtime, but currently it requires that this 2910 // parameter is always NULL. Otherwise it fires assertion. 2911 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2912 if (Ctor == nullptr) { 2913 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2914 /*isVarArg=*/false) 2915 ->getPointerTo(); 2916 Ctor = llvm::Constant::getNullValue(CtorTy); 2917 } 2918 if (Dtor == nullptr) { 2919 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2920 /*isVarArg=*/false) 2921 ->getPointerTo(); 2922 Dtor = llvm::Constant::getNullValue(DtorTy); 2923 } 2924 if (!CGF) { 2925 auto *InitFunctionTy = 2926 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2927 std::string Name = getName({"__omp_threadprivate_init_", ""}); 2928 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2929 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 2930 CodeGenFunction InitCGF(CGM); 2931 FunctionArgList ArgList; 2932 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2933 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2934 Loc, Loc); 2935 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2936 InitCGF.FinishFunction(); 2937 return InitFunction; 2938 } 2939 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2940 } 2941 return nullptr; 2942 } 2943 2944 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2945 llvm::GlobalVariable *Addr, 2946 bool PerformInit) { 2947 if (CGM.getLangOpts().OMPTargetTriples.empty() && 2948 !CGM.getLangOpts().OpenMPIsDevice) 2949 return false; 2950 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2951 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2952 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 2953 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2954 HasRequiresUnifiedSharedMemory)) 2955 return CGM.getLangOpts().OpenMPIsDevice; 2956 VD = VD->getDefinition(CGM.getContext()); 2957 assert(VD && "Unknown VarDecl"); 2958 2959 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 2960 return CGM.getLangOpts().OpenMPIsDevice; 2961 2962 QualType ASTTy = VD->getType(); 2963 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 2964 2965 // Produce the unique prefix to identify the new target regions. We use 2966 // the source location of the variable declaration which we know to not 2967 // conflict with any target region. 2968 unsigned DeviceID; 2969 unsigned FileID; 2970 unsigned Line; 2971 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2972 SmallString<128> Buffer, Out; 2973 { 2974 llvm::raw_svector_ostream OS(Buffer); 2975 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2976 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2977 } 2978 2979 const Expr *Init = VD->getAnyInitializer(); 2980 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2981 llvm::Constant *Ctor; 2982 llvm::Constant *ID; 2983 if (CGM.getLangOpts().OpenMPIsDevice) { 2984 // Generate function that re-emits the declaration's initializer into 2985 // the threadprivate copy of the variable VD 2986 CodeGenFunction CtorCGF(CGM); 2987 2988 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2989 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2990 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2991 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2992 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2993 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2994 FunctionArgList(), Loc, Loc); 2995 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2996 CtorCGF.EmitAnyExprToMem(Init, 2997 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2998 Init->getType().getQualifiers(), 2999 /*IsInitializer=*/true); 3000 CtorCGF.FinishFunction(); 3001 Ctor = Fn; 3002 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 3003 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 3004 } else { 3005 Ctor = new llvm::GlobalVariable( 3006 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 3007 llvm::GlobalValue::PrivateLinkage, 3008 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 3009 ID = Ctor; 3010 } 3011 3012 // Register the information for the entry associated with the constructor. 3013 Out.clear(); 3014 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 3015 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 3016 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 3017 } 3018 if (VD->getType().isDestructedType() != QualType::DK_none) { 3019 llvm::Constant *Dtor; 3020 llvm::Constant *ID; 3021 if (CGM.getLangOpts().OpenMPIsDevice) { 3022 // Generate function that emits destructor call for the threadprivate 3023 // copy of the variable VD 3024 CodeGenFunction DtorCGF(CGM); 3025 3026 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 3027 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 3028 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 3029 FTy, Twine(Buffer, "_dtor"), FI, Loc); 3030 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 3031 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 3032 FunctionArgList(), Loc, Loc); 3033 // Create a scope with an artificial location for the body of this 3034 // function. 3035 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 3036 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 3037 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 3038 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 3039 DtorCGF.FinishFunction(); 3040 Dtor = Fn; 3041 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 3042 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 3043 } else { 3044 Dtor = new llvm::GlobalVariable( 3045 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 3046 llvm::GlobalValue::PrivateLinkage, 3047 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 3048 ID = Dtor; 3049 } 3050 // Register the information for the entry associated with the destructor. 3051 Out.clear(); 3052 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 3053 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 3054 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 3055 } 3056 return CGM.getLangOpts().OpenMPIsDevice; 3057 } 3058 3059 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 3060 QualType VarType, 3061 StringRef Name) { 3062 std::string Suffix = getName({"artificial", ""}); 3063 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 3064 llvm::Value *GAddr = 3065 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 3066 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 3067 CGM.getTarget().isTLSSupported()) { 3068 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 3069 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 3070 } 3071 std::string CacheSuffix = getName({"cache", ""}); 3072 llvm::Value *Args[] = { 3073 emitUpdateLocation(CGF, SourceLocation()), 3074 getThreadID(CGF, SourceLocation()), 3075 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 3076 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 3077 /*isSigned=*/false), 3078 getOrCreateInternalVariable( 3079 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 3080 return Address( 3081 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3082 CGF.EmitRuntimeCall( 3083 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 3084 VarLVType->getPointerTo(/*AddrSpace=*/0)), 3085 CGM.getContext().getTypeAlignInChars(VarType)); 3086 } 3087 3088 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 3089 const RegionCodeGenTy &ThenGen, 3090 const RegionCodeGenTy &ElseGen) { 3091 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 3092 3093 // If the condition constant folds and can be elided, try to avoid emitting 3094 // the condition and the dead arm of the if/else. 3095 bool CondConstant; 3096 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 3097 if (CondConstant) 3098 ThenGen(CGF); 3099 else 3100 ElseGen(CGF); 3101 return; 3102 } 3103 3104 // Otherwise, the condition did not fold, or we couldn't elide it. Just 3105 // emit the conditional branch. 3106 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3107 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 3108 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 3109 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 3110 3111 // Emit the 'then' code. 3112 CGF.EmitBlock(ThenBlock); 3113 ThenGen(CGF); 3114 CGF.EmitBranch(ContBlock); 3115 // Emit the 'else' code if present. 3116 // There is no need to emit line number for unconditional branch. 3117 (void)ApplyDebugLocation::CreateEmpty(CGF); 3118 CGF.EmitBlock(ElseBlock); 3119 ElseGen(CGF); 3120 // There is no need to emit line number for unconditional branch. 3121 (void)ApplyDebugLocation::CreateEmpty(CGF); 3122 CGF.EmitBranch(ContBlock); 3123 // Emit the continuation block for code after the if. 3124 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 3125 } 3126 3127 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 3128 llvm::Function *OutlinedFn, 3129 ArrayRef<llvm::Value *> CapturedVars, 3130 const Expr *IfCond) { 3131 if (!CGF.HaveInsertPoint()) 3132 return; 3133 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 3134 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 3135 PrePostActionTy &) { 3136 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 3137 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3138 llvm::Value *Args[] = { 3139 RTLoc, 3140 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 3141 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 3142 llvm::SmallVector<llvm::Value *, 16> RealArgs; 3143 RealArgs.append(std::begin(Args), std::end(Args)); 3144 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 3145 3146 llvm::FunctionCallee RTLFn = 3147 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 3148 CGF.EmitRuntimeCall(RTLFn, RealArgs); 3149 }; 3150 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 3151 PrePostActionTy &) { 3152 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3153 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 3154 // Build calls: 3155 // __kmpc_serialized_parallel(&Loc, GTid); 3156 llvm::Value *Args[] = {RTLoc, ThreadID}; 3157 CGF.EmitRuntimeCall( 3158 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 3159 3160 // OutlinedFn(>id, &zero_bound, CapturedStruct); 3161 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 3162 Address ZeroAddrBound = 3163 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 3164 /*Name=*/".bound.zero.addr"); 3165 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 3166 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 3167 // ThreadId for serialized parallels is 0. 3168 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 3169 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 3170 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 3171 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 3172 3173 // __kmpc_end_serialized_parallel(&Loc, GTid); 3174 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 3175 CGF.EmitRuntimeCall( 3176 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 3177 EndArgs); 3178 }; 3179 if (IfCond) { 3180 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 3181 } else { 3182 RegionCodeGenTy ThenRCG(ThenGen); 3183 ThenRCG(CGF); 3184 } 3185 } 3186 3187 // If we're inside an (outlined) parallel region, use the region info's 3188 // thread-ID variable (it is passed in a first argument of the outlined function 3189 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 3190 // regular serial code region, get thread ID by calling kmp_int32 3191 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 3192 // return the address of that temp. 3193 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 3194 SourceLocation Loc) { 3195 if (auto *OMPRegionInfo = 3196 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3197 if (OMPRegionInfo->getThreadIDVariable()) 3198 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 3199 3200 llvm::Value *ThreadID = getThreadID(CGF, Loc); 3201 QualType Int32Ty = 3202 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 3203 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 3204 CGF.EmitStoreOfScalar(ThreadID, 3205 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 3206 3207 return ThreadIDTemp; 3208 } 3209 3210 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 3211 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 3212 SmallString<256> Buffer; 3213 llvm::raw_svector_ostream Out(Buffer); 3214 Out << Name; 3215 StringRef RuntimeName = Out.str(); 3216 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 3217 if (Elem.second) { 3218 assert(Elem.second->getType()->getPointerElementType() == Ty && 3219 "OMP internal variable has different type than requested"); 3220 return &*Elem.second; 3221 } 3222 3223 return Elem.second = new llvm::GlobalVariable( 3224 CGM.getModule(), Ty, /*IsConstant*/ false, 3225 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 3226 Elem.first(), /*InsertBefore=*/nullptr, 3227 llvm::GlobalValue::NotThreadLocal, AddressSpace); 3228 } 3229 3230 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 3231 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 3232 std::string Name = getName({Prefix, "var"}); 3233 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 3234 } 3235 3236 namespace { 3237 /// Common pre(post)-action for different OpenMP constructs. 3238 class CommonActionTy final : public PrePostActionTy { 3239 llvm::FunctionCallee EnterCallee; 3240 ArrayRef<llvm::Value *> EnterArgs; 3241 llvm::FunctionCallee ExitCallee; 3242 ArrayRef<llvm::Value *> ExitArgs; 3243 bool Conditional; 3244 llvm::BasicBlock *ContBlock = nullptr; 3245 3246 public: 3247 CommonActionTy(llvm::FunctionCallee EnterCallee, 3248 ArrayRef<llvm::Value *> EnterArgs, 3249 llvm::FunctionCallee ExitCallee, 3250 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 3251 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 3252 ExitArgs(ExitArgs), Conditional(Conditional) {} 3253 void Enter(CodeGenFunction &CGF) override { 3254 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 3255 if (Conditional) { 3256 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 3257 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3258 ContBlock = CGF.createBasicBlock("omp_if.end"); 3259 // Generate the branch (If-stmt) 3260 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 3261 CGF.EmitBlock(ThenBlock); 3262 } 3263 } 3264 void Done(CodeGenFunction &CGF) { 3265 // Emit the rest of blocks/branches 3266 CGF.EmitBranch(ContBlock); 3267 CGF.EmitBlock(ContBlock, true); 3268 } 3269 void Exit(CodeGenFunction &CGF) override { 3270 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 3271 } 3272 }; 3273 } // anonymous namespace 3274 3275 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 3276 StringRef CriticalName, 3277 const RegionCodeGenTy &CriticalOpGen, 3278 SourceLocation Loc, const Expr *Hint) { 3279 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 3280 // CriticalOpGen(); 3281 // __kmpc_end_critical(ident_t *, gtid, Lock); 3282 // Prepare arguments and build a call to __kmpc_critical 3283 if (!CGF.HaveInsertPoint()) 3284 return; 3285 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3286 getCriticalRegionLock(CriticalName)}; 3287 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 3288 std::end(Args)); 3289 if (Hint) { 3290 EnterArgs.push_back(CGF.Builder.CreateIntCast( 3291 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 3292 } 3293 CommonActionTy Action( 3294 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 3295 : OMPRTL__kmpc_critical), 3296 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 3297 CriticalOpGen.setAction(Action); 3298 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 3299 } 3300 3301 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 3302 const RegionCodeGenTy &MasterOpGen, 3303 SourceLocation Loc) { 3304 if (!CGF.HaveInsertPoint()) 3305 return; 3306 // if(__kmpc_master(ident_t *, gtid)) { 3307 // MasterOpGen(); 3308 // __kmpc_end_master(ident_t *, gtid); 3309 // } 3310 // Prepare arguments and build a call to __kmpc_master 3311 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3312 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 3313 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 3314 /*Conditional=*/true); 3315 MasterOpGen.setAction(Action); 3316 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 3317 Action.Done(CGF); 3318 } 3319 3320 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 3321 SourceLocation Loc) { 3322 if (!CGF.HaveInsertPoint()) 3323 return; 3324 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3325 if (OMPBuilder) { 3326 OMPBuilder->CreateTaskyield(CGF.Builder); 3327 } else { 3328 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 3329 llvm::Value *Args[] = { 3330 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3331 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 3332 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), 3333 Args); 3334 } 3335 3336 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3337 Region->emitUntiedSwitch(CGF); 3338 } 3339 3340 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 3341 const RegionCodeGenTy &TaskgroupOpGen, 3342 SourceLocation Loc) { 3343 if (!CGF.HaveInsertPoint()) 3344 return; 3345 // __kmpc_taskgroup(ident_t *, gtid); 3346 // TaskgroupOpGen(); 3347 // __kmpc_end_taskgroup(ident_t *, gtid); 3348 // Prepare arguments and build a call to __kmpc_taskgroup 3349 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3350 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3351 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3352 Args); 3353 TaskgroupOpGen.setAction(Action); 3354 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3355 } 3356 3357 /// Given an array of pointers to variables, project the address of a 3358 /// given variable. 3359 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3360 unsigned Index, const VarDecl *Var) { 3361 // Pull out the pointer to the variable. 3362 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 3363 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3364 3365 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3366 Addr = CGF.Builder.CreateElementBitCast( 3367 Addr, CGF.ConvertTypeForMem(Var->getType())); 3368 return Addr; 3369 } 3370 3371 static llvm::Value *emitCopyprivateCopyFunction( 3372 CodeGenModule &CGM, llvm::Type *ArgsType, 3373 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3374 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3375 SourceLocation Loc) { 3376 ASTContext &C = CGM.getContext(); 3377 // void copy_func(void *LHSArg, void *RHSArg); 3378 FunctionArgList Args; 3379 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3380 ImplicitParamDecl::Other); 3381 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3382 ImplicitParamDecl::Other); 3383 Args.push_back(&LHSArg); 3384 Args.push_back(&RHSArg); 3385 const auto &CGFI = 3386 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3387 std::string Name = 3388 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 3389 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 3390 llvm::GlobalValue::InternalLinkage, Name, 3391 &CGM.getModule()); 3392 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3393 Fn->setDoesNotRecurse(); 3394 CodeGenFunction CGF(CGM); 3395 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3396 // Dest = (void*[n])(LHSArg); 3397 // Src = (void*[n])(RHSArg); 3398 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3399 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3400 ArgsType), CGF.getPointerAlign()); 3401 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3402 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3403 ArgsType), CGF.getPointerAlign()); 3404 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3405 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3406 // ... 3407 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3408 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3409 const auto *DestVar = 3410 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3411 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3412 3413 const auto *SrcVar = 3414 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3415 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3416 3417 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3418 QualType Type = VD->getType(); 3419 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3420 } 3421 CGF.FinishFunction(); 3422 return Fn; 3423 } 3424 3425 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3426 const RegionCodeGenTy &SingleOpGen, 3427 SourceLocation Loc, 3428 ArrayRef<const Expr *> CopyprivateVars, 3429 ArrayRef<const Expr *> SrcExprs, 3430 ArrayRef<const Expr *> DstExprs, 3431 ArrayRef<const Expr *> AssignmentOps) { 3432 if (!CGF.HaveInsertPoint()) 3433 return; 3434 assert(CopyprivateVars.size() == SrcExprs.size() && 3435 CopyprivateVars.size() == DstExprs.size() && 3436 CopyprivateVars.size() == AssignmentOps.size()); 3437 ASTContext &C = CGM.getContext(); 3438 // int32 did_it = 0; 3439 // if(__kmpc_single(ident_t *, gtid)) { 3440 // SingleOpGen(); 3441 // __kmpc_end_single(ident_t *, gtid); 3442 // did_it = 1; 3443 // } 3444 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3445 // <copy_func>, did_it); 3446 3447 Address DidIt = Address::invalid(); 3448 if (!CopyprivateVars.empty()) { 3449 // int32 did_it = 0; 3450 QualType KmpInt32Ty = 3451 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3452 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3453 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3454 } 3455 // Prepare arguments and build a call to __kmpc_single 3456 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3457 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3458 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3459 /*Conditional=*/true); 3460 SingleOpGen.setAction(Action); 3461 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3462 if (DidIt.isValid()) { 3463 // did_it = 1; 3464 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3465 } 3466 Action.Done(CGF); 3467 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3468 // <copy_func>, did_it); 3469 if (DidIt.isValid()) { 3470 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3471 QualType CopyprivateArrayTy = C.getConstantArrayType( 3472 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 3473 /*IndexTypeQuals=*/0); 3474 // Create a list of all private variables for copyprivate. 3475 Address CopyprivateList = 3476 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3477 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3478 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 3479 CGF.Builder.CreateStore( 3480 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3481 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 3482 CGF.VoidPtrTy), 3483 Elem); 3484 } 3485 // Build function that copies private values from single region to all other 3486 // threads in the corresponding parallel region. 3487 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3488 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3489 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3490 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3491 Address CL = 3492 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3493 CGF.VoidPtrTy); 3494 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3495 llvm::Value *Args[] = { 3496 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3497 getThreadID(CGF, Loc), // i32 <gtid> 3498 BufSize, // size_t <buf_size> 3499 CL.getPointer(), // void *<copyprivate list> 3500 CpyFn, // void (*) (void *, void *) <copy_func> 3501 DidItVal // i32 did_it 3502 }; 3503 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3504 } 3505 } 3506 3507 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3508 const RegionCodeGenTy &OrderedOpGen, 3509 SourceLocation Loc, bool IsThreads) { 3510 if (!CGF.HaveInsertPoint()) 3511 return; 3512 // __kmpc_ordered(ident_t *, gtid); 3513 // OrderedOpGen(); 3514 // __kmpc_end_ordered(ident_t *, gtid); 3515 // Prepare arguments and build a call to __kmpc_ordered 3516 if (IsThreads) { 3517 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3518 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3519 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3520 Args); 3521 OrderedOpGen.setAction(Action); 3522 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3523 return; 3524 } 3525 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3526 } 3527 3528 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 3529 unsigned Flags; 3530 if (Kind == OMPD_for) 3531 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3532 else if (Kind == OMPD_sections) 3533 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3534 else if (Kind == OMPD_single) 3535 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3536 else if (Kind == OMPD_barrier) 3537 Flags = OMP_IDENT_BARRIER_EXPL; 3538 else 3539 Flags = OMP_IDENT_BARRIER_IMPL; 3540 return Flags; 3541 } 3542 3543 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 3544 CodeGenFunction &CGF, const OMPLoopDirective &S, 3545 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 3546 // Check if the loop directive is actually a doacross loop directive. In this 3547 // case choose static, 1 schedule. 3548 if (llvm::any_of( 3549 S.getClausesOfKind<OMPOrderedClause>(), 3550 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 3551 ScheduleKind = OMPC_SCHEDULE_static; 3552 // Chunk size is 1 in this case. 3553 llvm::APInt ChunkSize(32, 1); 3554 ChunkExpr = IntegerLiteral::Create( 3555 CGF.getContext(), ChunkSize, 3556 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3557 SourceLocation()); 3558 } 3559 } 3560 3561 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3562 OpenMPDirectiveKind Kind, bool EmitChecks, 3563 bool ForceSimpleCall) { 3564 // Check if we should use the OMPBuilder 3565 auto *OMPRegionInfo = 3566 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 3567 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3568 if (OMPBuilder) { 3569 CGF.Builder.restoreIP(OMPBuilder->CreateBarrier( 3570 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 3571 return; 3572 } 3573 3574 if (!CGF.HaveInsertPoint()) 3575 return; 3576 // Build call __kmpc_cancel_barrier(loc, thread_id); 3577 // Build call __kmpc_barrier(loc, thread_id); 3578 unsigned Flags = getDefaultFlagsForBarriers(Kind); 3579 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3580 // thread_id); 3581 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3582 getThreadID(CGF, Loc)}; 3583 if (OMPRegionInfo) { 3584 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3585 llvm::Value *Result = CGF.EmitRuntimeCall( 3586 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3587 if (EmitChecks) { 3588 // if (__kmpc_cancel_barrier()) { 3589 // exit from construct; 3590 // } 3591 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3592 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3593 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3594 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3595 CGF.EmitBlock(ExitBB); 3596 // exit from construct; 3597 CodeGenFunction::JumpDest CancelDestination = 3598 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3599 CGF.EmitBranchThroughCleanup(CancelDestination); 3600 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3601 } 3602 return; 3603 } 3604 } 3605 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3606 } 3607 3608 /// Map the OpenMP loop schedule to the runtime enumeration. 3609 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3610 bool Chunked, bool Ordered) { 3611 switch (ScheduleKind) { 3612 case OMPC_SCHEDULE_static: 3613 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3614 : (Ordered ? OMP_ord_static : OMP_sch_static); 3615 case OMPC_SCHEDULE_dynamic: 3616 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3617 case OMPC_SCHEDULE_guided: 3618 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3619 case OMPC_SCHEDULE_runtime: 3620 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3621 case OMPC_SCHEDULE_auto: 3622 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3623 case OMPC_SCHEDULE_unknown: 3624 assert(!Chunked && "chunk was specified but schedule kind not known"); 3625 return Ordered ? OMP_ord_static : OMP_sch_static; 3626 } 3627 llvm_unreachable("Unexpected runtime schedule"); 3628 } 3629 3630 /// Map the OpenMP distribute schedule to the runtime enumeration. 3631 static OpenMPSchedType 3632 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3633 // only static is allowed for dist_schedule 3634 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3635 } 3636 3637 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3638 bool Chunked) const { 3639 OpenMPSchedType Schedule = 3640 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3641 return Schedule == OMP_sch_static; 3642 } 3643 3644 bool CGOpenMPRuntime::isStaticNonchunked( 3645 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3646 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3647 return Schedule == OMP_dist_sch_static; 3648 } 3649 3650 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 3651 bool Chunked) const { 3652 OpenMPSchedType Schedule = 3653 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3654 return Schedule == OMP_sch_static_chunked; 3655 } 3656 3657 bool CGOpenMPRuntime::isStaticChunked( 3658 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3659 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3660 return Schedule == OMP_dist_sch_static_chunked; 3661 } 3662 3663 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3664 OpenMPSchedType Schedule = 3665 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3666 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3667 return Schedule != OMP_sch_static; 3668 } 3669 3670 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 3671 OpenMPScheduleClauseModifier M1, 3672 OpenMPScheduleClauseModifier M2) { 3673 int Modifier = 0; 3674 switch (M1) { 3675 case OMPC_SCHEDULE_MODIFIER_monotonic: 3676 Modifier = OMP_sch_modifier_monotonic; 3677 break; 3678 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3679 Modifier = OMP_sch_modifier_nonmonotonic; 3680 break; 3681 case OMPC_SCHEDULE_MODIFIER_simd: 3682 if (Schedule == OMP_sch_static_chunked) 3683 Schedule = OMP_sch_static_balanced_chunked; 3684 break; 3685 case OMPC_SCHEDULE_MODIFIER_last: 3686 case OMPC_SCHEDULE_MODIFIER_unknown: 3687 break; 3688 } 3689 switch (M2) { 3690 case OMPC_SCHEDULE_MODIFIER_monotonic: 3691 Modifier = OMP_sch_modifier_monotonic; 3692 break; 3693 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3694 Modifier = OMP_sch_modifier_nonmonotonic; 3695 break; 3696 case OMPC_SCHEDULE_MODIFIER_simd: 3697 if (Schedule == OMP_sch_static_chunked) 3698 Schedule = OMP_sch_static_balanced_chunked; 3699 break; 3700 case OMPC_SCHEDULE_MODIFIER_last: 3701 case OMPC_SCHEDULE_MODIFIER_unknown: 3702 break; 3703 } 3704 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 3705 // If the static schedule kind is specified or if the ordered clause is 3706 // specified, and if the nonmonotonic modifier is not specified, the effect is 3707 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 3708 // modifier is specified, the effect is as if the nonmonotonic modifier is 3709 // specified. 3710 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 3711 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 3712 Schedule == OMP_sch_static_balanced_chunked || 3713 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 3714 Schedule == OMP_dist_sch_static_chunked || 3715 Schedule == OMP_dist_sch_static)) 3716 Modifier = OMP_sch_modifier_nonmonotonic; 3717 } 3718 return Schedule | Modifier; 3719 } 3720 3721 void CGOpenMPRuntime::emitForDispatchInit( 3722 CodeGenFunction &CGF, SourceLocation Loc, 3723 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3724 bool Ordered, const DispatchRTInput &DispatchValues) { 3725 if (!CGF.HaveInsertPoint()) 3726 return; 3727 OpenMPSchedType Schedule = getRuntimeSchedule( 3728 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3729 assert(Ordered || 3730 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3731 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3732 Schedule != OMP_sch_static_balanced_chunked)); 3733 // Call __kmpc_dispatch_init( 3734 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3735 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3736 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3737 3738 // If the Chunk was not specified in the clause - use default value 1. 3739 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3740 : CGF.Builder.getIntN(IVSize, 1); 3741 llvm::Value *Args[] = { 3742 emitUpdateLocation(CGF, Loc), 3743 getThreadID(CGF, Loc), 3744 CGF.Builder.getInt32(addMonoNonMonoModifier( 3745 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3746 DispatchValues.LB, // Lower 3747 DispatchValues.UB, // Upper 3748 CGF.Builder.getIntN(IVSize, 1), // Stride 3749 Chunk // Chunk 3750 }; 3751 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3752 } 3753 3754 static void emitForStaticInitCall( 3755 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3756 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 3757 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3758 const CGOpenMPRuntime::StaticRTInput &Values) { 3759 if (!CGF.HaveInsertPoint()) 3760 return; 3761 3762 assert(!Values.Ordered); 3763 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3764 Schedule == OMP_sch_static_balanced_chunked || 3765 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3766 Schedule == OMP_dist_sch_static || 3767 Schedule == OMP_dist_sch_static_chunked); 3768 3769 // Call __kmpc_for_static_init( 3770 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3771 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3772 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3773 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3774 llvm::Value *Chunk = Values.Chunk; 3775 if (Chunk == nullptr) { 3776 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3777 Schedule == OMP_dist_sch_static) && 3778 "expected static non-chunked schedule"); 3779 // If the Chunk was not specified in the clause - use default value 1. 3780 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3781 } else { 3782 assert((Schedule == OMP_sch_static_chunked || 3783 Schedule == OMP_sch_static_balanced_chunked || 3784 Schedule == OMP_ord_static_chunked || 3785 Schedule == OMP_dist_sch_static_chunked) && 3786 "expected static chunked schedule"); 3787 } 3788 llvm::Value *Args[] = { 3789 UpdateLocation, 3790 ThreadId, 3791 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 3792 M2)), // Schedule type 3793 Values.IL.getPointer(), // &isLastIter 3794 Values.LB.getPointer(), // &LB 3795 Values.UB.getPointer(), // &UB 3796 Values.ST.getPointer(), // &Stride 3797 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3798 Chunk // Chunk 3799 }; 3800 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3801 } 3802 3803 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3804 SourceLocation Loc, 3805 OpenMPDirectiveKind DKind, 3806 const OpenMPScheduleTy &ScheduleKind, 3807 const StaticRTInput &Values) { 3808 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3809 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3810 assert(isOpenMPWorksharingDirective(DKind) && 3811 "Expected loop-based or sections-based directive."); 3812 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3813 isOpenMPLoopDirective(DKind) 3814 ? OMP_IDENT_WORK_LOOP 3815 : OMP_IDENT_WORK_SECTIONS); 3816 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3817 llvm::FunctionCallee StaticInitFunction = 3818 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3819 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 3820 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3821 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3822 } 3823 3824 void CGOpenMPRuntime::emitDistributeStaticInit( 3825 CodeGenFunction &CGF, SourceLocation Loc, 3826 OpenMPDistScheduleClauseKind SchedKind, 3827 const CGOpenMPRuntime::StaticRTInput &Values) { 3828 OpenMPSchedType ScheduleNum = 3829 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3830 llvm::Value *UpdatedLocation = 3831 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3832 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3833 llvm::FunctionCallee StaticInitFunction = 3834 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3835 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3836 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3837 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3838 } 3839 3840 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3841 SourceLocation Loc, 3842 OpenMPDirectiveKind DKind) { 3843 if (!CGF.HaveInsertPoint()) 3844 return; 3845 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3846 llvm::Value *Args[] = { 3847 emitUpdateLocation(CGF, Loc, 3848 isOpenMPDistributeDirective(DKind) 3849 ? OMP_IDENT_WORK_DISTRIBUTE 3850 : isOpenMPLoopDirective(DKind) 3851 ? OMP_IDENT_WORK_LOOP 3852 : OMP_IDENT_WORK_SECTIONS), 3853 getThreadID(CGF, Loc)}; 3854 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 3855 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3856 Args); 3857 } 3858 3859 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3860 SourceLocation Loc, 3861 unsigned IVSize, 3862 bool IVSigned) { 3863 if (!CGF.HaveInsertPoint()) 3864 return; 3865 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3866 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3867 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3868 } 3869 3870 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3871 SourceLocation Loc, unsigned IVSize, 3872 bool IVSigned, Address IL, 3873 Address LB, Address UB, 3874 Address ST) { 3875 // Call __kmpc_dispatch_next( 3876 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3877 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3878 // kmp_int[32|64] *p_stride); 3879 llvm::Value *Args[] = { 3880 emitUpdateLocation(CGF, Loc), 3881 getThreadID(CGF, Loc), 3882 IL.getPointer(), // &isLastIter 3883 LB.getPointer(), // &Lower 3884 UB.getPointer(), // &Upper 3885 ST.getPointer() // &Stride 3886 }; 3887 llvm::Value *Call = 3888 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3889 return CGF.EmitScalarConversion( 3890 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3891 CGF.getContext().BoolTy, Loc); 3892 } 3893 3894 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3895 llvm::Value *NumThreads, 3896 SourceLocation Loc) { 3897 if (!CGF.HaveInsertPoint()) 3898 return; 3899 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3900 llvm::Value *Args[] = { 3901 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3902 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3903 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3904 Args); 3905 } 3906 3907 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3908 ProcBindKind ProcBind, 3909 SourceLocation Loc) { 3910 if (!CGF.HaveInsertPoint()) 3911 return; 3912 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 3913 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3914 llvm::Value *Args[] = { 3915 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3916 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 3917 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3918 } 3919 3920 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3921 SourceLocation Loc, llvm::AtomicOrdering AO) { 3922 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3923 if (OMPBuilder) { 3924 OMPBuilder->CreateFlush(CGF.Builder); 3925 } else { 3926 if (!CGF.HaveInsertPoint()) 3927 return; 3928 // Build call void __kmpc_flush(ident_t *loc) 3929 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3930 emitUpdateLocation(CGF, Loc)); 3931 } 3932 } 3933 3934 namespace { 3935 /// Indexes of fields for type kmp_task_t. 3936 enum KmpTaskTFields { 3937 /// List of shared variables. 3938 KmpTaskTShareds, 3939 /// Task routine. 3940 KmpTaskTRoutine, 3941 /// Partition id for the untied tasks. 3942 KmpTaskTPartId, 3943 /// Function with call of destructors for private variables. 3944 Data1, 3945 /// Task priority. 3946 Data2, 3947 /// (Taskloops only) Lower bound. 3948 KmpTaskTLowerBound, 3949 /// (Taskloops only) Upper bound. 3950 KmpTaskTUpperBound, 3951 /// (Taskloops only) Stride. 3952 KmpTaskTStride, 3953 /// (Taskloops only) Is last iteration flag. 3954 KmpTaskTLastIter, 3955 /// (Taskloops only) Reduction data. 3956 KmpTaskTReductions, 3957 }; 3958 } // anonymous namespace 3959 3960 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3961 return OffloadEntriesTargetRegion.empty() && 3962 OffloadEntriesDeviceGlobalVar.empty(); 3963 } 3964 3965 /// Initialize target region entry. 3966 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3967 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3968 StringRef ParentName, unsigned LineNum, 3969 unsigned Order) { 3970 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3971 "only required for the device " 3972 "code generation."); 3973 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3974 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3975 OMPTargetRegionEntryTargetRegion); 3976 ++OffloadingEntriesNum; 3977 } 3978 3979 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3980 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3981 StringRef ParentName, unsigned LineNum, 3982 llvm::Constant *Addr, llvm::Constant *ID, 3983 OMPTargetRegionEntryKind Flags) { 3984 // If we are emitting code for a target, the entry is already initialized, 3985 // only has to be registered. 3986 if (CGM.getLangOpts().OpenMPIsDevice) { 3987 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3988 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3989 DiagnosticsEngine::Error, 3990 "Unable to find target region on line '%0' in the device code."); 3991 CGM.getDiags().Report(DiagID) << LineNum; 3992 return; 3993 } 3994 auto &Entry = 3995 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3996 assert(Entry.isValid() && "Entry not initialized!"); 3997 Entry.setAddress(Addr); 3998 Entry.setID(ID); 3999 Entry.setFlags(Flags); 4000 } else { 4001 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 4002 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 4003 ++OffloadingEntriesNum; 4004 } 4005 } 4006 4007 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 4008 unsigned DeviceID, unsigned FileID, StringRef ParentName, 4009 unsigned LineNum) const { 4010 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 4011 if (PerDevice == OffloadEntriesTargetRegion.end()) 4012 return false; 4013 auto PerFile = PerDevice->second.find(FileID); 4014 if (PerFile == PerDevice->second.end()) 4015 return false; 4016 auto PerParentName = PerFile->second.find(ParentName); 4017 if (PerParentName == PerFile->second.end()) 4018 return false; 4019 auto PerLine = PerParentName->second.find(LineNum); 4020 if (PerLine == PerParentName->second.end()) 4021 return false; 4022 // Fail if this entry is already registered. 4023 if (PerLine->second.getAddress() || PerLine->second.getID()) 4024 return false; 4025 return true; 4026 } 4027 4028 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 4029 const OffloadTargetRegionEntryInfoActTy &Action) { 4030 // Scan all target region entries and perform the provided action. 4031 for (const auto &D : OffloadEntriesTargetRegion) 4032 for (const auto &F : D.second) 4033 for (const auto &P : F.second) 4034 for (const auto &L : P.second) 4035 Action(D.first, F.first, P.first(), L.first, L.second); 4036 } 4037 4038 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4039 initializeDeviceGlobalVarEntryInfo(StringRef Name, 4040 OMPTargetGlobalVarEntryKind Flags, 4041 unsigned Order) { 4042 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 4043 "only required for the device " 4044 "code generation."); 4045 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 4046 ++OffloadingEntriesNum; 4047 } 4048 4049 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4050 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 4051 CharUnits VarSize, 4052 OMPTargetGlobalVarEntryKind Flags, 4053 llvm::GlobalValue::LinkageTypes Linkage) { 4054 if (CGM.getLangOpts().OpenMPIsDevice) { 4055 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 4056 assert(Entry.isValid() && Entry.getFlags() == Flags && 4057 "Entry not initialized!"); 4058 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 4059 "Resetting with the new address."); 4060 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 4061 if (Entry.getVarSize().isZero()) { 4062 Entry.setVarSize(VarSize); 4063 Entry.setLinkage(Linkage); 4064 } 4065 return; 4066 } 4067 Entry.setVarSize(VarSize); 4068 Entry.setLinkage(Linkage); 4069 Entry.setAddress(Addr); 4070 } else { 4071 if (hasDeviceGlobalVarEntryInfo(VarName)) { 4072 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 4073 assert(Entry.isValid() && Entry.getFlags() == Flags && 4074 "Entry not initialized!"); 4075 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 4076 "Resetting with the new address."); 4077 if (Entry.getVarSize().isZero()) { 4078 Entry.setVarSize(VarSize); 4079 Entry.setLinkage(Linkage); 4080 } 4081 return; 4082 } 4083 OffloadEntriesDeviceGlobalVar.try_emplace( 4084 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 4085 ++OffloadingEntriesNum; 4086 } 4087 } 4088 4089 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4090 actOnDeviceGlobalVarEntriesInfo( 4091 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 4092 // Scan all target region entries and perform the provided action. 4093 for (const auto &E : OffloadEntriesDeviceGlobalVar) 4094 Action(E.getKey(), E.getValue()); 4095 } 4096 4097 void CGOpenMPRuntime::createOffloadEntry( 4098 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 4099 llvm::GlobalValue::LinkageTypes Linkage) { 4100 StringRef Name = Addr->getName(); 4101 llvm::Module &M = CGM.getModule(); 4102 llvm::LLVMContext &C = M.getContext(); 4103 4104 // Create constant string with the name. 4105 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 4106 4107 std::string StringName = getName({"omp_offloading", "entry_name"}); 4108 auto *Str = new llvm::GlobalVariable( 4109 M, StrPtrInit->getType(), /*isConstant=*/true, 4110 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 4111 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4112 4113 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 4114 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 4115 llvm::ConstantInt::get(CGM.SizeTy, Size), 4116 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 4117 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 4118 std::string EntryName = getName({"omp_offloading", "entry", ""}); 4119 llvm::GlobalVariable *Entry = createGlobalStruct( 4120 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 4121 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 4122 4123 // The entry has to be created in the section the linker expects it to be. 4124 Entry->setSection("omp_offloading_entries"); 4125 } 4126 4127 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 4128 // Emit the offloading entries and metadata so that the device codegen side 4129 // can easily figure out what to emit. The produced metadata looks like 4130 // this: 4131 // 4132 // !omp_offload.info = !{!1, ...} 4133 // 4134 // Right now we only generate metadata for function that contain target 4135 // regions. 4136 4137 // If we are in simd mode or there are no entries, we don't need to do 4138 // anything. 4139 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 4140 return; 4141 4142 llvm::Module &M = CGM.getModule(); 4143 llvm::LLVMContext &C = M.getContext(); 4144 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 4145 SourceLocation, StringRef>, 4146 16> 4147 OrderedEntries(OffloadEntriesInfoManager.size()); 4148 llvm::SmallVector<StringRef, 16> ParentFunctions( 4149 OffloadEntriesInfoManager.size()); 4150 4151 // Auxiliary methods to create metadata values and strings. 4152 auto &&GetMDInt = [this](unsigned V) { 4153 return llvm::ConstantAsMetadata::get( 4154 llvm::ConstantInt::get(CGM.Int32Ty, V)); 4155 }; 4156 4157 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 4158 4159 // Create the offloading info metadata node. 4160 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 4161 4162 // Create function that emits metadata for each target region entry; 4163 auto &&TargetRegionMetadataEmitter = 4164 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 4165 &GetMDString]( 4166 unsigned DeviceID, unsigned FileID, StringRef ParentName, 4167 unsigned Line, 4168 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 4169 // Generate metadata for target regions. Each entry of this metadata 4170 // contains: 4171 // - Entry 0 -> Kind of this type of metadata (0). 4172 // - Entry 1 -> Device ID of the file where the entry was identified. 4173 // - Entry 2 -> File ID of the file where the entry was identified. 4174 // - Entry 3 -> Mangled name of the function where the entry was 4175 // identified. 4176 // - Entry 4 -> Line in the file where the entry was identified. 4177 // - Entry 5 -> Order the entry was created. 4178 // The first element of the metadata node is the kind. 4179 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 4180 GetMDInt(FileID), GetMDString(ParentName), 4181 GetMDInt(Line), GetMDInt(E.getOrder())}; 4182 4183 SourceLocation Loc; 4184 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 4185 E = CGM.getContext().getSourceManager().fileinfo_end(); 4186 I != E; ++I) { 4187 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 4188 I->getFirst()->getUniqueID().getFile() == FileID) { 4189 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 4190 I->getFirst(), Line, 1); 4191 break; 4192 } 4193 } 4194 // Save this entry in the right position of the ordered entries array. 4195 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 4196 ParentFunctions[E.getOrder()] = ParentName; 4197 4198 // Add metadata to the named metadata node. 4199 MD->addOperand(llvm::MDNode::get(C, Ops)); 4200 }; 4201 4202 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 4203 TargetRegionMetadataEmitter); 4204 4205 // Create function that emits metadata for each device global variable entry; 4206 auto &&DeviceGlobalVarMetadataEmitter = 4207 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 4208 MD](StringRef MangledName, 4209 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 4210 &E) { 4211 // Generate metadata for global variables. Each entry of this metadata 4212 // contains: 4213 // - Entry 0 -> Kind of this type of metadata (1). 4214 // - Entry 1 -> Mangled name of the variable. 4215 // - Entry 2 -> Declare target kind. 4216 // - Entry 3 -> Order the entry was created. 4217 // The first element of the metadata node is the kind. 4218 llvm::Metadata *Ops[] = { 4219 GetMDInt(E.getKind()), GetMDString(MangledName), 4220 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 4221 4222 // Save this entry in the right position of the ordered entries array. 4223 OrderedEntries[E.getOrder()] = 4224 std::make_tuple(&E, SourceLocation(), MangledName); 4225 4226 // Add metadata to the named metadata node. 4227 MD->addOperand(llvm::MDNode::get(C, Ops)); 4228 }; 4229 4230 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 4231 DeviceGlobalVarMetadataEmitter); 4232 4233 for (const auto &E : OrderedEntries) { 4234 assert(std::get<0>(E) && "All ordered entries must exist!"); 4235 if (const auto *CE = 4236 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 4237 std::get<0>(E))) { 4238 if (!CE->getID() || !CE->getAddress()) { 4239 // Do not blame the entry if the parent funtion is not emitted. 4240 StringRef FnName = ParentFunctions[CE->getOrder()]; 4241 if (!CGM.GetGlobalValue(FnName)) 4242 continue; 4243 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4244 DiagnosticsEngine::Error, 4245 "Offloading entry for target region in %0 is incorrect: either the " 4246 "address or the ID is invalid."); 4247 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 4248 continue; 4249 } 4250 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 4251 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 4252 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 4253 OffloadEntryInfoDeviceGlobalVar>( 4254 std::get<0>(E))) { 4255 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 4256 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4257 CE->getFlags()); 4258 switch (Flags) { 4259 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 4260 if (CGM.getLangOpts().OpenMPIsDevice && 4261 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 4262 continue; 4263 if (!CE->getAddress()) { 4264 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4265 DiagnosticsEngine::Error, "Offloading entry for declare target " 4266 "variable %0 is incorrect: the " 4267 "address is invalid."); 4268 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 4269 continue; 4270 } 4271 // The vaiable has no definition - no need to add the entry. 4272 if (CE->getVarSize().isZero()) 4273 continue; 4274 break; 4275 } 4276 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 4277 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 4278 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 4279 "Declaret target link address is set."); 4280 if (CGM.getLangOpts().OpenMPIsDevice) 4281 continue; 4282 if (!CE->getAddress()) { 4283 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4284 DiagnosticsEngine::Error, 4285 "Offloading entry for declare target variable is incorrect: the " 4286 "address is invalid."); 4287 CGM.getDiags().Report(DiagID); 4288 continue; 4289 } 4290 break; 4291 } 4292 createOffloadEntry(CE->getAddress(), CE->getAddress(), 4293 CE->getVarSize().getQuantity(), Flags, 4294 CE->getLinkage()); 4295 } else { 4296 llvm_unreachable("Unsupported entry kind."); 4297 } 4298 } 4299 } 4300 4301 /// Loads all the offload entries information from the host IR 4302 /// metadata. 4303 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 4304 // If we are in target mode, load the metadata from the host IR. This code has 4305 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 4306 4307 if (!CGM.getLangOpts().OpenMPIsDevice) 4308 return; 4309 4310 if (CGM.getLangOpts().OMPHostIRFile.empty()) 4311 return; 4312 4313 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 4314 if (auto EC = Buf.getError()) { 4315 CGM.getDiags().Report(diag::err_cannot_open_file) 4316 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4317 return; 4318 } 4319 4320 llvm::LLVMContext C; 4321 auto ME = expectedToErrorOrAndEmitErrors( 4322 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 4323 4324 if (auto EC = ME.getError()) { 4325 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4326 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 4327 CGM.getDiags().Report(DiagID) 4328 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4329 return; 4330 } 4331 4332 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4333 if (!MD) 4334 return; 4335 4336 for (llvm::MDNode *MN : MD->operands()) { 4337 auto &&GetMDInt = [MN](unsigned Idx) { 4338 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4339 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4340 }; 4341 4342 auto &&GetMDString = [MN](unsigned Idx) { 4343 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4344 return V->getString(); 4345 }; 4346 4347 switch (GetMDInt(0)) { 4348 default: 4349 llvm_unreachable("Unexpected metadata!"); 4350 break; 4351 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4352 OffloadingEntryInfoTargetRegion: 4353 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4354 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4355 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4356 /*Order=*/GetMDInt(5)); 4357 break; 4358 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4359 OffloadingEntryInfoDeviceGlobalVar: 4360 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4361 /*MangledName=*/GetMDString(1), 4362 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4363 /*Flags=*/GetMDInt(2)), 4364 /*Order=*/GetMDInt(3)); 4365 break; 4366 } 4367 } 4368 } 4369 4370 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4371 if (!KmpRoutineEntryPtrTy) { 4372 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4373 ASTContext &C = CGM.getContext(); 4374 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4375 FunctionProtoType::ExtProtoInfo EPI; 4376 KmpRoutineEntryPtrQTy = C.getPointerType( 4377 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4378 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4379 } 4380 } 4381 4382 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4383 // Make sure the type of the entry is already created. This is the type we 4384 // have to create: 4385 // struct __tgt_offload_entry{ 4386 // void *addr; // Pointer to the offload entry info. 4387 // // (function or global) 4388 // char *name; // Name of the function or global. 4389 // size_t size; // Size of the entry info (0 if it a function). 4390 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4391 // int32_t reserved; // Reserved, to use by the runtime library. 4392 // }; 4393 if (TgtOffloadEntryQTy.isNull()) { 4394 ASTContext &C = CGM.getContext(); 4395 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4396 RD->startDefinition(); 4397 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4398 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4399 addFieldToRecordDecl(C, RD, C.getSizeType()); 4400 addFieldToRecordDecl( 4401 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4402 addFieldToRecordDecl( 4403 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4404 RD->completeDefinition(); 4405 RD->addAttr(PackedAttr::CreateImplicit(C)); 4406 TgtOffloadEntryQTy = C.getRecordType(RD); 4407 } 4408 return TgtOffloadEntryQTy; 4409 } 4410 4411 namespace { 4412 struct PrivateHelpersTy { 4413 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 4414 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 4415 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 4416 PrivateElemInit(PrivateElemInit) {} 4417 const Expr *OriginalRef = nullptr; 4418 const VarDecl *Original = nullptr; 4419 const VarDecl *PrivateCopy = nullptr; 4420 const VarDecl *PrivateElemInit = nullptr; 4421 }; 4422 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4423 } // anonymous namespace 4424 4425 static RecordDecl * 4426 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4427 if (!Privates.empty()) { 4428 ASTContext &C = CGM.getContext(); 4429 // Build struct .kmp_privates_t. { 4430 // /* private vars */ 4431 // }; 4432 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4433 RD->startDefinition(); 4434 for (const auto &Pair : Privates) { 4435 const VarDecl *VD = Pair.second.Original; 4436 QualType Type = VD->getType().getNonReferenceType(); 4437 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4438 if (VD->hasAttrs()) { 4439 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4440 E(VD->getAttrs().end()); 4441 I != E; ++I) 4442 FD->addAttr(*I); 4443 } 4444 } 4445 RD->completeDefinition(); 4446 return RD; 4447 } 4448 return nullptr; 4449 } 4450 4451 static RecordDecl * 4452 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4453 QualType KmpInt32Ty, 4454 QualType KmpRoutineEntryPointerQTy) { 4455 ASTContext &C = CGM.getContext(); 4456 // Build struct kmp_task_t { 4457 // void * shareds; 4458 // kmp_routine_entry_t routine; 4459 // kmp_int32 part_id; 4460 // kmp_cmplrdata_t data1; 4461 // kmp_cmplrdata_t data2; 4462 // For taskloops additional fields: 4463 // kmp_uint64 lb; 4464 // kmp_uint64 ub; 4465 // kmp_int64 st; 4466 // kmp_int32 liter; 4467 // void * reductions; 4468 // }; 4469 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4470 UD->startDefinition(); 4471 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4472 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4473 UD->completeDefinition(); 4474 QualType KmpCmplrdataTy = C.getRecordType(UD); 4475 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4476 RD->startDefinition(); 4477 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4478 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4479 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4480 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4481 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4482 if (isOpenMPTaskLoopDirective(Kind)) { 4483 QualType KmpUInt64Ty = 4484 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4485 QualType KmpInt64Ty = 4486 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4487 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4488 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4489 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4490 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4491 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4492 } 4493 RD->completeDefinition(); 4494 return RD; 4495 } 4496 4497 static RecordDecl * 4498 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4499 ArrayRef<PrivateDataTy> Privates) { 4500 ASTContext &C = CGM.getContext(); 4501 // Build struct kmp_task_t_with_privates { 4502 // kmp_task_t task_data; 4503 // .kmp_privates_t. privates; 4504 // }; 4505 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4506 RD->startDefinition(); 4507 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4508 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4509 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4510 RD->completeDefinition(); 4511 return RD; 4512 } 4513 4514 /// Emit a proxy function which accepts kmp_task_t as the second 4515 /// argument. 4516 /// \code 4517 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4518 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4519 /// For taskloops: 4520 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4521 /// tt->reductions, tt->shareds); 4522 /// return 0; 4523 /// } 4524 /// \endcode 4525 static llvm::Function * 4526 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4527 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4528 QualType KmpTaskTWithPrivatesPtrQTy, 4529 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4530 QualType SharedsPtrTy, llvm::Function *TaskFunction, 4531 llvm::Value *TaskPrivatesMap) { 4532 ASTContext &C = CGM.getContext(); 4533 FunctionArgList Args; 4534 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4535 ImplicitParamDecl::Other); 4536 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4537 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4538 ImplicitParamDecl::Other); 4539 Args.push_back(&GtidArg); 4540 Args.push_back(&TaskTypeArg); 4541 const auto &TaskEntryFnInfo = 4542 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4543 llvm::FunctionType *TaskEntryTy = 4544 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4545 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 4546 auto *TaskEntry = llvm::Function::Create( 4547 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4548 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4549 TaskEntry->setDoesNotRecurse(); 4550 CodeGenFunction CGF(CGM); 4551 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4552 Loc, Loc); 4553 4554 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4555 // tt, 4556 // For taskloops: 4557 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4558 // tt->task_data.shareds); 4559 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4560 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4561 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4562 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4563 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4564 const auto *KmpTaskTWithPrivatesQTyRD = 4565 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4566 LValue Base = 4567 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4568 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4569 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4570 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4571 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 4572 4573 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4574 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4575 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4576 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4577 CGF.ConvertTypeForMem(SharedsPtrTy)); 4578 4579 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4580 llvm::Value *PrivatesParam; 4581 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4582 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4583 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4584 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 4585 } else { 4586 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4587 } 4588 4589 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4590 TaskPrivatesMap, 4591 CGF.Builder 4592 .CreatePointerBitCastOrAddrSpaceCast( 4593 TDBase.getAddress(CGF), CGF.VoidPtrTy) 4594 .getPointer()}; 4595 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4596 std::end(CommonArgs)); 4597 if (isOpenMPTaskLoopDirective(Kind)) { 4598 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4599 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4600 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4601 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4602 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4603 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4604 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4605 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4606 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4607 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4608 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4609 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4610 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4611 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4612 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4613 CallArgs.push_back(LBParam); 4614 CallArgs.push_back(UBParam); 4615 CallArgs.push_back(StParam); 4616 CallArgs.push_back(LIParam); 4617 CallArgs.push_back(RParam); 4618 } 4619 CallArgs.push_back(SharedsParam); 4620 4621 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4622 CallArgs); 4623 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4624 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4625 CGF.FinishFunction(); 4626 return TaskEntry; 4627 } 4628 4629 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4630 SourceLocation Loc, 4631 QualType KmpInt32Ty, 4632 QualType KmpTaskTWithPrivatesPtrQTy, 4633 QualType KmpTaskTWithPrivatesQTy) { 4634 ASTContext &C = CGM.getContext(); 4635 FunctionArgList Args; 4636 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4637 ImplicitParamDecl::Other); 4638 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4639 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4640 ImplicitParamDecl::Other); 4641 Args.push_back(&GtidArg); 4642 Args.push_back(&TaskTypeArg); 4643 const auto &DestructorFnInfo = 4644 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4645 llvm::FunctionType *DestructorFnTy = 4646 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4647 std::string Name = 4648 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 4649 auto *DestructorFn = 4650 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4651 Name, &CGM.getModule()); 4652 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4653 DestructorFnInfo); 4654 DestructorFn->setDoesNotRecurse(); 4655 CodeGenFunction CGF(CGM); 4656 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4657 Args, Loc, Loc); 4658 4659 LValue Base = CGF.EmitLoadOfPointerLValue( 4660 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4661 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4662 const auto *KmpTaskTWithPrivatesQTyRD = 4663 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4664 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4665 Base = CGF.EmitLValueForField(Base, *FI); 4666 for (const auto *Field : 4667 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4668 if (QualType::DestructionKind DtorKind = 4669 Field->getType().isDestructedType()) { 4670 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4671 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 4672 } 4673 } 4674 CGF.FinishFunction(); 4675 return DestructorFn; 4676 } 4677 4678 /// Emit a privates mapping function for correct handling of private and 4679 /// firstprivate variables. 4680 /// \code 4681 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4682 /// **noalias priv1,..., <tyn> **noalias privn) { 4683 /// *priv1 = &.privates.priv1; 4684 /// ...; 4685 /// *privn = &.privates.privn; 4686 /// } 4687 /// \endcode 4688 static llvm::Value * 4689 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4690 ArrayRef<const Expr *> PrivateVars, 4691 ArrayRef<const Expr *> FirstprivateVars, 4692 ArrayRef<const Expr *> LastprivateVars, 4693 QualType PrivatesQTy, 4694 ArrayRef<PrivateDataTy> Privates) { 4695 ASTContext &C = CGM.getContext(); 4696 FunctionArgList Args; 4697 ImplicitParamDecl TaskPrivatesArg( 4698 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4699 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4700 ImplicitParamDecl::Other); 4701 Args.push_back(&TaskPrivatesArg); 4702 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4703 unsigned Counter = 1; 4704 for (const Expr *E : PrivateVars) { 4705 Args.push_back(ImplicitParamDecl::Create( 4706 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4707 C.getPointerType(C.getPointerType(E->getType())) 4708 .withConst() 4709 .withRestrict(), 4710 ImplicitParamDecl::Other)); 4711 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4712 PrivateVarsPos[VD] = Counter; 4713 ++Counter; 4714 } 4715 for (const Expr *E : FirstprivateVars) { 4716 Args.push_back(ImplicitParamDecl::Create( 4717 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4718 C.getPointerType(C.getPointerType(E->getType())) 4719 .withConst() 4720 .withRestrict(), 4721 ImplicitParamDecl::Other)); 4722 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4723 PrivateVarsPos[VD] = Counter; 4724 ++Counter; 4725 } 4726 for (const Expr *E : LastprivateVars) { 4727 Args.push_back(ImplicitParamDecl::Create( 4728 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4729 C.getPointerType(C.getPointerType(E->getType())) 4730 .withConst() 4731 .withRestrict(), 4732 ImplicitParamDecl::Other)); 4733 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4734 PrivateVarsPos[VD] = Counter; 4735 ++Counter; 4736 } 4737 const auto &TaskPrivatesMapFnInfo = 4738 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4739 llvm::FunctionType *TaskPrivatesMapTy = 4740 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4741 std::string Name = 4742 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 4743 auto *TaskPrivatesMap = llvm::Function::Create( 4744 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 4745 &CGM.getModule()); 4746 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4747 TaskPrivatesMapFnInfo); 4748 if (CGM.getLangOpts().Optimize) { 4749 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4750 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4751 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4752 } 4753 CodeGenFunction CGF(CGM); 4754 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4755 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4756 4757 // *privi = &.privates.privi; 4758 LValue Base = CGF.EmitLoadOfPointerLValue( 4759 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4760 TaskPrivatesArg.getType()->castAs<PointerType>()); 4761 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4762 Counter = 0; 4763 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4764 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4765 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4766 LValue RefLVal = 4767 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4768 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4769 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 4770 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 4771 ++Counter; 4772 } 4773 CGF.FinishFunction(); 4774 return TaskPrivatesMap; 4775 } 4776 4777 /// Emit initialization for private variables in task-based directives. 4778 static void emitPrivatesInit(CodeGenFunction &CGF, 4779 const OMPExecutableDirective &D, 4780 Address KmpTaskSharedsPtr, LValue TDBase, 4781 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4782 QualType SharedsTy, QualType SharedsPtrTy, 4783 const OMPTaskDataTy &Data, 4784 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4785 ASTContext &C = CGF.getContext(); 4786 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4787 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4788 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4789 ? OMPD_taskloop 4790 : OMPD_task; 4791 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4792 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4793 LValue SrcBase; 4794 bool IsTargetTask = 4795 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4796 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4797 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4798 // PointersArray and SizesArray. The original variables for these arrays are 4799 // not captured and we get their addresses explicitly. 4800 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 4801 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4802 SrcBase = CGF.MakeAddrLValue( 4803 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4804 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4805 SharedsTy); 4806 } 4807 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4808 for (const PrivateDataTy &Pair : Privates) { 4809 const VarDecl *VD = Pair.second.PrivateCopy; 4810 const Expr *Init = VD->getAnyInitializer(); 4811 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4812 !CGF.isTrivialInitializer(Init)))) { 4813 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4814 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4815 const VarDecl *OriginalVD = Pair.second.Original; 4816 // Check if the variable is the target-based BasePointersArray, 4817 // PointersArray or SizesArray. 4818 LValue SharedRefLValue; 4819 QualType Type = PrivateLValue.getType(); 4820 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4821 if (IsTargetTask && !SharedField) { 4822 assert(isa<ImplicitParamDecl>(OriginalVD) && 4823 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4824 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4825 ->getNumParams() == 0 && 4826 isa<TranslationUnitDecl>( 4827 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4828 ->getDeclContext()) && 4829 "Expected artificial target data variable."); 4830 SharedRefLValue = 4831 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4832 } else if (ForDup) { 4833 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4834 SharedRefLValue = CGF.MakeAddrLValue( 4835 Address(SharedRefLValue.getPointer(CGF), 4836 C.getDeclAlign(OriginalVD)), 4837 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4838 SharedRefLValue.getTBAAInfo()); 4839 } else if (CGF.LambdaCaptureFields.count( 4840 Pair.second.Original->getCanonicalDecl()) > 0 || 4841 dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { 4842 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 4843 } else { 4844 // Processing for implicitly captured variables. 4845 InlinedOpenMPRegionRAII Region( 4846 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 4847 /*HasCancel=*/false); 4848 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 4849 } 4850 if (Type->isArrayType()) { 4851 // Initialize firstprivate array. 4852 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4853 // Perform simple memcpy. 4854 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4855 } else { 4856 // Initialize firstprivate array using element-by-element 4857 // initialization. 4858 CGF.EmitOMPAggregateAssign( 4859 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 4860 Type, 4861 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4862 Address SrcElement) { 4863 // Clean up any temporaries needed by the initialization. 4864 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4865 InitScope.addPrivate( 4866 Elem, [SrcElement]() -> Address { return SrcElement; }); 4867 (void)InitScope.Privatize(); 4868 // Emit initialization for single element. 4869 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4870 CGF, &CapturesInfo); 4871 CGF.EmitAnyExprToMem(Init, DestElement, 4872 Init->getType().getQualifiers(), 4873 /*IsInitializer=*/false); 4874 }); 4875 } 4876 } else { 4877 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4878 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 4879 return SharedRefLValue.getAddress(CGF); 4880 }); 4881 (void)InitScope.Privatize(); 4882 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4883 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4884 /*capturedByInit=*/false); 4885 } 4886 } else { 4887 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4888 } 4889 } 4890 ++FI; 4891 } 4892 } 4893 4894 /// Check if duplication function is required for taskloops. 4895 static bool checkInitIsRequired(CodeGenFunction &CGF, 4896 ArrayRef<PrivateDataTy> Privates) { 4897 bool InitRequired = false; 4898 for (const PrivateDataTy &Pair : Privates) { 4899 const VarDecl *VD = Pair.second.PrivateCopy; 4900 const Expr *Init = VD->getAnyInitializer(); 4901 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4902 !CGF.isTrivialInitializer(Init)); 4903 if (InitRequired) 4904 break; 4905 } 4906 return InitRequired; 4907 } 4908 4909 4910 /// Emit task_dup function (for initialization of 4911 /// private/firstprivate/lastprivate vars and last_iter flag) 4912 /// \code 4913 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4914 /// lastpriv) { 4915 /// // setup lastprivate flag 4916 /// task_dst->last = lastpriv; 4917 /// // could be constructor calls here... 4918 /// } 4919 /// \endcode 4920 static llvm::Value * 4921 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4922 const OMPExecutableDirective &D, 4923 QualType KmpTaskTWithPrivatesPtrQTy, 4924 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4925 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4926 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4927 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4928 ASTContext &C = CGM.getContext(); 4929 FunctionArgList Args; 4930 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4931 KmpTaskTWithPrivatesPtrQTy, 4932 ImplicitParamDecl::Other); 4933 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4934 KmpTaskTWithPrivatesPtrQTy, 4935 ImplicitParamDecl::Other); 4936 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4937 ImplicitParamDecl::Other); 4938 Args.push_back(&DstArg); 4939 Args.push_back(&SrcArg); 4940 Args.push_back(&LastprivArg); 4941 const auto &TaskDupFnInfo = 4942 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4943 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4944 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4945 auto *TaskDup = llvm::Function::Create( 4946 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4947 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4948 TaskDup->setDoesNotRecurse(); 4949 CodeGenFunction CGF(CGM); 4950 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4951 Loc); 4952 4953 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4954 CGF.GetAddrOfLocalVar(&DstArg), 4955 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4956 // task_dst->liter = lastpriv; 4957 if (WithLastIter) { 4958 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4959 LValue Base = CGF.EmitLValueForField( 4960 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4961 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4962 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4963 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4964 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4965 } 4966 4967 // Emit initial values for private copies (if any). 4968 assert(!Privates.empty()); 4969 Address KmpTaskSharedsPtr = Address::invalid(); 4970 if (!Data.FirstprivateVars.empty()) { 4971 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4972 CGF.GetAddrOfLocalVar(&SrcArg), 4973 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4974 LValue Base = CGF.EmitLValueForField( 4975 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4976 KmpTaskSharedsPtr = Address( 4977 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4978 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4979 KmpTaskTShareds)), 4980 Loc), 4981 CGM.getNaturalTypeAlignment(SharedsTy)); 4982 } 4983 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4984 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4985 CGF.FinishFunction(); 4986 return TaskDup; 4987 } 4988 4989 /// Checks if destructor function is required to be generated. 4990 /// \return true if cleanups are required, false otherwise. 4991 static bool 4992 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4993 bool NeedsCleanup = false; 4994 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4995 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4996 for (const FieldDecl *FD : PrivateRD->fields()) { 4997 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4998 if (NeedsCleanup) 4999 break; 5000 } 5001 return NeedsCleanup; 5002 } 5003 5004 CGOpenMPRuntime::TaskResultTy 5005 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 5006 const OMPExecutableDirective &D, 5007 llvm::Function *TaskFunction, QualType SharedsTy, 5008 Address Shareds, const OMPTaskDataTy &Data) { 5009 ASTContext &C = CGM.getContext(); 5010 llvm::SmallVector<PrivateDataTy, 4> Privates; 5011 // Aggregate privates and sort them by the alignment. 5012 const auto *I = Data.PrivateCopies.begin(); 5013 for (const Expr *E : Data.PrivateVars) { 5014 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5015 Privates.emplace_back( 5016 C.getDeclAlign(VD), 5017 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5018 /*PrivateElemInit=*/nullptr)); 5019 ++I; 5020 } 5021 I = Data.FirstprivateCopies.begin(); 5022 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 5023 for (const Expr *E : Data.FirstprivateVars) { 5024 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5025 Privates.emplace_back( 5026 C.getDeclAlign(VD), 5027 PrivateHelpersTy( 5028 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5029 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 5030 ++I; 5031 ++IElemInitRef; 5032 } 5033 I = Data.LastprivateCopies.begin(); 5034 for (const Expr *E : Data.LastprivateVars) { 5035 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5036 Privates.emplace_back( 5037 C.getDeclAlign(VD), 5038 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5039 /*PrivateElemInit=*/nullptr)); 5040 ++I; 5041 } 5042 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 5043 return L.first > R.first; 5044 }); 5045 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 5046 // Build type kmp_routine_entry_t (if not built yet). 5047 emitKmpRoutineEntryT(KmpInt32Ty); 5048 // Build type kmp_task_t (if not built yet). 5049 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 5050 if (SavedKmpTaskloopTQTy.isNull()) { 5051 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5052 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5053 } 5054 KmpTaskTQTy = SavedKmpTaskloopTQTy; 5055 } else { 5056 assert((D.getDirectiveKind() == OMPD_task || 5057 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 5058 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 5059 "Expected taskloop, task or target directive"); 5060 if (SavedKmpTaskTQTy.isNull()) { 5061 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5062 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5063 } 5064 KmpTaskTQTy = SavedKmpTaskTQTy; 5065 } 5066 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 5067 // Build particular struct kmp_task_t for the given task. 5068 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 5069 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 5070 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 5071 QualType KmpTaskTWithPrivatesPtrQTy = 5072 C.getPointerType(KmpTaskTWithPrivatesQTy); 5073 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 5074 llvm::Type *KmpTaskTWithPrivatesPtrTy = 5075 KmpTaskTWithPrivatesTy->getPointerTo(); 5076 llvm::Value *KmpTaskTWithPrivatesTySize = 5077 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 5078 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 5079 5080 // Emit initial values for private copies (if any). 5081 llvm::Value *TaskPrivatesMap = nullptr; 5082 llvm::Type *TaskPrivatesMapTy = 5083 std::next(TaskFunction->arg_begin(), 3)->getType(); 5084 if (!Privates.empty()) { 5085 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 5086 TaskPrivatesMap = emitTaskPrivateMappingFunction( 5087 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 5088 FI->getType(), Privates); 5089 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5090 TaskPrivatesMap, TaskPrivatesMapTy); 5091 } else { 5092 TaskPrivatesMap = llvm::ConstantPointerNull::get( 5093 cast<llvm::PointerType>(TaskPrivatesMapTy)); 5094 } 5095 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 5096 // kmp_task_t *tt); 5097 llvm::Function *TaskEntry = emitProxyTaskFunction( 5098 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5099 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 5100 TaskPrivatesMap); 5101 5102 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 5103 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 5104 // kmp_routine_entry_t *task_entry); 5105 // Task flags. Format is taken from 5106 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 5107 // description of kmp_tasking_flags struct. 5108 enum { 5109 TiedFlag = 0x1, 5110 FinalFlag = 0x2, 5111 DestructorsFlag = 0x8, 5112 PriorityFlag = 0x20, 5113 DetachableFlag = 0x40, 5114 }; 5115 unsigned Flags = Data.Tied ? TiedFlag : 0; 5116 bool NeedsCleanup = false; 5117 if (!Privates.empty()) { 5118 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 5119 if (NeedsCleanup) 5120 Flags = Flags | DestructorsFlag; 5121 } 5122 if (Data.Priority.getInt()) 5123 Flags = Flags | PriorityFlag; 5124 if (D.hasClausesOfKind<OMPDetachClause>()) 5125 Flags = Flags | DetachableFlag; 5126 llvm::Value *TaskFlags = 5127 Data.Final.getPointer() 5128 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 5129 CGF.Builder.getInt32(FinalFlag), 5130 CGF.Builder.getInt32(/*C=*/0)) 5131 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 5132 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 5133 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 5134 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 5135 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 5136 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5137 TaskEntry, KmpRoutineEntryPtrTy)}; 5138 llvm::Value *NewTask; 5139 if (D.hasClausesOfKind<OMPNowaitClause>()) { 5140 // Check if we have any device clause associated with the directive. 5141 const Expr *Device = nullptr; 5142 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 5143 Device = C->getDevice(); 5144 // Emit device ID if any otherwise use default value. 5145 llvm::Value *DeviceID; 5146 if (Device) 5147 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 5148 CGF.Int64Ty, /*isSigned=*/true); 5149 else 5150 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 5151 AllocArgs.push_back(DeviceID); 5152 NewTask = CGF.EmitRuntimeCall( 5153 createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs); 5154 } else { 5155 NewTask = CGF.EmitRuntimeCall( 5156 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 5157 } 5158 // Emit detach clause initialization. 5159 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 5160 // task_descriptor); 5161 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 5162 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 5163 LValue EvtLVal = CGF.EmitLValue(Evt); 5164 5165 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 5166 // int gtid, kmp_task_t *task); 5167 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 5168 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 5169 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 5170 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 5171 createRuntimeFunction(OMPRTL__kmpc_task_allow_completion_event), 5172 {Loc, Tid, NewTask}); 5173 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 5174 Evt->getExprLoc()); 5175 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 5176 } 5177 llvm::Value *NewTaskNewTaskTTy = 5178 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5179 NewTask, KmpTaskTWithPrivatesPtrTy); 5180 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 5181 KmpTaskTWithPrivatesQTy); 5182 LValue TDBase = 5183 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 5184 // Fill the data in the resulting kmp_task_t record. 5185 // Copy shareds if there are any. 5186 Address KmpTaskSharedsPtr = Address::invalid(); 5187 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 5188 KmpTaskSharedsPtr = 5189 Address(CGF.EmitLoadOfScalar( 5190 CGF.EmitLValueForField( 5191 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 5192 KmpTaskTShareds)), 5193 Loc), 5194 CGM.getNaturalTypeAlignment(SharedsTy)); 5195 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 5196 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 5197 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 5198 } 5199 // Emit initial values for private copies (if any). 5200 TaskResultTy Result; 5201 if (!Privates.empty()) { 5202 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 5203 SharedsTy, SharedsPtrTy, Data, Privates, 5204 /*ForDup=*/false); 5205 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 5206 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 5207 Result.TaskDupFn = emitTaskDupFunction( 5208 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 5209 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 5210 /*WithLastIter=*/!Data.LastprivateVars.empty()); 5211 } 5212 } 5213 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 5214 enum { Priority = 0, Destructors = 1 }; 5215 // Provide pointer to function with destructors for privates. 5216 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 5217 const RecordDecl *KmpCmplrdataUD = 5218 (*FI)->getType()->getAsUnionType()->getDecl(); 5219 if (NeedsCleanup) { 5220 llvm::Value *DestructorFn = emitDestructorsFunction( 5221 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5222 KmpTaskTWithPrivatesQTy); 5223 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 5224 LValue DestructorsLV = CGF.EmitLValueForField( 5225 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 5226 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5227 DestructorFn, KmpRoutineEntryPtrTy), 5228 DestructorsLV); 5229 } 5230 // Set priority. 5231 if (Data.Priority.getInt()) { 5232 LValue Data2LV = CGF.EmitLValueForField( 5233 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 5234 LValue PriorityLV = CGF.EmitLValueForField( 5235 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 5236 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 5237 } 5238 Result.NewTask = NewTask; 5239 Result.TaskEntry = TaskEntry; 5240 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 5241 Result.TDBase = TDBase; 5242 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 5243 return Result; 5244 } 5245 5246 namespace { 5247 /// Dependence kind for RTL. 5248 enum RTLDependenceKindTy { 5249 DepIn = 0x01, 5250 DepInOut = 0x3, 5251 DepMutexInOutSet = 0x4 5252 }; 5253 /// Fields ids in kmp_depend_info record. 5254 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 5255 } // namespace 5256 5257 /// Translates internal dependency kind into the runtime kind. 5258 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 5259 RTLDependenceKindTy DepKind; 5260 switch (K) { 5261 case OMPC_DEPEND_in: 5262 DepKind = DepIn; 5263 break; 5264 // Out and InOut dependencies must use the same code. 5265 case OMPC_DEPEND_out: 5266 case OMPC_DEPEND_inout: 5267 DepKind = DepInOut; 5268 break; 5269 case OMPC_DEPEND_mutexinoutset: 5270 DepKind = DepMutexInOutSet; 5271 break; 5272 case OMPC_DEPEND_source: 5273 case OMPC_DEPEND_sink: 5274 case OMPC_DEPEND_depobj: 5275 case OMPC_DEPEND_unknown: 5276 llvm_unreachable("Unknown task dependence type"); 5277 } 5278 return DepKind; 5279 } 5280 5281 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 5282 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 5283 QualType &FlagsTy) { 5284 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 5285 if (KmpDependInfoTy.isNull()) { 5286 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 5287 KmpDependInfoRD->startDefinition(); 5288 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 5289 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 5290 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 5291 KmpDependInfoRD->completeDefinition(); 5292 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 5293 } 5294 } 5295 5296 std::pair<llvm::Value *, LValue> 5297 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 5298 SourceLocation Loc) { 5299 ASTContext &C = CGM.getContext(); 5300 QualType FlagsTy; 5301 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5302 RecordDecl *KmpDependInfoRD = 5303 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5304 LValue Base = CGF.EmitLoadOfPointerLValue( 5305 DepobjLVal.getAddress(CGF), 5306 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5307 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5308 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5309 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5310 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 5311 Base.getTBAAInfo()); 5312 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5313 Addr.getPointer(), 5314 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5315 LValue NumDepsBase = CGF.MakeAddrLValue( 5316 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 5317 Base.getBaseInfo(), Base.getTBAAInfo()); 5318 // NumDeps = deps[i].base_addr; 5319 LValue BaseAddrLVal = CGF.EmitLValueForField( 5320 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5321 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 5322 return std::make_pair(NumDeps, Base); 5323 } 5324 5325 namespace { 5326 /// Loop generator for OpenMP iterator expression. 5327 class OMPIteratorGeneratorScope final 5328 : public CodeGenFunction::OMPPrivateScope { 5329 CodeGenFunction &CGF; 5330 const OMPIteratorExpr *E = nullptr; 5331 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 5332 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 5333 OMPIteratorGeneratorScope() = delete; 5334 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 5335 5336 public: 5337 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 5338 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 5339 if (!E) 5340 return; 5341 SmallVector<llvm::Value *, 4> Uppers; 5342 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 5343 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 5344 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 5345 addPrivate(VD, [&CGF, VD]() { 5346 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 5347 }); 5348 const OMPIteratorHelperData &HelperData = E->getHelper(I); 5349 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 5350 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 5351 "counter.addr"); 5352 }); 5353 } 5354 Privatize(); 5355 5356 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 5357 const OMPIteratorHelperData &HelperData = E->getHelper(I); 5358 LValue CLVal = 5359 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 5360 HelperData.CounterVD->getType()); 5361 // Counter = 0; 5362 CGF.EmitStoreOfScalar( 5363 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 5364 CLVal); 5365 CodeGenFunction::JumpDest &ContDest = 5366 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 5367 CodeGenFunction::JumpDest &ExitDest = 5368 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 5369 // N = <number-of_iterations>; 5370 llvm::Value *N = Uppers[I]; 5371 // cont: 5372 // if (Counter < N) goto body; else goto exit; 5373 CGF.EmitBlock(ContDest.getBlock()); 5374 auto *CVal = 5375 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 5376 llvm::Value *Cmp = 5377 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 5378 ? CGF.Builder.CreateICmpSLT(CVal, N) 5379 : CGF.Builder.CreateICmpULT(CVal, N); 5380 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 5381 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 5382 // body: 5383 CGF.EmitBlock(BodyBB); 5384 // Iteri = Begini + Counter * Stepi; 5385 CGF.EmitIgnoredExpr(HelperData.Update); 5386 } 5387 } 5388 ~OMPIteratorGeneratorScope() { 5389 if (!E) 5390 return; 5391 for (unsigned I = E->numOfIterators(); I > 0; --I) { 5392 // Counter = Counter + 1; 5393 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 5394 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 5395 // goto cont; 5396 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 5397 // exit: 5398 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 5399 } 5400 } 5401 }; 5402 } // namespace 5403 5404 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 5405 llvm::PointerUnion<unsigned *, LValue *> Pos, 5406 const OMPTaskDataTy::DependData &Data, 5407 Address DependenciesArray) { 5408 CodeGenModule &CGM = CGF.CGM; 5409 ASTContext &C = CGM.getContext(); 5410 QualType FlagsTy; 5411 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5412 RecordDecl *KmpDependInfoRD = 5413 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5414 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5415 5416 OMPIteratorGeneratorScope IteratorScope( 5417 CGF, cast_or_null<OMPIteratorExpr>( 5418 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 5419 : nullptr)); 5420 for (const Expr *E : Data.DepExprs) { 5421 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 5422 llvm::Value *Addr; 5423 if (OASE) { 5424 const Expr *Base = OASE->getBase(); 5425 Addr = CGF.EmitScalarExpr(Base); 5426 } else { 5427 Addr = CGF.EmitLValue(E).getPointer(CGF); 5428 } 5429 llvm::Value *Size; 5430 QualType Ty = E->getType(); 5431 if (OASE) { 5432 Size = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 5433 for (const Expr *SE : OASE->getDimensions()) { 5434 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 5435 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 5436 CGF.getContext().getSizeType(), 5437 SE->getExprLoc()); 5438 Size = CGF.Builder.CreateNUWMul(Size, Sz); 5439 } 5440 } else if (const auto *ASE = 5441 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 5442 LValue UpAddrLVal = 5443 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 5444 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 5445 UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 5446 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGM.SizeTy); 5447 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 5448 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 5449 } else { 5450 Size = CGF.getTypeSize(Ty); 5451 } 5452 LValue Base; 5453 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 5454 Base = CGF.MakeAddrLValue( 5455 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 5456 } else { 5457 LValue &PosLVal = *Pos.get<LValue *>(); 5458 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 5459 Base = CGF.MakeAddrLValue( 5460 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 5461 DependenciesArray.getAlignment()), 5462 KmpDependInfoTy); 5463 } 5464 // deps[i].base_addr = &<Dependencies[i].second>; 5465 LValue BaseAddrLVal = CGF.EmitLValueForField( 5466 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5467 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 5468 BaseAddrLVal); 5469 // deps[i].len = sizeof(<Dependencies[i].second>); 5470 LValue LenLVal = CGF.EmitLValueForField( 5471 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 5472 CGF.EmitStoreOfScalar(Size, LenLVal); 5473 // deps[i].flags = <Dependencies[i].first>; 5474 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 5475 LValue FlagsLVal = CGF.EmitLValueForField( 5476 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5477 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5478 FlagsLVal); 5479 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 5480 ++(*P); 5481 } else { 5482 LValue &PosLVal = *Pos.get<LValue *>(); 5483 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 5484 Idx = CGF.Builder.CreateNUWAdd(Idx, 5485 llvm::ConstantInt::get(Idx->getType(), 1)); 5486 CGF.EmitStoreOfScalar(Idx, PosLVal); 5487 } 5488 } 5489 } 5490 5491 static SmallVector<llvm::Value *, 4> 5492 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 5493 const OMPTaskDataTy::DependData &Data) { 5494 assert(Data.DepKind == OMPC_DEPEND_depobj && 5495 "Expected depobj dependecy kind."); 5496 SmallVector<llvm::Value *, 4> Sizes; 5497 SmallVector<LValue, 4> SizeLVals; 5498 ASTContext &C = CGF.getContext(); 5499 QualType FlagsTy; 5500 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5501 RecordDecl *KmpDependInfoRD = 5502 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5503 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5504 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 5505 { 5506 OMPIteratorGeneratorScope IteratorScope( 5507 CGF, cast_or_null<OMPIteratorExpr>( 5508 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 5509 : nullptr)); 5510 for (const Expr *E : Data.DepExprs) { 5511 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 5512 LValue Base = CGF.EmitLoadOfPointerLValue( 5513 DepobjLVal.getAddress(CGF), 5514 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5515 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5516 Base.getAddress(CGF), KmpDependInfoPtrT); 5517 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 5518 Base.getTBAAInfo()); 5519 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5520 Addr.getPointer(), 5521 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5522 LValue NumDepsBase = CGF.MakeAddrLValue( 5523 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 5524 Base.getBaseInfo(), Base.getTBAAInfo()); 5525 // NumDeps = deps[i].base_addr; 5526 LValue BaseAddrLVal = CGF.EmitLValueForField( 5527 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5528 llvm::Value *NumDeps = 5529 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 5530 LValue NumLVal = CGF.MakeAddrLValue( 5531 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 5532 C.getUIntPtrType()); 5533 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 5534 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 5535 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 5536 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 5537 CGF.EmitStoreOfScalar(Add, NumLVal); 5538 SizeLVals.push_back(NumLVal); 5539 } 5540 } 5541 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 5542 llvm::Value *Size = 5543 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 5544 Sizes.push_back(Size); 5545 } 5546 return Sizes; 5547 } 5548 5549 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 5550 LValue PosLVal, 5551 const OMPTaskDataTy::DependData &Data, 5552 Address DependenciesArray) { 5553 assert(Data.DepKind == OMPC_DEPEND_depobj && 5554 "Expected depobj dependecy kind."); 5555 ASTContext &C = CGF.getContext(); 5556 QualType FlagsTy; 5557 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5558 RecordDecl *KmpDependInfoRD = 5559 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5560 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5561 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 5562 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 5563 { 5564 OMPIteratorGeneratorScope IteratorScope( 5565 CGF, cast_or_null<OMPIteratorExpr>( 5566 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 5567 : nullptr)); 5568 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 5569 const Expr *E = Data.DepExprs[I]; 5570 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 5571 LValue Base = CGF.EmitLoadOfPointerLValue( 5572 DepobjLVal.getAddress(CGF), 5573 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5574 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5575 Base.getAddress(CGF), KmpDependInfoPtrT); 5576 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 5577 Base.getTBAAInfo()); 5578 5579 // Get number of elements in a single depobj. 5580 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5581 Addr.getPointer(), 5582 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5583 LValue NumDepsBase = CGF.MakeAddrLValue( 5584 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 5585 Base.getBaseInfo(), Base.getTBAAInfo()); 5586 // NumDeps = deps[i].base_addr; 5587 LValue BaseAddrLVal = CGF.EmitLValueForField( 5588 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5589 llvm::Value *NumDeps = 5590 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 5591 5592 // memcopy dependency data. 5593 llvm::Value *Size = CGF.Builder.CreateNUWMul( 5594 ElSize, 5595 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 5596 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 5597 Address DepAddr = 5598 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 5599 DependenciesArray.getAlignment()); 5600 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 5601 5602 // Increase pos. 5603 // pos += size; 5604 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 5605 CGF.EmitStoreOfScalar(Add, PosLVal); 5606 } 5607 } 5608 } 5609 5610 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 5611 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 5612 SourceLocation Loc) { 5613 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 5614 return D.DepExprs.empty(); 5615 })) 5616 return std::make_pair(nullptr, Address::invalid()); 5617 // Process list of dependencies. 5618 ASTContext &C = CGM.getContext(); 5619 Address DependenciesArray = Address::invalid(); 5620 llvm::Value *NumOfElements = nullptr; 5621 unsigned NumDependencies = std::accumulate( 5622 Dependencies.begin(), Dependencies.end(), 0, 5623 [](unsigned V, const OMPTaskDataTy::DependData &D) { 5624 return D.DepKind == OMPC_DEPEND_depobj 5625 ? V 5626 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 5627 }); 5628 QualType FlagsTy; 5629 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5630 bool HasDepobjDeps = false; 5631 bool HasRegularWithIterators = false; 5632 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 5633 llvm::Value *NumOfRegularWithIterators = 5634 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 5635 // Calculate number of depobj dependecies and regular deps with the iterators. 5636 for (const OMPTaskDataTy::DependData &D : Dependencies) { 5637 if (D.DepKind == OMPC_DEPEND_depobj) { 5638 SmallVector<llvm::Value *, 4> Sizes = 5639 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 5640 for (llvm::Value *Size : Sizes) { 5641 NumOfDepobjElements = 5642 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 5643 } 5644 HasDepobjDeps = true; 5645 continue; 5646 } 5647 // Include number of iterations, if any. 5648 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 5649 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 5650 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 5651 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 5652 NumOfRegularWithIterators = 5653 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 5654 } 5655 HasRegularWithIterators = true; 5656 continue; 5657 } 5658 } 5659 5660 QualType KmpDependInfoArrayTy; 5661 if (HasDepobjDeps || HasRegularWithIterators) { 5662 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 5663 /*isSigned=*/false); 5664 if (HasDepobjDeps) { 5665 NumOfElements = 5666 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 5667 } 5668 if (HasRegularWithIterators) { 5669 NumOfElements = 5670 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 5671 } 5672 OpaqueValueExpr OVE(Loc, 5673 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 5674 VK_RValue); 5675 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 5676 RValue::get(NumOfElements)); 5677 KmpDependInfoArrayTy = 5678 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 5679 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 5680 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 5681 // Properly emit variable-sized array. 5682 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 5683 ImplicitParamDecl::Other); 5684 CGF.EmitVarDecl(*PD); 5685 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 5686 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 5687 /*isSigned=*/false); 5688 } else { 5689 KmpDependInfoArrayTy = C.getConstantArrayType( 5690 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 5691 ArrayType::Normal, /*IndexTypeQuals=*/0); 5692 DependenciesArray = 5693 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 5694 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 5695 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 5696 /*isSigned=*/false); 5697 } 5698 unsigned Pos = 0; 5699 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 5700 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 5701 Dependencies[I].IteratorExpr) 5702 continue; 5703 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 5704 DependenciesArray); 5705 } 5706 // Copy regular dependecies with iterators. 5707 LValue PosLVal = CGF.MakeAddrLValue( 5708 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 5709 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 5710 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 5711 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 5712 !Dependencies[I].IteratorExpr) 5713 continue; 5714 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 5715 DependenciesArray); 5716 } 5717 // Copy final depobj arrays without iterators. 5718 if (HasDepobjDeps) { 5719 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 5720 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 5721 continue; 5722 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 5723 DependenciesArray); 5724 } 5725 } 5726 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5727 DependenciesArray, CGF.VoidPtrTy); 5728 return std::make_pair(NumOfElements, DependenciesArray); 5729 } 5730 5731 Address CGOpenMPRuntime::emitDepobjDependClause( 5732 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 5733 SourceLocation Loc) { 5734 if (Dependencies.DepExprs.empty()) 5735 return Address::invalid(); 5736 // Process list of dependencies. 5737 ASTContext &C = CGM.getContext(); 5738 Address DependenciesArray = Address::invalid(); 5739 unsigned NumDependencies = Dependencies.DepExprs.size(); 5740 QualType FlagsTy; 5741 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5742 RecordDecl *KmpDependInfoRD = 5743 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5744 5745 llvm::Value *Size; 5746 // Define type kmp_depend_info[<Dependencies.size()>]; 5747 // For depobj reserve one extra element to store the number of elements. 5748 // It is required to handle depobj(x) update(in) construct. 5749 // kmp_depend_info[<Dependencies.size()>] deps; 5750 llvm::Value *NumDepsVal; 5751 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 5752 if (const auto *IE = 5753 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 5754 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 5755 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 5756 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 5757 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 5758 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 5759 } 5760 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 5761 NumDepsVal); 5762 CharUnits SizeInBytes = 5763 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 5764 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 5765 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 5766 NumDepsVal = 5767 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 5768 } else { 5769 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5770 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 5771 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5772 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 5773 Size = CGM.getSize(Sz.alignTo(Align)); 5774 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 5775 } 5776 // Need to allocate on the dynamic memory. 5777 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5778 // Use default allocator. 5779 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5780 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5781 5782 llvm::Value *Addr = CGF.EmitRuntimeCall( 5783 createRuntimeFunction(OMPRTL__kmpc_alloc), Args, ".dep.arr.addr"); 5784 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5785 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 5786 DependenciesArray = Address(Addr, Align); 5787 // Write number of elements in the first element of array for depobj. 5788 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 5789 // deps[i].base_addr = NumDependencies; 5790 LValue BaseAddrLVal = CGF.EmitLValueForField( 5791 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5792 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5793 llvm::PointerUnion<unsigned *, LValue *> Pos; 5794 unsigned Idx = 1; 5795 LValue PosLVal; 5796 if (Dependencies.IteratorExpr) { 5797 PosLVal = CGF.MakeAddrLValue( 5798 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5799 C.getSizeType()); 5800 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5801 /*IsInit=*/true); 5802 Pos = &PosLVal; 5803 } else { 5804 Pos = &Idx; 5805 } 5806 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5807 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5808 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 5809 return DependenciesArray; 5810 } 5811 5812 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5813 SourceLocation Loc) { 5814 ASTContext &C = CGM.getContext(); 5815 QualType FlagsTy; 5816 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5817 LValue Base = CGF.EmitLoadOfPointerLValue( 5818 DepobjLVal.getAddress(CGF), 5819 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5820 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5821 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5822 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5823 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5824 Addr.getPointer(), 5825 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5826 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5827 CGF.VoidPtrTy); 5828 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5829 // Use default allocator. 5830 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5831 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5832 5833 // _kmpc_free(gtid, addr, nullptr); 5834 (void)CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_free), Args); 5835 } 5836 5837 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5838 OpenMPDependClauseKind NewDepKind, 5839 SourceLocation Loc) { 5840 ASTContext &C = CGM.getContext(); 5841 QualType FlagsTy; 5842 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5843 RecordDecl *KmpDependInfoRD = 5844 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5845 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5846 llvm::Value *NumDeps; 5847 LValue Base; 5848 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5849 5850 Address Begin = Base.getAddress(CGF); 5851 // Cast from pointer to array type to pointer to single element. 5852 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5853 // The basic structure here is a while-do loop. 5854 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5855 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5856 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5857 CGF.EmitBlock(BodyBB); 5858 llvm::PHINode *ElementPHI = 5859 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5860 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5861 Begin = Address(ElementPHI, Begin.getAlignment()); 5862 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5863 Base.getTBAAInfo()); 5864 // deps[i].flags = NewDepKind; 5865 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5866 LValue FlagsLVal = CGF.EmitLValueForField( 5867 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5868 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5869 FlagsLVal); 5870 5871 // Shift the address forward by one element. 5872 Address ElementNext = 5873 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5874 ElementPHI->addIncoming(ElementNext.getPointer(), 5875 CGF.Builder.GetInsertBlock()); 5876 llvm::Value *IsEmpty = 5877 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5878 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5879 // Done. 5880 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5881 } 5882 5883 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5884 const OMPExecutableDirective &D, 5885 llvm::Function *TaskFunction, 5886 QualType SharedsTy, Address Shareds, 5887 const Expr *IfCond, 5888 const OMPTaskDataTy &Data) { 5889 if (!CGF.HaveInsertPoint()) 5890 return; 5891 5892 TaskResultTy Result = 5893 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5894 llvm::Value *NewTask = Result.NewTask; 5895 llvm::Function *TaskEntry = Result.TaskEntry; 5896 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5897 LValue TDBase = Result.TDBase; 5898 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5899 // Process list of dependences. 5900 Address DependenciesArray = Address::invalid(); 5901 llvm::Value *NumOfElements; 5902 std::tie(NumOfElements, DependenciesArray) = 5903 emitDependClause(CGF, Data.Dependences, Loc); 5904 5905 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5906 // libcall. 5907 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5908 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5909 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5910 // list is not empty 5911 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5912 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5913 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5914 llvm::Value *DepTaskArgs[7]; 5915 if (!Data.Dependences.empty()) { 5916 DepTaskArgs[0] = UpLoc; 5917 DepTaskArgs[1] = ThreadID; 5918 DepTaskArgs[2] = NewTask; 5919 DepTaskArgs[3] = NumOfElements; 5920 DepTaskArgs[4] = DependenciesArray.getPointer(); 5921 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5922 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5923 } 5924 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5925 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5926 if (!Data.Tied) { 5927 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5928 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5929 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5930 } 5931 if (!Data.Dependences.empty()) { 5932 CGF.EmitRuntimeCall( 5933 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5934 } else { 5935 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5936 TaskArgs); 5937 } 5938 // Check if parent region is untied and build return for untied task; 5939 if (auto *Region = 5940 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5941 Region->emitUntiedSwitch(CGF); 5942 }; 5943 5944 llvm::Value *DepWaitTaskArgs[6]; 5945 if (!Data.Dependences.empty()) { 5946 DepWaitTaskArgs[0] = UpLoc; 5947 DepWaitTaskArgs[1] = ThreadID; 5948 DepWaitTaskArgs[2] = NumOfElements; 5949 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5950 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5951 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5952 } 5953 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5954 &Data, &DepWaitTaskArgs, 5955 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5956 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5957 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5958 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5959 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5960 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5961 // is specified. 5962 if (!Data.Dependences.empty()) 5963 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5964 DepWaitTaskArgs); 5965 // Call proxy_task_entry(gtid, new_task); 5966 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5967 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5968 Action.Enter(CGF); 5969 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5970 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5971 OutlinedFnArgs); 5972 }; 5973 5974 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5975 // kmp_task_t *new_task); 5976 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5977 // kmp_task_t *new_task); 5978 RegionCodeGenTy RCG(CodeGen); 5979 CommonActionTy Action( 5980 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5981 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5982 RCG.setAction(Action); 5983 RCG(CGF); 5984 }; 5985 5986 if (IfCond) { 5987 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5988 } else { 5989 RegionCodeGenTy ThenRCG(ThenCodeGen); 5990 ThenRCG(CGF); 5991 } 5992 } 5993 5994 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5995 const OMPLoopDirective &D, 5996 llvm::Function *TaskFunction, 5997 QualType SharedsTy, Address Shareds, 5998 const Expr *IfCond, 5999 const OMPTaskDataTy &Data) { 6000 if (!CGF.HaveInsertPoint()) 6001 return; 6002 TaskResultTy Result = 6003 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 6004 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 6005 // libcall. 6006 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 6007 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 6008 // sched, kmp_uint64 grainsize, void *task_dup); 6009 llvm::Value *ThreadID = getThreadID(CGF, Loc); 6010 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 6011 llvm::Value *IfVal; 6012 if (IfCond) { 6013 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 6014 /*isSigned=*/true); 6015 } else { 6016 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 6017 } 6018 6019 LValue LBLVal = CGF.EmitLValueForField( 6020 Result.TDBase, 6021 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 6022 const auto *LBVar = 6023 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 6024 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 6025 LBLVal.getQuals(), 6026 /*IsInitializer=*/true); 6027 LValue UBLVal = CGF.EmitLValueForField( 6028 Result.TDBase, 6029 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 6030 const auto *UBVar = 6031 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 6032 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 6033 UBLVal.getQuals(), 6034 /*IsInitializer=*/true); 6035 LValue StLVal = CGF.EmitLValueForField( 6036 Result.TDBase, 6037 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 6038 const auto *StVar = 6039 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 6040 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 6041 StLVal.getQuals(), 6042 /*IsInitializer=*/true); 6043 // Store reductions address. 6044 LValue RedLVal = CGF.EmitLValueForField( 6045 Result.TDBase, 6046 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 6047 if (Data.Reductions) { 6048 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 6049 } else { 6050 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 6051 CGF.getContext().VoidPtrTy); 6052 } 6053 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 6054 llvm::Value *TaskArgs[] = { 6055 UpLoc, 6056 ThreadID, 6057 Result.NewTask, 6058 IfVal, 6059 LBLVal.getPointer(CGF), 6060 UBLVal.getPointer(CGF), 6061 CGF.EmitLoadOfScalar(StLVal, Loc), 6062 llvm::ConstantInt::getSigned( 6063 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 6064 llvm::ConstantInt::getSigned( 6065 CGF.IntTy, Data.Schedule.getPointer() 6066 ? Data.Schedule.getInt() ? NumTasks : Grainsize 6067 : NoSchedule), 6068 Data.Schedule.getPointer() 6069 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 6070 /*isSigned=*/false) 6071 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 6072 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6073 Result.TaskDupFn, CGF.VoidPtrTy) 6074 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 6075 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 6076 } 6077 6078 /// Emit reduction operation for each element of array (required for 6079 /// array sections) LHS op = RHS. 6080 /// \param Type Type of array. 6081 /// \param LHSVar Variable on the left side of the reduction operation 6082 /// (references element of array in original variable). 6083 /// \param RHSVar Variable on the right side of the reduction operation 6084 /// (references element of array in original variable). 6085 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 6086 /// RHSVar. 6087 static void EmitOMPAggregateReduction( 6088 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 6089 const VarDecl *RHSVar, 6090 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 6091 const Expr *, const Expr *)> &RedOpGen, 6092 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 6093 const Expr *UpExpr = nullptr) { 6094 // Perform element-by-element initialization. 6095 QualType ElementTy; 6096 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 6097 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 6098 6099 // Drill down to the base element type on both arrays. 6100 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 6101 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 6102 6103 llvm::Value *RHSBegin = RHSAddr.getPointer(); 6104 llvm::Value *LHSBegin = LHSAddr.getPointer(); 6105 // Cast from pointer to array type to pointer to single element. 6106 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 6107 // The basic structure here is a while-do loop. 6108 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 6109 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 6110 llvm::Value *IsEmpty = 6111 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 6112 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 6113 6114 // Enter the loop body, making that address the current address. 6115 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 6116 CGF.EmitBlock(BodyBB); 6117 6118 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 6119 6120 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 6121 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 6122 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 6123 Address RHSElementCurrent = 6124 Address(RHSElementPHI, 6125 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 6126 6127 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 6128 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 6129 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 6130 Address LHSElementCurrent = 6131 Address(LHSElementPHI, 6132 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 6133 6134 // Emit copy. 6135 CodeGenFunction::OMPPrivateScope Scope(CGF); 6136 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 6137 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 6138 Scope.Privatize(); 6139 RedOpGen(CGF, XExpr, EExpr, UpExpr); 6140 Scope.ForceCleanup(); 6141 6142 // Shift the address forward by one element. 6143 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 6144 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 6145 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 6146 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 6147 // Check whether we've reached the end. 6148 llvm::Value *Done = 6149 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 6150 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 6151 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 6152 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 6153 6154 // Done. 6155 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 6156 } 6157 6158 /// Emit reduction combiner. If the combiner is a simple expression emit it as 6159 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 6160 /// UDR combiner function. 6161 static void emitReductionCombiner(CodeGenFunction &CGF, 6162 const Expr *ReductionOp) { 6163 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 6164 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 6165 if (const auto *DRE = 6166 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 6167 if (const auto *DRD = 6168 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 6169 std::pair<llvm::Function *, llvm::Function *> Reduction = 6170 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 6171 RValue Func = RValue::get(Reduction.first); 6172 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 6173 CGF.EmitIgnoredExpr(ReductionOp); 6174 return; 6175 } 6176 CGF.EmitIgnoredExpr(ReductionOp); 6177 } 6178 6179 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 6180 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 6181 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 6182 ArrayRef<const Expr *> ReductionOps) { 6183 ASTContext &C = CGM.getContext(); 6184 6185 // void reduction_func(void *LHSArg, void *RHSArg); 6186 FunctionArgList Args; 6187 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6188 ImplicitParamDecl::Other); 6189 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6190 ImplicitParamDecl::Other); 6191 Args.push_back(&LHSArg); 6192 Args.push_back(&RHSArg); 6193 const auto &CGFI = 6194 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6195 std::string Name = getName({"omp", "reduction", "reduction_func"}); 6196 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 6197 llvm::GlobalValue::InternalLinkage, Name, 6198 &CGM.getModule()); 6199 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 6200 Fn->setDoesNotRecurse(); 6201 CodeGenFunction CGF(CGM); 6202 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 6203 6204 // Dst = (void*[n])(LHSArg); 6205 // Src = (void*[n])(RHSArg); 6206 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6207 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 6208 ArgsType), CGF.getPointerAlign()); 6209 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6210 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 6211 ArgsType), CGF.getPointerAlign()); 6212 6213 // ... 6214 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 6215 // ... 6216 CodeGenFunction::OMPPrivateScope Scope(CGF); 6217 auto IPriv = Privates.begin(); 6218 unsigned Idx = 0; 6219 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 6220 const auto *RHSVar = 6221 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 6222 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 6223 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 6224 }); 6225 const auto *LHSVar = 6226 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 6227 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 6228 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 6229 }); 6230 QualType PrivTy = (*IPriv)->getType(); 6231 if (PrivTy->isVariablyModifiedType()) { 6232 // Get array size and emit VLA type. 6233 ++Idx; 6234 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 6235 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 6236 const VariableArrayType *VLA = 6237 CGF.getContext().getAsVariableArrayType(PrivTy); 6238 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 6239 CodeGenFunction::OpaqueValueMapping OpaqueMap( 6240 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 6241 CGF.EmitVariablyModifiedType(PrivTy); 6242 } 6243 } 6244 Scope.Privatize(); 6245 IPriv = Privates.begin(); 6246 auto ILHS = LHSExprs.begin(); 6247 auto IRHS = RHSExprs.begin(); 6248 for (const Expr *E : ReductionOps) { 6249 if ((*IPriv)->getType()->isArrayType()) { 6250 // Emit reduction for array section. 6251 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 6252 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 6253 EmitOMPAggregateReduction( 6254 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 6255 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 6256 emitReductionCombiner(CGF, E); 6257 }); 6258 } else { 6259 // Emit reduction for array subscript or single variable. 6260 emitReductionCombiner(CGF, E); 6261 } 6262 ++IPriv; 6263 ++ILHS; 6264 ++IRHS; 6265 } 6266 Scope.ForceCleanup(); 6267 CGF.FinishFunction(); 6268 return Fn; 6269 } 6270 6271 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 6272 const Expr *ReductionOp, 6273 const Expr *PrivateRef, 6274 const DeclRefExpr *LHS, 6275 const DeclRefExpr *RHS) { 6276 if (PrivateRef->getType()->isArrayType()) { 6277 // Emit reduction for array section. 6278 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 6279 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 6280 EmitOMPAggregateReduction( 6281 CGF, PrivateRef->getType(), LHSVar, RHSVar, 6282 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 6283 emitReductionCombiner(CGF, ReductionOp); 6284 }); 6285 } else { 6286 // Emit reduction for array subscript or single variable. 6287 emitReductionCombiner(CGF, ReductionOp); 6288 } 6289 } 6290 6291 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 6292 ArrayRef<const Expr *> Privates, 6293 ArrayRef<const Expr *> LHSExprs, 6294 ArrayRef<const Expr *> RHSExprs, 6295 ArrayRef<const Expr *> ReductionOps, 6296 ReductionOptionsTy Options) { 6297 if (!CGF.HaveInsertPoint()) 6298 return; 6299 6300 bool WithNowait = Options.WithNowait; 6301 bool SimpleReduction = Options.SimpleReduction; 6302 6303 // Next code should be emitted for reduction: 6304 // 6305 // static kmp_critical_name lock = { 0 }; 6306 // 6307 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 6308 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 6309 // ... 6310 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 6311 // *(Type<n>-1*)rhs[<n>-1]); 6312 // } 6313 // 6314 // ... 6315 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 6316 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 6317 // RedList, reduce_func, &<lock>)) { 6318 // case 1: 6319 // ... 6320 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 6321 // ... 6322 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 6323 // break; 6324 // case 2: 6325 // ... 6326 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 6327 // ... 6328 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 6329 // break; 6330 // default:; 6331 // } 6332 // 6333 // if SimpleReduction is true, only the next code is generated: 6334 // ... 6335 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 6336 // ... 6337 6338 ASTContext &C = CGM.getContext(); 6339 6340 if (SimpleReduction) { 6341 CodeGenFunction::RunCleanupsScope Scope(CGF); 6342 auto IPriv = Privates.begin(); 6343 auto ILHS = LHSExprs.begin(); 6344 auto IRHS = RHSExprs.begin(); 6345 for (const Expr *E : ReductionOps) { 6346 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 6347 cast<DeclRefExpr>(*IRHS)); 6348 ++IPriv; 6349 ++ILHS; 6350 ++IRHS; 6351 } 6352 return; 6353 } 6354 6355 // 1. Build a list of reduction variables. 6356 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 6357 auto Size = RHSExprs.size(); 6358 for (const Expr *E : Privates) { 6359 if (E->getType()->isVariablyModifiedType()) 6360 // Reserve place for array size. 6361 ++Size; 6362 } 6363 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 6364 QualType ReductionArrayTy = 6365 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 6366 /*IndexTypeQuals=*/0); 6367 Address ReductionList = 6368 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 6369 auto IPriv = Privates.begin(); 6370 unsigned Idx = 0; 6371 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 6372 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 6373 CGF.Builder.CreateStore( 6374 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6375 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 6376 Elem); 6377 if ((*IPriv)->getType()->isVariablyModifiedType()) { 6378 // Store array size. 6379 ++Idx; 6380 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 6381 llvm::Value *Size = CGF.Builder.CreateIntCast( 6382 CGF.getVLASize( 6383 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 6384 .NumElts, 6385 CGF.SizeTy, /*isSigned=*/false); 6386 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 6387 Elem); 6388 } 6389 } 6390 6391 // 2. Emit reduce_func(). 6392 llvm::Function *ReductionFn = emitReductionFunction( 6393 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 6394 LHSExprs, RHSExprs, ReductionOps); 6395 6396 // 3. Create static kmp_critical_name lock = { 0 }; 6397 std::string Name = getName({"reduction"}); 6398 llvm::Value *Lock = getCriticalRegionLock(Name); 6399 6400 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 6401 // RedList, reduce_func, &<lock>); 6402 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 6403 llvm::Value *ThreadId = getThreadID(CGF, Loc); 6404 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 6405 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6406 ReductionList.getPointer(), CGF.VoidPtrTy); 6407 llvm::Value *Args[] = { 6408 IdentTLoc, // ident_t *<loc> 6409 ThreadId, // i32 <gtid> 6410 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 6411 ReductionArrayTySize, // size_type sizeof(RedList) 6412 RL, // void *RedList 6413 ReductionFn, // void (*) (void *, void *) <reduce_func> 6414 Lock // kmp_critical_name *&<lock> 6415 }; 6416 llvm::Value *Res = CGF.EmitRuntimeCall( 6417 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 6418 : OMPRTL__kmpc_reduce), 6419 Args); 6420 6421 // 5. Build switch(res) 6422 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 6423 llvm::SwitchInst *SwInst = 6424 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 6425 6426 // 6. Build case 1: 6427 // ... 6428 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 6429 // ... 6430 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 6431 // break; 6432 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 6433 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 6434 CGF.EmitBlock(Case1BB); 6435 6436 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 6437 llvm::Value *EndArgs[] = { 6438 IdentTLoc, // ident_t *<loc> 6439 ThreadId, // i32 <gtid> 6440 Lock // kmp_critical_name *&<lock> 6441 }; 6442 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 6443 CodeGenFunction &CGF, PrePostActionTy &Action) { 6444 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6445 auto IPriv = Privates.begin(); 6446 auto ILHS = LHSExprs.begin(); 6447 auto IRHS = RHSExprs.begin(); 6448 for (const Expr *E : ReductionOps) { 6449 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 6450 cast<DeclRefExpr>(*IRHS)); 6451 ++IPriv; 6452 ++ILHS; 6453 ++IRHS; 6454 } 6455 }; 6456 RegionCodeGenTy RCG(CodeGen); 6457 CommonActionTy Action( 6458 nullptr, llvm::None, 6459 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 6460 : OMPRTL__kmpc_end_reduce), 6461 EndArgs); 6462 RCG.setAction(Action); 6463 RCG(CGF); 6464 6465 CGF.EmitBranch(DefaultBB); 6466 6467 // 7. Build case 2: 6468 // ... 6469 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 6470 // ... 6471 // break; 6472 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 6473 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 6474 CGF.EmitBlock(Case2BB); 6475 6476 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 6477 CodeGenFunction &CGF, PrePostActionTy &Action) { 6478 auto ILHS = LHSExprs.begin(); 6479 auto IRHS = RHSExprs.begin(); 6480 auto IPriv = Privates.begin(); 6481 for (const Expr *E : ReductionOps) { 6482 const Expr *XExpr = nullptr; 6483 const Expr *EExpr = nullptr; 6484 const Expr *UpExpr = nullptr; 6485 BinaryOperatorKind BO = BO_Comma; 6486 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 6487 if (BO->getOpcode() == BO_Assign) { 6488 XExpr = BO->getLHS(); 6489 UpExpr = BO->getRHS(); 6490 } 6491 } 6492 // Try to emit update expression as a simple atomic. 6493 const Expr *RHSExpr = UpExpr; 6494 if (RHSExpr) { 6495 // Analyze RHS part of the whole expression. 6496 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 6497 RHSExpr->IgnoreParenImpCasts())) { 6498 // If this is a conditional operator, analyze its condition for 6499 // min/max reduction operator. 6500 RHSExpr = ACO->getCond(); 6501 } 6502 if (const auto *BORHS = 6503 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 6504 EExpr = BORHS->getRHS(); 6505 BO = BORHS->getOpcode(); 6506 } 6507 } 6508 if (XExpr) { 6509 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 6510 auto &&AtomicRedGen = [BO, VD, 6511 Loc](CodeGenFunction &CGF, const Expr *XExpr, 6512 const Expr *EExpr, const Expr *UpExpr) { 6513 LValue X = CGF.EmitLValue(XExpr); 6514 RValue E; 6515 if (EExpr) 6516 E = CGF.EmitAnyExpr(EExpr); 6517 CGF.EmitOMPAtomicSimpleUpdateExpr( 6518 X, E, BO, /*IsXLHSInRHSPart=*/true, 6519 llvm::AtomicOrdering::Monotonic, Loc, 6520 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 6521 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6522 PrivateScope.addPrivate( 6523 VD, [&CGF, VD, XRValue, Loc]() { 6524 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 6525 CGF.emitOMPSimpleStore( 6526 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 6527 VD->getType().getNonReferenceType(), Loc); 6528 return LHSTemp; 6529 }); 6530 (void)PrivateScope.Privatize(); 6531 return CGF.EmitAnyExpr(UpExpr); 6532 }); 6533 }; 6534 if ((*IPriv)->getType()->isArrayType()) { 6535 // Emit atomic reduction for array section. 6536 const auto *RHSVar = 6537 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 6538 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 6539 AtomicRedGen, XExpr, EExpr, UpExpr); 6540 } else { 6541 // Emit atomic reduction for array subscript or single variable. 6542 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 6543 } 6544 } else { 6545 // Emit as a critical region. 6546 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 6547 const Expr *, const Expr *) { 6548 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6549 std::string Name = RT.getName({"atomic_reduction"}); 6550 RT.emitCriticalRegion( 6551 CGF, Name, 6552 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 6553 Action.Enter(CGF); 6554 emitReductionCombiner(CGF, E); 6555 }, 6556 Loc); 6557 }; 6558 if ((*IPriv)->getType()->isArrayType()) { 6559 const auto *LHSVar = 6560 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 6561 const auto *RHSVar = 6562 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 6563 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 6564 CritRedGen); 6565 } else { 6566 CritRedGen(CGF, nullptr, nullptr, nullptr); 6567 } 6568 } 6569 ++ILHS; 6570 ++IRHS; 6571 ++IPriv; 6572 } 6573 }; 6574 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 6575 if (!WithNowait) { 6576 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 6577 llvm::Value *EndArgs[] = { 6578 IdentTLoc, // ident_t *<loc> 6579 ThreadId, // i32 <gtid> 6580 Lock // kmp_critical_name *&<lock> 6581 }; 6582 CommonActionTy Action(nullptr, llvm::None, 6583 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 6584 EndArgs); 6585 AtomicRCG.setAction(Action); 6586 AtomicRCG(CGF); 6587 } else { 6588 AtomicRCG(CGF); 6589 } 6590 6591 CGF.EmitBranch(DefaultBB); 6592 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 6593 } 6594 6595 /// Generates unique name for artificial threadprivate variables. 6596 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 6597 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 6598 const Expr *Ref) { 6599 SmallString<256> Buffer; 6600 llvm::raw_svector_ostream Out(Buffer); 6601 const clang::DeclRefExpr *DE; 6602 const VarDecl *D = ::getBaseDecl(Ref, DE); 6603 if (!D) 6604 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 6605 D = D->getCanonicalDecl(); 6606 std::string Name = CGM.getOpenMPRuntime().getName( 6607 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 6608 Out << Prefix << Name << "_" 6609 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 6610 return std::string(Out.str()); 6611 } 6612 6613 /// Emits reduction initializer function: 6614 /// \code 6615 /// void @.red_init(void* %arg, void* %orig) { 6616 /// %0 = bitcast void* %arg to <type>* 6617 /// store <type> <init>, <type>* %0 6618 /// ret void 6619 /// } 6620 /// \endcode 6621 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 6622 SourceLocation Loc, 6623 ReductionCodeGen &RCG, unsigned N) { 6624 ASTContext &C = CGM.getContext(); 6625 QualType VoidPtrTy = C.VoidPtrTy; 6626 VoidPtrTy.addRestrict(); 6627 FunctionArgList Args; 6628 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 6629 ImplicitParamDecl::Other); 6630 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 6631 ImplicitParamDecl::Other); 6632 Args.emplace_back(&Param); 6633 Args.emplace_back(&ParamOrig); 6634 const auto &FnInfo = 6635 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6636 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6637 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 6638 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6639 Name, &CGM.getModule()); 6640 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6641 Fn->setDoesNotRecurse(); 6642 CodeGenFunction CGF(CGM); 6643 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6644 Address PrivateAddr = CGF.EmitLoadOfPointer( 6645 CGF.GetAddrOfLocalVar(&Param), 6646 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6647 llvm::Value *Size = nullptr; 6648 // If the size of the reduction item is non-constant, load it from global 6649 // threadprivate variable. 6650 if (RCG.getSizes(N).second) { 6651 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6652 CGF, CGM.getContext().getSizeType(), 6653 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6654 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6655 CGM.getContext().getSizeType(), Loc); 6656 } 6657 RCG.emitAggregateType(CGF, N, Size); 6658 LValue OrigLVal; 6659 // If initializer uses initializer from declare reduction construct, emit a 6660 // pointer to the address of the original reduction item (reuired by reduction 6661 // initializer) 6662 if (RCG.usesReductionInitializer(N)) { 6663 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 6664 SharedAddr = CGF.EmitLoadOfPointer( 6665 SharedAddr, 6666 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 6667 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 6668 } else { 6669 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 6670 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 6671 CGM.getContext().VoidPtrTy); 6672 } 6673 // Emit the initializer: 6674 // %0 = bitcast void* %arg to <type>* 6675 // store <type> <init>, <type>* %0 6676 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 6677 [](CodeGenFunction &) { return false; }); 6678 CGF.FinishFunction(); 6679 return Fn; 6680 } 6681 6682 /// Emits reduction combiner function: 6683 /// \code 6684 /// void @.red_comb(void* %arg0, void* %arg1) { 6685 /// %lhs = bitcast void* %arg0 to <type>* 6686 /// %rhs = bitcast void* %arg1 to <type>* 6687 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 6688 /// store <type> %2, <type>* %lhs 6689 /// ret void 6690 /// } 6691 /// \endcode 6692 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 6693 SourceLocation Loc, 6694 ReductionCodeGen &RCG, unsigned N, 6695 const Expr *ReductionOp, 6696 const Expr *LHS, const Expr *RHS, 6697 const Expr *PrivateRef) { 6698 ASTContext &C = CGM.getContext(); 6699 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 6700 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 6701 FunctionArgList Args; 6702 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 6703 C.VoidPtrTy, ImplicitParamDecl::Other); 6704 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6705 ImplicitParamDecl::Other); 6706 Args.emplace_back(&ParamInOut); 6707 Args.emplace_back(&ParamIn); 6708 const auto &FnInfo = 6709 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6710 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6711 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 6712 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6713 Name, &CGM.getModule()); 6714 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6715 Fn->setDoesNotRecurse(); 6716 CodeGenFunction CGF(CGM); 6717 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6718 llvm::Value *Size = nullptr; 6719 // If the size of the reduction item is non-constant, load it from global 6720 // threadprivate variable. 6721 if (RCG.getSizes(N).second) { 6722 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6723 CGF, CGM.getContext().getSizeType(), 6724 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6725 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6726 CGM.getContext().getSizeType(), Loc); 6727 } 6728 RCG.emitAggregateType(CGF, N, Size); 6729 // Remap lhs and rhs variables to the addresses of the function arguments. 6730 // %lhs = bitcast void* %arg0 to <type>* 6731 // %rhs = bitcast void* %arg1 to <type>* 6732 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6733 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6734 // Pull out the pointer to the variable. 6735 Address PtrAddr = CGF.EmitLoadOfPointer( 6736 CGF.GetAddrOfLocalVar(&ParamInOut), 6737 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6738 return CGF.Builder.CreateElementBitCast( 6739 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6740 }); 6741 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6742 // Pull out the pointer to the variable. 6743 Address PtrAddr = CGF.EmitLoadOfPointer( 6744 CGF.GetAddrOfLocalVar(&ParamIn), 6745 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6746 return CGF.Builder.CreateElementBitCast( 6747 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6748 }); 6749 PrivateScope.Privatize(); 6750 // Emit the combiner body: 6751 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6752 // store <type> %2, <type>* %lhs 6753 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6754 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6755 cast<DeclRefExpr>(RHS)); 6756 CGF.FinishFunction(); 6757 return Fn; 6758 } 6759 6760 /// Emits reduction finalizer function: 6761 /// \code 6762 /// void @.red_fini(void* %arg) { 6763 /// %0 = bitcast void* %arg to <type>* 6764 /// <destroy>(<type>* %0) 6765 /// ret void 6766 /// } 6767 /// \endcode 6768 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6769 SourceLocation Loc, 6770 ReductionCodeGen &RCG, unsigned N) { 6771 if (!RCG.needCleanups(N)) 6772 return nullptr; 6773 ASTContext &C = CGM.getContext(); 6774 FunctionArgList Args; 6775 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6776 ImplicitParamDecl::Other); 6777 Args.emplace_back(&Param); 6778 const auto &FnInfo = 6779 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6780 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6781 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6782 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6783 Name, &CGM.getModule()); 6784 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6785 Fn->setDoesNotRecurse(); 6786 CodeGenFunction CGF(CGM); 6787 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6788 Address PrivateAddr = CGF.EmitLoadOfPointer( 6789 CGF.GetAddrOfLocalVar(&Param), 6790 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6791 llvm::Value *Size = nullptr; 6792 // If the size of the reduction item is non-constant, load it from global 6793 // threadprivate variable. 6794 if (RCG.getSizes(N).second) { 6795 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6796 CGF, CGM.getContext().getSizeType(), 6797 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6798 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6799 CGM.getContext().getSizeType(), Loc); 6800 } 6801 RCG.emitAggregateType(CGF, N, Size); 6802 // Emit the finalizer body: 6803 // <destroy>(<type>* %0) 6804 RCG.emitCleanups(CGF, N, PrivateAddr); 6805 CGF.FinishFunction(Loc); 6806 return Fn; 6807 } 6808 6809 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6810 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6811 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6812 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6813 return nullptr; 6814 6815 // Build typedef struct: 6816 // kmp_taskred_input { 6817 // void *reduce_shar; // shared reduction item 6818 // void *reduce_orig; // original reduction item used for initialization 6819 // size_t reduce_size; // size of data item 6820 // void *reduce_init; // data initialization routine 6821 // void *reduce_fini; // data finalization routine 6822 // void *reduce_comb; // data combiner routine 6823 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6824 // } kmp_taskred_input_t; 6825 ASTContext &C = CGM.getContext(); 6826 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6827 RD->startDefinition(); 6828 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6829 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6830 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6831 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6832 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6833 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6834 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6835 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6836 RD->completeDefinition(); 6837 QualType RDType = C.getRecordType(RD); 6838 unsigned Size = Data.ReductionVars.size(); 6839 llvm::APInt ArraySize(/*numBits=*/64, Size); 6840 QualType ArrayRDType = C.getConstantArrayType( 6841 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6842 // kmp_task_red_input_t .rd_input.[Size]; 6843 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6844 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6845 Data.ReductionCopies, Data.ReductionOps); 6846 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6847 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6848 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6849 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6850 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6851 TaskRedInput.getPointer(), Idxs, 6852 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6853 ".rd_input.gep."); 6854 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6855 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6856 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6857 RCG.emitSharedOrigLValue(CGF, Cnt); 6858 llvm::Value *CastedShared = 6859 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6860 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6861 // ElemLVal.reduce_orig = &Origs[Cnt]; 6862 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6863 llvm::Value *CastedOrig = 6864 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6865 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6866 RCG.emitAggregateType(CGF, Cnt); 6867 llvm::Value *SizeValInChars; 6868 llvm::Value *SizeVal; 6869 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6870 // We use delayed creation/initialization for VLAs and array sections. It is 6871 // required because runtime does not provide the way to pass the sizes of 6872 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6873 // threadprivate global variables are used to store these values and use 6874 // them in the functions. 6875 bool DelayedCreation = !!SizeVal; 6876 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6877 /*isSigned=*/false); 6878 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6879 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6880 // ElemLVal.reduce_init = init; 6881 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6882 llvm::Value *InitAddr = 6883 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6884 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6885 // ElemLVal.reduce_fini = fini; 6886 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6887 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6888 llvm::Value *FiniAddr = Fini 6889 ? CGF.EmitCastToVoidPtr(Fini) 6890 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6891 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6892 // ElemLVal.reduce_comb = comb; 6893 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6894 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6895 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6896 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6897 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6898 // ElemLVal.flags = 0; 6899 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6900 if (DelayedCreation) { 6901 CGF.EmitStoreOfScalar( 6902 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6903 FlagsLVal); 6904 } else 6905 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6906 FlagsLVal.getType()); 6907 } 6908 if (Data.IsReductionWithTaskMod) { 6909 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6910 // is_ws, int num, void *data); 6911 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6912 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6913 CGM.IntTy, /*isSigned=*/true); 6914 llvm::Value *Args[] = { 6915 IdentTLoc, GTid, 6916 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6917 /*isSigned=*/true), 6918 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6919 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6920 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6921 return CGF.EmitRuntimeCall( 6922 createRuntimeFunction(OMPRTL__kmpc_taskred_modifier_init), Args); 6923 } 6924 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6925 llvm::Value *Args[] = { 6926 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6927 /*isSigned=*/true), 6928 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6929 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6930 CGM.VoidPtrTy)}; 6931 return CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskred_init), 6932 Args); 6933 } 6934 6935 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6936 SourceLocation Loc, 6937 bool IsWorksharingReduction) { 6938 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6939 // is_ws, int num, void *data); 6940 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6941 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6942 CGM.IntTy, /*isSigned=*/true); 6943 llvm::Value *Args[] = {IdentTLoc, GTid, 6944 llvm::ConstantInt::get(CGM.IntTy, 6945 IsWorksharingReduction ? 1 : 0, 6946 /*isSigned=*/true)}; 6947 (void)CGF.EmitRuntimeCall( 6948 createRuntimeFunction(OMPRTL__kmpc_task_reduction_modifier_fini), Args); 6949 } 6950 6951 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6952 SourceLocation Loc, 6953 ReductionCodeGen &RCG, 6954 unsigned N) { 6955 auto Sizes = RCG.getSizes(N); 6956 // Emit threadprivate global variable if the type is non-constant 6957 // (Sizes.second = nullptr). 6958 if (Sizes.second) { 6959 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6960 /*isSigned=*/false); 6961 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6962 CGF, CGM.getContext().getSizeType(), 6963 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6964 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6965 } 6966 } 6967 6968 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6969 SourceLocation Loc, 6970 llvm::Value *ReductionsPtr, 6971 LValue SharedLVal) { 6972 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6973 // *d); 6974 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6975 CGM.IntTy, 6976 /*isSigned=*/true), 6977 ReductionsPtr, 6978 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6979 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6980 return Address( 6981 CGF.EmitRuntimeCall( 6982 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6983 SharedLVal.getAlignment()); 6984 } 6985 6986 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6987 SourceLocation Loc) { 6988 if (!CGF.HaveInsertPoint()) 6989 return; 6990 6991 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 6992 if (OMPBuilder) { 6993 OMPBuilder->CreateTaskwait(CGF.Builder); 6994 } else { 6995 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6996 // global_tid); 6997 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6998 // Ignore return result until untied tasks are supported. 6999 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 7000 } 7001 7002 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 7003 Region->emitUntiedSwitch(CGF); 7004 } 7005 7006 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 7007 OpenMPDirectiveKind InnerKind, 7008 const RegionCodeGenTy &CodeGen, 7009 bool HasCancel) { 7010 if (!CGF.HaveInsertPoint()) 7011 return; 7012 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 7013 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 7014 } 7015 7016 namespace { 7017 enum RTCancelKind { 7018 CancelNoreq = 0, 7019 CancelParallel = 1, 7020 CancelLoop = 2, 7021 CancelSections = 3, 7022 CancelTaskgroup = 4 7023 }; 7024 } // anonymous namespace 7025 7026 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 7027 RTCancelKind CancelKind = CancelNoreq; 7028 if (CancelRegion == OMPD_parallel) 7029 CancelKind = CancelParallel; 7030 else if (CancelRegion == OMPD_for) 7031 CancelKind = CancelLoop; 7032 else if (CancelRegion == OMPD_sections) 7033 CancelKind = CancelSections; 7034 else { 7035 assert(CancelRegion == OMPD_taskgroup); 7036 CancelKind = CancelTaskgroup; 7037 } 7038 return CancelKind; 7039 } 7040 7041 void CGOpenMPRuntime::emitCancellationPointCall( 7042 CodeGenFunction &CGF, SourceLocation Loc, 7043 OpenMPDirectiveKind CancelRegion) { 7044 if (!CGF.HaveInsertPoint()) 7045 return; 7046 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 7047 // global_tid, kmp_int32 cncl_kind); 7048 if (auto *OMPRegionInfo = 7049 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 7050 // For 'cancellation point taskgroup', the task region info may not have a 7051 // cancel. This may instead happen in another adjacent task. 7052 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 7053 llvm::Value *Args[] = { 7054 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 7055 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 7056 // Ignore return result until untied tasks are supported. 7057 llvm::Value *Result = CGF.EmitRuntimeCall( 7058 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 7059 // if (__kmpc_cancellationpoint()) { 7060 // exit from construct; 7061 // } 7062 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 7063 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 7064 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 7065 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 7066 CGF.EmitBlock(ExitBB); 7067 // exit from construct; 7068 CodeGenFunction::JumpDest CancelDest = 7069 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 7070 CGF.EmitBranchThroughCleanup(CancelDest); 7071 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 7072 } 7073 } 7074 } 7075 7076 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 7077 const Expr *IfCond, 7078 OpenMPDirectiveKind CancelRegion) { 7079 if (!CGF.HaveInsertPoint()) 7080 return; 7081 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 7082 // kmp_int32 cncl_kind); 7083 if (auto *OMPRegionInfo = 7084 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 7085 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 7086 PrePostActionTy &) { 7087 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 7088 llvm::Value *Args[] = { 7089 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 7090 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 7091 // Ignore return result until untied tasks are supported. 7092 llvm::Value *Result = CGF.EmitRuntimeCall( 7093 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 7094 // if (__kmpc_cancel()) { 7095 // exit from construct; 7096 // } 7097 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 7098 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 7099 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 7100 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 7101 CGF.EmitBlock(ExitBB); 7102 // exit from construct; 7103 CodeGenFunction::JumpDest CancelDest = 7104 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 7105 CGF.EmitBranchThroughCleanup(CancelDest); 7106 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 7107 }; 7108 if (IfCond) { 7109 emitIfClause(CGF, IfCond, ThenGen, 7110 [](CodeGenFunction &, PrePostActionTy &) {}); 7111 } else { 7112 RegionCodeGenTy ThenRCG(ThenGen); 7113 ThenRCG(CGF); 7114 } 7115 } 7116 } 7117 7118 namespace { 7119 /// Cleanup action for uses_allocators support. 7120 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 7121 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 7122 7123 public: 7124 OMPUsesAllocatorsActionTy( 7125 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 7126 : Allocators(Allocators) {} 7127 void Enter(CodeGenFunction &CGF) override { 7128 if (!CGF.HaveInsertPoint()) 7129 return; 7130 for (const auto &AllocatorData : Allocators) { 7131 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 7132 CGF, AllocatorData.first, AllocatorData.second); 7133 } 7134 } 7135 void Exit(CodeGenFunction &CGF) override { 7136 if (!CGF.HaveInsertPoint()) 7137 return; 7138 for (const auto &AllocatorData : Allocators) { 7139 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 7140 AllocatorData.first); 7141 } 7142 } 7143 }; 7144 } // namespace 7145 7146 void CGOpenMPRuntime::emitTargetOutlinedFunction( 7147 const OMPExecutableDirective &D, StringRef ParentName, 7148 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 7149 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 7150 assert(!ParentName.empty() && "Invalid target region parent name!"); 7151 HasEmittedTargetRegion = true; 7152 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 7153 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 7154 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 7155 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 7156 if (!D.AllocatorTraits) 7157 continue; 7158 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 7159 } 7160 } 7161 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 7162 CodeGen.setAction(UsesAllocatorAction); 7163 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 7164 IsOffloadEntry, CodeGen); 7165 } 7166 7167 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 7168 const Expr *Allocator, 7169 const Expr *AllocatorTraits) { 7170 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 7171 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 7172 // Use default memspace handle. 7173 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 7174 llvm::Value *NumTraits = llvm::ConstantInt::get( 7175 CGF.IntTy, cast<ConstantArrayType>( 7176 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 7177 ->getSize() 7178 .getLimitedValue()); 7179 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 7180 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 7181 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 7182 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 7183 AllocatorTraitsLVal.getBaseInfo(), 7184 AllocatorTraitsLVal.getTBAAInfo()); 7185 llvm::Value *Traits = 7186 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 7187 7188 llvm::Value *AllocatorVal = 7189 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_init_allocator), 7190 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 7191 // Store to allocator. 7192 CGF.EmitVarDecl(*cast<VarDecl>( 7193 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 7194 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 7195 AllocatorVal = 7196 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 7197 Allocator->getType(), Allocator->getExprLoc()); 7198 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 7199 } 7200 7201 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 7202 const Expr *Allocator) { 7203 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 7204 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 7205 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 7206 llvm::Value *AllocatorVal = 7207 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 7208 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 7209 CGF.getContext().VoidPtrTy, 7210 Allocator->getExprLoc()); 7211 (void)CGF.EmitRuntimeCall( 7212 createRuntimeFunction(OMPRTL__kmpc_destroy_allocator), 7213 {ThreadId, AllocatorVal}); 7214 } 7215 7216 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 7217 const OMPExecutableDirective &D, StringRef ParentName, 7218 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 7219 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 7220 // Create a unique name for the entry function using the source location 7221 // information of the current target region. The name will be something like: 7222 // 7223 // __omp_offloading_DD_FFFF_PP_lBB 7224 // 7225 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 7226 // mangled name of the function that encloses the target region and BB is the 7227 // line number of the target region. 7228 7229 unsigned DeviceID; 7230 unsigned FileID; 7231 unsigned Line; 7232 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 7233 Line); 7234 SmallString<64> EntryFnName; 7235 { 7236 llvm::raw_svector_ostream OS(EntryFnName); 7237 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 7238 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 7239 } 7240 7241 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 7242 7243 CodeGenFunction CGF(CGM, true); 7244 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 7245 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7246 7247 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 7248 7249 // If this target outline function is not an offload entry, we don't need to 7250 // register it. 7251 if (!IsOffloadEntry) 7252 return; 7253 7254 // The target region ID is used by the runtime library to identify the current 7255 // target region, so it only has to be unique and not necessarily point to 7256 // anything. It could be the pointer to the outlined function that implements 7257 // the target region, but we aren't using that so that the compiler doesn't 7258 // need to keep that, and could therefore inline the host function if proven 7259 // worthwhile during optimization. In the other hand, if emitting code for the 7260 // device, the ID has to be the function address so that it can retrieved from 7261 // the offloading entry and launched by the runtime library. We also mark the 7262 // outlined function to have external linkage in case we are emitting code for 7263 // the device, because these functions will be entry points to the device. 7264 7265 if (CGM.getLangOpts().OpenMPIsDevice) { 7266 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 7267 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 7268 OutlinedFn->setDSOLocal(false); 7269 } else { 7270 std::string Name = getName({EntryFnName, "region_id"}); 7271 OutlinedFnID = new llvm::GlobalVariable( 7272 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 7273 llvm::GlobalValue::WeakAnyLinkage, 7274 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 7275 } 7276 7277 // Register the information for the entry associated with this target region. 7278 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 7279 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 7280 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 7281 } 7282 7283 /// Checks if the expression is constant or does not have non-trivial function 7284 /// calls. 7285 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 7286 // We can skip constant expressions. 7287 // We can skip expressions with trivial calls or simple expressions. 7288 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 7289 !E->hasNonTrivialCall(Ctx)) && 7290 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 7291 } 7292 7293 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 7294 const Stmt *Body) { 7295 const Stmt *Child = Body->IgnoreContainers(); 7296 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 7297 Child = nullptr; 7298 for (const Stmt *S : C->body()) { 7299 if (const auto *E = dyn_cast<Expr>(S)) { 7300 if (isTrivial(Ctx, E)) 7301 continue; 7302 } 7303 // Some of the statements can be ignored. 7304 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 7305 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 7306 continue; 7307 // Analyze declarations. 7308 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 7309 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 7310 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 7311 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 7312 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 7313 isa<UsingDirectiveDecl>(D) || 7314 isa<OMPDeclareReductionDecl>(D) || 7315 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 7316 return true; 7317 const auto *VD = dyn_cast<VarDecl>(D); 7318 if (!VD) 7319 return false; 7320 return VD->isConstexpr() || 7321 ((VD->getType().isTrivialType(Ctx) || 7322 VD->getType()->isReferenceType()) && 7323 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 7324 })) 7325 continue; 7326 } 7327 // Found multiple children - cannot get the one child only. 7328 if (Child) 7329 return nullptr; 7330 Child = S; 7331 } 7332 if (Child) 7333 Child = Child->IgnoreContainers(); 7334 } 7335 return Child; 7336 } 7337 7338 /// Emit the number of teams for a target directive. Inspect the num_teams 7339 /// clause associated with a teams construct combined or closely nested 7340 /// with the target directive. 7341 /// 7342 /// Emit a team of size one for directives such as 'target parallel' that 7343 /// have no associated teams construct. 7344 /// 7345 /// Otherwise, return nullptr. 7346 static llvm::Value * 7347 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 7348 const OMPExecutableDirective &D) { 7349 assert(!CGF.getLangOpts().OpenMPIsDevice && 7350 "Clauses associated with the teams directive expected to be emitted " 7351 "only for the host!"); 7352 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7353 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7354 "Expected target-based executable directive."); 7355 CGBuilderTy &Bld = CGF.Builder; 7356 switch (DirectiveKind) { 7357 case OMPD_target: { 7358 const auto *CS = D.getInnermostCapturedStmt(); 7359 const auto *Body = 7360 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 7361 const Stmt *ChildStmt = 7362 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 7363 if (const auto *NestedDir = 7364 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 7365 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 7366 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 7367 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7368 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7369 const Expr *NumTeams = 7370 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 7371 llvm::Value *NumTeamsVal = 7372 CGF.EmitScalarExpr(NumTeams, 7373 /*IgnoreResultAssign*/ true); 7374 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 7375 /*isSigned=*/true); 7376 } 7377 return Bld.getInt32(0); 7378 } 7379 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 7380 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 7381 return Bld.getInt32(1); 7382 return Bld.getInt32(0); 7383 } 7384 return nullptr; 7385 } 7386 case OMPD_target_teams: 7387 case OMPD_target_teams_distribute: 7388 case OMPD_target_teams_distribute_simd: 7389 case OMPD_target_teams_distribute_parallel_for: 7390 case OMPD_target_teams_distribute_parallel_for_simd: { 7391 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 7392 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 7393 const Expr *NumTeams = 7394 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 7395 llvm::Value *NumTeamsVal = 7396 CGF.EmitScalarExpr(NumTeams, 7397 /*IgnoreResultAssign*/ true); 7398 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 7399 /*isSigned=*/true); 7400 } 7401 return Bld.getInt32(0); 7402 } 7403 case OMPD_target_parallel: 7404 case OMPD_target_parallel_for: 7405 case OMPD_target_parallel_for_simd: 7406 case OMPD_target_simd: 7407 return Bld.getInt32(1); 7408 case OMPD_parallel: 7409 case OMPD_for: 7410 case OMPD_parallel_for: 7411 case OMPD_parallel_master: 7412 case OMPD_parallel_sections: 7413 case OMPD_for_simd: 7414 case OMPD_parallel_for_simd: 7415 case OMPD_cancel: 7416 case OMPD_cancellation_point: 7417 case OMPD_ordered: 7418 case OMPD_threadprivate: 7419 case OMPD_allocate: 7420 case OMPD_task: 7421 case OMPD_simd: 7422 case OMPD_sections: 7423 case OMPD_section: 7424 case OMPD_single: 7425 case OMPD_master: 7426 case OMPD_critical: 7427 case OMPD_taskyield: 7428 case OMPD_barrier: 7429 case OMPD_taskwait: 7430 case OMPD_taskgroup: 7431 case OMPD_atomic: 7432 case OMPD_flush: 7433 case OMPD_depobj: 7434 case OMPD_scan: 7435 case OMPD_teams: 7436 case OMPD_target_data: 7437 case OMPD_target_exit_data: 7438 case OMPD_target_enter_data: 7439 case OMPD_distribute: 7440 case OMPD_distribute_simd: 7441 case OMPD_distribute_parallel_for: 7442 case OMPD_distribute_parallel_for_simd: 7443 case OMPD_teams_distribute: 7444 case OMPD_teams_distribute_simd: 7445 case OMPD_teams_distribute_parallel_for: 7446 case OMPD_teams_distribute_parallel_for_simd: 7447 case OMPD_target_update: 7448 case OMPD_declare_simd: 7449 case OMPD_declare_variant: 7450 case OMPD_begin_declare_variant: 7451 case OMPD_end_declare_variant: 7452 case OMPD_declare_target: 7453 case OMPD_end_declare_target: 7454 case OMPD_declare_reduction: 7455 case OMPD_declare_mapper: 7456 case OMPD_taskloop: 7457 case OMPD_taskloop_simd: 7458 case OMPD_master_taskloop: 7459 case OMPD_master_taskloop_simd: 7460 case OMPD_parallel_master_taskloop: 7461 case OMPD_parallel_master_taskloop_simd: 7462 case OMPD_requires: 7463 case OMPD_unknown: 7464 break; 7465 } 7466 llvm_unreachable("Unexpected directive kind."); 7467 } 7468 7469 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 7470 llvm::Value *DefaultThreadLimitVal) { 7471 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7472 CGF.getContext(), CS->getCapturedStmt()); 7473 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7474 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 7475 llvm::Value *NumThreads = nullptr; 7476 llvm::Value *CondVal = nullptr; 7477 // Handle if clause. If if clause present, the number of threads is 7478 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7479 if (Dir->hasClausesOfKind<OMPIfClause>()) { 7480 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7481 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7482 const OMPIfClause *IfClause = nullptr; 7483 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 7484 if (C->getNameModifier() == OMPD_unknown || 7485 C->getNameModifier() == OMPD_parallel) { 7486 IfClause = C; 7487 break; 7488 } 7489 } 7490 if (IfClause) { 7491 const Expr *Cond = IfClause->getCondition(); 7492 bool Result; 7493 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7494 if (!Result) 7495 return CGF.Builder.getInt32(1); 7496 } else { 7497 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 7498 if (const auto *PreInit = 7499 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 7500 for (const auto *I : PreInit->decls()) { 7501 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7502 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7503 } else { 7504 CodeGenFunction::AutoVarEmission Emission = 7505 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7506 CGF.EmitAutoVarCleanups(Emission); 7507 } 7508 } 7509 } 7510 CondVal = CGF.EvaluateExprAsBool(Cond); 7511 } 7512 } 7513 } 7514 // Check the value of num_threads clause iff if clause was not specified 7515 // or is not evaluated to false. 7516 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 7517 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7518 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7519 const auto *NumThreadsClause = 7520 Dir->getSingleClause<OMPNumThreadsClause>(); 7521 CodeGenFunction::LexicalScope Scope( 7522 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 7523 if (const auto *PreInit = 7524 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 7525 for (const auto *I : PreInit->decls()) { 7526 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7527 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7528 } else { 7529 CodeGenFunction::AutoVarEmission Emission = 7530 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7531 CGF.EmitAutoVarCleanups(Emission); 7532 } 7533 } 7534 } 7535 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 7536 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 7537 /*isSigned=*/false); 7538 if (DefaultThreadLimitVal) 7539 NumThreads = CGF.Builder.CreateSelect( 7540 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 7541 DefaultThreadLimitVal, NumThreads); 7542 } else { 7543 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 7544 : CGF.Builder.getInt32(0); 7545 } 7546 // Process condition of the if clause. 7547 if (CondVal) { 7548 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 7549 CGF.Builder.getInt32(1)); 7550 } 7551 return NumThreads; 7552 } 7553 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 7554 return CGF.Builder.getInt32(1); 7555 return DefaultThreadLimitVal; 7556 } 7557 return DefaultThreadLimitVal ? DefaultThreadLimitVal 7558 : CGF.Builder.getInt32(0); 7559 } 7560 7561 /// Emit the number of threads for a target directive. Inspect the 7562 /// thread_limit clause associated with a teams construct combined or closely 7563 /// nested with the target directive. 7564 /// 7565 /// Emit the num_threads clause for directives such as 'target parallel' that 7566 /// have no associated teams construct. 7567 /// 7568 /// Otherwise, return nullptr. 7569 static llvm::Value * 7570 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 7571 const OMPExecutableDirective &D) { 7572 assert(!CGF.getLangOpts().OpenMPIsDevice && 7573 "Clauses associated with the teams directive expected to be emitted " 7574 "only for the host!"); 7575 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7576 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7577 "Expected target-based executable directive."); 7578 CGBuilderTy &Bld = CGF.Builder; 7579 llvm::Value *ThreadLimitVal = nullptr; 7580 llvm::Value *NumThreadsVal = nullptr; 7581 switch (DirectiveKind) { 7582 case OMPD_target: { 7583 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7584 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7585 return NumThreads; 7586 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7587 CGF.getContext(), CS->getCapturedStmt()); 7588 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7589 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 7590 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7591 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7592 const auto *ThreadLimitClause = 7593 Dir->getSingleClause<OMPThreadLimitClause>(); 7594 CodeGenFunction::LexicalScope Scope( 7595 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 7596 if (const auto *PreInit = 7597 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 7598 for (const auto *I : PreInit->decls()) { 7599 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7600 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7601 } else { 7602 CodeGenFunction::AutoVarEmission Emission = 7603 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7604 CGF.EmitAutoVarCleanups(Emission); 7605 } 7606 } 7607 } 7608 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7609 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7610 ThreadLimitVal = 7611 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7612 } 7613 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 7614 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 7615 CS = Dir->getInnermostCapturedStmt(); 7616 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7617 CGF.getContext(), CS->getCapturedStmt()); 7618 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 7619 } 7620 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 7621 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 7622 CS = Dir->getInnermostCapturedStmt(); 7623 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7624 return NumThreads; 7625 } 7626 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 7627 return Bld.getInt32(1); 7628 } 7629 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7630 } 7631 case OMPD_target_teams: { 7632 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7633 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7634 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7635 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7636 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7637 ThreadLimitVal = 7638 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7639 } 7640 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7641 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7642 return NumThreads; 7643 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7644 CGF.getContext(), CS->getCapturedStmt()); 7645 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7646 if (Dir->getDirectiveKind() == OMPD_distribute) { 7647 CS = Dir->getInnermostCapturedStmt(); 7648 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7649 return NumThreads; 7650 } 7651 } 7652 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7653 } 7654 case OMPD_target_teams_distribute: 7655 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7656 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7657 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7658 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7659 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7660 ThreadLimitVal = 7661 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7662 } 7663 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7664 case OMPD_target_parallel: 7665 case OMPD_target_parallel_for: 7666 case OMPD_target_parallel_for_simd: 7667 case OMPD_target_teams_distribute_parallel_for: 7668 case OMPD_target_teams_distribute_parallel_for_simd: { 7669 llvm::Value *CondVal = nullptr; 7670 // Handle if clause. If if clause present, the number of threads is 7671 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7672 if (D.hasClausesOfKind<OMPIfClause>()) { 7673 const OMPIfClause *IfClause = nullptr; 7674 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7675 if (C->getNameModifier() == OMPD_unknown || 7676 C->getNameModifier() == OMPD_parallel) { 7677 IfClause = C; 7678 break; 7679 } 7680 } 7681 if (IfClause) { 7682 const Expr *Cond = IfClause->getCondition(); 7683 bool Result; 7684 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7685 if (!Result) 7686 return Bld.getInt32(1); 7687 } else { 7688 CodeGenFunction::RunCleanupsScope Scope(CGF); 7689 CondVal = CGF.EvaluateExprAsBool(Cond); 7690 } 7691 } 7692 } 7693 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7694 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7695 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7696 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7697 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7698 ThreadLimitVal = 7699 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7700 } 7701 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7702 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7703 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7704 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7705 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7706 NumThreadsVal = 7707 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7708 ThreadLimitVal = ThreadLimitVal 7709 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7710 ThreadLimitVal), 7711 NumThreadsVal, ThreadLimitVal) 7712 : NumThreadsVal; 7713 } 7714 if (!ThreadLimitVal) 7715 ThreadLimitVal = Bld.getInt32(0); 7716 if (CondVal) 7717 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7718 return ThreadLimitVal; 7719 } 7720 case OMPD_target_teams_distribute_simd: 7721 case OMPD_target_simd: 7722 return Bld.getInt32(1); 7723 case OMPD_parallel: 7724 case OMPD_for: 7725 case OMPD_parallel_for: 7726 case OMPD_parallel_master: 7727 case OMPD_parallel_sections: 7728 case OMPD_for_simd: 7729 case OMPD_parallel_for_simd: 7730 case OMPD_cancel: 7731 case OMPD_cancellation_point: 7732 case OMPD_ordered: 7733 case OMPD_threadprivate: 7734 case OMPD_allocate: 7735 case OMPD_task: 7736 case OMPD_simd: 7737 case OMPD_sections: 7738 case OMPD_section: 7739 case OMPD_single: 7740 case OMPD_master: 7741 case OMPD_critical: 7742 case OMPD_taskyield: 7743 case OMPD_barrier: 7744 case OMPD_taskwait: 7745 case OMPD_taskgroup: 7746 case OMPD_atomic: 7747 case OMPD_flush: 7748 case OMPD_depobj: 7749 case OMPD_scan: 7750 case OMPD_teams: 7751 case OMPD_target_data: 7752 case OMPD_target_exit_data: 7753 case OMPD_target_enter_data: 7754 case OMPD_distribute: 7755 case OMPD_distribute_simd: 7756 case OMPD_distribute_parallel_for: 7757 case OMPD_distribute_parallel_for_simd: 7758 case OMPD_teams_distribute: 7759 case OMPD_teams_distribute_simd: 7760 case OMPD_teams_distribute_parallel_for: 7761 case OMPD_teams_distribute_parallel_for_simd: 7762 case OMPD_target_update: 7763 case OMPD_declare_simd: 7764 case OMPD_declare_variant: 7765 case OMPD_begin_declare_variant: 7766 case OMPD_end_declare_variant: 7767 case OMPD_declare_target: 7768 case OMPD_end_declare_target: 7769 case OMPD_declare_reduction: 7770 case OMPD_declare_mapper: 7771 case OMPD_taskloop: 7772 case OMPD_taskloop_simd: 7773 case OMPD_master_taskloop: 7774 case OMPD_master_taskloop_simd: 7775 case OMPD_parallel_master_taskloop: 7776 case OMPD_parallel_master_taskloop_simd: 7777 case OMPD_requires: 7778 case OMPD_unknown: 7779 break; 7780 } 7781 llvm_unreachable("Unsupported directive kind."); 7782 } 7783 7784 namespace { 7785 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7786 7787 // Utility to handle information from clauses associated with a given 7788 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7789 // It provides a convenient interface to obtain the information and generate 7790 // code for that information. 7791 class MappableExprsHandler { 7792 public: 7793 /// Values for bit flags used to specify the mapping type for 7794 /// offloading. 7795 enum OpenMPOffloadMappingFlags : uint64_t { 7796 /// No flags 7797 OMP_MAP_NONE = 0x0, 7798 /// Allocate memory on the device and move data from host to device. 7799 OMP_MAP_TO = 0x01, 7800 /// Allocate memory on the device and move data from device to host. 7801 OMP_MAP_FROM = 0x02, 7802 /// Always perform the requested mapping action on the element, even 7803 /// if it was already mapped before. 7804 OMP_MAP_ALWAYS = 0x04, 7805 /// Delete the element from the device environment, ignoring the 7806 /// current reference count associated with the element. 7807 OMP_MAP_DELETE = 0x08, 7808 /// The element being mapped is a pointer-pointee pair; both the 7809 /// pointer and the pointee should be mapped. 7810 OMP_MAP_PTR_AND_OBJ = 0x10, 7811 /// This flags signals that the base address of an entry should be 7812 /// passed to the target kernel as an argument. 7813 OMP_MAP_TARGET_PARAM = 0x20, 7814 /// Signal that the runtime library has to return the device pointer 7815 /// in the current position for the data being mapped. Used when we have the 7816 /// use_device_ptr clause. 7817 OMP_MAP_RETURN_PARAM = 0x40, 7818 /// This flag signals that the reference being passed is a pointer to 7819 /// private data. 7820 OMP_MAP_PRIVATE = 0x80, 7821 /// Pass the element to the device by value. 7822 OMP_MAP_LITERAL = 0x100, 7823 /// Implicit map 7824 OMP_MAP_IMPLICIT = 0x200, 7825 /// Close is a hint to the runtime to allocate memory close to 7826 /// the target device. 7827 OMP_MAP_CLOSE = 0x400, 7828 /// The 16 MSBs of the flags indicate whether the entry is member of some 7829 /// struct/class. 7830 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7831 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7832 }; 7833 7834 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7835 static unsigned getFlagMemberOffset() { 7836 unsigned Offset = 0; 7837 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7838 Remain = Remain >> 1) 7839 Offset++; 7840 return Offset; 7841 } 7842 7843 /// Class that associates information with a base pointer to be passed to the 7844 /// runtime library. 7845 class BasePointerInfo { 7846 /// The base pointer. 7847 llvm::Value *Ptr = nullptr; 7848 /// The base declaration that refers to this device pointer, or null if 7849 /// there is none. 7850 const ValueDecl *DevPtrDecl = nullptr; 7851 7852 public: 7853 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7854 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7855 llvm::Value *operator*() const { return Ptr; } 7856 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7857 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7858 }; 7859 7860 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7861 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7862 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7863 7864 /// Map between a struct and the its lowest & highest elements which have been 7865 /// mapped. 7866 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7867 /// HE(FieldIndex, Pointer)} 7868 struct StructRangeInfoTy { 7869 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7870 0, Address::invalid()}; 7871 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7872 0, Address::invalid()}; 7873 Address Base = Address::invalid(); 7874 }; 7875 7876 private: 7877 /// Kind that defines how a device pointer has to be returned. 7878 struct MapInfo { 7879 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7880 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7881 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7882 bool ReturnDevicePointer = false; 7883 bool IsImplicit = false; 7884 7885 MapInfo() = default; 7886 MapInfo( 7887 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7888 OpenMPMapClauseKind MapType, 7889 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7890 bool ReturnDevicePointer, bool IsImplicit) 7891 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7892 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 7893 }; 7894 7895 /// If use_device_ptr is used on a pointer which is a struct member and there 7896 /// is no map information about it, then emission of that entry is deferred 7897 /// until the whole struct has been processed. 7898 struct DeferredDevicePtrEntryTy { 7899 const Expr *IE = nullptr; 7900 const ValueDecl *VD = nullptr; 7901 7902 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 7903 : IE(IE), VD(VD) {} 7904 }; 7905 7906 /// The target directive from where the mappable clauses were extracted. It 7907 /// is either a executable directive or a user-defined mapper directive. 7908 llvm::PointerUnion<const OMPExecutableDirective *, 7909 const OMPDeclareMapperDecl *> 7910 CurDir; 7911 7912 /// Function the directive is being generated for. 7913 CodeGenFunction &CGF; 7914 7915 /// Set of all first private variables in the current directive. 7916 /// bool data is set to true if the variable is implicitly marked as 7917 /// firstprivate, false otherwise. 7918 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7919 7920 /// Map between device pointer declarations and their expression components. 7921 /// The key value for declarations in 'this' is null. 7922 llvm::DenseMap< 7923 const ValueDecl *, 7924 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7925 DevPointersMap; 7926 7927 llvm::Value *getExprTypeSize(const Expr *E) const { 7928 QualType ExprTy = E->getType().getCanonicalType(); 7929 7930 // Calculate the size for array shaping expression. 7931 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7932 llvm::Value *Size = 7933 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7934 for (const Expr *SE : OAE->getDimensions()) { 7935 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7936 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7937 CGF.getContext().getSizeType(), 7938 SE->getExprLoc()); 7939 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7940 } 7941 return Size; 7942 } 7943 7944 // Reference types are ignored for mapping purposes. 7945 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7946 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7947 7948 // Given that an array section is considered a built-in type, we need to 7949 // do the calculation based on the length of the section instead of relying 7950 // on CGF.getTypeSize(E->getType()). 7951 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7952 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7953 OAE->getBase()->IgnoreParenImpCasts()) 7954 .getCanonicalType(); 7955 7956 // If there is no length associated with the expression and lower bound is 7957 // not specified too, that means we are using the whole length of the 7958 // base. 7959 if (!OAE->getLength() && OAE->getColonLoc().isValid() && 7960 !OAE->getLowerBound()) 7961 return CGF.getTypeSize(BaseTy); 7962 7963 llvm::Value *ElemSize; 7964 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7965 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7966 } else { 7967 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7968 assert(ATy && "Expecting array type if not a pointer type."); 7969 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7970 } 7971 7972 // If we don't have a length at this point, that is because we have an 7973 // array section with a single element. 7974 if (!OAE->getLength() && OAE->getColonLoc().isInvalid()) 7975 return ElemSize; 7976 7977 if (const Expr *LenExpr = OAE->getLength()) { 7978 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7979 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7980 CGF.getContext().getSizeType(), 7981 LenExpr->getExprLoc()); 7982 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7983 } 7984 assert(!OAE->getLength() && OAE->getColonLoc().isValid() && 7985 OAE->getLowerBound() && "expected array_section[lb:]."); 7986 // Size = sizetype - lb * elemtype; 7987 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7988 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7989 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7990 CGF.getContext().getSizeType(), 7991 OAE->getLowerBound()->getExprLoc()); 7992 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7993 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7994 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7995 LengthVal = CGF.Builder.CreateSelect( 7996 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7997 return LengthVal; 7998 } 7999 return CGF.getTypeSize(ExprTy); 8000 } 8001 8002 /// Return the corresponding bits for a given map clause modifier. Add 8003 /// a flag marking the map as a pointer if requested. Add a flag marking the 8004 /// map as the first one of a series of maps that relate to the same map 8005 /// expression. 8006 OpenMPOffloadMappingFlags getMapTypeBits( 8007 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 8008 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 8009 OpenMPOffloadMappingFlags Bits = 8010 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 8011 switch (MapType) { 8012 case OMPC_MAP_alloc: 8013 case OMPC_MAP_release: 8014 // alloc and release is the default behavior in the runtime library, i.e. 8015 // if we don't pass any bits alloc/release that is what the runtime is 8016 // going to do. Therefore, we don't need to signal anything for these two 8017 // type modifiers. 8018 break; 8019 case OMPC_MAP_to: 8020 Bits |= OMP_MAP_TO; 8021 break; 8022 case OMPC_MAP_from: 8023 Bits |= OMP_MAP_FROM; 8024 break; 8025 case OMPC_MAP_tofrom: 8026 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 8027 break; 8028 case OMPC_MAP_delete: 8029 Bits |= OMP_MAP_DELETE; 8030 break; 8031 case OMPC_MAP_unknown: 8032 llvm_unreachable("Unexpected map type!"); 8033 } 8034 if (AddPtrFlag) 8035 Bits |= OMP_MAP_PTR_AND_OBJ; 8036 if (AddIsTargetParamFlag) 8037 Bits |= OMP_MAP_TARGET_PARAM; 8038 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 8039 != MapModifiers.end()) 8040 Bits |= OMP_MAP_ALWAYS; 8041 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 8042 != MapModifiers.end()) 8043 Bits |= OMP_MAP_CLOSE; 8044 return Bits; 8045 } 8046 8047 /// Return true if the provided expression is a final array section. A 8048 /// final array section, is one whose length can't be proved to be one. 8049 bool isFinalArraySectionExpression(const Expr *E) const { 8050 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 8051 8052 // It is not an array section and therefore not a unity-size one. 8053 if (!OASE) 8054 return false; 8055 8056 // An array section with no colon always refer to a single element. 8057 if (OASE->getColonLoc().isInvalid()) 8058 return false; 8059 8060 const Expr *Length = OASE->getLength(); 8061 8062 // If we don't have a length we have to check if the array has size 1 8063 // for this dimension. Also, we should always expect a length if the 8064 // base type is pointer. 8065 if (!Length) { 8066 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 8067 OASE->getBase()->IgnoreParenImpCasts()) 8068 .getCanonicalType(); 8069 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 8070 return ATy->getSize().getSExtValue() != 1; 8071 // If we don't have a constant dimension length, we have to consider 8072 // the current section as having any size, so it is not necessarily 8073 // unitary. If it happen to be unity size, that's user fault. 8074 return true; 8075 } 8076 8077 // Check if the length evaluates to 1. 8078 Expr::EvalResult Result; 8079 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 8080 return true; // Can have more that size 1. 8081 8082 llvm::APSInt ConstLength = Result.Val.getInt(); 8083 return ConstLength.getSExtValue() != 1; 8084 } 8085 8086 /// Generate the base pointers, section pointers, sizes and map type 8087 /// bits for the provided map type, map modifier, and expression components. 8088 /// \a IsFirstComponent should be set to true if the provided set of 8089 /// components is the first associated with a capture. 8090 void generateInfoForComponentList( 8091 OpenMPMapClauseKind MapType, 8092 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8093 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 8094 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8095 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8096 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 8097 bool IsImplicit, 8098 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8099 OverlappedElements = llvm::None) const { 8100 // The following summarizes what has to be generated for each map and the 8101 // types below. The generated information is expressed in this order: 8102 // base pointer, section pointer, size, flags 8103 // (to add to the ones that come from the map type and modifier). 8104 // 8105 // double d; 8106 // int i[100]; 8107 // float *p; 8108 // 8109 // struct S1 { 8110 // int i; 8111 // float f[50]; 8112 // } 8113 // struct S2 { 8114 // int i; 8115 // float f[50]; 8116 // S1 s; 8117 // double *p; 8118 // struct S2 *ps; 8119 // } 8120 // S2 s; 8121 // S2 *ps; 8122 // 8123 // map(d) 8124 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 8125 // 8126 // map(i) 8127 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 8128 // 8129 // map(i[1:23]) 8130 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 8131 // 8132 // map(p) 8133 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 8134 // 8135 // map(p[1:24]) 8136 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 8137 // 8138 // map(s) 8139 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 8140 // 8141 // map(s.i) 8142 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 8143 // 8144 // map(s.s.f) 8145 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 8146 // 8147 // map(s.p) 8148 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 8149 // 8150 // map(to: s.p[:22]) 8151 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 8152 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 8153 // &(s.p), &(s.p[0]), 22*sizeof(double), 8154 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 8155 // (*) alloc space for struct members, only this is a target parameter 8156 // (**) map the pointer (nothing to be mapped in this example) (the compiler 8157 // optimizes this entry out, same in the examples below) 8158 // (***) map the pointee (map: to) 8159 // 8160 // map(s.ps) 8161 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 8162 // 8163 // map(from: s.ps->s.i) 8164 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 8165 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 8166 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 8167 // 8168 // map(to: s.ps->ps) 8169 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 8170 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 8171 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 8172 // 8173 // map(s.ps->ps->ps) 8174 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 8175 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 8176 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 8177 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 8178 // 8179 // map(to: s.ps->ps->s.f[:22]) 8180 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 8181 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 8182 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 8183 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 8184 // 8185 // map(ps) 8186 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 8187 // 8188 // map(ps->i) 8189 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 8190 // 8191 // map(ps->s.f) 8192 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 8193 // 8194 // map(from: ps->p) 8195 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 8196 // 8197 // map(to: ps->p[:22]) 8198 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 8199 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 8200 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 8201 // 8202 // map(ps->ps) 8203 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 8204 // 8205 // map(from: ps->ps->s.i) 8206 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 8207 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 8208 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 8209 // 8210 // map(from: ps->ps->ps) 8211 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 8212 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 8213 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 8214 // 8215 // map(ps->ps->ps->ps) 8216 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 8217 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 8218 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 8219 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 8220 // 8221 // map(to: ps->ps->ps->s.f[:22]) 8222 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 8223 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 8224 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 8225 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 8226 // 8227 // map(to: s.f[:22]) map(from: s.p[:33]) 8228 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 8229 // sizeof(double*) (**), TARGET_PARAM 8230 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 8231 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 8232 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 8233 // (*) allocate contiguous space needed to fit all mapped members even if 8234 // we allocate space for members not mapped (in this example, 8235 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 8236 // them as well because they fall between &s.f[0] and &s.p) 8237 // 8238 // map(from: s.f[:22]) map(to: ps->p[:33]) 8239 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 8240 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 8241 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 8242 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 8243 // (*) the struct this entry pertains to is the 2nd element in the list of 8244 // arguments, hence MEMBER_OF(2) 8245 // 8246 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 8247 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 8248 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 8249 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 8250 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 8251 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 8252 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 8253 // (*) the struct this entry pertains to is the 4th element in the list 8254 // of arguments, hence MEMBER_OF(4) 8255 8256 // Track if the map information being generated is the first for a capture. 8257 bool IsCaptureFirstInfo = IsFirstComponentList; 8258 // When the variable is on a declare target link or in a to clause with 8259 // unified memory, a reference is needed to hold the host/device address 8260 // of the variable. 8261 bool RequiresReference = false; 8262 8263 // Scan the components from the base to the complete expression. 8264 auto CI = Components.rbegin(); 8265 auto CE = Components.rend(); 8266 auto I = CI; 8267 8268 // Track if the map information being generated is the first for a list of 8269 // components. 8270 bool IsExpressionFirstInfo = true; 8271 Address BP = Address::invalid(); 8272 const Expr *AssocExpr = I->getAssociatedExpression(); 8273 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 8274 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8275 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 8276 8277 if (isa<MemberExpr>(AssocExpr)) { 8278 // The base is the 'this' pointer. The content of the pointer is going 8279 // to be the base of the field being mapped. 8280 BP = CGF.LoadCXXThisAddress(); 8281 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 8282 (OASE && 8283 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 8284 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 8285 } else if (OAShE && 8286 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 8287 BP = Address( 8288 CGF.EmitScalarExpr(OAShE->getBase()), 8289 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 8290 } else { 8291 // The base is the reference to the variable. 8292 // BP = &Var. 8293 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 8294 if (const auto *VD = 8295 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 8296 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8297 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 8298 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 8299 (*Res == OMPDeclareTargetDeclAttr::MT_To && 8300 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 8301 RequiresReference = true; 8302 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 8303 } 8304 } 8305 } 8306 8307 // If the variable is a pointer and is being dereferenced (i.e. is not 8308 // the last component), the base has to be the pointer itself, not its 8309 // reference. References are ignored for mapping purposes. 8310 QualType Ty = 8311 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 8312 if (Ty->isAnyPointerType() && std::next(I) != CE) { 8313 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8314 8315 // We do not need to generate individual map information for the 8316 // pointer, it can be associated with the combined storage. 8317 ++I; 8318 } 8319 } 8320 8321 // Track whether a component of the list should be marked as MEMBER_OF some 8322 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 8323 // in a component list should be marked as MEMBER_OF, all subsequent entries 8324 // do not belong to the base struct. E.g. 8325 // struct S2 s; 8326 // s.ps->ps->ps->f[:] 8327 // (1) (2) (3) (4) 8328 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 8329 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 8330 // is the pointee of ps(2) which is not member of struct s, so it should not 8331 // be marked as such (it is still PTR_AND_OBJ). 8332 // The variable is initialized to false so that PTR_AND_OBJ entries which 8333 // are not struct members are not considered (e.g. array of pointers to 8334 // data). 8335 bool ShouldBeMemberOf = false; 8336 8337 // Variable keeping track of whether or not we have encountered a component 8338 // in the component list which is a member expression. Useful when we have a 8339 // pointer or a final array section, in which case it is the previous 8340 // component in the list which tells us whether we have a member expression. 8341 // E.g. X.f[:] 8342 // While processing the final array section "[:]" it is "f" which tells us 8343 // whether we are dealing with a member of a declared struct. 8344 const MemberExpr *EncounteredME = nullptr; 8345 8346 for (; I != CE; ++I) { 8347 // If the current component is member of a struct (parent struct) mark it. 8348 if (!EncounteredME) { 8349 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 8350 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 8351 // as MEMBER_OF the parent struct. 8352 if (EncounteredME) 8353 ShouldBeMemberOf = true; 8354 } 8355 8356 auto Next = std::next(I); 8357 8358 // We need to generate the addresses and sizes if this is the last 8359 // component, if the component is a pointer or if it is an array section 8360 // whose length can't be proved to be one. If this is a pointer, it 8361 // becomes the base address for the following components. 8362 8363 // A final array section, is one whose length can't be proved to be one. 8364 bool IsFinalArraySection = 8365 isFinalArraySectionExpression(I->getAssociatedExpression()); 8366 8367 // Get information on whether the element is a pointer. Have to do a 8368 // special treatment for array sections given that they are built-in 8369 // types. 8370 const auto *OASE = 8371 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 8372 const auto *OAShE = 8373 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 8374 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 8375 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 8376 bool IsPointer = 8377 OAShE || 8378 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 8379 .getCanonicalType() 8380 ->isAnyPointerType()) || 8381 I->getAssociatedExpression()->getType()->isAnyPointerType(); 8382 bool IsNonDerefPointer = IsPointer && !UO && !BO; 8383 8384 if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { 8385 // If this is not the last component, we expect the pointer to be 8386 // associated with an array expression or member expression. 8387 assert((Next == CE || 8388 isa<MemberExpr>(Next->getAssociatedExpression()) || 8389 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 8390 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 8391 isa<UnaryOperator>(Next->getAssociatedExpression()) || 8392 isa<BinaryOperator>(Next->getAssociatedExpression())) && 8393 "Unexpected expression"); 8394 8395 Address LB = Address::invalid(); 8396 if (OAShE) { 8397 LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 8398 CGF.getContext().getTypeAlignInChars( 8399 OAShE->getBase()->getType())); 8400 } else { 8401 LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 8402 .getAddress(CGF); 8403 } 8404 8405 // If this component is a pointer inside the base struct then we don't 8406 // need to create any entry for it - it will be combined with the object 8407 // it is pointing to into a single PTR_AND_OBJ entry. 8408 bool IsMemberPointer = 8409 IsPointer && EncounteredME && 8410 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 8411 EncounteredME); 8412 if (!OverlappedElements.empty()) { 8413 // Handle base element with the info for overlapped elements. 8414 assert(!PartialStruct.Base.isValid() && "The base element is set."); 8415 assert(Next == CE && 8416 "Expected last element for the overlapped elements."); 8417 assert(!IsPointer && 8418 "Unexpected base element with the pointer type."); 8419 // Mark the whole struct as the struct that requires allocation on the 8420 // device. 8421 PartialStruct.LowestElem = {0, LB}; 8422 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 8423 I->getAssociatedExpression()->getType()); 8424 Address HB = CGF.Builder.CreateConstGEP( 8425 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 8426 CGF.VoidPtrTy), 8427 TypeSize.getQuantity() - 1); 8428 PartialStruct.HighestElem = { 8429 std::numeric_limits<decltype( 8430 PartialStruct.HighestElem.first)>::max(), 8431 HB}; 8432 PartialStruct.Base = BP; 8433 // Emit data for non-overlapped data. 8434 OpenMPOffloadMappingFlags Flags = 8435 OMP_MAP_MEMBER_OF | 8436 getMapTypeBits(MapType, MapModifiers, IsImplicit, 8437 /*AddPtrFlag=*/false, 8438 /*AddIsTargetParamFlag=*/false); 8439 LB = BP; 8440 llvm::Value *Size = nullptr; 8441 // Do bitcopy of all non-overlapped structure elements. 8442 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 8443 Component : OverlappedElements) { 8444 Address ComponentLB = Address::invalid(); 8445 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 8446 Component) { 8447 if (MC.getAssociatedDeclaration()) { 8448 ComponentLB = 8449 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 8450 .getAddress(CGF); 8451 Size = CGF.Builder.CreatePtrDiff( 8452 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 8453 CGF.EmitCastToVoidPtr(LB.getPointer())); 8454 break; 8455 } 8456 } 8457 BasePointers.push_back(BP.getPointer()); 8458 Pointers.push_back(LB.getPointer()); 8459 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, 8460 /*isSigned=*/true)); 8461 Types.push_back(Flags); 8462 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 8463 } 8464 BasePointers.push_back(BP.getPointer()); 8465 Pointers.push_back(LB.getPointer()); 8466 Size = CGF.Builder.CreatePtrDiff( 8467 CGF.EmitCastToVoidPtr( 8468 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 8469 CGF.EmitCastToVoidPtr(LB.getPointer())); 8470 Sizes.push_back( 8471 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8472 Types.push_back(Flags); 8473 break; 8474 } 8475 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 8476 if (!IsMemberPointer) { 8477 BasePointers.push_back(BP.getPointer()); 8478 Pointers.push_back(LB.getPointer()); 8479 Sizes.push_back( 8480 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8481 8482 // We need to add a pointer flag for each map that comes from the 8483 // same expression except for the first one. We also need to signal 8484 // this map is the first one that relates with the current capture 8485 // (there is a set of entries for each capture). 8486 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 8487 MapType, MapModifiers, IsImplicit, 8488 !IsExpressionFirstInfo || RequiresReference, 8489 IsCaptureFirstInfo && !RequiresReference); 8490 8491 if (!IsExpressionFirstInfo) { 8492 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 8493 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 8494 if (IsPointer) 8495 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 8496 OMP_MAP_DELETE | OMP_MAP_CLOSE); 8497 8498 if (ShouldBeMemberOf) { 8499 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 8500 // should be later updated with the correct value of MEMBER_OF. 8501 Flags |= OMP_MAP_MEMBER_OF; 8502 // From now on, all subsequent PTR_AND_OBJ entries should not be 8503 // marked as MEMBER_OF. 8504 ShouldBeMemberOf = false; 8505 } 8506 } 8507 8508 Types.push_back(Flags); 8509 } 8510 8511 // If we have encountered a member expression so far, keep track of the 8512 // mapped member. If the parent is "*this", then the value declaration 8513 // is nullptr. 8514 if (EncounteredME) { 8515 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 8516 unsigned FieldIndex = FD->getFieldIndex(); 8517 8518 // Update info about the lowest and highest elements for this struct 8519 if (!PartialStruct.Base.isValid()) { 8520 PartialStruct.LowestElem = {FieldIndex, LB}; 8521 PartialStruct.HighestElem = {FieldIndex, LB}; 8522 PartialStruct.Base = BP; 8523 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8524 PartialStruct.LowestElem = {FieldIndex, LB}; 8525 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8526 PartialStruct.HighestElem = {FieldIndex, LB}; 8527 } 8528 } 8529 8530 // If we have a final array section, we are done with this expression. 8531 if (IsFinalArraySection) 8532 break; 8533 8534 // The pointer becomes the base for the next element. 8535 if (Next != CE) 8536 BP = LB; 8537 8538 IsExpressionFirstInfo = false; 8539 IsCaptureFirstInfo = false; 8540 } 8541 } 8542 } 8543 8544 /// Return the adjusted map modifiers if the declaration a capture refers to 8545 /// appears in a first-private clause. This is expected to be used only with 8546 /// directives that start with 'target'. 8547 MappableExprsHandler::OpenMPOffloadMappingFlags 8548 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8549 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8550 8551 // A first private variable captured by reference will use only the 8552 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8553 // declaration is known as first-private in this handler. 8554 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8555 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 8556 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 8557 return MappableExprsHandler::OMP_MAP_ALWAYS | 8558 MappableExprsHandler::OMP_MAP_TO; 8559 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8560 return MappableExprsHandler::OMP_MAP_TO | 8561 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8562 return MappableExprsHandler::OMP_MAP_PRIVATE | 8563 MappableExprsHandler::OMP_MAP_TO; 8564 } 8565 return MappableExprsHandler::OMP_MAP_TO | 8566 MappableExprsHandler::OMP_MAP_FROM; 8567 } 8568 8569 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8570 // Rotate by getFlagMemberOffset() bits. 8571 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8572 << getFlagMemberOffset()); 8573 } 8574 8575 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8576 OpenMPOffloadMappingFlags MemberOfFlag) { 8577 // If the entry is PTR_AND_OBJ but has not been marked with the special 8578 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8579 // marked as MEMBER_OF. 8580 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8581 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8582 return; 8583 8584 // Reset the placeholder value to prepare the flag for the assignment of the 8585 // proper MEMBER_OF value. 8586 Flags &= ~OMP_MAP_MEMBER_OF; 8587 Flags |= MemberOfFlag; 8588 } 8589 8590 void getPlainLayout(const CXXRecordDecl *RD, 8591 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8592 bool AsBase) const { 8593 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8594 8595 llvm::StructType *St = 8596 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8597 8598 unsigned NumElements = St->getNumElements(); 8599 llvm::SmallVector< 8600 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8601 RecordLayout(NumElements); 8602 8603 // Fill bases. 8604 for (const auto &I : RD->bases()) { 8605 if (I.isVirtual()) 8606 continue; 8607 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8608 // Ignore empty bases. 8609 if (Base->isEmpty() || CGF.getContext() 8610 .getASTRecordLayout(Base) 8611 .getNonVirtualSize() 8612 .isZero()) 8613 continue; 8614 8615 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8616 RecordLayout[FieldIndex] = Base; 8617 } 8618 // Fill in virtual bases. 8619 for (const auto &I : RD->vbases()) { 8620 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8621 // Ignore empty bases. 8622 if (Base->isEmpty()) 8623 continue; 8624 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8625 if (RecordLayout[FieldIndex]) 8626 continue; 8627 RecordLayout[FieldIndex] = Base; 8628 } 8629 // Fill in all the fields. 8630 assert(!RD->isUnion() && "Unexpected union."); 8631 for (const auto *Field : RD->fields()) { 8632 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8633 // will fill in later.) 8634 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8635 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8636 RecordLayout[FieldIndex] = Field; 8637 } 8638 } 8639 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8640 &Data : RecordLayout) { 8641 if (Data.isNull()) 8642 continue; 8643 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8644 getPlainLayout(Base, Layout, /*AsBase=*/true); 8645 else 8646 Layout.push_back(Data.get<const FieldDecl *>()); 8647 } 8648 } 8649 8650 public: 8651 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8652 : CurDir(&Dir), CGF(CGF) { 8653 // Extract firstprivate clause information. 8654 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8655 for (const auto *D : C->varlists()) 8656 FirstPrivateDecls.try_emplace( 8657 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8658 // Extract implicit firstprivates from uses_allocators clauses. 8659 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8660 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8661 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8662 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8663 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8664 /*Implicit=*/true); 8665 else if (const auto *VD = dyn_cast<VarDecl>( 8666 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8667 ->getDecl())) 8668 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8669 } 8670 } 8671 // Extract device pointer clause information. 8672 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8673 for (auto L : C->component_lists()) 8674 DevPointersMap[L.first].push_back(L.second); 8675 } 8676 8677 /// Constructor for the declare mapper directive. 8678 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8679 : CurDir(&Dir), CGF(CGF) {} 8680 8681 /// Generate code for the combined entry if we have a partially mapped struct 8682 /// and take care of the mapping flags of the arguments corresponding to 8683 /// individual struct members. 8684 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 8685 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8686 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 8687 const StructRangeInfoTy &PartialStruct) const { 8688 // Base is the base of the struct 8689 BasePointers.push_back(PartialStruct.Base.getPointer()); 8690 // Pointer is the address of the lowest element 8691 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 8692 Pointers.push_back(LB); 8693 // Size is (addr of {highest+1} element) - (addr of lowest element) 8694 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 8695 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 8696 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8697 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8698 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8699 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8700 /*isSigned=*/false); 8701 Sizes.push_back(Size); 8702 // Map type is always TARGET_PARAM 8703 Types.push_back(OMP_MAP_TARGET_PARAM); 8704 // Remove TARGET_PARAM flag from the first element 8705 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8706 8707 // All other current entries will be MEMBER_OF the combined entry 8708 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8709 // 0xFFFF in the MEMBER_OF field). 8710 OpenMPOffloadMappingFlags MemberOfFlag = 8711 getMemberOfFlag(BasePointers.size() - 1); 8712 for (auto &M : CurTypes) 8713 setCorrectMemberOfFlag(M, MemberOfFlag); 8714 } 8715 8716 /// Generate all the base pointers, section pointers, sizes and map 8717 /// types for the extracted mappable expressions. Also, for each item that 8718 /// relates with a device pointer, a pair of the relevant declaration and 8719 /// index where it occurs is appended to the device pointers info array. 8720 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 8721 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8722 MapFlagsArrayTy &Types) const { 8723 // We have to process the component lists that relate with the same 8724 // declaration in a single chunk so that we can generate the map flags 8725 // correctly. Therefore, we organize all lists in a map. 8726 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8727 8728 // Helper function to fill the information map for the different supported 8729 // clauses. 8730 auto &&InfoGen = [&Info]( 8731 const ValueDecl *D, 8732 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8733 OpenMPMapClauseKind MapType, 8734 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8735 bool ReturnDevicePointer, bool IsImplicit) { 8736 const ValueDecl *VD = 8737 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8738 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8739 IsImplicit); 8740 }; 8741 8742 assert(CurDir.is<const OMPExecutableDirective *>() && 8743 "Expect a executable directive"); 8744 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8745 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 8746 for (const auto L : C->component_lists()) { 8747 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 8748 /*ReturnDevicePointer=*/false, C->isImplicit()); 8749 } 8750 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 8751 for (const auto L : C->component_lists()) { 8752 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 8753 /*ReturnDevicePointer=*/false, C->isImplicit()); 8754 } 8755 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 8756 for (const auto L : C->component_lists()) { 8757 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 8758 /*ReturnDevicePointer=*/false, C->isImplicit()); 8759 } 8760 8761 // Look at the use_device_ptr clause information and mark the existing map 8762 // entries as such. If there is no map information for an entry in the 8763 // use_device_ptr list, we create one with map type 'alloc' and zero size 8764 // section. It is the user fault if that was not mapped before. If there is 8765 // no map information and the pointer is a struct member, then we defer the 8766 // emission of that entry until the whole struct has been processed. 8767 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 8768 DeferredInfo; 8769 8770 for (const auto *C : 8771 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 8772 for (const auto L : C->component_lists()) { 8773 assert(!L.second.empty() && "Not expecting empty list of components!"); 8774 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 8775 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8776 const Expr *IE = L.second.back().getAssociatedExpression(); 8777 // If the first component is a member expression, we have to look into 8778 // 'this', which maps to null in the map of map information. Otherwise 8779 // look directly for the information. 8780 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8781 8782 // We potentially have map information for this declaration already. 8783 // Look for the first set of components that refer to it. 8784 if (It != Info.end()) { 8785 auto CI = std::find_if( 8786 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 8787 return MI.Components.back().getAssociatedDeclaration() == VD; 8788 }); 8789 // If we found a map entry, signal that the pointer has to be returned 8790 // and move on to the next declaration. 8791 if (CI != It->second.end()) { 8792 CI->ReturnDevicePointer = true; 8793 continue; 8794 } 8795 } 8796 8797 // We didn't find any match in our map information - generate a zero 8798 // size array section - if the pointer is a struct member we defer this 8799 // action until the whole struct has been processed. 8800 if (isa<MemberExpr>(IE)) { 8801 // Insert the pointer into Info to be processed by 8802 // generateInfoForComponentList. Because it is a member pointer 8803 // without a pointee, no entry will be generated for it, therefore 8804 // we need to generate one after the whole struct has been processed. 8805 // Nonetheless, generateInfoForComponentList must be called to take 8806 // the pointer into account for the calculation of the range of the 8807 // partial struct. 8808 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 8809 /*ReturnDevicePointer=*/false, C->isImplicit()); 8810 DeferredInfo[nullptr].emplace_back(IE, VD); 8811 } else { 8812 llvm::Value *Ptr = 8813 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8814 BasePointers.emplace_back(Ptr, VD); 8815 Pointers.push_back(Ptr); 8816 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8817 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8818 } 8819 } 8820 } 8821 8822 for (const auto &M : Info) { 8823 // We need to know when we generate information for the first component 8824 // associated with a capture, because the mapping flags depend on it. 8825 bool IsFirstComponentList = true; 8826 8827 // Temporary versions of arrays 8828 MapBaseValuesArrayTy CurBasePointers; 8829 MapValuesArrayTy CurPointers; 8830 MapValuesArrayTy CurSizes; 8831 MapFlagsArrayTy CurTypes; 8832 StructRangeInfoTy PartialStruct; 8833 8834 for (const MapInfo &L : M.second) { 8835 assert(!L.Components.empty() && 8836 "Not expecting declaration with no component lists."); 8837 8838 // Remember the current base pointer index. 8839 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 8840 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8841 CurBasePointers, CurPointers, CurSizes, 8842 CurTypes, PartialStruct, 8843 IsFirstComponentList, L.IsImplicit); 8844 8845 // If this entry relates with a device pointer, set the relevant 8846 // declaration and add the 'return pointer' flag. 8847 if (L.ReturnDevicePointer) { 8848 assert(CurBasePointers.size() > CurrentBasePointersIdx && 8849 "Unexpected number of mapped base pointers."); 8850 8851 const ValueDecl *RelevantVD = 8852 L.Components.back().getAssociatedDeclaration(); 8853 assert(RelevantVD && 8854 "No relevant declaration related with device pointer??"); 8855 8856 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 8857 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8858 } 8859 IsFirstComponentList = false; 8860 } 8861 8862 // Append any pending zero-length pointers which are struct members and 8863 // used with use_device_ptr. 8864 auto CI = DeferredInfo.find(M.first); 8865 if (CI != DeferredInfo.end()) { 8866 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8867 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8868 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 8869 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 8870 CurBasePointers.emplace_back(BasePtr, L.VD); 8871 CurPointers.push_back(Ptr); 8872 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8873 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8874 // value MEMBER_OF=FFFF so that the entry is later updated with the 8875 // correct value of MEMBER_OF. 8876 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8877 OMP_MAP_MEMBER_OF); 8878 } 8879 } 8880 8881 // If there is an entry in PartialStruct it means we have a struct with 8882 // individual members mapped. Emit an extra combined entry. 8883 if (PartialStruct.Base.isValid()) 8884 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8885 PartialStruct); 8886 8887 // We need to append the results of this capture to what we already have. 8888 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8889 Pointers.append(CurPointers.begin(), CurPointers.end()); 8890 Sizes.append(CurSizes.begin(), CurSizes.end()); 8891 Types.append(CurTypes.begin(), CurTypes.end()); 8892 } 8893 } 8894 8895 /// Generate all the base pointers, section pointers, sizes and map types for 8896 /// the extracted map clauses of user-defined mapper. 8897 void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, 8898 MapValuesArrayTy &Pointers, 8899 MapValuesArrayTy &Sizes, 8900 MapFlagsArrayTy &Types) const { 8901 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8902 "Expect a declare mapper directive"); 8903 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8904 // We have to process the component lists that relate with the same 8905 // declaration in a single chunk so that we can generate the map flags 8906 // correctly. Therefore, we organize all lists in a map. 8907 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8908 8909 // Helper function to fill the information map for the different supported 8910 // clauses. 8911 auto &&InfoGen = [&Info]( 8912 const ValueDecl *D, 8913 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8914 OpenMPMapClauseKind MapType, 8915 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8916 bool ReturnDevicePointer, bool IsImplicit) { 8917 const ValueDecl *VD = 8918 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8919 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8920 IsImplicit); 8921 }; 8922 8923 for (const auto *C : CurMapperDir->clauselists()) { 8924 const auto *MC = cast<OMPMapClause>(C); 8925 for (const auto L : MC->component_lists()) { 8926 InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), 8927 /*ReturnDevicePointer=*/false, MC->isImplicit()); 8928 } 8929 } 8930 8931 for (const auto &M : Info) { 8932 // We need to know when we generate information for the first component 8933 // associated with a capture, because the mapping flags depend on it. 8934 bool IsFirstComponentList = true; 8935 8936 // Temporary versions of arrays 8937 MapBaseValuesArrayTy CurBasePointers; 8938 MapValuesArrayTy CurPointers; 8939 MapValuesArrayTy CurSizes; 8940 MapFlagsArrayTy CurTypes; 8941 StructRangeInfoTy PartialStruct; 8942 8943 for (const MapInfo &L : M.second) { 8944 assert(!L.Components.empty() && 8945 "Not expecting declaration with no component lists."); 8946 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8947 CurBasePointers, CurPointers, CurSizes, 8948 CurTypes, PartialStruct, 8949 IsFirstComponentList, L.IsImplicit); 8950 IsFirstComponentList = false; 8951 } 8952 8953 // If there is an entry in PartialStruct it means we have a struct with 8954 // individual members mapped. Emit an extra combined entry. 8955 if (PartialStruct.Base.isValid()) 8956 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8957 PartialStruct); 8958 8959 // We need to append the results of this capture to what we already have. 8960 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8961 Pointers.append(CurPointers.begin(), CurPointers.end()); 8962 Sizes.append(CurSizes.begin(), CurSizes.end()); 8963 Types.append(CurTypes.begin(), CurTypes.end()); 8964 } 8965 } 8966 8967 /// Emit capture info for lambdas for variables captured by reference. 8968 void generateInfoForLambdaCaptures( 8969 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 8970 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8971 MapFlagsArrayTy &Types, 8972 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8973 const auto *RD = VD->getType() 8974 .getCanonicalType() 8975 .getNonReferenceType() 8976 ->getAsCXXRecordDecl(); 8977 if (!RD || !RD->isLambda()) 8978 return; 8979 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8980 LValue VDLVal = CGF.MakeAddrLValue( 8981 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8982 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8983 FieldDecl *ThisCapture = nullptr; 8984 RD->getCaptureFields(Captures, ThisCapture); 8985 if (ThisCapture) { 8986 LValue ThisLVal = 8987 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8988 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8989 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8990 VDLVal.getPointer(CGF)); 8991 BasePointers.push_back(ThisLVal.getPointer(CGF)); 8992 Pointers.push_back(ThisLValVal.getPointer(CGF)); 8993 Sizes.push_back( 8994 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8995 CGF.Int64Ty, /*isSigned=*/true)); 8996 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8997 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8998 } 8999 for (const LambdaCapture &LC : RD->captures()) { 9000 if (!LC.capturesVariable()) 9001 continue; 9002 const VarDecl *VD = LC.getCapturedVar(); 9003 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 9004 continue; 9005 auto It = Captures.find(VD); 9006 assert(It != Captures.end() && "Found lambda capture without field."); 9007 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 9008 if (LC.getCaptureKind() == LCK_ByRef) { 9009 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 9010 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9011 VDLVal.getPointer(CGF)); 9012 BasePointers.push_back(VarLVal.getPointer(CGF)); 9013 Pointers.push_back(VarLValVal.getPointer(CGF)); 9014 Sizes.push_back(CGF.Builder.CreateIntCast( 9015 CGF.getTypeSize( 9016 VD->getType().getCanonicalType().getNonReferenceType()), 9017 CGF.Int64Ty, /*isSigned=*/true)); 9018 } else { 9019 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 9020 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9021 VDLVal.getPointer(CGF)); 9022 BasePointers.push_back(VarLVal.getPointer(CGF)); 9023 Pointers.push_back(VarRVal.getScalarVal()); 9024 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9025 } 9026 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9027 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9028 } 9029 } 9030 9031 /// Set correct indices for lambdas captures. 9032 void adjustMemberOfForLambdaCaptures( 9033 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 9034 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 9035 MapFlagsArrayTy &Types) const { 9036 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 9037 // Set correct member_of idx for all implicit lambda captures. 9038 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9039 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 9040 continue; 9041 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 9042 assert(BasePtr && "Unable to find base lambda address."); 9043 int TgtIdx = -1; 9044 for (unsigned J = I; J > 0; --J) { 9045 unsigned Idx = J - 1; 9046 if (Pointers[Idx] != BasePtr) 9047 continue; 9048 TgtIdx = Idx; 9049 break; 9050 } 9051 assert(TgtIdx != -1 && "Unable to find parent lambda."); 9052 // All other current entries will be MEMBER_OF the combined entry 9053 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9054 // 0xFFFF in the MEMBER_OF field). 9055 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 9056 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 9057 } 9058 } 9059 9060 /// Generate the base pointers, section pointers, sizes and map types 9061 /// associated to a given capture. 9062 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 9063 llvm::Value *Arg, 9064 MapBaseValuesArrayTy &BasePointers, 9065 MapValuesArrayTy &Pointers, 9066 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 9067 StructRangeInfoTy &PartialStruct) const { 9068 assert(!Cap->capturesVariableArrayType() && 9069 "Not expecting to generate map info for a variable array type!"); 9070 9071 // We need to know when we generating information for the first component 9072 const ValueDecl *VD = Cap->capturesThis() 9073 ? nullptr 9074 : Cap->getCapturedVar()->getCanonicalDecl(); 9075 9076 // If this declaration appears in a is_device_ptr clause we just have to 9077 // pass the pointer by value. If it is a reference to a declaration, we just 9078 // pass its value. 9079 if (DevPointersMap.count(VD)) { 9080 BasePointers.emplace_back(Arg, VD); 9081 Pointers.push_back(Arg); 9082 Sizes.push_back( 9083 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 9084 CGF.Int64Ty, /*isSigned=*/true)); 9085 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 9086 return; 9087 } 9088 9089 using MapData = 9090 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 9091 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 9092 SmallVector<MapData, 4> DeclComponentLists; 9093 assert(CurDir.is<const OMPExecutableDirective *>() && 9094 "Expect a executable directive"); 9095 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9096 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 9097 for (const auto L : C->decl_component_lists(VD)) { 9098 assert(L.first == VD && 9099 "We got information for the wrong declaration??"); 9100 assert(!L.second.empty() && 9101 "Not expecting declaration with no component lists."); 9102 DeclComponentLists.emplace_back(L.second, C->getMapType(), 9103 C->getMapTypeModifiers(), 9104 C->isImplicit()); 9105 } 9106 } 9107 9108 // Find overlapping elements (including the offset from the base element). 9109 llvm::SmallDenseMap< 9110 const MapData *, 9111 llvm::SmallVector< 9112 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 9113 4> 9114 OverlappedData; 9115 size_t Count = 0; 9116 for (const MapData &L : DeclComponentLists) { 9117 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9118 OpenMPMapClauseKind MapType; 9119 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9120 bool IsImplicit; 9121 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 9122 ++Count; 9123 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 9124 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 9125 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 9126 auto CI = Components.rbegin(); 9127 auto CE = Components.rend(); 9128 auto SI = Components1.rbegin(); 9129 auto SE = Components1.rend(); 9130 for (; CI != CE && SI != SE; ++CI, ++SI) { 9131 if (CI->getAssociatedExpression()->getStmtClass() != 9132 SI->getAssociatedExpression()->getStmtClass()) 9133 break; 9134 // Are we dealing with different variables/fields? 9135 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 9136 break; 9137 } 9138 // Found overlapping if, at least for one component, reached the head of 9139 // the components list. 9140 if (CI == CE || SI == SE) { 9141 assert((CI != CE || SI != SE) && 9142 "Unexpected full match of the mapping components."); 9143 const MapData &BaseData = CI == CE ? L : L1; 9144 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9145 SI == SE ? Components : Components1; 9146 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9147 OverlappedElements.getSecond().push_back(SubData); 9148 } 9149 } 9150 } 9151 // Sort the overlapped elements for each item. 9152 llvm::SmallVector<const FieldDecl *, 4> Layout; 9153 if (!OverlappedData.empty()) { 9154 if (const auto *CRD = 9155 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 9156 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9157 else { 9158 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 9159 Layout.append(RD->field_begin(), RD->field_end()); 9160 } 9161 } 9162 for (auto &Pair : OverlappedData) { 9163 llvm::sort( 9164 Pair.getSecond(), 9165 [&Layout]( 9166 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9167 OMPClauseMappableExprCommon::MappableExprComponentListRef 9168 Second) { 9169 auto CI = First.rbegin(); 9170 auto CE = First.rend(); 9171 auto SI = Second.rbegin(); 9172 auto SE = Second.rend(); 9173 for (; CI != CE && SI != SE; ++CI, ++SI) { 9174 if (CI->getAssociatedExpression()->getStmtClass() != 9175 SI->getAssociatedExpression()->getStmtClass()) 9176 break; 9177 // Are we dealing with different variables/fields? 9178 if (CI->getAssociatedDeclaration() != 9179 SI->getAssociatedDeclaration()) 9180 break; 9181 } 9182 9183 // Lists contain the same elements. 9184 if (CI == CE && SI == SE) 9185 return false; 9186 9187 // List with less elements is less than list with more elements. 9188 if (CI == CE || SI == SE) 9189 return CI == CE; 9190 9191 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9192 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9193 if (FD1->getParent() == FD2->getParent()) 9194 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9195 const auto It = 9196 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9197 return FD == FD1 || FD == FD2; 9198 }); 9199 return *It == FD1; 9200 }); 9201 } 9202 9203 // Associated with a capture, because the mapping flags depend on it. 9204 // Go through all of the elements with the overlapped elements. 9205 for (const auto &Pair : OverlappedData) { 9206 const MapData &L = *Pair.getFirst(); 9207 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9208 OpenMPMapClauseKind MapType; 9209 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9210 bool IsImplicit; 9211 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 9212 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9213 OverlappedComponents = Pair.getSecond(); 9214 bool IsFirstComponentList = true; 9215 generateInfoForComponentList(MapType, MapModifiers, Components, 9216 BasePointers, Pointers, Sizes, Types, 9217 PartialStruct, IsFirstComponentList, 9218 IsImplicit, OverlappedComponents); 9219 } 9220 // Go through other elements without overlapped elements. 9221 bool IsFirstComponentList = OverlappedData.empty(); 9222 for (const MapData &L : DeclComponentLists) { 9223 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9224 OpenMPMapClauseKind MapType; 9225 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9226 bool IsImplicit; 9227 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 9228 auto It = OverlappedData.find(&L); 9229 if (It == OverlappedData.end()) 9230 generateInfoForComponentList(MapType, MapModifiers, Components, 9231 BasePointers, Pointers, Sizes, Types, 9232 PartialStruct, IsFirstComponentList, 9233 IsImplicit); 9234 IsFirstComponentList = false; 9235 } 9236 } 9237 9238 /// Generate the base pointers, section pointers, sizes and map types 9239 /// associated with the declare target link variables. 9240 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 9241 MapValuesArrayTy &Pointers, 9242 MapValuesArrayTy &Sizes, 9243 MapFlagsArrayTy &Types) const { 9244 assert(CurDir.is<const OMPExecutableDirective *>() && 9245 "Expect a executable directive"); 9246 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9247 // Map other list items in the map clause which are not captured variables 9248 // but "declare target link" global variables. 9249 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 9250 for (const auto L : C->component_lists()) { 9251 if (!L.first) 9252 continue; 9253 const auto *VD = dyn_cast<VarDecl>(L.first); 9254 if (!VD) 9255 continue; 9256 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9257 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9258 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 9259 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 9260 continue; 9261 StructRangeInfoTy PartialStruct; 9262 generateInfoForComponentList( 9263 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 9264 Pointers, Sizes, Types, PartialStruct, 9265 /*IsFirstComponentList=*/true, C->isImplicit()); 9266 assert(!PartialStruct.Base.isValid() && 9267 "No partial structs for declare target link expected."); 9268 } 9269 } 9270 } 9271 9272 /// Generate the default map information for a given capture \a CI, 9273 /// record field declaration \a RI and captured value \a CV. 9274 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9275 const FieldDecl &RI, llvm::Value *CV, 9276 MapBaseValuesArrayTy &CurBasePointers, 9277 MapValuesArrayTy &CurPointers, 9278 MapValuesArrayTy &CurSizes, 9279 MapFlagsArrayTy &CurMapTypes) const { 9280 bool IsImplicit = true; 9281 // Do the default mapping. 9282 if (CI.capturesThis()) { 9283 CurBasePointers.push_back(CV); 9284 CurPointers.push_back(CV); 9285 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9286 CurSizes.push_back( 9287 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9288 CGF.Int64Ty, /*isSigned=*/true)); 9289 // Default map type. 9290 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9291 } else if (CI.capturesVariableByCopy()) { 9292 CurBasePointers.push_back(CV); 9293 CurPointers.push_back(CV); 9294 if (!RI.getType()->isAnyPointerType()) { 9295 // We have to signal to the runtime captures passed by value that are 9296 // not pointers. 9297 CurMapTypes.push_back(OMP_MAP_LITERAL); 9298 CurSizes.push_back(CGF.Builder.CreateIntCast( 9299 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9300 } else { 9301 // Pointers are implicitly mapped with a zero size and no flags 9302 // (other than first map that is added for all implicit maps). 9303 CurMapTypes.push_back(OMP_MAP_NONE); 9304 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9305 } 9306 const VarDecl *VD = CI.getCapturedVar(); 9307 auto I = FirstPrivateDecls.find(VD); 9308 if (I != FirstPrivateDecls.end()) 9309 IsImplicit = I->getSecond(); 9310 } else { 9311 assert(CI.capturesVariable() && "Expected captured reference."); 9312 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9313 QualType ElementType = PtrTy->getPointeeType(); 9314 CurSizes.push_back(CGF.Builder.CreateIntCast( 9315 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9316 // The default map type for a scalar/complex type is 'to' because by 9317 // default the value doesn't have to be retrieved. For an aggregate 9318 // type, the default is 'tofrom'. 9319 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 9320 const VarDecl *VD = CI.getCapturedVar(); 9321 auto I = FirstPrivateDecls.find(VD); 9322 if (I != FirstPrivateDecls.end() && 9323 VD->getType().isConstant(CGF.getContext())) { 9324 llvm::Constant *Addr = 9325 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 9326 // Copy the value of the original variable to the new global copy. 9327 CGF.Builder.CreateMemCpy( 9328 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 9329 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 9330 CurSizes.back(), /*IsVolatile=*/false); 9331 // Use new global variable as the base pointers. 9332 CurBasePointers.push_back(Addr); 9333 CurPointers.push_back(Addr); 9334 } else { 9335 CurBasePointers.push_back(CV); 9336 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9337 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9338 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9339 AlignmentSource::Decl)); 9340 CurPointers.push_back(PtrAddr.getPointer()); 9341 } else { 9342 CurPointers.push_back(CV); 9343 } 9344 } 9345 if (I != FirstPrivateDecls.end()) 9346 IsImplicit = I->getSecond(); 9347 } 9348 // Every default map produces a single argument which is a target parameter. 9349 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 9350 9351 // Add flag stating this is an implicit map. 9352 if (IsImplicit) 9353 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 9354 } 9355 }; 9356 } // anonymous namespace 9357 9358 /// Emit the arrays used to pass the captures and map information to the 9359 /// offloading runtime library. If there is no map or capture information, 9360 /// return nullptr by reference. 9361 static void 9362 emitOffloadingArrays(CodeGenFunction &CGF, 9363 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 9364 MappableExprsHandler::MapValuesArrayTy &Pointers, 9365 MappableExprsHandler::MapValuesArrayTy &Sizes, 9366 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 9367 CGOpenMPRuntime::TargetDataInfo &Info) { 9368 CodeGenModule &CGM = CGF.CGM; 9369 ASTContext &Ctx = CGF.getContext(); 9370 9371 // Reset the array information. 9372 Info.clearArrayInfo(); 9373 Info.NumberOfPtrs = BasePointers.size(); 9374 9375 if (Info.NumberOfPtrs) { 9376 // Detect if we have any capture size requiring runtime evaluation of the 9377 // size so that a constant array could be eventually used. 9378 bool hasRuntimeEvaluationCaptureSize = false; 9379 for (llvm::Value *S : Sizes) 9380 if (!isa<llvm::Constant>(S)) { 9381 hasRuntimeEvaluationCaptureSize = true; 9382 break; 9383 } 9384 9385 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9386 QualType PointerArrayType = Ctx.getConstantArrayType( 9387 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9388 /*IndexTypeQuals=*/0); 9389 9390 Info.BasePointersArray = 9391 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9392 Info.PointersArray = 9393 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9394 9395 // If we don't have any VLA types or other types that require runtime 9396 // evaluation, we can use a constant array for the map sizes, otherwise we 9397 // need to fill up the arrays as we do for the pointers. 9398 QualType Int64Ty = 9399 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9400 if (hasRuntimeEvaluationCaptureSize) { 9401 QualType SizeArrayType = Ctx.getConstantArrayType( 9402 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9403 /*IndexTypeQuals=*/0); 9404 Info.SizesArray = 9405 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9406 } else { 9407 // We expect all the sizes to be constant, so we collect them to create 9408 // a constant array. 9409 SmallVector<llvm::Constant *, 16> ConstSizes; 9410 for (llvm::Value *S : Sizes) 9411 ConstSizes.push_back(cast<llvm::Constant>(S)); 9412 9413 auto *SizesArrayInit = llvm::ConstantArray::get( 9414 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9415 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9416 auto *SizesArrayGbl = new llvm::GlobalVariable( 9417 CGM.getModule(), SizesArrayInit->getType(), 9418 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9419 SizesArrayInit, Name); 9420 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9421 Info.SizesArray = SizesArrayGbl; 9422 } 9423 9424 // The map types are always constant so we don't need to generate code to 9425 // fill arrays. Instead, we create an array constant. 9426 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 9427 llvm::copy(MapTypes, Mapping.begin()); 9428 llvm::Constant *MapTypesArrayInit = 9429 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 9430 std::string MaptypesName = 9431 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9432 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 9433 CGM.getModule(), MapTypesArrayInit->getType(), 9434 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9435 MapTypesArrayInit, MaptypesName); 9436 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9437 Info.MapTypesArray = MapTypesArrayGbl; 9438 9439 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9440 llvm::Value *BPVal = *BasePointers[I]; 9441 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9442 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9443 Info.BasePointersArray, 0, I); 9444 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9445 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9446 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9447 CGF.Builder.CreateStore(BPVal, BPAddr); 9448 9449 if (Info.requiresDevicePointerInfo()) 9450 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 9451 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9452 9453 llvm::Value *PVal = Pointers[I]; 9454 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9455 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9456 Info.PointersArray, 0, I); 9457 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9458 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9459 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9460 CGF.Builder.CreateStore(PVal, PAddr); 9461 9462 if (hasRuntimeEvaluationCaptureSize) { 9463 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9464 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9465 Info.SizesArray, 9466 /*Idx0=*/0, 9467 /*Idx1=*/I); 9468 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9469 CGF.Builder.CreateStore( 9470 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true), 9471 SAddr); 9472 } 9473 } 9474 } 9475 } 9476 9477 /// Emit the arguments to be passed to the runtime library based on the 9478 /// arrays of pointers, sizes and map types. 9479 static void emitOffloadingArraysArgument( 9480 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9481 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9482 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 9483 CodeGenModule &CGM = CGF.CGM; 9484 if (Info.NumberOfPtrs) { 9485 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9486 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9487 Info.BasePointersArray, 9488 /*Idx0=*/0, /*Idx1=*/0); 9489 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9490 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9491 Info.PointersArray, 9492 /*Idx0=*/0, 9493 /*Idx1=*/0); 9494 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9495 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9496 /*Idx0=*/0, /*Idx1=*/0); 9497 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9498 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9499 Info.MapTypesArray, 9500 /*Idx0=*/0, 9501 /*Idx1=*/0); 9502 } else { 9503 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9504 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9505 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9506 MapTypesArrayArg = 9507 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9508 } 9509 } 9510 9511 /// Check for inner distribute directive. 9512 static const OMPExecutableDirective * 9513 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9514 const auto *CS = D.getInnermostCapturedStmt(); 9515 const auto *Body = 9516 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9517 const Stmt *ChildStmt = 9518 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9519 9520 if (const auto *NestedDir = 9521 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9522 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9523 switch (D.getDirectiveKind()) { 9524 case OMPD_target: 9525 if (isOpenMPDistributeDirective(DKind)) 9526 return NestedDir; 9527 if (DKind == OMPD_teams) { 9528 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9529 /*IgnoreCaptured=*/true); 9530 if (!Body) 9531 return nullptr; 9532 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9533 if (const auto *NND = 9534 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9535 DKind = NND->getDirectiveKind(); 9536 if (isOpenMPDistributeDirective(DKind)) 9537 return NND; 9538 } 9539 } 9540 return nullptr; 9541 case OMPD_target_teams: 9542 if (isOpenMPDistributeDirective(DKind)) 9543 return NestedDir; 9544 return nullptr; 9545 case OMPD_target_parallel: 9546 case OMPD_target_simd: 9547 case OMPD_target_parallel_for: 9548 case OMPD_target_parallel_for_simd: 9549 return nullptr; 9550 case OMPD_target_teams_distribute: 9551 case OMPD_target_teams_distribute_simd: 9552 case OMPD_target_teams_distribute_parallel_for: 9553 case OMPD_target_teams_distribute_parallel_for_simd: 9554 case OMPD_parallel: 9555 case OMPD_for: 9556 case OMPD_parallel_for: 9557 case OMPD_parallel_master: 9558 case OMPD_parallel_sections: 9559 case OMPD_for_simd: 9560 case OMPD_parallel_for_simd: 9561 case OMPD_cancel: 9562 case OMPD_cancellation_point: 9563 case OMPD_ordered: 9564 case OMPD_threadprivate: 9565 case OMPD_allocate: 9566 case OMPD_task: 9567 case OMPD_simd: 9568 case OMPD_sections: 9569 case OMPD_section: 9570 case OMPD_single: 9571 case OMPD_master: 9572 case OMPD_critical: 9573 case OMPD_taskyield: 9574 case OMPD_barrier: 9575 case OMPD_taskwait: 9576 case OMPD_taskgroup: 9577 case OMPD_atomic: 9578 case OMPD_flush: 9579 case OMPD_depobj: 9580 case OMPD_scan: 9581 case OMPD_teams: 9582 case OMPD_target_data: 9583 case OMPD_target_exit_data: 9584 case OMPD_target_enter_data: 9585 case OMPD_distribute: 9586 case OMPD_distribute_simd: 9587 case OMPD_distribute_parallel_for: 9588 case OMPD_distribute_parallel_for_simd: 9589 case OMPD_teams_distribute: 9590 case OMPD_teams_distribute_simd: 9591 case OMPD_teams_distribute_parallel_for: 9592 case OMPD_teams_distribute_parallel_for_simd: 9593 case OMPD_target_update: 9594 case OMPD_declare_simd: 9595 case OMPD_declare_variant: 9596 case OMPD_begin_declare_variant: 9597 case OMPD_end_declare_variant: 9598 case OMPD_declare_target: 9599 case OMPD_end_declare_target: 9600 case OMPD_declare_reduction: 9601 case OMPD_declare_mapper: 9602 case OMPD_taskloop: 9603 case OMPD_taskloop_simd: 9604 case OMPD_master_taskloop: 9605 case OMPD_master_taskloop_simd: 9606 case OMPD_parallel_master_taskloop: 9607 case OMPD_parallel_master_taskloop_simd: 9608 case OMPD_requires: 9609 case OMPD_unknown: 9610 llvm_unreachable("Unexpected directive."); 9611 } 9612 } 9613 9614 return nullptr; 9615 } 9616 9617 /// Emit the user-defined mapper function. The code generation follows the 9618 /// pattern in the example below. 9619 /// \code 9620 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9621 /// void *base, void *begin, 9622 /// int64_t size, int64_t type) { 9623 /// // Allocate space for an array section first. 9624 /// if (size > 1 && !maptype.IsDelete) 9625 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9626 /// size*sizeof(Ty), clearToFrom(type)); 9627 /// // Map members. 9628 /// for (unsigned i = 0; i < size; i++) { 9629 /// // For each component specified by this mapper: 9630 /// for (auto c : all_components) { 9631 /// if (c.hasMapper()) 9632 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9633 /// c.arg_type); 9634 /// else 9635 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9636 /// c.arg_begin, c.arg_size, c.arg_type); 9637 /// } 9638 /// } 9639 /// // Delete the array section. 9640 /// if (size > 1 && maptype.IsDelete) 9641 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9642 /// size*sizeof(Ty), clearToFrom(type)); 9643 /// } 9644 /// \endcode 9645 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9646 CodeGenFunction *CGF) { 9647 if (UDMMap.count(D) > 0) 9648 return; 9649 ASTContext &C = CGM.getContext(); 9650 QualType Ty = D->getType(); 9651 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9652 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9653 auto *MapperVarDecl = 9654 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9655 SourceLocation Loc = D->getLocation(); 9656 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9657 9658 // Prepare mapper function arguments and attributes. 9659 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9660 C.VoidPtrTy, ImplicitParamDecl::Other); 9661 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9662 ImplicitParamDecl::Other); 9663 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9664 C.VoidPtrTy, ImplicitParamDecl::Other); 9665 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9666 ImplicitParamDecl::Other); 9667 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9668 ImplicitParamDecl::Other); 9669 FunctionArgList Args; 9670 Args.push_back(&HandleArg); 9671 Args.push_back(&BaseArg); 9672 Args.push_back(&BeginArg); 9673 Args.push_back(&SizeArg); 9674 Args.push_back(&TypeArg); 9675 const CGFunctionInfo &FnInfo = 9676 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9677 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9678 SmallString<64> TyStr; 9679 llvm::raw_svector_ostream Out(TyStr); 9680 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9681 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9682 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9683 Name, &CGM.getModule()); 9684 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9685 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9686 // Start the mapper function code generation. 9687 CodeGenFunction MapperCGF(CGM); 9688 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9689 // Compute the starting and end addreses of array elements. 9690 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9691 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9692 C.getPointerType(Int64Ty), Loc); 9693 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9694 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 9695 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 9696 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9697 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9698 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9699 C.getPointerType(Int64Ty), Loc); 9700 // Prepare common arguments for array initiation and deletion. 9701 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9702 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9703 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9704 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9705 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9706 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9707 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9708 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9709 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9710 9711 // Emit array initiation if this is an array section and \p MapType indicates 9712 // that memory allocation is required. 9713 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9714 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9715 ElementSize, HeadBB, /*IsInit=*/true); 9716 9717 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9718 9719 // Emit the loop header block. 9720 MapperCGF.EmitBlock(HeadBB); 9721 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9722 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9723 // Evaluate whether the initial condition is satisfied. 9724 llvm::Value *IsEmpty = 9725 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9726 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9727 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9728 9729 // Emit the loop body block. 9730 MapperCGF.EmitBlock(BodyBB); 9731 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9732 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9733 PtrPHI->addIncoming(PtrBegin, EntryBB); 9734 Address PtrCurrent = 9735 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9736 .getAlignment() 9737 .alignmentOfArrayElement(ElementSize)); 9738 // Privatize the declared variable of mapper to be the current array element. 9739 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9740 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 9741 return MapperCGF 9742 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 9743 .getAddress(MapperCGF); 9744 }); 9745 (void)Scope.Privatize(); 9746 9747 // Get map clause information. Fill up the arrays with all mapped variables. 9748 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9749 MappableExprsHandler::MapValuesArrayTy Pointers; 9750 MappableExprsHandler::MapValuesArrayTy Sizes; 9751 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9752 MappableExprsHandler MEHandler(*D, MapperCGF); 9753 MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); 9754 9755 // Call the runtime API __tgt_mapper_num_components to get the number of 9756 // pre-existing components. 9757 llvm::Value *OffloadingArgs[] = {Handle}; 9758 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9759 createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs); 9760 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9761 PreviousSize, 9762 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9763 9764 // Fill up the runtime mapper handle for all components. 9765 for (unsigned I = 0; I < BasePointers.size(); ++I) { 9766 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9767 *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9768 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9769 Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9770 llvm::Value *CurSizeArg = Sizes[I]; 9771 9772 // Extract the MEMBER_OF field from the map type. 9773 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 9774 MapperCGF.EmitBlock(MemberBB); 9775 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); 9776 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 9777 OriMapType, 9778 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 9779 llvm::BasicBlock *MemberCombineBB = 9780 MapperCGF.createBasicBlock("omp.member.combine"); 9781 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 9782 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 9783 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 9784 // Add the number of pre-existing components to the MEMBER_OF field if it 9785 // is valid. 9786 MapperCGF.EmitBlock(MemberCombineBB); 9787 llvm::Value *CombinedMember = 9788 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9789 // Do nothing if it is not a member of previous components. 9790 MapperCGF.EmitBlock(TypeBB); 9791 llvm::PHINode *MemberMapType = 9792 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9793 MemberMapType->addIncoming(OriMapType, MemberBB); 9794 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9795 9796 // Combine the map type inherited from user-defined mapper with that 9797 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9798 // bits of the \a MapType, which is the input argument of the mapper 9799 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9800 // bits of MemberMapType. 9801 // [OpenMP 5.0], 1.2.6. map-type decay. 9802 // | alloc | to | from | tofrom | release | delete 9803 // ---------------------------------------------------------- 9804 // alloc | alloc | alloc | alloc | alloc | release | delete 9805 // to | alloc | to | alloc | to | release | delete 9806 // from | alloc | alloc | from | from | release | delete 9807 // tofrom | alloc | to | from | tofrom | release | delete 9808 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9809 MapType, 9810 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9811 MappableExprsHandler::OMP_MAP_FROM)); 9812 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9813 llvm::BasicBlock *AllocElseBB = 9814 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9815 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9816 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9817 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9818 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9819 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9820 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9821 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9822 MapperCGF.EmitBlock(AllocBB); 9823 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9824 MemberMapType, 9825 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9826 MappableExprsHandler::OMP_MAP_FROM))); 9827 MapperCGF.Builder.CreateBr(EndBB); 9828 MapperCGF.EmitBlock(AllocElseBB); 9829 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9830 LeftToFrom, 9831 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9832 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9833 // In case of to, clear OMP_MAP_FROM. 9834 MapperCGF.EmitBlock(ToBB); 9835 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9836 MemberMapType, 9837 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9838 MapperCGF.Builder.CreateBr(EndBB); 9839 MapperCGF.EmitBlock(ToElseBB); 9840 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9841 LeftToFrom, 9842 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9843 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9844 // In case of from, clear OMP_MAP_TO. 9845 MapperCGF.EmitBlock(FromBB); 9846 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9847 MemberMapType, 9848 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9849 // In case of tofrom, do nothing. 9850 MapperCGF.EmitBlock(EndBB); 9851 llvm::PHINode *CurMapType = 9852 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9853 CurMapType->addIncoming(AllocMapType, AllocBB); 9854 CurMapType->addIncoming(ToMapType, ToBB); 9855 CurMapType->addIncoming(FromMapType, FromBB); 9856 CurMapType->addIncoming(MemberMapType, ToElseBB); 9857 9858 // TODO: call the corresponding mapper function if a user-defined mapper is 9859 // associated with this map clause. 9860 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9861 // data structure. 9862 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9863 CurSizeArg, CurMapType}; 9864 MapperCGF.EmitRuntimeCall( 9865 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), 9866 OffloadingArgs); 9867 } 9868 9869 // Update the pointer to point to the next element that needs to be mapped, 9870 // and check whether we have mapped all elements. 9871 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9872 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9873 PtrPHI->addIncoming(PtrNext, BodyBB); 9874 llvm::Value *IsDone = 9875 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9876 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9877 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9878 9879 MapperCGF.EmitBlock(ExitBB); 9880 // Emit array deletion if this is an array section and \p MapType indicates 9881 // that deletion is required. 9882 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9883 ElementSize, DoneBB, /*IsInit=*/false); 9884 9885 // Emit the function exit block. 9886 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9887 MapperCGF.FinishFunction(); 9888 UDMMap.try_emplace(D, Fn); 9889 if (CGF) { 9890 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9891 Decls.second.push_back(D); 9892 } 9893 } 9894 9895 /// Emit the array initialization or deletion portion for user-defined mapper 9896 /// code generation. First, it evaluates whether an array section is mapped and 9897 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9898 /// true, and \a MapType indicates to not delete this array, array 9899 /// initialization code is generated. If \a IsInit is false, and \a MapType 9900 /// indicates to not this array, array deletion code is generated. 9901 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9902 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9903 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9904 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9905 StringRef Prefix = IsInit ? ".init" : ".del"; 9906 9907 // Evaluate if this is an array section. 9908 llvm::BasicBlock *IsDeleteBB = 9909 MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"})); 9910 llvm::BasicBlock *BodyBB = 9911 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9912 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9913 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9914 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9915 9916 // Evaluate if we are going to delete this section. 9917 MapperCGF.EmitBlock(IsDeleteBB); 9918 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9919 MapType, 9920 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9921 llvm::Value *DeleteCond; 9922 if (IsInit) { 9923 DeleteCond = MapperCGF.Builder.CreateIsNull( 9924 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9925 } else { 9926 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9927 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9928 } 9929 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9930 9931 MapperCGF.EmitBlock(BodyBB); 9932 // Get the array size by multiplying element size and element number (i.e., \p 9933 // Size). 9934 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9935 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9936 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9937 // memory allocation/deletion purpose only. 9938 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9939 MapType, 9940 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9941 MappableExprsHandler::OMP_MAP_FROM))); 9942 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9943 // data structure. 9944 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9945 MapperCGF.EmitRuntimeCall( 9946 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs); 9947 } 9948 9949 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9950 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9951 llvm::Value *DeviceID, 9952 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9953 const OMPLoopDirective &D)> 9954 SizeEmitter) { 9955 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9956 const OMPExecutableDirective *TD = &D; 9957 // Get nested teams distribute kind directive, if any. 9958 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9959 TD = getNestedDistributeDirective(CGM.getContext(), D); 9960 if (!TD) 9961 return; 9962 const auto *LD = cast<OMPLoopDirective>(TD); 9963 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, 9964 PrePostActionTy &) { 9965 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9966 llvm::Value *Args[] = {DeviceID, NumIterations}; 9967 CGF.EmitRuntimeCall( 9968 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); 9969 } 9970 }; 9971 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9972 } 9973 9974 void CGOpenMPRuntime::emitTargetCall( 9975 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9976 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9977 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9978 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9979 const OMPLoopDirective &D)> 9980 SizeEmitter) { 9981 if (!CGF.HaveInsertPoint()) 9982 return; 9983 9984 assert(OutlinedFn && "Invalid outlined function!"); 9985 9986 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 9987 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9988 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9989 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9990 PrePostActionTy &) { 9991 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9992 }; 9993 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9994 9995 CodeGenFunction::OMPTargetDataInfo InputInfo; 9996 llvm::Value *MapTypesArray = nullptr; 9997 // Fill up the pointer arrays and transfer execution to the device. 9998 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9999 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, 10000 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 10001 if (Device.getInt() == OMPC_DEVICE_ancestor) { 10002 // Reverse offloading is not supported, so just execute on the host. 10003 if (RequiresOuterTask) { 10004 CapturedVars.clear(); 10005 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10006 } 10007 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10008 return; 10009 } 10010 10011 // On top of the arrays that were filled up, the target offloading call 10012 // takes as arguments the device id as well as the host pointer. The host 10013 // pointer is used by the runtime library to identify the current target 10014 // region, so it only has to be unique and not necessarily point to 10015 // anything. It could be the pointer to the outlined function that 10016 // implements the target region, but we aren't using that so that the 10017 // compiler doesn't need to keep that, and could therefore inline the host 10018 // function if proven worthwhile during optimization. 10019 10020 // From this point on, we need to have an ID of the target region defined. 10021 assert(OutlinedFnID && "Invalid outlined function ID!"); 10022 10023 // Emit device ID if any. 10024 llvm::Value *DeviceID; 10025 if (Device.getPointer()) { 10026 assert((Device.getInt() == OMPC_DEVICE_unknown || 10027 Device.getInt() == OMPC_DEVICE_device_num) && 10028 "Expected device_num modifier."); 10029 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10030 DeviceID = 10031 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10032 } else { 10033 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10034 } 10035 10036 // Emit the number of elements in the offloading arrays. 10037 llvm::Value *PointerNum = 10038 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10039 10040 // Return value of the runtime offloading call. 10041 llvm::Value *Return; 10042 10043 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10044 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10045 10046 // Emit tripcount for the target loop-based directive. 10047 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10048 10049 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10050 // The target region is an outlined function launched by the runtime 10051 // via calls __tgt_target() or __tgt_target_teams(). 10052 // 10053 // __tgt_target() launches a target region with one team and one thread, 10054 // executing a serial region. This master thread may in turn launch 10055 // more threads within its team upon encountering a parallel region, 10056 // however, no additional teams can be launched on the device. 10057 // 10058 // __tgt_target_teams() launches a target region with one or more teams, 10059 // each with one or more threads. This call is required for target 10060 // constructs such as: 10061 // 'target teams' 10062 // 'target' / 'teams' 10063 // 'target teams distribute parallel for' 10064 // 'target parallel' 10065 // and so on. 10066 // 10067 // Note that on the host and CPU targets, the runtime implementation of 10068 // these calls simply call the outlined function without forking threads. 10069 // The outlined functions themselves have runtime calls to 10070 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10071 // the compiler in emitTeamsCall() and emitParallelCall(). 10072 // 10073 // In contrast, on the NVPTX target, the implementation of 10074 // __tgt_target_teams() launches a GPU kernel with the requested number 10075 // of teams and threads so no additional calls to the runtime are required. 10076 if (NumTeams) { 10077 // If we have NumTeams defined this means that we have an enclosed teams 10078 // region. Therefore we also expect to have NumThreads defined. These two 10079 // values should be defined in the presence of a teams directive, 10080 // regardless of having any clauses associated. If the user is using teams 10081 // but no clauses, these two values will be the default that should be 10082 // passed to the runtime library - a 32-bit integer with the value zero. 10083 assert(NumThreads && "Thread limit expression should be available along " 10084 "with number of teams."); 10085 llvm::Value *OffloadingArgs[] = {DeviceID, 10086 OutlinedFnID, 10087 PointerNum, 10088 InputInfo.BasePointersArray.getPointer(), 10089 InputInfo.PointersArray.getPointer(), 10090 InputInfo.SizesArray.getPointer(), 10091 MapTypesArray, 10092 NumTeams, 10093 NumThreads}; 10094 Return = CGF.EmitRuntimeCall( 10095 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 10096 : OMPRTL__tgt_target_teams), 10097 OffloadingArgs); 10098 } else { 10099 llvm::Value *OffloadingArgs[] = {DeviceID, 10100 OutlinedFnID, 10101 PointerNum, 10102 InputInfo.BasePointersArray.getPointer(), 10103 InputInfo.PointersArray.getPointer(), 10104 InputInfo.SizesArray.getPointer(), 10105 MapTypesArray}; 10106 Return = CGF.EmitRuntimeCall( 10107 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 10108 : OMPRTL__tgt_target), 10109 OffloadingArgs); 10110 } 10111 10112 // Check the error code and execute the host version if required. 10113 llvm::BasicBlock *OffloadFailedBlock = 10114 CGF.createBasicBlock("omp_offload.failed"); 10115 llvm::BasicBlock *OffloadContBlock = 10116 CGF.createBasicBlock("omp_offload.cont"); 10117 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10118 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10119 10120 CGF.EmitBlock(OffloadFailedBlock); 10121 if (RequiresOuterTask) { 10122 CapturedVars.clear(); 10123 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10124 } 10125 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10126 CGF.EmitBranch(OffloadContBlock); 10127 10128 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10129 }; 10130 10131 // Notify that the host version must be executed. 10132 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10133 RequiresOuterTask](CodeGenFunction &CGF, 10134 PrePostActionTy &) { 10135 if (RequiresOuterTask) { 10136 CapturedVars.clear(); 10137 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10138 } 10139 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10140 }; 10141 10142 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10143 &CapturedVars, RequiresOuterTask, 10144 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10145 // Fill up the arrays with all the captured variables. 10146 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10147 MappableExprsHandler::MapValuesArrayTy Pointers; 10148 MappableExprsHandler::MapValuesArrayTy Sizes; 10149 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10150 10151 // Get mappable expression information. 10152 MappableExprsHandler MEHandler(D, CGF); 10153 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10154 10155 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10156 auto CV = CapturedVars.begin(); 10157 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10158 CE = CS.capture_end(); 10159 CI != CE; ++CI, ++RI, ++CV) { 10160 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 10161 MappableExprsHandler::MapValuesArrayTy CurPointers; 10162 MappableExprsHandler::MapValuesArrayTy CurSizes; 10163 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 10164 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10165 10166 // VLA sizes are passed to the outlined region by copy and do not have map 10167 // information associated. 10168 if (CI->capturesVariableArrayType()) { 10169 CurBasePointers.push_back(*CV); 10170 CurPointers.push_back(*CV); 10171 CurSizes.push_back(CGF.Builder.CreateIntCast( 10172 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10173 // Copy to the device as an argument. No need to retrieve it. 10174 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10175 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10176 MappableExprsHandler::OMP_MAP_IMPLICIT); 10177 } else { 10178 // If we have any information in the map clause, we use it, otherwise we 10179 // just do a default mapping. 10180 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 10181 CurSizes, CurMapTypes, PartialStruct); 10182 if (CurBasePointers.empty()) 10183 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 10184 CurPointers, CurSizes, CurMapTypes); 10185 // Generate correct mapping for variables captured by reference in 10186 // lambdas. 10187 if (CI->capturesVariable()) 10188 MEHandler.generateInfoForLambdaCaptures( 10189 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 10190 CurMapTypes, LambdaPointers); 10191 } 10192 // We expect to have at least an element of information for this capture. 10193 assert(!CurBasePointers.empty() && 10194 "Non-existing map pointer for capture!"); 10195 assert(CurBasePointers.size() == CurPointers.size() && 10196 CurBasePointers.size() == CurSizes.size() && 10197 CurBasePointers.size() == CurMapTypes.size() && 10198 "Inconsistent map information sizes!"); 10199 10200 // If there is an entry in PartialStruct it means we have a struct with 10201 // individual members mapped. Emit an extra combined entry. 10202 if (PartialStruct.Base.isValid()) 10203 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 10204 CurMapTypes, PartialStruct); 10205 10206 // We need to append the results of this capture to what we already have. 10207 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 10208 Pointers.append(CurPointers.begin(), CurPointers.end()); 10209 Sizes.append(CurSizes.begin(), CurSizes.end()); 10210 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 10211 } 10212 // Adjust MEMBER_OF flags for the lambdas captures. 10213 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 10214 Pointers, MapTypes); 10215 // Map other list items in the map clause which are not captured variables 10216 // but "declare target link" global variables. 10217 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 10218 MapTypes); 10219 10220 TargetDataInfo Info; 10221 // Fill up the arrays and create the arguments. 10222 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10223 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10224 Info.PointersArray, Info.SizesArray, 10225 Info.MapTypesArray, Info); 10226 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10227 InputInfo.BasePointersArray = 10228 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10229 InputInfo.PointersArray = 10230 Address(Info.PointersArray, CGM.getPointerAlign()); 10231 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10232 MapTypesArray = Info.MapTypesArray; 10233 if (RequiresOuterTask) 10234 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10235 else 10236 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10237 }; 10238 10239 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10240 CodeGenFunction &CGF, PrePostActionTy &) { 10241 if (RequiresOuterTask) { 10242 CodeGenFunction::OMPTargetDataInfo InputInfo; 10243 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10244 } else { 10245 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10246 } 10247 }; 10248 10249 // If we have a target function ID it means that we need to support 10250 // offloading, otherwise, just execute on the host. We need to execute on host 10251 // regardless of the conditional in the if clause if, e.g., the user do not 10252 // specify target triples. 10253 if (OutlinedFnID) { 10254 if (IfCond) { 10255 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10256 } else { 10257 RegionCodeGenTy ThenRCG(TargetThenGen); 10258 ThenRCG(CGF); 10259 } 10260 } else { 10261 RegionCodeGenTy ElseRCG(TargetElseGen); 10262 ElseRCG(CGF); 10263 } 10264 } 10265 10266 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10267 StringRef ParentName) { 10268 if (!S) 10269 return; 10270 10271 // Codegen OMP target directives that offload compute to the device. 10272 bool RequiresDeviceCodegen = 10273 isa<OMPExecutableDirective>(S) && 10274 isOpenMPTargetExecutionDirective( 10275 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10276 10277 if (RequiresDeviceCodegen) { 10278 const auto &E = *cast<OMPExecutableDirective>(S); 10279 unsigned DeviceID; 10280 unsigned FileID; 10281 unsigned Line; 10282 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10283 FileID, Line); 10284 10285 // Is this a target region that should not be emitted as an entry point? If 10286 // so just signal we are done with this target region. 10287 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10288 ParentName, Line)) 10289 return; 10290 10291 switch (E.getDirectiveKind()) { 10292 case OMPD_target: 10293 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10294 cast<OMPTargetDirective>(E)); 10295 break; 10296 case OMPD_target_parallel: 10297 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10298 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10299 break; 10300 case OMPD_target_teams: 10301 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10302 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10303 break; 10304 case OMPD_target_teams_distribute: 10305 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10306 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10307 break; 10308 case OMPD_target_teams_distribute_simd: 10309 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10310 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10311 break; 10312 case OMPD_target_parallel_for: 10313 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10314 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10315 break; 10316 case OMPD_target_parallel_for_simd: 10317 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10318 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10319 break; 10320 case OMPD_target_simd: 10321 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10322 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10323 break; 10324 case OMPD_target_teams_distribute_parallel_for: 10325 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10326 CGM, ParentName, 10327 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10328 break; 10329 case OMPD_target_teams_distribute_parallel_for_simd: 10330 CodeGenFunction:: 10331 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10332 CGM, ParentName, 10333 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10334 break; 10335 case OMPD_parallel: 10336 case OMPD_for: 10337 case OMPD_parallel_for: 10338 case OMPD_parallel_master: 10339 case OMPD_parallel_sections: 10340 case OMPD_for_simd: 10341 case OMPD_parallel_for_simd: 10342 case OMPD_cancel: 10343 case OMPD_cancellation_point: 10344 case OMPD_ordered: 10345 case OMPD_threadprivate: 10346 case OMPD_allocate: 10347 case OMPD_task: 10348 case OMPD_simd: 10349 case OMPD_sections: 10350 case OMPD_section: 10351 case OMPD_single: 10352 case OMPD_master: 10353 case OMPD_critical: 10354 case OMPD_taskyield: 10355 case OMPD_barrier: 10356 case OMPD_taskwait: 10357 case OMPD_taskgroup: 10358 case OMPD_atomic: 10359 case OMPD_flush: 10360 case OMPD_depobj: 10361 case OMPD_scan: 10362 case OMPD_teams: 10363 case OMPD_target_data: 10364 case OMPD_target_exit_data: 10365 case OMPD_target_enter_data: 10366 case OMPD_distribute: 10367 case OMPD_distribute_simd: 10368 case OMPD_distribute_parallel_for: 10369 case OMPD_distribute_parallel_for_simd: 10370 case OMPD_teams_distribute: 10371 case OMPD_teams_distribute_simd: 10372 case OMPD_teams_distribute_parallel_for: 10373 case OMPD_teams_distribute_parallel_for_simd: 10374 case OMPD_target_update: 10375 case OMPD_declare_simd: 10376 case OMPD_declare_variant: 10377 case OMPD_begin_declare_variant: 10378 case OMPD_end_declare_variant: 10379 case OMPD_declare_target: 10380 case OMPD_end_declare_target: 10381 case OMPD_declare_reduction: 10382 case OMPD_declare_mapper: 10383 case OMPD_taskloop: 10384 case OMPD_taskloop_simd: 10385 case OMPD_master_taskloop: 10386 case OMPD_master_taskloop_simd: 10387 case OMPD_parallel_master_taskloop: 10388 case OMPD_parallel_master_taskloop_simd: 10389 case OMPD_requires: 10390 case OMPD_unknown: 10391 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10392 } 10393 return; 10394 } 10395 10396 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10397 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10398 return; 10399 10400 scanForTargetRegionsFunctions( 10401 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 10402 return; 10403 } 10404 10405 // If this is a lambda function, look into its body. 10406 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10407 S = L->getBody(); 10408 10409 // Keep looking for target regions recursively. 10410 for (const Stmt *II : S->children()) 10411 scanForTargetRegionsFunctions(II, ParentName); 10412 } 10413 10414 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10415 // If emitting code for the host, we do not process FD here. Instead we do 10416 // the normal code generation. 10417 if (!CGM.getLangOpts().OpenMPIsDevice) { 10418 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 10419 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10420 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10421 // Do not emit device_type(nohost) functions for the host. 10422 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10423 return true; 10424 } 10425 return false; 10426 } 10427 10428 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10429 // Try to detect target regions in the function. 10430 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10431 StringRef Name = CGM.getMangledName(GD); 10432 scanForTargetRegionsFunctions(FD->getBody(), Name); 10433 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10434 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10435 // Do not emit device_type(nohost) functions for the host. 10436 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10437 return true; 10438 } 10439 10440 // Do not to emit function if it is not marked as declare target. 10441 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10442 AlreadyEmittedTargetDecls.count(VD) == 0; 10443 } 10444 10445 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10446 if (!CGM.getLangOpts().OpenMPIsDevice) 10447 return false; 10448 10449 // Check if there are Ctors/Dtors in this declaration and look for target 10450 // regions in it. We use the complete variant to produce the kernel name 10451 // mangling. 10452 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10453 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10454 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10455 StringRef ParentName = 10456 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10457 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10458 } 10459 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10460 StringRef ParentName = 10461 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10462 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10463 } 10464 } 10465 10466 // Do not to emit variable if it is not marked as declare target. 10467 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10468 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10469 cast<VarDecl>(GD.getDecl())); 10470 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10471 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10472 HasRequiresUnifiedSharedMemory)) { 10473 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10474 return true; 10475 } 10476 return false; 10477 } 10478 10479 llvm::Constant * 10480 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 10481 const VarDecl *VD) { 10482 assert(VD->getType().isConstant(CGM.getContext()) && 10483 "Expected constant variable."); 10484 StringRef VarName; 10485 llvm::Constant *Addr; 10486 llvm::GlobalValue::LinkageTypes Linkage; 10487 QualType Ty = VD->getType(); 10488 SmallString<128> Buffer; 10489 { 10490 unsigned DeviceID; 10491 unsigned FileID; 10492 unsigned Line; 10493 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 10494 FileID, Line); 10495 llvm::raw_svector_ostream OS(Buffer); 10496 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 10497 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 10498 VarName = OS.str(); 10499 } 10500 Linkage = llvm::GlobalValue::InternalLinkage; 10501 Addr = 10502 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 10503 getDefaultFirstprivateAddressSpace()); 10504 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 10505 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 10506 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 10507 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10508 VarName, Addr, VarSize, 10509 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 10510 return Addr; 10511 } 10512 10513 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10514 llvm::Constant *Addr) { 10515 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10516 !CGM.getLangOpts().OpenMPIsDevice) 10517 return; 10518 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10519 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10520 if (!Res) { 10521 if (CGM.getLangOpts().OpenMPIsDevice) { 10522 // Register non-target variables being emitted in device code (debug info 10523 // may cause this). 10524 StringRef VarName = CGM.getMangledName(VD); 10525 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10526 } 10527 return; 10528 } 10529 // Register declare target variables. 10530 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10531 StringRef VarName; 10532 CharUnits VarSize; 10533 llvm::GlobalValue::LinkageTypes Linkage; 10534 10535 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10536 !HasRequiresUnifiedSharedMemory) { 10537 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10538 VarName = CGM.getMangledName(VD); 10539 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10540 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10541 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10542 } else { 10543 VarSize = CharUnits::Zero(); 10544 } 10545 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10546 // Temp solution to prevent optimizations of the internal variables. 10547 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10548 std::string RefName = getName({VarName, "ref"}); 10549 if (!CGM.GetGlobalValue(RefName)) { 10550 llvm::Constant *AddrRef = 10551 getOrCreateInternalVariable(Addr->getType(), RefName); 10552 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10553 GVAddrRef->setConstant(/*Val=*/true); 10554 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10555 GVAddrRef->setInitializer(Addr); 10556 CGM.addCompilerUsedGlobal(GVAddrRef); 10557 } 10558 } 10559 } else { 10560 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10561 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10562 HasRequiresUnifiedSharedMemory)) && 10563 "Declare target attribute must link or to with unified memory."); 10564 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10565 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10566 else 10567 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10568 10569 if (CGM.getLangOpts().OpenMPIsDevice) { 10570 VarName = Addr->getName(); 10571 Addr = nullptr; 10572 } else { 10573 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10574 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10575 } 10576 VarSize = CGM.getPointerSize(); 10577 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10578 } 10579 10580 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10581 VarName, Addr, VarSize, Flags, Linkage); 10582 } 10583 10584 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10585 if (isa<FunctionDecl>(GD.getDecl()) || 10586 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10587 return emitTargetFunctions(GD); 10588 10589 return emitTargetGlobalVariable(GD); 10590 } 10591 10592 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10593 for (const VarDecl *VD : DeferredGlobalVariables) { 10594 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10595 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10596 if (!Res) 10597 continue; 10598 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10599 !HasRequiresUnifiedSharedMemory) { 10600 CGM.EmitGlobal(VD); 10601 } else { 10602 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10603 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10604 HasRequiresUnifiedSharedMemory)) && 10605 "Expected link clause or to clause with unified memory."); 10606 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10607 } 10608 } 10609 } 10610 10611 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10612 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10613 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10614 " Expected target-based directive."); 10615 } 10616 10617 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10618 for (const OMPClause *Clause : D->clauselists()) { 10619 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10620 HasRequiresUnifiedSharedMemory = true; 10621 } else if (const auto *AC = 10622 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10623 switch (AC->getAtomicDefaultMemOrderKind()) { 10624 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10625 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10626 break; 10627 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10628 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10629 break; 10630 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10631 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10632 break; 10633 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10634 break; 10635 } 10636 } 10637 } 10638 } 10639 10640 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10641 return RequiresAtomicOrdering; 10642 } 10643 10644 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10645 LangAS &AS) { 10646 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10647 return false; 10648 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10649 switch(A->getAllocatorType()) { 10650 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10651 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10652 // Not supported, fallback to the default mem space. 10653 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10654 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10655 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10656 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10657 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10658 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10659 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10660 AS = LangAS::Default; 10661 return true; 10662 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10663 llvm_unreachable("Expected predefined allocator for the variables with the " 10664 "static storage."); 10665 } 10666 return false; 10667 } 10668 10669 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10670 return HasRequiresUnifiedSharedMemory; 10671 } 10672 10673 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10674 CodeGenModule &CGM) 10675 : CGM(CGM) { 10676 if (CGM.getLangOpts().OpenMPIsDevice) { 10677 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10678 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10679 } 10680 } 10681 10682 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10683 if (CGM.getLangOpts().OpenMPIsDevice) 10684 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10685 } 10686 10687 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10688 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10689 return true; 10690 10691 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10692 // Do not to emit function if it is marked as declare target as it was already 10693 // emitted. 10694 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10695 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10696 if (auto *F = dyn_cast_or_null<llvm::Function>( 10697 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10698 return !F->isDeclaration(); 10699 return false; 10700 } 10701 return true; 10702 } 10703 10704 return !AlreadyEmittedTargetDecls.insert(D).second; 10705 } 10706 10707 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10708 // If we don't have entries or if we are emitting code for the device, we 10709 // don't need to do anything. 10710 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10711 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10712 (OffloadEntriesInfoManager.empty() && 10713 !HasEmittedDeclareTargetRegion && 10714 !HasEmittedTargetRegion)) 10715 return nullptr; 10716 10717 // Create and register the function that handles the requires directives. 10718 ASTContext &C = CGM.getContext(); 10719 10720 llvm::Function *RequiresRegFn; 10721 { 10722 CodeGenFunction CGF(CGM); 10723 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10724 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10725 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10726 RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); 10727 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10728 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10729 // TODO: check for other requires clauses. 10730 // The requires directive takes effect only when a target region is 10731 // present in the compilation unit. Otherwise it is ignored and not 10732 // passed to the runtime. This avoids the runtime from throwing an error 10733 // for mismatching requires clauses across compilation units that don't 10734 // contain at least 1 target region. 10735 assert((HasEmittedTargetRegion || 10736 HasEmittedDeclareTargetRegion || 10737 !OffloadEntriesInfoManager.empty()) && 10738 "Target or declare target region expected."); 10739 if (HasRequiresUnifiedSharedMemory) 10740 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10741 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires), 10742 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10743 CGF.FinishFunction(); 10744 } 10745 return RequiresRegFn; 10746 } 10747 10748 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10749 const OMPExecutableDirective &D, 10750 SourceLocation Loc, 10751 llvm::Function *OutlinedFn, 10752 ArrayRef<llvm::Value *> CapturedVars) { 10753 if (!CGF.HaveInsertPoint()) 10754 return; 10755 10756 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10757 CodeGenFunction::RunCleanupsScope Scope(CGF); 10758 10759 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10760 llvm::Value *Args[] = { 10761 RTLoc, 10762 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10763 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10764 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10765 RealArgs.append(std::begin(Args), std::end(Args)); 10766 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10767 10768 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 10769 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10770 } 10771 10772 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10773 const Expr *NumTeams, 10774 const Expr *ThreadLimit, 10775 SourceLocation Loc) { 10776 if (!CGF.HaveInsertPoint()) 10777 return; 10778 10779 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10780 10781 llvm::Value *NumTeamsVal = 10782 NumTeams 10783 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10784 CGF.CGM.Int32Ty, /* isSigned = */ true) 10785 : CGF.Builder.getInt32(0); 10786 10787 llvm::Value *ThreadLimitVal = 10788 ThreadLimit 10789 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10790 CGF.CGM.Int32Ty, /* isSigned = */ true) 10791 : CGF.Builder.getInt32(0); 10792 10793 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10794 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10795 ThreadLimitVal}; 10796 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 10797 PushNumTeamsArgs); 10798 } 10799 10800 void CGOpenMPRuntime::emitTargetDataCalls( 10801 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10802 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10803 if (!CGF.HaveInsertPoint()) 10804 return; 10805 10806 // Action used to replace the default codegen action and turn privatization 10807 // off. 10808 PrePostActionTy NoPrivAction; 10809 10810 // Generate the code for the opening of the data environment. Capture all the 10811 // arguments of the runtime call by reference because they are used in the 10812 // closing of the region. 10813 auto &&BeginThenGen = [this, &D, Device, &Info, 10814 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10815 // Fill up the arrays with all the mapped variables. 10816 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10817 MappableExprsHandler::MapValuesArrayTy Pointers; 10818 MappableExprsHandler::MapValuesArrayTy Sizes; 10819 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10820 10821 // Get map clause information. 10822 MappableExprsHandler MCHandler(D, CGF); 10823 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10824 10825 // Fill up the arrays and create the arguments. 10826 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10827 10828 llvm::Value *BasePointersArrayArg = nullptr; 10829 llvm::Value *PointersArrayArg = nullptr; 10830 llvm::Value *SizesArrayArg = nullptr; 10831 llvm::Value *MapTypesArrayArg = nullptr; 10832 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10833 SizesArrayArg, MapTypesArrayArg, Info); 10834 10835 // Emit device ID if any. 10836 llvm::Value *DeviceID = nullptr; 10837 if (Device) { 10838 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10839 CGF.Int64Ty, /*isSigned=*/true); 10840 } else { 10841 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10842 } 10843 10844 // Emit the number of elements in the offloading arrays. 10845 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10846 10847 llvm::Value *OffloadingArgs[] = { 10848 DeviceID, PointerNum, BasePointersArrayArg, 10849 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10850 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 10851 OffloadingArgs); 10852 10853 // If device pointer privatization is required, emit the body of the region 10854 // here. It will have to be duplicated: with and without privatization. 10855 if (!Info.CaptureDeviceAddrMap.empty()) 10856 CodeGen(CGF); 10857 }; 10858 10859 // Generate code for the closing of the data region. 10860 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10861 PrePostActionTy &) { 10862 assert(Info.isValid() && "Invalid data environment closing arguments."); 10863 10864 llvm::Value *BasePointersArrayArg = nullptr; 10865 llvm::Value *PointersArrayArg = nullptr; 10866 llvm::Value *SizesArrayArg = nullptr; 10867 llvm::Value *MapTypesArrayArg = nullptr; 10868 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10869 SizesArrayArg, MapTypesArrayArg, Info); 10870 10871 // Emit device ID if any. 10872 llvm::Value *DeviceID = nullptr; 10873 if (Device) { 10874 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10875 CGF.Int64Ty, /*isSigned=*/true); 10876 } else { 10877 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10878 } 10879 10880 // Emit the number of elements in the offloading arrays. 10881 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10882 10883 llvm::Value *OffloadingArgs[] = { 10884 DeviceID, PointerNum, BasePointersArrayArg, 10885 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10886 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 10887 OffloadingArgs); 10888 }; 10889 10890 // If we need device pointer privatization, we need to emit the body of the 10891 // region with no privatization in the 'else' branch of the conditional. 10892 // Otherwise, we don't have to do anything. 10893 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10894 PrePostActionTy &) { 10895 if (!Info.CaptureDeviceAddrMap.empty()) { 10896 CodeGen.setAction(NoPrivAction); 10897 CodeGen(CGF); 10898 } 10899 }; 10900 10901 // We don't have to do anything to close the region if the if clause evaluates 10902 // to false. 10903 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10904 10905 if (IfCond) { 10906 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10907 } else { 10908 RegionCodeGenTy RCG(BeginThenGen); 10909 RCG(CGF); 10910 } 10911 10912 // If we don't require privatization of device pointers, we emit the body in 10913 // between the runtime calls. This avoids duplicating the body code. 10914 if (Info.CaptureDeviceAddrMap.empty()) { 10915 CodeGen.setAction(NoPrivAction); 10916 CodeGen(CGF); 10917 } 10918 10919 if (IfCond) { 10920 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10921 } else { 10922 RegionCodeGenTy RCG(EndThenGen); 10923 RCG(CGF); 10924 } 10925 } 10926 10927 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10928 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10929 const Expr *Device) { 10930 if (!CGF.HaveInsertPoint()) 10931 return; 10932 10933 assert((isa<OMPTargetEnterDataDirective>(D) || 10934 isa<OMPTargetExitDataDirective>(D) || 10935 isa<OMPTargetUpdateDirective>(D)) && 10936 "Expecting either target enter, exit data, or update directives."); 10937 10938 CodeGenFunction::OMPTargetDataInfo InputInfo; 10939 llvm::Value *MapTypesArray = nullptr; 10940 // Generate the code for the opening of the data environment. 10941 auto &&ThenGen = [this, &D, Device, &InputInfo, 10942 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10943 // Emit device ID if any. 10944 llvm::Value *DeviceID = nullptr; 10945 if (Device) { 10946 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10947 CGF.Int64Ty, /*isSigned=*/true); 10948 } else { 10949 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10950 } 10951 10952 // Emit the number of elements in the offloading arrays. 10953 llvm::Constant *PointerNum = 10954 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10955 10956 llvm::Value *OffloadingArgs[] = {DeviceID, 10957 PointerNum, 10958 InputInfo.BasePointersArray.getPointer(), 10959 InputInfo.PointersArray.getPointer(), 10960 InputInfo.SizesArray.getPointer(), 10961 MapTypesArray}; 10962 10963 // Select the right runtime function call for each expected standalone 10964 // directive. 10965 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10966 OpenMPRTLFunction RTLFn; 10967 switch (D.getDirectiveKind()) { 10968 case OMPD_target_enter_data: 10969 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 10970 : OMPRTL__tgt_target_data_begin; 10971 break; 10972 case OMPD_target_exit_data: 10973 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 10974 : OMPRTL__tgt_target_data_end; 10975 break; 10976 case OMPD_target_update: 10977 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 10978 : OMPRTL__tgt_target_data_update; 10979 break; 10980 case OMPD_parallel: 10981 case OMPD_for: 10982 case OMPD_parallel_for: 10983 case OMPD_parallel_master: 10984 case OMPD_parallel_sections: 10985 case OMPD_for_simd: 10986 case OMPD_parallel_for_simd: 10987 case OMPD_cancel: 10988 case OMPD_cancellation_point: 10989 case OMPD_ordered: 10990 case OMPD_threadprivate: 10991 case OMPD_allocate: 10992 case OMPD_task: 10993 case OMPD_simd: 10994 case OMPD_sections: 10995 case OMPD_section: 10996 case OMPD_single: 10997 case OMPD_master: 10998 case OMPD_critical: 10999 case OMPD_taskyield: 11000 case OMPD_barrier: 11001 case OMPD_taskwait: 11002 case OMPD_taskgroup: 11003 case OMPD_atomic: 11004 case OMPD_flush: 11005 case OMPD_depobj: 11006 case OMPD_scan: 11007 case OMPD_teams: 11008 case OMPD_target_data: 11009 case OMPD_distribute: 11010 case OMPD_distribute_simd: 11011 case OMPD_distribute_parallel_for: 11012 case OMPD_distribute_parallel_for_simd: 11013 case OMPD_teams_distribute: 11014 case OMPD_teams_distribute_simd: 11015 case OMPD_teams_distribute_parallel_for: 11016 case OMPD_teams_distribute_parallel_for_simd: 11017 case OMPD_declare_simd: 11018 case OMPD_declare_variant: 11019 case OMPD_begin_declare_variant: 11020 case OMPD_end_declare_variant: 11021 case OMPD_declare_target: 11022 case OMPD_end_declare_target: 11023 case OMPD_declare_reduction: 11024 case OMPD_declare_mapper: 11025 case OMPD_taskloop: 11026 case OMPD_taskloop_simd: 11027 case OMPD_master_taskloop: 11028 case OMPD_master_taskloop_simd: 11029 case OMPD_parallel_master_taskloop: 11030 case OMPD_parallel_master_taskloop_simd: 11031 case OMPD_target: 11032 case OMPD_target_simd: 11033 case OMPD_target_teams_distribute: 11034 case OMPD_target_teams_distribute_simd: 11035 case OMPD_target_teams_distribute_parallel_for: 11036 case OMPD_target_teams_distribute_parallel_for_simd: 11037 case OMPD_target_teams: 11038 case OMPD_target_parallel: 11039 case OMPD_target_parallel_for: 11040 case OMPD_target_parallel_for_simd: 11041 case OMPD_requires: 11042 case OMPD_unknown: 11043 llvm_unreachable("Unexpected standalone target data directive."); 11044 break; 11045 } 11046 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 11047 }; 11048 11049 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 11050 CodeGenFunction &CGF, PrePostActionTy &) { 11051 // Fill up the arrays with all the mapped variables. 11052 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 11053 MappableExprsHandler::MapValuesArrayTy Pointers; 11054 MappableExprsHandler::MapValuesArrayTy Sizes; 11055 MappableExprsHandler::MapFlagsArrayTy MapTypes; 11056 11057 // Get map clause information. 11058 MappableExprsHandler MEHandler(D, CGF); 11059 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 11060 11061 TargetDataInfo Info; 11062 // Fill up the arrays and create the arguments. 11063 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 11064 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 11065 Info.PointersArray, Info.SizesArray, 11066 Info.MapTypesArray, Info); 11067 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11068 InputInfo.BasePointersArray = 11069 Address(Info.BasePointersArray, CGM.getPointerAlign()); 11070 InputInfo.PointersArray = 11071 Address(Info.PointersArray, CGM.getPointerAlign()); 11072 InputInfo.SizesArray = 11073 Address(Info.SizesArray, CGM.getPointerAlign()); 11074 MapTypesArray = Info.MapTypesArray; 11075 if (D.hasClausesOfKind<OMPDependClause>()) 11076 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11077 else 11078 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11079 }; 11080 11081 if (IfCond) { 11082 emitIfClause(CGF, IfCond, TargetThenGen, 11083 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11084 } else { 11085 RegionCodeGenTy ThenRCG(TargetThenGen); 11086 ThenRCG(CGF); 11087 } 11088 } 11089 11090 namespace { 11091 /// Kind of parameter in a function with 'declare simd' directive. 11092 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11093 /// Attribute set of the parameter. 11094 struct ParamAttrTy { 11095 ParamKindTy Kind = Vector; 11096 llvm::APSInt StrideOrArg; 11097 llvm::APSInt Alignment; 11098 }; 11099 } // namespace 11100 11101 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11102 ArrayRef<ParamAttrTy> ParamAttrs) { 11103 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11104 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11105 // of that clause. The VLEN value must be power of 2. 11106 // In other case the notion of the function`s "characteristic data type" (CDT) 11107 // is used to compute the vector length. 11108 // CDT is defined in the following order: 11109 // a) For non-void function, the CDT is the return type. 11110 // b) If the function has any non-uniform, non-linear parameters, then the 11111 // CDT is the type of the first such parameter. 11112 // c) If the CDT determined by a) or b) above is struct, union, or class 11113 // type which is pass-by-value (except for the type that maps to the 11114 // built-in complex data type), the characteristic data type is int. 11115 // d) If none of the above three cases is applicable, the CDT is int. 11116 // The VLEN is then determined based on the CDT and the size of vector 11117 // register of that ISA for which current vector version is generated. The 11118 // VLEN is computed using the formula below: 11119 // VLEN = sizeof(vector_register) / sizeof(CDT), 11120 // where vector register size specified in section 3.2.1 Registers and the 11121 // Stack Frame of original AMD64 ABI document. 11122 QualType RetType = FD->getReturnType(); 11123 if (RetType.isNull()) 11124 return 0; 11125 ASTContext &C = FD->getASTContext(); 11126 QualType CDT; 11127 if (!RetType.isNull() && !RetType->isVoidType()) { 11128 CDT = RetType; 11129 } else { 11130 unsigned Offset = 0; 11131 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11132 if (ParamAttrs[Offset].Kind == Vector) 11133 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11134 ++Offset; 11135 } 11136 if (CDT.isNull()) { 11137 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11138 if (ParamAttrs[I + Offset].Kind == Vector) { 11139 CDT = FD->getParamDecl(I)->getType(); 11140 break; 11141 } 11142 } 11143 } 11144 } 11145 if (CDT.isNull()) 11146 CDT = C.IntTy; 11147 CDT = CDT->getCanonicalTypeUnqualified(); 11148 if (CDT->isRecordType() || CDT->isUnionType()) 11149 CDT = C.IntTy; 11150 return C.getTypeSize(CDT); 11151 } 11152 11153 static void 11154 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11155 const llvm::APSInt &VLENVal, 11156 ArrayRef<ParamAttrTy> ParamAttrs, 11157 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11158 struct ISADataTy { 11159 char ISA; 11160 unsigned VecRegSize; 11161 }; 11162 ISADataTy ISAData[] = { 11163 { 11164 'b', 128 11165 }, // SSE 11166 { 11167 'c', 256 11168 }, // AVX 11169 { 11170 'd', 256 11171 }, // AVX2 11172 { 11173 'e', 512 11174 }, // AVX512 11175 }; 11176 llvm::SmallVector<char, 2> Masked; 11177 switch (State) { 11178 case OMPDeclareSimdDeclAttr::BS_Undefined: 11179 Masked.push_back('N'); 11180 Masked.push_back('M'); 11181 break; 11182 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11183 Masked.push_back('N'); 11184 break; 11185 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11186 Masked.push_back('M'); 11187 break; 11188 } 11189 for (char Mask : Masked) { 11190 for (const ISADataTy &Data : ISAData) { 11191 SmallString<256> Buffer; 11192 llvm::raw_svector_ostream Out(Buffer); 11193 Out << "_ZGV" << Data.ISA << Mask; 11194 if (!VLENVal) { 11195 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11196 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11197 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11198 } else { 11199 Out << VLENVal; 11200 } 11201 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11202 switch (ParamAttr.Kind){ 11203 case LinearWithVarStride: 11204 Out << 's' << ParamAttr.StrideOrArg; 11205 break; 11206 case Linear: 11207 Out << 'l'; 11208 if (ParamAttr.StrideOrArg != 1) 11209 Out << ParamAttr.StrideOrArg; 11210 break; 11211 case Uniform: 11212 Out << 'u'; 11213 break; 11214 case Vector: 11215 Out << 'v'; 11216 break; 11217 } 11218 if (!!ParamAttr.Alignment) 11219 Out << 'a' << ParamAttr.Alignment; 11220 } 11221 Out << '_' << Fn->getName(); 11222 Fn->addFnAttr(Out.str()); 11223 } 11224 } 11225 } 11226 11227 // This are the Functions that are needed to mangle the name of the 11228 // vector functions generated by the compiler, according to the rules 11229 // defined in the "Vector Function ABI specifications for AArch64", 11230 // available at 11231 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11232 11233 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11234 /// 11235 /// TODO: Need to implement the behavior for reference marked with a 11236 /// var or no linear modifiers (1.b in the section). For this, we 11237 /// need to extend ParamKindTy to support the linear modifiers. 11238 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11239 QT = QT.getCanonicalType(); 11240 11241 if (QT->isVoidType()) 11242 return false; 11243 11244 if (Kind == ParamKindTy::Uniform) 11245 return false; 11246 11247 if (Kind == ParamKindTy::Linear) 11248 return false; 11249 11250 // TODO: Handle linear references with modifiers 11251 11252 if (Kind == ParamKindTy::LinearWithVarStride) 11253 return false; 11254 11255 return true; 11256 } 11257 11258 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11259 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11260 QT = QT.getCanonicalType(); 11261 unsigned Size = C.getTypeSize(QT); 11262 11263 // Only scalars and complex within 16 bytes wide set PVB to true. 11264 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11265 return false; 11266 11267 if (QT->isFloatingType()) 11268 return true; 11269 11270 if (QT->isIntegerType()) 11271 return true; 11272 11273 if (QT->isPointerType()) 11274 return true; 11275 11276 // TODO: Add support for complex types (section 3.1.2, item 2). 11277 11278 return false; 11279 } 11280 11281 /// Computes the lane size (LS) of a return type or of an input parameter, 11282 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11283 /// TODO: Add support for references, section 3.2.1, item 1. 11284 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11285 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11286 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11287 if (getAArch64PBV(PTy, C)) 11288 return C.getTypeSize(PTy); 11289 } 11290 if (getAArch64PBV(QT, C)) 11291 return C.getTypeSize(QT); 11292 11293 return C.getTypeSize(C.getUIntPtrType()); 11294 } 11295 11296 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11297 // signature of the scalar function, as defined in 3.2.2 of the 11298 // AAVFABI. 11299 static std::tuple<unsigned, unsigned, bool> 11300 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11301 QualType RetType = FD->getReturnType().getCanonicalType(); 11302 11303 ASTContext &C = FD->getASTContext(); 11304 11305 bool OutputBecomesInput = false; 11306 11307 llvm::SmallVector<unsigned, 8> Sizes; 11308 if (!RetType->isVoidType()) { 11309 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11310 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11311 OutputBecomesInput = true; 11312 } 11313 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11314 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11315 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11316 } 11317 11318 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11319 // The LS of a function parameter / return value can only be a power 11320 // of 2, starting from 8 bits, up to 128. 11321 assert(std::all_of(Sizes.begin(), Sizes.end(), 11322 [](unsigned Size) { 11323 return Size == 8 || Size == 16 || Size == 32 || 11324 Size == 64 || Size == 128; 11325 }) && 11326 "Invalid size"); 11327 11328 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11329 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11330 OutputBecomesInput); 11331 } 11332 11333 /// Mangle the parameter part of the vector function name according to 11334 /// their OpenMP classification. The mangling function is defined in 11335 /// section 3.5 of the AAVFABI. 11336 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11337 SmallString<256> Buffer; 11338 llvm::raw_svector_ostream Out(Buffer); 11339 for (const auto &ParamAttr : ParamAttrs) { 11340 switch (ParamAttr.Kind) { 11341 case LinearWithVarStride: 11342 Out << "ls" << ParamAttr.StrideOrArg; 11343 break; 11344 case Linear: 11345 Out << 'l'; 11346 // Don't print the step value if it is not present or if it is 11347 // equal to 1. 11348 if (ParamAttr.StrideOrArg != 1) 11349 Out << ParamAttr.StrideOrArg; 11350 break; 11351 case Uniform: 11352 Out << 'u'; 11353 break; 11354 case Vector: 11355 Out << 'v'; 11356 break; 11357 } 11358 11359 if (!!ParamAttr.Alignment) 11360 Out << 'a' << ParamAttr.Alignment; 11361 } 11362 11363 return std::string(Out.str()); 11364 } 11365 11366 // Function used to add the attribute. The parameter `VLEN` is 11367 // templated to allow the use of "x" when targeting scalable functions 11368 // for SVE. 11369 template <typename T> 11370 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11371 char ISA, StringRef ParSeq, 11372 StringRef MangledName, bool OutputBecomesInput, 11373 llvm::Function *Fn) { 11374 SmallString<256> Buffer; 11375 llvm::raw_svector_ostream Out(Buffer); 11376 Out << Prefix << ISA << LMask << VLEN; 11377 if (OutputBecomesInput) 11378 Out << "v"; 11379 Out << ParSeq << "_" << MangledName; 11380 Fn->addFnAttr(Out.str()); 11381 } 11382 11383 // Helper function to generate the Advanced SIMD names depending on 11384 // the value of the NDS when simdlen is not present. 11385 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11386 StringRef Prefix, char ISA, 11387 StringRef ParSeq, StringRef MangledName, 11388 bool OutputBecomesInput, 11389 llvm::Function *Fn) { 11390 switch (NDS) { 11391 case 8: 11392 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11393 OutputBecomesInput, Fn); 11394 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11395 OutputBecomesInput, Fn); 11396 break; 11397 case 16: 11398 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11399 OutputBecomesInput, Fn); 11400 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11401 OutputBecomesInput, Fn); 11402 break; 11403 case 32: 11404 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11405 OutputBecomesInput, Fn); 11406 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11407 OutputBecomesInput, Fn); 11408 break; 11409 case 64: 11410 case 128: 11411 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11412 OutputBecomesInput, Fn); 11413 break; 11414 default: 11415 llvm_unreachable("Scalar type is too wide."); 11416 } 11417 } 11418 11419 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11420 static void emitAArch64DeclareSimdFunction( 11421 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11422 ArrayRef<ParamAttrTy> ParamAttrs, 11423 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11424 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11425 11426 // Get basic data for building the vector signature. 11427 const auto Data = getNDSWDS(FD, ParamAttrs); 11428 const unsigned NDS = std::get<0>(Data); 11429 const unsigned WDS = std::get<1>(Data); 11430 const bool OutputBecomesInput = std::get<2>(Data); 11431 11432 // Check the values provided via `simdlen` by the user. 11433 // 1. A `simdlen(1)` doesn't produce vector signatures, 11434 if (UserVLEN == 1) { 11435 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11436 DiagnosticsEngine::Warning, 11437 "The clause simdlen(1) has no effect when targeting aarch64."); 11438 CGM.getDiags().Report(SLoc, DiagID); 11439 return; 11440 } 11441 11442 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11443 // Advanced SIMD output. 11444 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11445 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11446 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11447 "power of 2 when targeting Advanced SIMD."); 11448 CGM.getDiags().Report(SLoc, DiagID); 11449 return; 11450 } 11451 11452 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11453 // limits. 11454 if (ISA == 's' && UserVLEN != 0) { 11455 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11456 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11457 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11458 "lanes in the architectural constraints " 11459 "for SVE (min is 128-bit, max is " 11460 "2048-bit, by steps of 128-bit)"); 11461 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11462 return; 11463 } 11464 } 11465 11466 // Sort out parameter sequence. 11467 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11468 StringRef Prefix = "_ZGV"; 11469 // Generate simdlen from user input (if any). 11470 if (UserVLEN) { 11471 if (ISA == 's') { 11472 // SVE generates only a masked function. 11473 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11474 OutputBecomesInput, Fn); 11475 } else { 11476 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11477 // Advanced SIMD generates one or two functions, depending on 11478 // the `[not]inbranch` clause. 11479 switch (State) { 11480 case OMPDeclareSimdDeclAttr::BS_Undefined: 11481 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11482 OutputBecomesInput, Fn); 11483 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11484 OutputBecomesInput, Fn); 11485 break; 11486 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11487 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11488 OutputBecomesInput, Fn); 11489 break; 11490 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11491 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11492 OutputBecomesInput, Fn); 11493 break; 11494 } 11495 } 11496 } else { 11497 // If no user simdlen is provided, follow the AAVFABI rules for 11498 // generating the vector length. 11499 if (ISA == 's') { 11500 // SVE, section 3.4.1, item 1. 11501 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11502 OutputBecomesInput, Fn); 11503 } else { 11504 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11505 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11506 // two vector names depending on the use of the clause 11507 // `[not]inbranch`. 11508 switch (State) { 11509 case OMPDeclareSimdDeclAttr::BS_Undefined: 11510 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11511 OutputBecomesInput, Fn); 11512 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11513 OutputBecomesInput, Fn); 11514 break; 11515 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11516 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11517 OutputBecomesInput, Fn); 11518 break; 11519 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11520 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11521 OutputBecomesInput, Fn); 11522 break; 11523 } 11524 } 11525 } 11526 } 11527 11528 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11529 llvm::Function *Fn) { 11530 ASTContext &C = CGM.getContext(); 11531 FD = FD->getMostRecentDecl(); 11532 // Map params to their positions in function decl. 11533 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11534 if (isa<CXXMethodDecl>(FD)) 11535 ParamPositions.try_emplace(FD, 0); 11536 unsigned ParamPos = ParamPositions.size(); 11537 for (const ParmVarDecl *P : FD->parameters()) { 11538 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11539 ++ParamPos; 11540 } 11541 while (FD) { 11542 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11543 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11544 // Mark uniform parameters. 11545 for (const Expr *E : Attr->uniforms()) { 11546 E = E->IgnoreParenImpCasts(); 11547 unsigned Pos; 11548 if (isa<CXXThisExpr>(E)) { 11549 Pos = ParamPositions[FD]; 11550 } else { 11551 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11552 ->getCanonicalDecl(); 11553 Pos = ParamPositions[PVD]; 11554 } 11555 ParamAttrs[Pos].Kind = Uniform; 11556 } 11557 // Get alignment info. 11558 auto NI = Attr->alignments_begin(); 11559 for (const Expr *E : Attr->aligneds()) { 11560 E = E->IgnoreParenImpCasts(); 11561 unsigned Pos; 11562 QualType ParmTy; 11563 if (isa<CXXThisExpr>(E)) { 11564 Pos = ParamPositions[FD]; 11565 ParmTy = E->getType(); 11566 } else { 11567 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11568 ->getCanonicalDecl(); 11569 Pos = ParamPositions[PVD]; 11570 ParmTy = PVD->getType(); 11571 } 11572 ParamAttrs[Pos].Alignment = 11573 (*NI) 11574 ? (*NI)->EvaluateKnownConstInt(C) 11575 : llvm::APSInt::getUnsigned( 11576 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11577 .getQuantity()); 11578 ++NI; 11579 } 11580 // Mark linear parameters. 11581 auto SI = Attr->steps_begin(); 11582 auto MI = Attr->modifiers_begin(); 11583 for (const Expr *E : Attr->linears()) { 11584 E = E->IgnoreParenImpCasts(); 11585 unsigned Pos; 11586 // Rescaling factor needed to compute the linear parameter 11587 // value in the mangled name. 11588 unsigned PtrRescalingFactor = 1; 11589 if (isa<CXXThisExpr>(E)) { 11590 Pos = ParamPositions[FD]; 11591 } else { 11592 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11593 ->getCanonicalDecl(); 11594 Pos = ParamPositions[PVD]; 11595 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11596 PtrRescalingFactor = CGM.getContext() 11597 .getTypeSizeInChars(P->getPointeeType()) 11598 .getQuantity(); 11599 } 11600 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11601 ParamAttr.Kind = Linear; 11602 // Assuming a stride of 1, for `linear` without modifiers. 11603 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11604 if (*SI) { 11605 Expr::EvalResult Result; 11606 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11607 if (const auto *DRE = 11608 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11609 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 11610 ParamAttr.Kind = LinearWithVarStride; 11611 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 11612 ParamPositions[StridePVD->getCanonicalDecl()]); 11613 } 11614 } 11615 } else { 11616 ParamAttr.StrideOrArg = Result.Val.getInt(); 11617 } 11618 } 11619 // If we are using a linear clause on a pointer, we need to 11620 // rescale the value of linear_step with the byte size of the 11621 // pointee type. 11622 if (Linear == ParamAttr.Kind) 11623 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11624 ++SI; 11625 ++MI; 11626 } 11627 llvm::APSInt VLENVal; 11628 SourceLocation ExprLoc; 11629 const Expr *VLENExpr = Attr->getSimdlen(); 11630 if (VLENExpr) { 11631 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11632 ExprLoc = VLENExpr->getExprLoc(); 11633 } 11634 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11635 if (CGM.getTriple().isX86()) { 11636 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11637 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11638 unsigned VLEN = VLENVal.getExtValue(); 11639 StringRef MangledName = Fn->getName(); 11640 if (CGM.getTarget().hasFeature("sve")) 11641 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11642 MangledName, 's', 128, Fn, ExprLoc); 11643 if (CGM.getTarget().hasFeature("neon")) 11644 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11645 MangledName, 'n', 128, Fn, ExprLoc); 11646 } 11647 } 11648 FD = FD->getPreviousDecl(); 11649 } 11650 } 11651 11652 namespace { 11653 /// Cleanup action for doacross support. 11654 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11655 public: 11656 static const int DoacrossFinArgs = 2; 11657 11658 private: 11659 llvm::FunctionCallee RTLFn; 11660 llvm::Value *Args[DoacrossFinArgs]; 11661 11662 public: 11663 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11664 ArrayRef<llvm::Value *> CallArgs) 11665 : RTLFn(RTLFn) { 11666 assert(CallArgs.size() == DoacrossFinArgs); 11667 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11668 } 11669 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11670 if (!CGF.HaveInsertPoint()) 11671 return; 11672 CGF.EmitRuntimeCall(RTLFn, Args); 11673 } 11674 }; 11675 } // namespace 11676 11677 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11678 const OMPLoopDirective &D, 11679 ArrayRef<Expr *> NumIterations) { 11680 if (!CGF.HaveInsertPoint()) 11681 return; 11682 11683 ASTContext &C = CGM.getContext(); 11684 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11685 RecordDecl *RD; 11686 if (KmpDimTy.isNull()) { 11687 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11688 // kmp_int64 lo; // lower 11689 // kmp_int64 up; // upper 11690 // kmp_int64 st; // stride 11691 // }; 11692 RD = C.buildImplicitRecord("kmp_dim"); 11693 RD->startDefinition(); 11694 addFieldToRecordDecl(C, RD, Int64Ty); 11695 addFieldToRecordDecl(C, RD, Int64Ty); 11696 addFieldToRecordDecl(C, RD, Int64Ty); 11697 RD->completeDefinition(); 11698 KmpDimTy = C.getRecordType(RD); 11699 } else { 11700 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11701 } 11702 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11703 QualType ArrayTy = 11704 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11705 11706 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11707 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11708 enum { LowerFD = 0, UpperFD, StrideFD }; 11709 // Fill dims with data. 11710 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11711 LValue DimsLVal = CGF.MakeAddrLValue( 11712 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11713 // dims.upper = num_iterations; 11714 LValue UpperLVal = CGF.EmitLValueForField( 11715 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11716 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11717 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11718 Int64Ty, NumIterations[I]->getExprLoc()); 11719 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11720 // dims.stride = 1; 11721 LValue StrideLVal = CGF.EmitLValueForField( 11722 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11723 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11724 StrideLVal); 11725 } 11726 11727 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11728 // kmp_int32 num_dims, struct kmp_dim * dims); 11729 llvm::Value *Args[] = { 11730 emitUpdateLocation(CGF, D.getBeginLoc()), 11731 getThreadID(CGF, D.getBeginLoc()), 11732 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11733 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11734 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11735 CGM.VoidPtrTy)}; 11736 11737 llvm::FunctionCallee RTLFn = 11738 createRuntimeFunction(OMPRTL__kmpc_doacross_init); 11739 CGF.EmitRuntimeCall(RTLFn, Args); 11740 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11741 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11742 llvm::FunctionCallee FiniRTLFn = 11743 createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 11744 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11745 llvm::makeArrayRef(FiniArgs)); 11746 } 11747 11748 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11749 const OMPDependClause *C) { 11750 QualType Int64Ty = 11751 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11752 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11753 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11754 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11755 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11756 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11757 const Expr *CounterVal = C->getLoopData(I); 11758 assert(CounterVal); 11759 llvm::Value *CntVal = CGF.EmitScalarConversion( 11760 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11761 CounterVal->getExprLoc()); 11762 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11763 /*Volatile=*/false, Int64Ty); 11764 } 11765 llvm::Value *Args[] = { 11766 emitUpdateLocation(CGF, C->getBeginLoc()), 11767 getThreadID(CGF, C->getBeginLoc()), 11768 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11769 llvm::FunctionCallee RTLFn; 11770 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11771 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 11772 } else { 11773 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11774 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 11775 } 11776 CGF.EmitRuntimeCall(RTLFn, Args); 11777 } 11778 11779 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11780 llvm::FunctionCallee Callee, 11781 ArrayRef<llvm::Value *> Args) const { 11782 assert(Loc.isValid() && "Outlined function call location must be valid."); 11783 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11784 11785 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11786 if (Fn->doesNotThrow()) { 11787 CGF.EmitNounwindRuntimeCall(Fn, Args); 11788 return; 11789 } 11790 } 11791 CGF.EmitRuntimeCall(Callee, Args); 11792 } 11793 11794 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11795 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11796 ArrayRef<llvm::Value *> Args) const { 11797 emitCall(CGF, Loc, OutlinedFn, Args); 11798 } 11799 11800 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11801 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11802 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11803 HasEmittedDeclareTargetRegion = true; 11804 } 11805 11806 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11807 const VarDecl *NativeParam, 11808 const VarDecl *TargetParam) const { 11809 return CGF.GetAddrOfLocalVar(NativeParam); 11810 } 11811 11812 namespace { 11813 /// Cleanup action for allocate support. 11814 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11815 public: 11816 static const int CleanupArgs = 3; 11817 11818 private: 11819 llvm::FunctionCallee RTLFn; 11820 llvm::Value *Args[CleanupArgs]; 11821 11822 public: 11823 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 11824 ArrayRef<llvm::Value *> CallArgs) 11825 : RTLFn(RTLFn) { 11826 assert(CallArgs.size() == CleanupArgs && 11827 "Size of arguments does not match."); 11828 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11829 } 11830 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11831 if (!CGF.HaveInsertPoint()) 11832 return; 11833 CGF.EmitRuntimeCall(RTLFn, Args); 11834 } 11835 }; 11836 } // namespace 11837 11838 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11839 const VarDecl *VD) { 11840 if (!VD) 11841 return Address::invalid(); 11842 const VarDecl *CVD = VD->getCanonicalDecl(); 11843 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 11844 return Address::invalid(); 11845 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11846 // Use the default allocation. 11847 if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 11848 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 11849 !AA->getAllocator()) 11850 return Address::invalid(); 11851 llvm::Value *Size; 11852 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11853 if (CVD->getType()->isVariablyModifiedType()) { 11854 Size = CGF.getTypeSize(CVD->getType()); 11855 // Align the size: ((size + align - 1) / align) * align 11856 Size = CGF.Builder.CreateNUWAdd( 11857 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11858 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11859 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11860 } else { 11861 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11862 Size = CGM.getSize(Sz.alignTo(Align)); 11863 } 11864 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11865 assert(AA->getAllocator() && 11866 "Expected allocator expression for non-default allocator."); 11867 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11868 // According to the standard, the original allocator type is a enum (integer). 11869 // Convert to pointer type, if required. 11870 if (Allocator->getType()->isIntegerTy()) 11871 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 11872 else if (Allocator->getType()->isPointerTy()) 11873 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 11874 CGM.VoidPtrTy); 11875 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11876 11877 llvm::Value *Addr = 11878 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, 11879 getName({CVD->getName(), ".void.addr"})); 11880 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 11881 Allocator}; 11882 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); 11883 11884 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11885 llvm::makeArrayRef(FiniArgs)); 11886 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11887 Addr, 11888 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 11889 getName({CVD->getName(), ".addr"})); 11890 return Address(Addr, Align); 11891 } 11892 11893 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11894 CodeGenModule &CGM, const OMPLoopDirective &S) 11895 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11896 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11897 if (!NeedToPush) 11898 return; 11899 NontemporalDeclsSet &DS = 11900 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11901 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11902 for (const Stmt *Ref : C->private_refs()) { 11903 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11904 const ValueDecl *VD; 11905 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11906 VD = DRE->getDecl(); 11907 } else { 11908 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11909 assert((ME->isImplicitCXXThis() || 11910 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11911 "Expected member of current class."); 11912 VD = ME->getMemberDecl(); 11913 } 11914 DS.insert(VD); 11915 } 11916 } 11917 } 11918 11919 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11920 if (!NeedToPush) 11921 return; 11922 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11923 } 11924 11925 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11926 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11927 11928 return llvm::any_of( 11929 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11930 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 11931 } 11932 11933 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11934 const OMPExecutableDirective &S, 11935 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11936 const { 11937 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11938 // Vars in target/task regions must be excluded completely. 11939 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11940 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11941 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11942 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11943 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11944 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11945 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11946 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11947 } 11948 } 11949 // Exclude vars in private clauses. 11950 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11951 for (const Expr *Ref : C->varlists()) { 11952 if (!Ref->getType()->isScalarType()) 11953 continue; 11954 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11955 if (!DRE) 11956 continue; 11957 NeedToCheckForLPCs.insert(DRE->getDecl()); 11958 } 11959 } 11960 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11961 for (const Expr *Ref : C->varlists()) { 11962 if (!Ref->getType()->isScalarType()) 11963 continue; 11964 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11965 if (!DRE) 11966 continue; 11967 NeedToCheckForLPCs.insert(DRE->getDecl()); 11968 } 11969 } 11970 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11971 for (const Expr *Ref : C->varlists()) { 11972 if (!Ref->getType()->isScalarType()) 11973 continue; 11974 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11975 if (!DRE) 11976 continue; 11977 NeedToCheckForLPCs.insert(DRE->getDecl()); 11978 } 11979 } 11980 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11981 for (const Expr *Ref : C->varlists()) { 11982 if (!Ref->getType()->isScalarType()) 11983 continue; 11984 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11985 if (!DRE) 11986 continue; 11987 NeedToCheckForLPCs.insert(DRE->getDecl()); 11988 } 11989 } 11990 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 11991 for (const Expr *Ref : C->varlists()) { 11992 if (!Ref->getType()->isScalarType()) 11993 continue; 11994 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11995 if (!DRE) 11996 continue; 11997 NeedToCheckForLPCs.insert(DRE->getDecl()); 11998 } 11999 } 12000 for (const Decl *VD : NeedToCheckForLPCs) { 12001 for (const LastprivateConditionalData &Data : 12002 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 12003 if (Data.DeclToUniqueName.count(VD) > 0) { 12004 if (!Data.Disabled) 12005 NeedToAddForLPCsAsDisabled.insert(VD); 12006 break; 12007 } 12008 } 12009 } 12010 } 12011 12012 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12013 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12014 : CGM(CGF.CGM), 12015 Action((CGM.getLangOpts().OpenMP >= 50 && 12016 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12017 [](const OMPLastprivateClause *C) { 12018 return C->getKind() == 12019 OMPC_LASTPRIVATE_conditional; 12020 })) 12021 ? ActionToDo::PushAsLastprivateConditional 12022 : ActionToDo::DoNotPush) { 12023 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12024 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12025 return; 12026 assert(Action == ActionToDo::PushAsLastprivateConditional && 12027 "Expected a push action."); 12028 LastprivateConditionalData &Data = 12029 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12030 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12031 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12032 continue; 12033 12034 for (const Expr *Ref : C->varlists()) { 12035 Data.DeclToUniqueName.insert(std::make_pair( 12036 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12037 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12038 } 12039 } 12040 Data.IVLVal = IVLVal; 12041 Data.Fn = CGF.CurFn; 12042 } 12043 12044 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12045 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12046 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12047 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12048 if (CGM.getLangOpts().OpenMP < 50) 12049 return; 12050 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12051 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12052 if (!NeedToAddForLPCsAsDisabled.empty()) { 12053 Action = ActionToDo::DisableLastprivateConditional; 12054 LastprivateConditionalData &Data = 12055 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12056 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12057 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12058 Data.Fn = CGF.CurFn; 12059 Data.Disabled = true; 12060 } 12061 } 12062 12063 CGOpenMPRuntime::LastprivateConditionalRAII 12064 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12065 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12066 return LastprivateConditionalRAII(CGF, S); 12067 } 12068 12069 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12070 if (CGM.getLangOpts().OpenMP < 50) 12071 return; 12072 if (Action == ActionToDo::DisableLastprivateConditional) { 12073 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12074 "Expected list of disabled private vars."); 12075 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12076 } 12077 if (Action == ActionToDo::PushAsLastprivateConditional) { 12078 assert( 12079 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12080 "Expected list of lastprivate conditional vars."); 12081 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12082 } 12083 } 12084 12085 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12086 const VarDecl *VD) { 12087 ASTContext &C = CGM.getContext(); 12088 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12089 if (I == LastprivateConditionalToTypes.end()) 12090 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12091 QualType NewType; 12092 const FieldDecl *VDField; 12093 const FieldDecl *FiredField; 12094 LValue BaseLVal; 12095 auto VI = I->getSecond().find(VD); 12096 if (VI == I->getSecond().end()) { 12097 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12098 RD->startDefinition(); 12099 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12100 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12101 RD->completeDefinition(); 12102 NewType = C.getRecordType(RD); 12103 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12104 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12105 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12106 } else { 12107 NewType = std::get<0>(VI->getSecond()); 12108 VDField = std::get<1>(VI->getSecond()); 12109 FiredField = std::get<2>(VI->getSecond()); 12110 BaseLVal = std::get<3>(VI->getSecond()); 12111 } 12112 LValue FiredLVal = 12113 CGF.EmitLValueForField(BaseLVal, FiredField); 12114 CGF.EmitStoreOfScalar( 12115 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12116 FiredLVal); 12117 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12118 } 12119 12120 namespace { 12121 /// Checks if the lastprivate conditional variable is referenced in LHS. 12122 class LastprivateConditionalRefChecker final 12123 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12124 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12125 const Expr *FoundE = nullptr; 12126 const Decl *FoundD = nullptr; 12127 StringRef UniqueDeclName; 12128 LValue IVLVal; 12129 llvm::Function *FoundFn = nullptr; 12130 SourceLocation Loc; 12131 12132 public: 12133 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12134 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12135 llvm::reverse(LPM)) { 12136 auto It = D.DeclToUniqueName.find(E->getDecl()); 12137 if (It == D.DeclToUniqueName.end()) 12138 continue; 12139 if (D.Disabled) 12140 return false; 12141 FoundE = E; 12142 FoundD = E->getDecl()->getCanonicalDecl(); 12143 UniqueDeclName = It->second; 12144 IVLVal = D.IVLVal; 12145 FoundFn = D.Fn; 12146 break; 12147 } 12148 return FoundE == E; 12149 } 12150 bool VisitMemberExpr(const MemberExpr *E) { 12151 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12152 return false; 12153 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12154 llvm::reverse(LPM)) { 12155 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12156 if (It == D.DeclToUniqueName.end()) 12157 continue; 12158 if (D.Disabled) 12159 return false; 12160 FoundE = E; 12161 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12162 UniqueDeclName = It->second; 12163 IVLVal = D.IVLVal; 12164 FoundFn = D.Fn; 12165 break; 12166 } 12167 return FoundE == E; 12168 } 12169 bool VisitStmt(const Stmt *S) { 12170 for (const Stmt *Child : S->children()) { 12171 if (!Child) 12172 continue; 12173 if (const auto *E = dyn_cast<Expr>(Child)) 12174 if (!E->isGLValue()) 12175 continue; 12176 if (Visit(Child)) 12177 return true; 12178 } 12179 return false; 12180 } 12181 explicit LastprivateConditionalRefChecker( 12182 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12183 : LPM(LPM) {} 12184 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12185 getFoundData() const { 12186 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12187 } 12188 }; 12189 } // namespace 12190 12191 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12192 LValue IVLVal, 12193 StringRef UniqueDeclName, 12194 LValue LVal, 12195 SourceLocation Loc) { 12196 // Last updated loop counter for the lastprivate conditional var. 12197 // int<xx> last_iv = 0; 12198 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12199 llvm::Constant *LastIV = 12200 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12201 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12202 IVLVal.getAlignment().getAsAlign()); 12203 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12204 12205 // Last value of the lastprivate conditional. 12206 // decltype(priv_a) last_a; 12207 llvm::Constant *Last = getOrCreateInternalVariable( 12208 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12209 cast<llvm::GlobalVariable>(Last)->setAlignment( 12210 LVal.getAlignment().getAsAlign()); 12211 LValue LastLVal = 12212 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 12213 12214 // Global loop counter. Required to handle inner parallel-for regions. 12215 // iv 12216 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12217 12218 // #pragma omp critical(a) 12219 // if (last_iv <= iv) { 12220 // last_iv = iv; 12221 // last_a = priv_a; 12222 // } 12223 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12224 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12225 Action.Enter(CGF); 12226 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12227 // (last_iv <= iv) ? Check if the variable is updated and store new 12228 // value in global var. 12229 llvm::Value *CmpRes; 12230 if (IVLVal.getType()->isSignedIntegerType()) { 12231 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12232 } else { 12233 assert(IVLVal.getType()->isUnsignedIntegerType() && 12234 "Loop iteration variable must be integer."); 12235 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12236 } 12237 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12238 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12239 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12240 // { 12241 CGF.EmitBlock(ThenBB); 12242 12243 // last_iv = iv; 12244 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12245 12246 // last_a = priv_a; 12247 switch (CGF.getEvaluationKind(LVal.getType())) { 12248 case TEK_Scalar: { 12249 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12250 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12251 break; 12252 } 12253 case TEK_Complex: { 12254 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12255 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12256 break; 12257 } 12258 case TEK_Aggregate: 12259 llvm_unreachable( 12260 "Aggregates are not supported in lastprivate conditional."); 12261 } 12262 // } 12263 CGF.EmitBranch(ExitBB); 12264 // There is no need to emit line number for unconditional branch. 12265 (void)ApplyDebugLocation::CreateEmpty(CGF); 12266 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12267 }; 12268 12269 if (CGM.getLangOpts().OpenMPSimd) { 12270 // Do not emit as a critical region as no parallel region could be emitted. 12271 RegionCodeGenTy ThenRCG(CodeGen); 12272 ThenRCG(CGF); 12273 } else { 12274 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12275 } 12276 } 12277 12278 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12279 const Expr *LHS) { 12280 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12281 return; 12282 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12283 if (!Checker.Visit(LHS)) 12284 return; 12285 const Expr *FoundE; 12286 const Decl *FoundD; 12287 StringRef UniqueDeclName; 12288 LValue IVLVal; 12289 llvm::Function *FoundFn; 12290 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12291 Checker.getFoundData(); 12292 if (FoundFn != CGF.CurFn) { 12293 // Special codegen for inner parallel regions. 12294 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12295 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12296 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12297 "Lastprivate conditional is not found in outer region."); 12298 QualType StructTy = std::get<0>(It->getSecond()); 12299 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12300 LValue PrivLVal = CGF.EmitLValue(FoundE); 12301 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12302 PrivLVal.getAddress(CGF), 12303 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12304 LValue BaseLVal = 12305 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12306 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12307 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12308 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12309 FiredLVal, llvm::AtomicOrdering::Unordered, 12310 /*IsVolatile=*/true, /*isInit=*/false); 12311 return; 12312 } 12313 12314 // Private address of the lastprivate conditional in the current context. 12315 // priv_a 12316 LValue LVal = CGF.EmitLValue(FoundE); 12317 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12318 FoundE->getExprLoc()); 12319 } 12320 12321 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12322 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12323 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12324 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12325 return; 12326 auto Range = llvm::reverse(LastprivateConditionalStack); 12327 auto It = llvm::find_if( 12328 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12329 if (It == Range.end() || It->Fn != CGF.CurFn) 12330 return; 12331 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12332 assert(LPCI != LastprivateConditionalToTypes.end() && 12333 "Lastprivates must be registered already."); 12334 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12335 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12336 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12337 for (const auto &Pair : It->DeclToUniqueName) { 12338 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12339 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 12340 continue; 12341 auto I = LPCI->getSecond().find(Pair.first); 12342 assert(I != LPCI->getSecond().end() && 12343 "Lastprivate must be rehistered already."); 12344 // bool Cmp = priv_a.Fired != 0; 12345 LValue BaseLVal = std::get<3>(I->getSecond()); 12346 LValue FiredLVal = 12347 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12348 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12349 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12350 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12351 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12352 // if (Cmp) { 12353 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12354 CGF.EmitBlock(ThenBB); 12355 Address Addr = CGF.GetAddrOfLocalVar(VD); 12356 LValue LVal; 12357 if (VD->getType()->isReferenceType()) 12358 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12359 AlignmentSource::Decl); 12360 else 12361 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12362 AlignmentSource::Decl); 12363 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12364 D.getBeginLoc()); 12365 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12366 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12367 // } 12368 } 12369 } 12370 12371 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12372 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12373 SourceLocation Loc) { 12374 if (CGF.getLangOpts().OpenMP < 50) 12375 return; 12376 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12377 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12378 "Unknown lastprivate conditional variable."); 12379 StringRef UniqueName = It->second; 12380 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12381 // The variable was not updated in the region - exit. 12382 if (!GV) 12383 return; 12384 LValue LPLVal = CGF.MakeAddrLValue( 12385 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 12386 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12387 CGF.EmitStoreOfScalar(Res, PrivLVal); 12388 } 12389 12390 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12391 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12392 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12393 llvm_unreachable("Not supported in SIMD-only mode"); 12394 } 12395 12396 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12397 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12398 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12399 llvm_unreachable("Not supported in SIMD-only mode"); 12400 } 12401 12402 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12403 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12404 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12405 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12406 bool Tied, unsigned &NumberOfParts) { 12407 llvm_unreachable("Not supported in SIMD-only mode"); 12408 } 12409 12410 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12411 SourceLocation Loc, 12412 llvm::Function *OutlinedFn, 12413 ArrayRef<llvm::Value *> CapturedVars, 12414 const Expr *IfCond) { 12415 llvm_unreachable("Not supported in SIMD-only mode"); 12416 } 12417 12418 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12419 CodeGenFunction &CGF, StringRef CriticalName, 12420 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12421 const Expr *Hint) { 12422 llvm_unreachable("Not supported in SIMD-only mode"); 12423 } 12424 12425 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12426 const RegionCodeGenTy &MasterOpGen, 12427 SourceLocation Loc) { 12428 llvm_unreachable("Not supported in SIMD-only mode"); 12429 } 12430 12431 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12432 SourceLocation Loc) { 12433 llvm_unreachable("Not supported in SIMD-only mode"); 12434 } 12435 12436 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12437 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12438 SourceLocation Loc) { 12439 llvm_unreachable("Not supported in SIMD-only mode"); 12440 } 12441 12442 void CGOpenMPSIMDRuntime::emitSingleRegion( 12443 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12444 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12445 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12446 ArrayRef<const Expr *> AssignmentOps) { 12447 llvm_unreachable("Not supported in SIMD-only mode"); 12448 } 12449 12450 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12451 const RegionCodeGenTy &OrderedOpGen, 12452 SourceLocation Loc, 12453 bool IsThreads) { 12454 llvm_unreachable("Not supported in SIMD-only mode"); 12455 } 12456 12457 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12458 SourceLocation Loc, 12459 OpenMPDirectiveKind Kind, 12460 bool EmitChecks, 12461 bool ForceSimpleCall) { 12462 llvm_unreachable("Not supported in SIMD-only mode"); 12463 } 12464 12465 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12466 CodeGenFunction &CGF, SourceLocation Loc, 12467 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12468 bool Ordered, const DispatchRTInput &DispatchValues) { 12469 llvm_unreachable("Not supported in SIMD-only mode"); 12470 } 12471 12472 void CGOpenMPSIMDRuntime::emitForStaticInit( 12473 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12474 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12475 llvm_unreachable("Not supported in SIMD-only mode"); 12476 } 12477 12478 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12479 CodeGenFunction &CGF, SourceLocation Loc, 12480 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12481 llvm_unreachable("Not supported in SIMD-only mode"); 12482 } 12483 12484 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12485 SourceLocation Loc, 12486 unsigned IVSize, 12487 bool IVSigned) { 12488 llvm_unreachable("Not supported in SIMD-only mode"); 12489 } 12490 12491 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12492 SourceLocation Loc, 12493 OpenMPDirectiveKind DKind) { 12494 llvm_unreachable("Not supported in SIMD-only mode"); 12495 } 12496 12497 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12498 SourceLocation Loc, 12499 unsigned IVSize, bool IVSigned, 12500 Address IL, Address LB, 12501 Address UB, Address ST) { 12502 llvm_unreachable("Not supported in SIMD-only mode"); 12503 } 12504 12505 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12506 llvm::Value *NumThreads, 12507 SourceLocation Loc) { 12508 llvm_unreachable("Not supported in SIMD-only mode"); 12509 } 12510 12511 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12512 ProcBindKind ProcBind, 12513 SourceLocation Loc) { 12514 llvm_unreachable("Not supported in SIMD-only mode"); 12515 } 12516 12517 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12518 const VarDecl *VD, 12519 Address VDAddr, 12520 SourceLocation Loc) { 12521 llvm_unreachable("Not supported in SIMD-only mode"); 12522 } 12523 12524 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12525 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12526 CodeGenFunction *CGF) { 12527 llvm_unreachable("Not supported in SIMD-only mode"); 12528 } 12529 12530 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12531 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12532 llvm_unreachable("Not supported in SIMD-only mode"); 12533 } 12534 12535 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12536 ArrayRef<const Expr *> Vars, 12537 SourceLocation Loc, 12538 llvm::AtomicOrdering AO) { 12539 llvm_unreachable("Not supported in SIMD-only mode"); 12540 } 12541 12542 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12543 const OMPExecutableDirective &D, 12544 llvm::Function *TaskFunction, 12545 QualType SharedsTy, Address Shareds, 12546 const Expr *IfCond, 12547 const OMPTaskDataTy &Data) { 12548 llvm_unreachable("Not supported in SIMD-only mode"); 12549 } 12550 12551 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12552 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12553 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12554 const Expr *IfCond, const OMPTaskDataTy &Data) { 12555 llvm_unreachable("Not supported in SIMD-only mode"); 12556 } 12557 12558 void CGOpenMPSIMDRuntime::emitReduction( 12559 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12560 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12561 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12562 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12563 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12564 ReductionOps, Options); 12565 } 12566 12567 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12568 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12569 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12570 llvm_unreachable("Not supported in SIMD-only mode"); 12571 } 12572 12573 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12574 SourceLocation Loc, 12575 bool IsWorksharingReduction) { 12576 llvm_unreachable("Not supported in SIMD-only mode"); 12577 } 12578 12579 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12580 SourceLocation Loc, 12581 ReductionCodeGen &RCG, 12582 unsigned N) { 12583 llvm_unreachable("Not supported in SIMD-only mode"); 12584 } 12585 12586 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12587 SourceLocation Loc, 12588 llvm::Value *ReductionsPtr, 12589 LValue SharedLVal) { 12590 llvm_unreachable("Not supported in SIMD-only mode"); 12591 } 12592 12593 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12594 SourceLocation Loc) { 12595 llvm_unreachable("Not supported in SIMD-only mode"); 12596 } 12597 12598 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12599 CodeGenFunction &CGF, SourceLocation Loc, 12600 OpenMPDirectiveKind CancelRegion) { 12601 llvm_unreachable("Not supported in SIMD-only mode"); 12602 } 12603 12604 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12605 SourceLocation Loc, const Expr *IfCond, 12606 OpenMPDirectiveKind CancelRegion) { 12607 llvm_unreachable("Not supported in SIMD-only mode"); 12608 } 12609 12610 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12611 const OMPExecutableDirective &D, StringRef ParentName, 12612 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12613 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12614 llvm_unreachable("Not supported in SIMD-only mode"); 12615 } 12616 12617 void CGOpenMPSIMDRuntime::emitTargetCall( 12618 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12619 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12620 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12621 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12622 const OMPLoopDirective &D)> 12623 SizeEmitter) { 12624 llvm_unreachable("Not supported in SIMD-only mode"); 12625 } 12626 12627 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12628 llvm_unreachable("Not supported in SIMD-only mode"); 12629 } 12630 12631 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12632 llvm_unreachable("Not supported in SIMD-only mode"); 12633 } 12634 12635 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12636 return false; 12637 } 12638 12639 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12640 const OMPExecutableDirective &D, 12641 SourceLocation Loc, 12642 llvm::Function *OutlinedFn, 12643 ArrayRef<llvm::Value *> CapturedVars) { 12644 llvm_unreachable("Not supported in SIMD-only mode"); 12645 } 12646 12647 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12648 const Expr *NumTeams, 12649 const Expr *ThreadLimit, 12650 SourceLocation Loc) { 12651 llvm_unreachable("Not supported in SIMD-only mode"); 12652 } 12653 12654 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12655 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12656 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12657 llvm_unreachable("Not supported in SIMD-only mode"); 12658 } 12659 12660 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12661 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12662 const Expr *Device) { 12663 llvm_unreachable("Not supported in SIMD-only mode"); 12664 } 12665 12666 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12667 const OMPLoopDirective &D, 12668 ArrayRef<Expr *> NumIterations) { 12669 llvm_unreachable("Not supported in SIMD-only mode"); 12670 } 12671 12672 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12673 const OMPDependClause *C) { 12674 llvm_unreachable("Not supported in SIMD-only mode"); 12675 } 12676 12677 const VarDecl * 12678 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12679 const VarDecl *NativeParam) const { 12680 llvm_unreachable("Not supported in SIMD-only mode"); 12681 } 12682 12683 Address 12684 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12685 const VarDecl *NativeParam, 12686 const VarDecl *TargetParam) const { 12687 llvm_unreachable("Not supported in SIMD-only mode"); 12688 } 12689