1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/OpenMPKinds.h" 25 #include "clang/CodeGen/ConstantInitBuilder.h" 26 #include "llvm/ADT/ArrayRef.h" 27 #include "llvm/ADT/SetOperations.h" 28 #include "llvm/ADT/StringExtras.h" 29 #include "llvm/Bitcode/BitcodeReader.h" 30 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 31 #include "llvm/IR/DerivedTypes.h" 32 #include "llvm/IR/GlobalValue.h" 33 #include "llvm/IR/Value.h" 34 #include "llvm/Support/AtomicOrdering.h" 35 #include "llvm/Support/Format.h" 36 #include "llvm/Support/raw_ostream.h" 37 #include <cassert> 38 39 using namespace clang; 40 using namespace CodeGen; 41 using namespace llvm::omp; 42 43 namespace { 44 /// Base class for handling code generation inside OpenMP regions. 45 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 46 public: 47 /// Kinds of OpenMP regions used in codegen. 48 enum CGOpenMPRegionKind { 49 /// Region with outlined function for standalone 'parallel' 50 /// directive. 51 ParallelOutlinedRegion, 52 /// Region with outlined function for standalone 'task' directive. 53 TaskOutlinedRegion, 54 /// Region for constructs that do not require function outlining, 55 /// like 'for', 'sections', 'atomic' etc. directives. 56 InlinedRegion, 57 /// Region with outlined function for standalone 'target' directive. 58 TargetRegion, 59 }; 60 61 CGOpenMPRegionInfo(const CapturedStmt &CS, 62 const CGOpenMPRegionKind RegionKind, 63 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 64 bool HasCancel) 65 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 66 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 67 68 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 69 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 70 bool HasCancel) 71 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 72 Kind(Kind), HasCancel(HasCancel) {} 73 74 /// Get a variable or parameter for storing global thread id 75 /// inside OpenMP construct. 76 virtual const VarDecl *getThreadIDVariable() const = 0; 77 78 /// Emit the captured statement body. 79 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 80 81 /// Get an LValue for the current ThreadID variable. 82 /// \return LValue for thread id variable. This LValue always has type int32*. 83 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 84 85 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 86 87 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 88 89 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 90 91 bool hasCancel() const { return HasCancel; } 92 93 static bool classof(const CGCapturedStmtInfo *Info) { 94 return Info->getKind() == CR_OpenMP; 95 } 96 97 ~CGOpenMPRegionInfo() override = default; 98 99 protected: 100 CGOpenMPRegionKind RegionKind; 101 RegionCodeGenTy CodeGen; 102 OpenMPDirectiveKind Kind; 103 bool HasCancel; 104 }; 105 106 /// API for captured statement code generation in OpenMP constructs. 107 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 108 public: 109 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 110 const RegionCodeGenTy &CodeGen, 111 OpenMPDirectiveKind Kind, bool HasCancel, 112 StringRef HelperName) 113 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 114 HasCancel), 115 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 116 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 117 } 118 119 /// Get a variable or parameter for storing global thread id 120 /// inside OpenMP construct. 121 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 122 123 /// Get the name of the capture helper. 124 StringRef getHelperName() const override { return HelperName; } 125 126 static bool classof(const CGCapturedStmtInfo *Info) { 127 return CGOpenMPRegionInfo::classof(Info) && 128 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 129 ParallelOutlinedRegion; 130 } 131 132 private: 133 /// A variable or parameter storing global thread id for OpenMP 134 /// constructs. 135 const VarDecl *ThreadIDVar; 136 StringRef HelperName; 137 }; 138 139 /// API for captured statement code generation in OpenMP constructs. 140 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 141 public: 142 class UntiedTaskActionTy final : public PrePostActionTy { 143 bool Untied; 144 const VarDecl *PartIDVar; 145 const RegionCodeGenTy UntiedCodeGen; 146 llvm::SwitchInst *UntiedSwitch = nullptr; 147 148 public: 149 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 150 const RegionCodeGenTy &UntiedCodeGen) 151 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 152 void Enter(CodeGenFunction &CGF) override { 153 if (Untied) { 154 // Emit task switching point. 155 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 156 CGF.GetAddrOfLocalVar(PartIDVar), 157 PartIDVar->getType()->castAs<PointerType>()); 158 llvm::Value *Res = 159 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 160 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 161 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 162 CGF.EmitBlock(DoneBB); 163 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 164 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 165 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 166 CGF.Builder.GetInsertBlock()); 167 emitUntiedSwitch(CGF); 168 } 169 } 170 void emitUntiedSwitch(CodeGenFunction &CGF) const { 171 if (Untied) { 172 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 173 CGF.GetAddrOfLocalVar(PartIDVar), 174 PartIDVar->getType()->castAs<PointerType>()); 175 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 176 PartIdLVal); 177 UntiedCodeGen(CGF); 178 CodeGenFunction::JumpDest CurPoint = 179 CGF.getJumpDestInCurrentScope(".untied.next."); 180 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 181 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 182 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 183 CGF.Builder.GetInsertBlock()); 184 CGF.EmitBranchThroughCleanup(CurPoint); 185 CGF.EmitBlock(CurPoint.getBlock()); 186 } 187 } 188 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 189 }; 190 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 191 const VarDecl *ThreadIDVar, 192 const RegionCodeGenTy &CodeGen, 193 OpenMPDirectiveKind Kind, bool HasCancel, 194 const UntiedTaskActionTy &Action) 195 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 196 ThreadIDVar(ThreadIDVar), Action(Action) { 197 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 198 } 199 200 /// Get a variable or parameter for storing global thread id 201 /// inside OpenMP construct. 202 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 203 204 /// Get an LValue for the current ThreadID variable. 205 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 206 207 /// Get the name of the capture helper. 208 StringRef getHelperName() const override { return ".omp_outlined."; } 209 210 void emitUntiedSwitch(CodeGenFunction &CGF) override { 211 Action.emitUntiedSwitch(CGF); 212 } 213 214 static bool classof(const CGCapturedStmtInfo *Info) { 215 return CGOpenMPRegionInfo::classof(Info) && 216 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 217 TaskOutlinedRegion; 218 } 219 220 private: 221 /// A variable or parameter storing global thread id for OpenMP 222 /// constructs. 223 const VarDecl *ThreadIDVar; 224 /// Action for emitting code for untied tasks. 225 const UntiedTaskActionTy &Action; 226 }; 227 228 /// API for inlined captured statement code generation in OpenMP 229 /// constructs. 230 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 231 public: 232 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 233 const RegionCodeGenTy &CodeGen, 234 OpenMPDirectiveKind Kind, bool HasCancel) 235 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 236 OldCSI(OldCSI), 237 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 238 239 // Retrieve the value of the context parameter. 240 llvm::Value *getContextValue() const override { 241 if (OuterRegionInfo) 242 return OuterRegionInfo->getContextValue(); 243 llvm_unreachable("No context value for inlined OpenMP region"); 244 } 245 246 void setContextValue(llvm::Value *V) override { 247 if (OuterRegionInfo) { 248 OuterRegionInfo->setContextValue(V); 249 return; 250 } 251 llvm_unreachable("No context value for inlined OpenMP region"); 252 } 253 254 /// Lookup the captured field decl for a variable. 255 const FieldDecl *lookup(const VarDecl *VD) const override { 256 if (OuterRegionInfo) 257 return OuterRegionInfo->lookup(VD); 258 // If there is no outer outlined region,no need to lookup in a list of 259 // captured variables, we can use the original one. 260 return nullptr; 261 } 262 263 FieldDecl *getThisFieldDecl() const override { 264 if (OuterRegionInfo) 265 return OuterRegionInfo->getThisFieldDecl(); 266 return nullptr; 267 } 268 269 /// Get a variable or parameter for storing global thread id 270 /// inside OpenMP construct. 271 const VarDecl *getThreadIDVariable() const override { 272 if (OuterRegionInfo) 273 return OuterRegionInfo->getThreadIDVariable(); 274 return nullptr; 275 } 276 277 /// Get an LValue for the current ThreadID variable. 278 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 279 if (OuterRegionInfo) 280 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 281 llvm_unreachable("No LValue for inlined OpenMP construct"); 282 } 283 284 /// Get the name of the capture helper. 285 StringRef getHelperName() const override { 286 if (auto *OuterRegionInfo = getOldCSI()) 287 return OuterRegionInfo->getHelperName(); 288 llvm_unreachable("No helper name for inlined OpenMP construct"); 289 } 290 291 void emitUntiedSwitch(CodeGenFunction &CGF) override { 292 if (OuterRegionInfo) 293 OuterRegionInfo->emitUntiedSwitch(CGF); 294 } 295 296 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 297 298 static bool classof(const CGCapturedStmtInfo *Info) { 299 return CGOpenMPRegionInfo::classof(Info) && 300 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 301 } 302 303 ~CGOpenMPInlinedRegionInfo() override = default; 304 305 private: 306 /// CodeGen info about outer OpenMP region. 307 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 308 CGOpenMPRegionInfo *OuterRegionInfo; 309 }; 310 311 /// API for captured statement code generation in OpenMP target 312 /// constructs. For this captures, implicit parameters are used instead of the 313 /// captured fields. The name of the target region has to be unique in a given 314 /// application so it is provided by the client, because only the client has 315 /// the information to generate that. 316 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 317 public: 318 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 319 const RegionCodeGenTy &CodeGen, StringRef HelperName) 320 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 321 /*HasCancel=*/false), 322 HelperName(HelperName) {} 323 324 /// This is unused for target regions because each starts executing 325 /// with a single thread. 326 const VarDecl *getThreadIDVariable() const override { return nullptr; } 327 328 /// Get the name of the capture helper. 329 StringRef getHelperName() const override { return HelperName; } 330 331 static bool classof(const CGCapturedStmtInfo *Info) { 332 return CGOpenMPRegionInfo::classof(Info) && 333 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 334 } 335 336 private: 337 StringRef HelperName; 338 }; 339 340 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 341 llvm_unreachable("No codegen for expressions"); 342 } 343 /// API for generation of expressions captured in a innermost OpenMP 344 /// region. 345 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 346 public: 347 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 348 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 349 OMPD_unknown, 350 /*HasCancel=*/false), 351 PrivScope(CGF) { 352 // Make sure the globals captured in the provided statement are local by 353 // using the privatization logic. We assume the same variable is not 354 // captured more than once. 355 for (const auto &C : CS.captures()) { 356 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 357 continue; 358 359 const VarDecl *VD = C.getCapturedVar(); 360 if (VD->isLocalVarDeclOrParm()) 361 continue; 362 363 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 364 /*RefersToEnclosingVariableOrCapture=*/false, 365 VD->getType().getNonReferenceType(), VK_LValue, 366 C.getLocation()); 367 PrivScope.addPrivate( 368 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 369 } 370 (void)PrivScope.Privatize(); 371 } 372 373 /// Lookup the captured field decl for a variable. 374 const FieldDecl *lookup(const VarDecl *VD) const override { 375 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 376 return FD; 377 return nullptr; 378 } 379 380 /// Emit the captured statement body. 381 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 382 llvm_unreachable("No body for expressions"); 383 } 384 385 /// Get a variable or parameter for storing global thread id 386 /// inside OpenMP construct. 387 const VarDecl *getThreadIDVariable() const override { 388 llvm_unreachable("No thread id for expressions"); 389 } 390 391 /// Get the name of the capture helper. 392 StringRef getHelperName() const override { 393 llvm_unreachable("No helper name for expressions"); 394 } 395 396 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 397 398 private: 399 /// Private scope to capture global variables. 400 CodeGenFunction::OMPPrivateScope PrivScope; 401 }; 402 403 /// RAII for emitting code of OpenMP constructs. 404 class InlinedOpenMPRegionRAII { 405 CodeGenFunction &CGF; 406 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 407 FieldDecl *LambdaThisCaptureField = nullptr; 408 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 409 410 public: 411 /// Constructs region for combined constructs. 412 /// \param CodeGen Code generation sequence for combined directives. Includes 413 /// a list of functions used for code generation of implicitly inlined 414 /// regions. 415 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 416 OpenMPDirectiveKind Kind, bool HasCancel) 417 : CGF(CGF) { 418 // Start emission for the construct. 419 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 420 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 421 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 422 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 423 CGF.LambdaThisCaptureField = nullptr; 424 BlockInfo = CGF.BlockInfo; 425 CGF.BlockInfo = nullptr; 426 } 427 428 ~InlinedOpenMPRegionRAII() { 429 // Restore original CapturedStmtInfo only if we're done with code emission. 430 auto *OldCSI = 431 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 432 delete CGF.CapturedStmtInfo; 433 CGF.CapturedStmtInfo = OldCSI; 434 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 435 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 436 CGF.BlockInfo = BlockInfo; 437 } 438 }; 439 440 /// Values for bit flags used in the ident_t to describe the fields. 441 /// All enumeric elements are named and described in accordance with the code 442 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 443 enum OpenMPLocationFlags : unsigned { 444 /// Use trampoline for internal microtask. 445 OMP_IDENT_IMD = 0x01, 446 /// Use c-style ident structure. 447 OMP_IDENT_KMPC = 0x02, 448 /// Atomic reduction option for kmpc_reduce. 449 OMP_ATOMIC_REDUCE = 0x10, 450 /// Explicit 'barrier' directive. 451 OMP_IDENT_BARRIER_EXPL = 0x20, 452 /// Implicit barrier in code. 453 OMP_IDENT_BARRIER_IMPL = 0x40, 454 /// Implicit barrier in 'for' directive. 455 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 456 /// Implicit barrier in 'sections' directive. 457 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 458 /// Implicit barrier in 'single' directive. 459 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 460 /// Call of __kmp_for_static_init for static loop. 461 OMP_IDENT_WORK_LOOP = 0x200, 462 /// Call of __kmp_for_static_init for sections. 463 OMP_IDENT_WORK_SECTIONS = 0x400, 464 /// Call of __kmp_for_static_init for distribute. 465 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 466 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 467 }; 468 469 namespace { 470 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 471 /// Values for bit flags for marking which requires clauses have been used. 472 enum OpenMPOffloadingRequiresDirFlags : int64_t { 473 /// flag undefined. 474 OMP_REQ_UNDEFINED = 0x000, 475 /// no requires clause present. 476 OMP_REQ_NONE = 0x001, 477 /// reverse_offload clause. 478 OMP_REQ_REVERSE_OFFLOAD = 0x002, 479 /// unified_address clause. 480 OMP_REQ_UNIFIED_ADDRESS = 0x004, 481 /// unified_shared_memory clause. 482 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 483 /// dynamic_allocators clause. 484 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 485 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 486 }; 487 488 enum OpenMPOffloadingReservedDeviceIDs { 489 /// Device ID if the device was not defined, runtime should get it 490 /// from environment variables in the spec. 491 OMP_DEVICEID_UNDEF = -1, 492 }; 493 } // anonymous namespace 494 495 /// Describes ident structure that describes a source location. 496 /// All descriptions are taken from 497 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 498 /// Original structure: 499 /// typedef struct ident { 500 /// kmp_int32 reserved_1; /**< might be used in Fortran; 501 /// see above */ 502 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 503 /// KMP_IDENT_KMPC identifies this union 504 /// member */ 505 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 506 /// see above */ 507 ///#if USE_ITT_BUILD 508 /// /* but currently used for storing 509 /// region-specific ITT */ 510 /// /* contextual information. */ 511 ///#endif /* USE_ITT_BUILD */ 512 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 513 /// C++ */ 514 /// char const *psource; /**< String describing the source location. 515 /// The string is composed of semi-colon separated 516 // fields which describe the source file, 517 /// the function and a pair of line numbers that 518 /// delimit the construct. 519 /// */ 520 /// } ident_t; 521 enum IdentFieldIndex { 522 /// might be used in Fortran 523 IdentField_Reserved_1, 524 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 525 IdentField_Flags, 526 /// Not really used in Fortran any more 527 IdentField_Reserved_2, 528 /// Source[4] in Fortran, do not use for C++ 529 IdentField_Reserved_3, 530 /// String describing the source location. The string is composed of 531 /// semi-colon separated fields which describe the source file, the function 532 /// and a pair of line numbers that delimit the construct. 533 IdentField_PSource 534 }; 535 536 /// Schedule types for 'omp for' loops (these enumerators are taken from 537 /// the enum sched_type in kmp.h). 538 enum OpenMPSchedType { 539 /// Lower bound for default (unordered) versions. 540 OMP_sch_lower = 32, 541 OMP_sch_static_chunked = 33, 542 OMP_sch_static = 34, 543 OMP_sch_dynamic_chunked = 35, 544 OMP_sch_guided_chunked = 36, 545 OMP_sch_runtime = 37, 546 OMP_sch_auto = 38, 547 /// static with chunk adjustment (e.g., simd) 548 OMP_sch_static_balanced_chunked = 45, 549 /// Lower bound for 'ordered' versions. 550 OMP_ord_lower = 64, 551 OMP_ord_static_chunked = 65, 552 OMP_ord_static = 66, 553 OMP_ord_dynamic_chunked = 67, 554 OMP_ord_guided_chunked = 68, 555 OMP_ord_runtime = 69, 556 OMP_ord_auto = 70, 557 OMP_sch_default = OMP_sch_static, 558 /// dist_schedule types 559 OMP_dist_sch_static_chunked = 91, 560 OMP_dist_sch_static = 92, 561 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 562 /// Set if the monotonic schedule modifier was present. 563 OMP_sch_modifier_monotonic = (1 << 29), 564 /// Set if the nonmonotonic schedule modifier was present. 565 OMP_sch_modifier_nonmonotonic = (1 << 30), 566 }; 567 568 enum OpenMPRTLFunction { 569 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 570 /// kmpc_micro microtask, ...); 571 OMPRTL__kmpc_fork_call, 572 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, 573 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 574 OMPRTL__kmpc_threadprivate_cached, 575 /// Call to void __kmpc_threadprivate_register( ident_t *, 576 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 577 OMPRTL__kmpc_threadprivate_register, 578 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 579 OMPRTL__kmpc_global_thread_num, 580 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 581 // kmp_critical_name *crit); 582 OMPRTL__kmpc_critical, 583 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 584 // global_tid, kmp_critical_name *crit, uintptr_t hint); 585 OMPRTL__kmpc_critical_with_hint, 586 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 587 // kmp_critical_name *crit); 588 OMPRTL__kmpc_end_critical, 589 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 590 // global_tid); 591 OMPRTL__kmpc_cancel_barrier, 592 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 593 OMPRTL__kmpc_barrier, 594 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 595 OMPRTL__kmpc_for_static_fini, 596 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 597 // global_tid); 598 OMPRTL__kmpc_serialized_parallel, 599 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 600 // global_tid); 601 OMPRTL__kmpc_end_serialized_parallel, 602 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 603 // kmp_int32 num_threads); 604 OMPRTL__kmpc_push_num_threads, 605 // Call to void __kmpc_flush(ident_t *loc); 606 OMPRTL__kmpc_flush, 607 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 608 OMPRTL__kmpc_master, 609 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 610 OMPRTL__kmpc_end_master, 611 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 612 // int end_part); 613 OMPRTL__kmpc_omp_taskyield, 614 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 615 OMPRTL__kmpc_single, 616 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 617 OMPRTL__kmpc_end_single, 618 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 619 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 620 // kmp_routine_entry_t *task_entry); 621 OMPRTL__kmpc_omp_task_alloc, 622 // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *, 623 // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, 624 // size_t sizeof_shareds, kmp_routine_entry_t *task_entry, 625 // kmp_int64 device_id); 626 OMPRTL__kmpc_omp_target_task_alloc, 627 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 628 // new_task); 629 OMPRTL__kmpc_omp_task, 630 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 631 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 632 // kmp_int32 didit); 633 OMPRTL__kmpc_copyprivate, 634 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 635 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 636 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 637 OMPRTL__kmpc_reduce, 638 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 639 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 640 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 641 // *lck); 642 OMPRTL__kmpc_reduce_nowait, 643 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 644 // kmp_critical_name *lck); 645 OMPRTL__kmpc_end_reduce, 646 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 647 // kmp_critical_name *lck); 648 OMPRTL__kmpc_end_reduce_nowait, 649 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 650 // kmp_task_t * new_task); 651 OMPRTL__kmpc_omp_task_begin_if0, 652 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 653 // kmp_task_t * new_task); 654 OMPRTL__kmpc_omp_task_complete_if0, 655 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 656 OMPRTL__kmpc_ordered, 657 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 658 OMPRTL__kmpc_end_ordered, 659 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 660 // global_tid); 661 OMPRTL__kmpc_omp_taskwait, 662 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 663 OMPRTL__kmpc_taskgroup, 664 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 665 OMPRTL__kmpc_end_taskgroup, 666 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 667 // int proc_bind); 668 OMPRTL__kmpc_push_proc_bind, 669 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 670 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 671 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 672 OMPRTL__kmpc_omp_task_with_deps, 673 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 674 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 675 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 676 OMPRTL__kmpc_omp_wait_deps, 677 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 678 // global_tid, kmp_int32 cncl_kind); 679 OMPRTL__kmpc_cancellationpoint, 680 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 681 // kmp_int32 cncl_kind); 682 OMPRTL__kmpc_cancel, 683 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 684 // kmp_int32 num_teams, kmp_int32 thread_limit); 685 OMPRTL__kmpc_push_num_teams, 686 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 687 // microtask, ...); 688 OMPRTL__kmpc_fork_teams, 689 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 690 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 691 // sched, kmp_uint64 grainsize, void *task_dup); 692 OMPRTL__kmpc_taskloop, 693 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 694 // num_dims, struct kmp_dim *dims); 695 OMPRTL__kmpc_doacross_init, 696 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 697 OMPRTL__kmpc_doacross_fini, 698 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 699 // *vec); 700 OMPRTL__kmpc_doacross_post, 701 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 702 // *vec); 703 OMPRTL__kmpc_doacross_wait, 704 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 705 // *data); 706 OMPRTL__kmpc_task_reduction_init, 707 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 708 // *d); 709 OMPRTL__kmpc_task_reduction_get_th_data, 710 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); 711 OMPRTL__kmpc_alloc, 712 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); 713 OMPRTL__kmpc_free, 714 715 // 716 // Offloading related calls 717 // 718 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 719 // size); 720 OMPRTL__kmpc_push_target_tripcount, 721 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 722 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 723 // *arg_types); 724 OMPRTL__tgt_target, 725 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 726 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 727 // *arg_types); 728 OMPRTL__tgt_target_nowait, 729 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 730 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 731 // *arg_types, int32_t num_teams, int32_t thread_limit); 732 OMPRTL__tgt_target_teams, 733 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 734 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 735 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 736 OMPRTL__tgt_target_teams_nowait, 737 // Call to void __tgt_register_requires(int64_t flags); 738 OMPRTL__tgt_register_requires, 739 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 740 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 741 OMPRTL__tgt_target_data_begin, 742 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 743 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 744 // *arg_types); 745 OMPRTL__tgt_target_data_begin_nowait, 746 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 747 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 748 OMPRTL__tgt_target_data_end, 749 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 750 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 751 // *arg_types); 752 OMPRTL__tgt_target_data_end_nowait, 753 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 754 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 755 OMPRTL__tgt_target_data_update, 756 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 757 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 758 // *arg_types); 759 OMPRTL__tgt_target_data_update_nowait, 760 // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 761 OMPRTL__tgt_mapper_num_components, 762 // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void 763 // *base, void *begin, int64_t size, int64_t type); 764 OMPRTL__tgt_push_mapper_component, 765 }; 766 767 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 768 /// region. 769 class CleanupTy final : public EHScopeStack::Cleanup { 770 PrePostActionTy *Action; 771 772 public: 773 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 774 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 775 if (!CGF.HaveInsertPoint()) 776 return; 777 Action->Exit(CGF); 778 } 779 }; 780 781 } // anonymous namespace 782 783 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 784 CodeGenFunction::RunCleanupsScope Scope(CGF); 785 if (PrePostAction) { 786 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 787 Callback(CodeGen, CGF, *PrePostAction); 788 } else { 789 PrePostActionTy Action; 790 Callback(CodeGen, CGF, Action); 791 } 792 } 793 794 /// Check if the combiner is a call to UDR combiner and if it is so return the 795 /// UDR decl used for reduction. 796 static const OMPDeclareReductionDecl * 797 getReductionInit(const Expr *ReductionOp) { 798 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 799 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 800 if (const auto *DRE = 801 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 802 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 803 return DRD; 804 return nullptr; 805 } 806 807 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 808 const OMPDeclareReductionDecl *DRD, 809 const Expr *InitOp, 810 Address Private, Address Original, 811 QualType Ty) { 812 if (DRD->getInitializer()) { 813 std::pair<llvm::Function *, llvm::Function *> Reduction = 814 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 815 const auto *CE = cast<CallExpr>(InitOp); 816 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 817 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 818 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 819 const auto *LHSDRE = 820 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 821 const auto *RHSDRE = 822 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 823 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 824 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 825 [=]() { return Private; }); 826 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 827 [=]() { return Original; }); 828 (void)PrivateScope.Privatize(); 829 RValue Func = RValue::get(Reduction.second); 830 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 831 CGF.EmitIgnoredExpr(InitOp); 832 } else { 833 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 834 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 835 auto *GV = new llvm::GlobalVariable( 836 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 837 llvm::GlobalValue::PrivateLinkage, Init, Name); 838 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 839 RValue InitRVal; 840 switch (CGF.getEvaluationKind(Ty)) { 841 case TEK_Scalar: 842 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 843 break; 844 case TEK_Complex: 845 InitRVal = 846 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 847 break; 848 case TEK_Aggregate: 849 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 850 break; 851 } 852 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 853 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 854 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 855 /*IsInitializer=*/false); 856 } 857 } 858 859 /// Emit initialization of arrays of complex types. 860 /// \param DestAddr Address of the array. 861 /// \param Type Type of array. 862 /// \param Init Initial expression of array. 863 /// \param SrcAddr Address of the original array. 864 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 865 QualType Type, bool EmitDeclareReductionInit, 866 const Expr *Init, 867 const OMPDeclareReductionDecl *DRD, 868 Address SrcAddr = Address::invalid()) { 869 // Perform element-by-element initialization. 870 QualType ElementTy; 871 872 // Drill down to the base element type on both arrays. 873 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 874 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 875 DestAddr = 876 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 877 if (DRD) 878 SrcAddr = 879 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 880 881 llvm::Value *SrcBegin = nullptr; 882 if (DRD) 883 SrcBegin = SrcAddr.getPointer(); 884 llvm::Value *DestBegin = DestAddr.getPointer(); 885 // Cast from pointer to array type to pointer to single element. 886 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 887 // The basic structure here is a while-do loop. 888 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 889 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 890 llvm::Value *IsEmpty = 891 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 892 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 893 894 // Enter the loop body, making that address the current address. 895 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 896 CGF.EmitBlock(BodyBB); 897 898 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 899 900 llvm::PHINode *SrcElementPHI = nullptr; 901 Address SrcElementCurrent = Address::invalid(); 902 if (DRD) { 903 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 904 "omp.arraycpy.srcElementPast"); 905 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 906 SrcElementCurrent = 907 Address(SrcElementPHI, 908 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 909 } 910 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 911 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 912 DestElementPHI->addIncoming(DestBegin, EntryBB); 913 Address DestElementCurrent = 914 Address(DestElementPHI, 915 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 916 917 // Emit copy. 918 { 919 CodeGenFunction::RunCleanupsScope InitScope(CGF); 920 if (EmitDeclareReductionInit) { 921 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 922 SrcElementCurrent, ElementTy); 923 } else 924 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 925 /*IsInitializer=*/false); 926 } 927 928 if (DRD) { 929 // Shift the address forward by one element. 930 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 931 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 932 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 933 } 934 935 // Shift the address forward by one element. 936 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 937 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 938 // Check whether we've reached the end. 939 llvm::Value *Done = 940 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 941 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 942 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 943 944 // Done. 945 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 946 } 947 948 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 949 return CGF.EmitOMPSharedLValue(E); 950 } 951 952 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 953 const Expr *E) { 954 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 955 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 956 return LValue(); 957 } 958 959 void ReductionCodeGen::emitAggregateInitialization( 960 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 961 const OMPDeclareReductionDecl *DRD) { 962 // Emit VarDecl with copy init for arrays. 963 // Get the address of the original variable captured in current 964 // captured region. 965 const auto *PrivateVD = 966 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 967 bool EmitDeclareReductionInit = 968 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 969 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 970 EmitDeclareReductionInit, 971 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 972 : PrivateVD->getInit(), 973 DRD, SharedLVal.getAddress(CGF)); 974 } 975 976 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 977 ArrayRef<const Expr *> Privates, 978 ArrayRef<const Expr *> ReductionOps) { 979 ClausesData.reserve(Shareds.size()); 980 SharedAddresses.reserve(Shareds.size()); 981 Sizes.reserve(Shareds.size()); 982 BaseDecls.reserve(Shareds.size()); 983 auto IPriv = Privates.begin(); 984 auto IRed = ReductionOps.begin(); 985 for (const Expr *Ref : Shareds) { 986 ClausesData.emplace_back(Ref, *IPriv, *IRed); 987 std::advance(IPriv, 1); 988 std::advance(IRed, 1); 989 } 990 } 991 992 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 993 assert(SharedAddresses.size() == N && 994 "Number of generated lvalues must be exactly N."); 995 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 996 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 997 SharedAddresses.emplace_back(First, Second); 998 } 999 1000 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 1001 const auto *PrivateVD = 1002 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1003 QualType PrivateType = PrivateVD->getType(); 1004 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 1005 if (!PrivateType->isVariablyModifiedType()) { 1006 Sizes.emplace_back( 1007 CGF.getTypeSize( 1008 SharedAddresses[N].first.getType().getNonReferenceType()), 1009 nullptr); 1010 return; 1011 } 1012 llvm::Value *Size; 1013 llvm::Value *SizeInChars; 1014 auto *ElemType = cast<llvm::PointerType>( 1015 SharedAddresses[N].first.getPointer(CGF)->getType()) 1016 ->getElementType(); 1017 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 1018 if (AsArraySection) { 1019 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF), 1020 SharedAddresses[N].first.getPointer(CGF)); 1021 Size = CGF.Builder.CreateNUWAdd( 1022 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 1023 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 1024 } else { 1025 SizeInChars = CGF.getTypeSize( 1026 SharedAddresses[N].first.getType().getNonReferenceType()); 1027 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 1028 } 1029 Sizes.emplace_back(SizeInChars, Size); 1030 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1031 CGF, 1032 cast<OpaqueValueExpr>( 1033 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1034 RValue::get(Size)); 1035 CGF.EmitVariablyModifiedType(PrivateType); 1036 } 1037 1038 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 1039 llvm::Value *Size) { 1040 const auto *PrivateVD = 1041 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1042 QualType PrivateType = PrivateVD->getType(); 1043 if (!PrivateType->isVariablyModifiedType()) { 1044 assert(!Size && !Sizes[N].second && 1045 "Size should be nullptr for non-variably modified reduction " 1046 "items."); 1047 return; 1048 } 1049 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1050 CGF, 1051 cast<OpaqueValueExpr>( 1052 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1053 RValue::get(Size)); 1054 CGF.EmitVariablyModifiedType(PrivateType); 1055 } 1056 1057 void ReductionCodeGen::emitInitialization( 1058 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1059 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1060 assert(SharedAddresses.size() > N && "No variable was generated"); 1061 const auto *PrivateVD = 1062 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1063 const OMPDeclareReductionDecl *DRD = 1064 getReductionInit(ClausesData[N].ReductionOp); 1065 QualType PrivateType = PrivateVD->getType(); 1066 PrivateAddr = CGF.Builder.CreateElementBitCast( 1067 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1068 QualType SharedType = SharedAddresses[N].first.getType(); 1069 SharedLVal = CGF.MakeAddrLValue( 1070 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 1071 CGF.ConvertTypeForMem(SharedType)), 1072 SharedType, SharedAddresses[N].first.getBaseInfo(), 1073 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1074 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1075 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1076 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1077 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1078 PrivateAddr, SharedLVal.getAddress(CGF), 1079 SharedLVal.getType()); 1080 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1081 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1082 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1083 PrivateVD->getType().getQualifiers(), 1084 /*IsInitializer=*/false); 1085 } 1086 } 1087 1088 bool ReductionCodeGen::needCleanups(unsigned N) { 1089 const auto *PrivateVD = 1090 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1091 QualType PrivateType = PrivateVD->getType(); 1092 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1093 return DTorKind != QualType::DK_none; 1094 } 1095 1096 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1097 Address PrivateAddr) { 1098 const auto *PrivateVD = 1099 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1100 QualType PrivateType = PrivateVD->getType(); 1101 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1102 if (needCleanups(N)) { 1103 PrivateAddr = CGF.Builder.CreateElementBitCast( 1104 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1105 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1106 } 1107 } 1108 1109 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1110 LValue BaseLV) { 1111 BaseTy = BaseTy.getNonReferenceType(); 1112 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1113 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1114 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1115 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 1116 } else { 1117 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 1118 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1119 } 1120 BaseTy = BaseTy->getPointeeType(); 1121 } 1122 return CGF.MakeAddrLValue( 1123 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 1124 CGF.ConvertTypeForMem(ElTy)), 1125 BaseLV.getType(), BaseLV.getBaseInfo(), 1126 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1127 } 1128 1129 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1130 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1131 llvm::Value *Addr) { 1132 Address Tmp = Address::invalid(); 1133 Address TopTmp = Address::invalid(); 1134 Address MostTopTmp = Address::invalid(); 1135 BaseTy = BaseTy.getNonReferenceType(); 1136 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1137 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1138 Tmp = CGF.CreateMemTemp(BaseTy); 1139 if (TopTmp.isValid()) 1140 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1141 else 1142 MostTopTmp = Tmp; 1143 TopTmp = Tmp; 1144 BaseTy = BaseTy->getPointeeType(); 1145 } 1146 llvm::Type *Ty = BaseLVType; 1147 if (Tmp.isValid()) 1148 Ty = Tmp.getElementType(); 1149 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1150 if (Tmp.isValid()) { 1151 CGF.Builder.CreateStore(Addr, Tmp); 1152 return MostTopTmp; 1153 } 1154 return Address(Addr, BaseLVAlignment); 1155 } 1156 1157 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1158 const VarDecl *OrigVD = nullptr; 1159 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1160 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1161 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1162 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1163 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1164 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1165 DE = cast<DeclRefExpr>(Base); 1166 OrigVD = cast<VarDecl>(DE->getDecl()); 1167 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1168 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1169 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1170 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1171 DE = cast<DeclRefExpr>(Base); 1172 OrigVD = cast<VarDecl>(DE->getDecl()); 1173 } 1174 return OrigVD; 1175 } 1176 1177 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1178 Address PrivateAddr) { 1179 const DeclRefExpr *DE; 1180 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1181 BaseDecls.emplace_back(OrigVD); 1182 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1183 LValue BaseLValue = 1184 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1185 OriginalBaseLValue); 1186 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1187 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1188 llvm::Value *PrivatePointer = 1189 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1190 PrivateAddr.getPointer(), 1191 SharedAddresses[N].first.getAddress(CGF).getType()); 1192 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1193 return castToBase(CGF, OrigVD->getType(), 1194 SharedAddresses[N].first.getType(), 1195 OriginalBaseLValue.getAddress(CGF).getType(), 1196 OriginalBaseLValue.getAlignment(), Ptr); 1197 } 1198 BaseDecls.emplace_back( 1199 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1200 return PrivateAddr; 1201 } 1202 1203 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1204 const OMPDeclareReductionDecl *DRD = 1205 getReductionInit(ClausesData[N].ReductionOp); 1206 return DRD && DRD->getInitializer(); 1207 } 1208 1209 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1210 return CGF.EmitLoadOfPointerLValue( 1211 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1212 getThreadIDVariable()->getType()->castAs<PointerType>()); 1213 } 1214 1215 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1216 if (!CGF.HaveInsertPoint()) 1217 return; 1218 // 1.2.2 OpenMP Language Terminology 1219 // Structured block - An executable statement with a single entry at the 1220 // top and a single exit at the bottom. 1221 // The point of exit cannot be a branch out of the structured block. 1222 // longjmp() and throw() must not violate the entry/exit criteria. 1223 CGF.EHStack.pushTerminate(); 1224 CodeGen(CGF); 1225 CGF.EHStack.popTerminate(); 1226 } 1227 1228 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1229 CodeGenFunction &CGF) { 1230 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1231 getThreadIDVariable()->getType(), 1232 AlignmentSource::Decl); 1233 } 1234 1235 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1236 QualType FieldTy) { 1237 auto *Field = FieldDecl::Create( 1238 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1239 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1240 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1241 Field->setAccess(AS_public); 1242 DC->addDecl(Field); 1243 return Field; 1244 } 1245 1246 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1247 StringRef Separator) 1248 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1249 OffloadEntriesInfoManager(CGM) { 1250 ASTContext &C = CGM.getContext(); 1251 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1252 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1253 RD->startDefinition(); 1254 // reserved_1 1255 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1256 // flags 1257 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1258 // reserved_2 1259 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1260 // reserved_3 1261 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1262 // psource 1263 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1264 RD->completeDefinition(); 1265 IdentQTy = C.getRecordType(RD); 1266 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1267 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1268 1269 loadOffloadInfoMetadata(); 1270 } 1271 1272 bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD, 1273 const GlobalDecl &OldGD, 1274 llvm::GlobalValue *OrigAddr, 1275 bool IsForDefinition) { 1276 // Emit at least a definition for the aliasee if the the address of the 1277 // original function is requested. 1278 if (IsForDefinition || OrigAddr) 1279 (void)CGM.GetAddrOfGlobal(NewGD); 1280 StringRef NewMangledName = CGM.getMangledName(NewGD); 1281 llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName); 1282 if (Addr && !Addr->isDeclaration()) { 1283 const auto *D = cast<FunctionDecl>(OldGD.getDecl()); 1284 const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(NewGD); 1285 llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI); 1286 1287 // Create a reference to the named value. This ensures that it is emitted 1288 // if a deferred decl. 1289 llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD); 1290 1291 // Create the new alias itself, but don't set a name yet. 1292 auto *GA = 1293 llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule()); 1294 1295 if (OrigAddr) { 1296 assert(OrigAddr->isDeclaration() && "Expected declaration"); 1297 1298 GA->takeName(OrigAddr); 1299 OrigAddr->replaceAllUsesWith( 1300 llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType())); 1301 OrigAddr->eraseFromParent(); 1302 } else { 1303 GA->setName(CGM.getMangledName(OldGD)); 1304 } 1305 1306 // Set attributes which are particular to an alias; this is a 1307 // specialization of the attributes which may be set on a global function. 1308 if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() || 1309 D->isWeakImported()) 1310 GA->setLinkage(llvm::Function::WeakAnyLinkage); 1311 1312 CGM.SetCommonAttributes(OldGD, GA); 1313 return true; 1314 } 1315 return false; 1316 } 1317 1318 void CGOpenMPRuntime::clear() { 1319 InternalVars.clear(); 1320 // Clean non-target variable declarations possibly used only in debug info. 1321 for (const auto &Data : EmittedNonTargetVariables) { 1322 if (!Data.getValue().pointsToAliveValue()) 1323 continue; 1324 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1325 if (!GV) 1326 continue; 1327 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1328 continue; 1329 GV->eraseFromParent(); 1330 } 1331 // Emit aliases for the deferred aliasees. 1332 for (const auto &Pair : DeferredVariantFunction) { 1333 StringRef MangledName = CGM.getMangledName(Pair.second.second); 1334 llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName); 1335 // If not able to emit alias, just emit original declaration. 1336 (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr, 1337 /*IsForDefinition=*/false); 1338 } 1339 } 1340 1341 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1342 SmallString<128> Buffer; 1343 llvm::raw_svector_ostream OS(Buffer); 1344 StringRef Sep = FirstSeparator; 1345 for (StringRef Part : Parts) { 1346 OS << Sep << Part; 1347 Sep = Separator; 1348 } 1349 return std::string(OS.str()); 1350 } 1351 1352 static llvm::Function * 1353 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1354 const Expr *CombinerInitializer, const VarDecl *In, 1355 const VarDecl *Out, bool IsCombiner) { 1356 // void .omp_combiner.(Ty *in, Ty *out); 1357 ASTContext &C = CGM.getContext(); 1358 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1359 FunctionArgList Args; 1360 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1361 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1362 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1363 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1364 Args.push_back(&OmpOutParm); 1365 Args.push_back(&OmpInParm); 1366 const CGFunctionInfo &FnInfo = 1367 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1368 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1369 std::string Name = CGM.getOpenMPRuntime().getName( 1370 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1371 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1372 Name, &CGM.getModule()); 1373 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1374 if (CGM.getLangOpts().Optimize) { 1375 Fn->removeFnAttr(llvm::Attribute::NoInline); 1376 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1377 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1378 } 1379 CodeGenFunction CGF(CGM); 1380 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1381 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1382 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1383 Out->getLocation()); 1384 CodeGenFunction::OMPPrivateScope Scope(CGF); 1385 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1386 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1387 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1388 .getAddress(CGF); 1389 }); 1390 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1391 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1392 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1393 .getAddress(CGF); 1394 }); 1395 (void)Scope.Privatize(); 1396 if (!IsCombiner && Out->hasInit() && 1397 !CGF.isTrivialInitializer(Out->getInit())) { 1398 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1399 Out->getType().getQualifiers(), 1400 /*IsInitializer=*/true); 1401 } 1402 if (CombinerInitializer) 1403 CGF.EmitIgnoredExpr(CombinerInitializer); 1404 Scope.ForceCleanup(); 1405 CGF.FinishFunction(); 1406 return Fn; 1407 } 1408 1409 void CGOpenMPRuntime::emitUserDefinedReduction( 1410 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1411 if (UDRMap.count(D) > 0) 1412 return; 1413 llvm::Function *Combiner = emitCombinerOrInitializer( 1414 CGM, D->getType(), D->getCombiner(), 1415 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1416 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1417 /*IsCombiner=*/true); 1418 llvm::Function *Initializer = nullptr; 1419 if (const Expr *Init = D->getInitializer()) { 1420 Initializer = emitCombinerOrInitializer( 1421 CGM, D->getType(), 1422 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1423 : nullptr, 1424 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1425 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1426 /*IsCombiner=*/false); 1427 } 1428 UDRMap.try_emplace(D, Combiner, Initializer); 1429 if (CGF) { 1430 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1431 Decls.second.push_back(D); 1432 } 1433 } 1434 1435 std::pair<llvm::Function *, llvm::Function *> 1436 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1437 auto I = UDRMap.find(D); 1438 if (I != UDRMap.end()) 1439 return I->second; 1440 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1441 return UDRMap.lookup(D); 1442 } 1443 1444 namespace { 1445 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1446 // Builder if one is present. 1447 struct PushAndPopStackRAII { 1448 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1449 bool HasCancel) 1450 : OMPBuilder(OMPBuilder) { 1451 if (!OMPBuilder) 1452 return; 1453 1454 // The following callback is the crucial part of clangs cleanup process. 1455 // 1456 // NOTE: 1457 // Once the OpenMPIRBuilder is used to create parallel regions (and 1458 // similar), the cancellation destination (Dest below) is determined via 1459 // IP. That means if we have variables to finalize we split the block at IP, 1460 // use the new block (=BB) as destination to build a JumpDest (via 1461 // getJumpDestInCurrentScope(BB)) which then is fed to 1462 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1463 // to push & pop an FinalizationInfo object. 1464 // The FiniCB will still be needed but at the point where the 1465 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1466 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1467 assert(IP.getBlock()->end() == IP.getPoint() && 1468 "Clang CG should cause non-terminated block!"); 1469 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1470 CGF.Builder.restoreIP(IP); 1471 CodeGenFunction::JumpDest Dest = 1472 CGF.getOMPCancelDestination(OMPD_parallel); 1473 CGF.EmitBranchThroughCleanup(Dest); 1474 }; 1475 1476 // TODO: Remove this once we emit parallel regions through the 1477 // OpenMPIRBuilder as it can do this setup internally. 1478 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1479 {FiniCB, OMPD_parallel, HasCancel}); 1480 OMPBuilder->pushFinalizationCB(std::move(FI)); 1481 } 1482 ~PushAndPopStackRAII() { 1483 if (OMPBuilder) 1484 OMPBuilder->popFinalizationCB(); 1485 } 1486 llvm::OpenMPIRBuilder *OMPBuilder; 1487 }; 1488 } // namespace 1489 1490 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1491 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1492 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1493 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1494 assert(ThreadIDVar->getType()->isPointerType() && 1495 "thread id variable must be of type kmp_int32 *"); 1496 CodeGenFunction CGF(CGM, true); 1497 bool HasCancel = false; 1498 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1499 HasCancel = OPD->hasCancel(); 1500 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1501 HasCancel = OPSD->hasCancel(); 1502 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1503 HasCancel = OPFD->hasCancel(); 1504 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1505 HasCancel = OPFD->hasCancel(); 1506 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1507 HasCancel = OPFD->hasCancel(); 1508 else if (const auto *OPFD = 1509 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1510 HasCancel = OPFD->hasCancel(); 1511 else if (const auto *OPFD = 1512 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1513 HasCancel = OPFD->hasCancel(); 1514 1515 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1516 // parallel region to make cancellation barriers work properly. 1517 llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder(); 1518 PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel); 1519 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1520 HasCancel, OutlinedHelperName); 1521 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1522 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1523 } 1524 1525 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1526 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1527 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1528 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1529 return emitParallelOrTeamsOutlinedFunction( 1530 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1531 } 1532 1533 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1534 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1535 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1536 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1537 return emitParallelOrTeamsOutlinedFunction( 1538 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1539 } 1540 1541 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1542 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1543 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1544 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1545 bool Tied, unsigned &NumberOfParts) { 1546 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1547 PrePostActionTy &) { 1548 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1549 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1550 llvm::Value *TaskArgs[] = { 1551 UpLoc, ThreadID, 1552 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1553 TaskTVar->getType()->castAs<PointerType>()) 1554 .getPointer(CGF)}; 1555 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1556 }; 1557 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1558 UntiedCodeGen); 1559 CodeGen.setAction(Action); 1560 assert(!ThreadIDVar->getType()->isPointerType() && 1561 "thread id variable must be of type kmp_int32 for tasks"); 1562 const OpenMPDirectiveKind Region = 1563 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1564 : OMPD_task; 1565 const CapturedStmt *CS = D.getCapturedStmt(Region); 1566 bool HasCancel = false; 1567 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1568 HasCancel = TD->hasCancel(); 1569 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1570 HasCancel = TD->hasCancel(); 1571 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1572 HasCancel = TD->hasCancel(); 1573 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1574 HasCancel = TD->hasCancel(); 1575 1576 CodeGenFunction CGF(CGM, true); 1577 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1578 InnermostKind, HasCancel, Action); 1579 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1580 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1581 if (!Tied) 1582 NumberOfParts = Action.getNumberOfParts(); 1583 return Res; 1584 } 1585 1586 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1587 const RecordDecl *RD, const CGRecordLayout &RL, 1588 ArrayRef<llvm::Constant *> Data) { 1589 llvm::StructType *StructTy = RL.getLLVMType(); 1590 unsigned PrevIdx = 0; 1591 ConstantInitBuilder CIBuilder(CGM); 1592 auto DI = Data.begin(); 1593 for (const FieldDecl *FD : RD->fields()) { 1594 unsigned Idx = RL.getLLVMFieldNo(FD); 1595 // Fill the alignment. 1596 for (unsigned I = PrevIdx; I < Idx; ++I) 1597 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1598 PrevIdx = Idx + 1; 1599 Fields.add(*DI); 1600 ++DI; 1601 } 1602 } 1603 1604 template <class... As> 1605 static llvm::GlobalVariable * 1606 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1607 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1608 As &&... Args) { 1609 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1610 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1611 ConstantInitBuilder CIBuilder(CGM); 1612 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1613 buildStructValue(Fields, CGM, RD, RL, Data); 1614 return Fields.finishAndCreateGlobal( 1615 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1616 std::forward<As>(Args)...); 1617 } 1618 1619 template <typename T> 1620 static void 1621 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1622 ArrayRef<llvm::Constant *> Data, 1623 T &Parent) { 1624 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1625 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1626 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1627 buildStructValue(Fields, CGM, RD, RL, Data); 1628 Fields.finishAndAddTo(Parent); 1629 } 1630 1631 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1632 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1633 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1634 FlagsTy FlagsKey(Flags, Reserved2Flags); 1635 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1636 if (!Entry) { 1637 if (!DefaultOpenMPPSource) { 1638 // Initialize default location for psource field of ident_t structure of 1639 // all ident_t objects. Format is ";file;function;line;column;;". 1640 // Taken from 1641 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1642 DefaultOpenMPPSource = 1643 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1644 DefaultOpenMPPSource = 1645 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1646 } 1647 1648 llvm::Constant *Data[] = { 1649 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1650 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1651 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1652 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1653 llvm::GlobalValue *DefaultOpenMPLocation = 1654 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1655 llvm::GlobalValue::PrivateLinkage); 1656 DefaultOpenMPLocation->setUnnamedAddr( 1657 llvm::GlobalValue::UnnamedAddr::Global); 1658 1659 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1660 } 1661 return Address(Entry, Align); 1662 } 1663 1664 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1665 bool AtCurrentPoint) { 1666 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1667 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1668 1669 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1670 if (AtCurrentPoint) { 1671 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1672 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1673 } else { 1674 Elem.second.ServiceInsertPt = 1675 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1676 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1677 } 1678 } 1679 1680 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1681 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1682 if (Elem.second.ServiceInsertPt) { 1683 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1684 Elem.second.ServiceInsertPt = nullptr; 1685 Ptr->eraseFromParent(); 1686 } 1687 } 1688 1689 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1690 SourceLocation Loc, 1691 unsigned Flags) { 1692 Flags |= OMP_IDENT_KMPC; 1693 // If no debug info is generated - return global default location. 1694 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1695 Loc.isInvalid()) 1696 return getOrCreateDefaultLocation(Flags).getPointer(); 1697 1698 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1699 1700 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1701 Address LocValue = Address::invalid(); 1702 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1703 if (I != OpenMPLocThreadIDMap.end()) 1704 LocValue = Address(I->second.DebugLoc, Align); 1705 1706 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1707 // GetOpenMPThreadID was called before this routine. 1708 if (!LocValue.isValid()) { 1709 // Generate "ident_t .kmpc_loc.addr;" 1710 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1711 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1712 Elem.second.DebugLoc = AI.getPointer(); 1713 LocValue = AI; 1714 1715 if (!Elem.second.ServiceInsertPt) 1716 setLocThreadIdInsertPt(CGF); 1717 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1718 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1719 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1720 CGF.getTypeSize(IdentQTy)); 1721 } 1722 1723 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1724 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1725 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1726 LValue PSource = 1727 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1728 1729 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1730 if (OMPDebugLoc == nullptr) { 1731 SmallString<128> Buffer2; 1732 llvm::raw_svector_ostream OS2(Buffer2); 1733 // Build debug location 1734 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1735 OS2 << ";" << PLoc.getFilename() << ";"; 1736 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1737 OS2 << FD->getQualifiedNameAsString(); 1738 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1739 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1740 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1741 } 1742 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1743 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1744 1745 // Our callers always pass this to a runtime function, so for 1746 // convenience, go ahead and return a naked pointer. 1747 return LocValue.getPointer(); 1748 } 1749 1750 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1751 SourceLocation Loc) { 1752 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1753 1754 llvm::Value *ThreadID = nullptr; 1755 // Check whether we've already cached a load of the thread id in this 1756 // function. 1757 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1758 if (I != OpenMPLocThreadIDMap.end()) { 1759 ThreadID = I->second.ThreadID; 1760 if (ThreadID != nullptr) 1761 return ThreadID; 1762 } 1763 // If exceptions are enabled, do not use parameter to avoid possible crash. 1764 if (auto *OMPRegionInfo = 1765 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1766 if (OMPRegionInfo->getThreadIDVariable()) { 1767 // Check if this an outlined function with thread id passed as argument. 1768 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1769 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1770 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1771 !CGF.getLangOpts().CXXExceptions || 1772 CGF.Builder.GetInsertBlock() == TopBlock || 1773 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1774 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1775 TopBlock || 1776 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1777 CGF.Builder.GetInsertBlock()) { 1778 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1779 // If value loaded in entry block, cache it and use it everywhere in 1780 // function. 1781 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1782 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1783 Elem.second.ThreadID = ThreadID; 1784 } 1785 return ThreadID; 1786 } 1787 } 1788 } 1789 1790 // This is not an outlined function region - need to call __kmpc_int32 1791 // kmpc_global_thread_num(ident_t *loc). 1792 // Generate thread id value and cache this value for use across the 1793 // function. 1794 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1795 if (!Elem.second.ServiceInsertPt) 1796 setLocThreadIdInsertPt(CGF); 1797 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1798 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1799 llvm::CallInst *Call = CGF.Builder.CreateCall( 1800 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1801 emitUpdateLocation(CGF, Loc)); 1802 Call->setCallingConv(CGF.getRuntimeCC()); 1803 Elem.second.ThreadID = Call; 1804 return Call; 1805 } 1806 1807 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1808 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1809 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1810 clearLocThreadIdInsertPt(CGF); 1811 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1812 } 1813 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1814 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1815 UDRMap.erase(D); 1816 FunctionUDRMap.erase(CGF.CurFn); 1817 } 1818 auto I = FunctionUDMMap.find(CGF.CurFn); 1819 if (I != FunctionUDMMap.end()) { 1820 for(const auto *D : I->second) 1821 UDMMap.erase(D); 1822 FunctionUDMMap.erase(I); 1823 } 1824 LastprivateConditionalToTypes.erase(CGF.CurFn); 1825 } 1826 1827 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1828 return IdentTy->getPointerTo(); 1829 } 1830 1831 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1832 if (!Kmpc_MicroTy) { 1833 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1834 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1835 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1836 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1837 } 1838 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1839 } 1840 1841 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1842 llvm::FunctionCallee RTLFn = nullptr; 1843 switch (static_cast<OpenMPRTLFunction>(Function)) { 1844 case OMPRTL__kmpc_fork_call: { 1845 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1846 // microtask, ...); 1847 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1848 getKmpc_MicroPointerTy()}; 1849 auto *FnTy = 1850 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1851 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1852 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 1853 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 1854 llvm::LLVMContext &Ctx = F->getContext(); 1855 llvm::MDBuilder MDB(Ctx); 1856 // Annotate the callback behavior of the __kmpc_fork_call: 1857 // - The callback callee is argument number 2 (microtask). 1858 // - The first two arguments of the callback callee are unknown (-1). 1859 // - All variadic arguments to the __kmpc_fork_call are passed to the 1860 // callback callee. 1861 F->addMetadata( 1862 llvm::LLVMContext::MD_callback, 1863 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 1864 2, {-1, -1}, 1865 /* VarArgsArePassed */ true)})); 1866 } 1867 } 1868 break; 1869 } 1870 case OMPRTL__kmpc_global_thread_num: { 1871 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1872 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1873 auto *FnTy = 1874 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1875 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1876 break; 1877 } 1878 case OMPRTL__kmpc_threadprivate_cached: { 1879 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1880 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1881 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1882 CGM.VoidPtrTy, CGM.SizeTy, 1883 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1884 auto *FnTy = 1885 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1886 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1887 break; 1888 } 1889 case OMPRTL__kmpc_critical: { 1890 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1891 // kmp_critical_name *crit); 1892 llvm::Type *TypeParams[] = { 1893 getIdentTyPointerTy(), CGM.Int32Ty, 1894 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1895 auto *FnTy = 1896 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1897 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1898 break; 1899 } 1900 case OMPRTL__kmpc_critical_with_hint: { 1901 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1902 // kmp_critical_name *crit, uintptr_t hint); 1903 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1904 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1905 CGM.IntPtrTy}; 1906 auto *FnTy = 1907 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1908 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1909 break; 1910 } 1911 case OMPRTL__kmpc_threadprivate_register: { 1912 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1913 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1914 // typedef void *(*kmpc_ctor)(void *); 1915 auto *KmpcCtorTy = 1916 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1917 /*isVarArg*/ false)->getPointerTo(); 1918 // typedef void *(*kmpc_cctor)(void *, void *); 1919 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1920 auto *KmpcCopyCtorTy = 1921 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1922 /*isVarArg*/ false) 1923 ->getPointerTo(); 1924 // typedef void (*kmpc_dtor)(void *); 1925 auto *KmpcDtorTy = 1926 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1927 ->getPointerTo(); 1928 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1929 KmpcCopyCtorTy, KmpcDtorTy}; 1930 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1931 /*isVarArg*/ false); 1932 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1933 break; 1934 } 1935 case OMPRTL__kmpc_end_critical: { 1936 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1937 // kmp_critical_name *crit); 1938 llvm::Type *TypeParams[] = { 1939 getIdentTyPointerTy(), CGM.Int32Ty, 1940 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1941 auto *FnTy = 1942 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1943 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1944 break; 1945 } 1946 case OMPRTL__kmpc_cancel_barrier: { 1947 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1948 // global_tid); 1949 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1950 auto *FnTy = 1951 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1952 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1953 break; 1954 } 1955 case OMPRTL__kmpc_barrier: { 1956 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1957 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1958 auto *FnTy = 1959 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1960 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1961 break; 1962 } 1963 case OMPRTL__kmpc_for_static_fini: { 1964 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1965 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1966 auto *FnTy = 1967 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1968 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1969 break; 1970 } 1971 case OMPRTL__kmpc_push_num_threads: { 1972 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1973 // kmp_int32 num_threads) 1974 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1975 CGM.Int32Ty}; 1976 auto *FnTy = 1977 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1978 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1979 break; 1980 } 1981 case OMPRTL__kmpc_serialized_parallel: { 1982 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1983 // global_tid); 1984 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1985 auto *FnTy = 1986 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1987 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1988 break; 1989 } 1990 case OMPRTL__kmpc_end_serialized_parallel: { 1991 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1992 // global_tid); 1993 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1994 auto *FnTy = 1995 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1996 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1997 break; 1998 } 1999 case OMPRTL__kmpc_flush: { 2000 // Build void __kmpc_flush(ident_t *loc); 2001 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 2002 auto *FnTy = 2003 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2004 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 2005 break; 2006 } 2007 case OMPRTL__kmpc_master: { 2008 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 2009 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2010 auto *FnTy = 2011 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2012 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 2013 break; 2014 } 2015 case OMPRTL__kmpc_end_master: { 2016 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 2017 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2018 auto *FnTy = 2019 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2020 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 2021 break; 2022 } 2023 case OMPRTL__kmpc_omp_taskyield: { 2024 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 2025 // int end_part); 2026 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2027 auto *FnTy = 2028 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2029 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 2030 break; 2031 } 2032 case OMPRTL__kmpc_single: { 2033 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 2034 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2035 auto *FnTy = 2036 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2037 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 2038 break; 2039 } 2040 case OMPRTL__kmpc_end_single: { 2041 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 2042 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2043 auto *FnTy = 2044 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2045 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 2046 break; 2047 } 2048 case OMPRTL__kmpc_omp_task_alloc: { 2049 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 2050 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2051 // kmp_routine_entry_t *task_entry); 2052 assert(KmpRoutineEntryPtrTy != nullptr && 2053 "Type kmp_routine_entry_t must be created."); 2054 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2055 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 2056 // Return void * and then cast to particular kmp_task_t type. 2057 auto *FnTy = 2058 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2059 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 2060 break; 2061 } 2062 case OMPRTL__kmpc_omp_target_task_alloc: { 2063 // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid, 2064 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2065 // kmp_routine_entry_t *task_entry, kmp_int64 device_id); 2066 assert(KmpRoutineEntryPtrTy != nullptr && 2067 "Type kmp_routine_entry_t must be created."); 2068 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2069 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy, 2070 CGM.Int64Ty}; 2071 // Return void * and then cast to particular kmp_task_t type. 2072 auto *FnTy = 2073 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2074 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc"); 2075 break; 2076 } 2077 case OMPRTL__kmpc_omp_task: { 2078 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2079 // *new_task); 2080 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2081 CGM.VoidPtrTy}; 2082 auto *FnTy = 2083 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2084 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 2085 break; 2086 } 2087 case OMPRTL__kmpc_copyprivate: { 2088 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 2089 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 2090 // kmp_int32 didit); 2091 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2092 auto *CpyFnTy = 2093 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 2094 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 2095 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 2096 CGM.Int32Ty}; 2097 auto *FnTy = 2098 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2099 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 2100 break; 2101 } 2102 case OMPRTL__kmpc_reduce: { 2103 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 2104 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 2105 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 2106 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2107 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2108 /*isVarArg=*/false); 2109 llvm::Type *TypeParams[] = { 2110 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2111 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2112 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2113 auto *FnTy = 2114 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2115 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 2116 break; 2117 } 2118 case OMPRTL__kmpc_reduce_nowait: { 2119 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 2120 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 2121 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 2122 // *lck); 2123 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2124 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2125 /*isVarArg=*/false); 2126 llvm::Type *TypeParams[] = { 2127 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2128 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2129 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2130 auto *FnTy = 2131 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2132 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 2133 break; 2134 } 2135 case OMPRTL__kmpc_end_reduce: { 2136 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 2137 // kmp_critical_name *lck); 2138 llvm::Type *TypeParams[] = { 2139 getIdentTyPointerTy(), CGM.Int32Ty, 2140 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2141 auto *FnTy = 2142 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2143 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 2144 break; 2145 } 2146 case OMPRTL__kmpc_end_reduce_nowait: { 2147 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 2148 // kmp_critical_name *lck); 2149 llvm::Type *TypeParams[] = { 2150 getIdentTyPointerTy(), CGM.Int32Ty, 2151 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2152 auto *FnTy = 2153 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2154 RTLFn = 2155 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 2156 break; 2157 } 2158 case OMPRTL__kmpc_omp_task_begin_if0: { 2159 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2160 // *new_task); 2161 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2162 CGM.VoidPtrTy}; 2163 auto *FnTy = 2164 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2165 RTLFn = 2166 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 2167 break; 2168 } 2169 case OMPRTL__kmpc_omp_task_complete_if0: { 2170 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2171 // *new_task); 2172 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2173 CGM.VoidPtrTy}; 2174 auto *FnTy = 2175 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2176 RTLFn = CGM.CreateRuntimeFunction(FnTy, 2177 /*Name=*/"__kmpc_omp_task_complete_if0"); 2178 break; 2179 } 2180 case OMPRTL__kmpc_ordered: { 2181 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 2182 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2183 auto *FnTy = 2184 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2185 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 2186 break; 2187 } 2188 case OMPRTL__kmpc_end_ordered: { 2189 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 2190 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2191 auto *FnTy = 2192 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2193 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 2194 break; 2195 } 2196 case OMPRTL__kmpc_omp_taskwait: { 2197 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 2198 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2199 auto *FnTy = 2200 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2201 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 2202 break; 2203 } 2204 case OMPRTL__kmpc_taskgroup: { 2205 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 2206 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2207 auto *FnTy = 2208 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2209 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 2210 break; 2211 } 2212 case OMPRTL__kmpc_end_taskgroup: { 2213 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 2214 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2215 auto *FnTy = 2216 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2217 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 2218 break; 2219 } 2220 case OMPRTL__kmpc_push_proc_bind: { 2221 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 2222 // int proc_bind) 2223 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2224 auto *FnTy = 2225 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2226 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 2227 break; 2228 } 2229 case OMPRTL__kmpc_omp_task_with_deps: { 2230 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2231 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2232 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 2233 llvm::Type *TypeParams[] = { 2234 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 2235 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 2236 auto *FnTy = 2237 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2238 RTLFn = 2239 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 2240 break; 2241 } 2242 case OMPRTL__kmpc_omp_wait_deps: { 2243 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2244 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2245 // kmp_depend_info_t *noalias_dep_list); 2246 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2247 CGM.Int32Ty, CGM.VoidPtrTy, 2248 CGM.Int32Ty, CGM.VoidPtrTy}; 2249 auto *FnTy = 2250 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2251 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2252 break; 2253 } 2254 case OMPRTL__kmpc_cancellationpoint: { 2255 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2256 // global_tid, kmp_int32 cncl_kind) 2257 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2258 auto *FnTy = 2259 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2260 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2261 break; 2262 } 2263 case OMPRTL__kmpc_cancel: { 2264 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2265 // kmp_int32 cncl_kind) 2266 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2267 auto *FnTy = 2268 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2269 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2270 break; 2271 } 2272 case OMPRTL__kmpc_push_num_teams: { 2273 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2274 // kmp_int32 num_teams, kmp_int32 num_threads) 2275 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2276 CGM.Int32Ty}; 2277 auto *FnTy = 2278 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2279 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2280 break; 2281 } 2282 case OMPRTL__kmpc_fork_teams: { 2283 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2284 // microtask, ...); 2285 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2286 getKmpc_MicroPointerTy()}; 2287 auto *FnTy = 2288 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2289 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2290 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 2291 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 2292 llvm::LLVMContext &Ctx = F->getContext(); 2293 llvm::MDBuilder MDB(Ctx); 2294 // Annotate the callback behavior of the __kmpc_fork_teams: 2295 // - The callback callee is argument number 2 (microtask). 2296 // - The first two arguments of the callback callee are unknown (-1). 2297 // - All variadic arguments to the __kmpc_fork_teams are passed to the 2298 // callback callee. 2299 F->addMetadata( 2300 llvm::LLVMContext::MD_callback, 2301 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 2302 2, {-1, -1}, 2303 /* VarArgsArePassed */ true)})); 2304 } 2305 } 2306 break; 2307 } 2308 case OMPRTL__kmpc_taskloop: { 2309 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2310 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2311 // sched, kmp_uint64 grainsize, void *task_dup); 2312 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2313 CGM.IntTy, 2314 CGM.VoidPtrTy, 2315 CGM.IntTy, 2316 CGM.Int64Ty->getPointerTo(), 2317 CGM.Int64Ty->getPointerTo(), 2318 CGM.Int64Ty, 2319 CGM.IntTy, 2320 CGM.IntTy, 2321 CGM.Int64Ty, 2322 CGM.VoidPtrTy}; 2323 auto *FnTy = 2324 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2325 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2326 break; 2327 } 2328 case OMPRTL__kmpc_doacross_init: { 2329 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2330 // num_dims, struct kmp_dim *dims); 2331 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2332 CGM.Int32Ty, 2333 CGM.Int32Ty, 2334 CGM.VoidPtrTy}; 2335 auto *FnTy = 2336 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2337 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2338 break; 2339 } 2340 case OMPRTL__kmpc_doacross_fini: { 2341 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2342 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2343 auto *FnTy = 2344 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2345 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2346 break; 2347 } 2348 case OMPRTL__kmpc_doacross_post: { 2349 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2350 // *vec); 2351 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2352 CGM.Int64Ty->getPointerTo()}; 2353 auto *FnTy = 2354 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2355 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2356 break; 2357 } 2358 case OMPRTL__kmpc_doacross_wait: { 2359 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2360 // *vec); 2361 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2362 CGM.Int64Ty->getPointerTo()}; 2363 auto *FnTy = 2364 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2365 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2366 break; 2367 } 2368 case OMPRTL__kmpc_task_reduction_init: { 2369 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2370 // *data); 2371 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2372 auto *FnTy = 2373 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2374 RTLFn = 2375 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2376 break; 2377 } 2378 case OMPRTL__kmpc_task_reduction_get_th_data: { 2379 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2380 // *d); 2381 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2382 auto *FnTy = 2383 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2384 RTLFn = CGM.CreateRuntimeFunction( 2385 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2386 break; 2387 } 2388 case OMPRTL__kmpc_alloc: { 2389 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t 2390 // al); omp_allocator_handle_t type is void *. 2391 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy}; 2392 auto *FnTy = 2393 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2394 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc"); 2395 break; 2396 } 2397 case OMPRTL__kmpc_free: { 2398 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t 2399 // al); omp_allocator_handle_t type is void *. 2400 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2401 auto *FnTy = 2402 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2403 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); 2404 break; 2405 } 2406 case OMPRTL__kmpc_push_target_tripcount: { 2407 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 2408 // size); 2409 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; 2410 llvm::FunctionType *FnTy = 2411 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2412 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); 2413 break; 2414 } 2415 case OMPRTL__tgt_target: { 2416 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2417 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2418 // *arg_types); 2419 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2420 CGM.VoidPtrTy, 2421 CGM.Int32Ty, 2422 CGM.VoidPtrPtrTy, 2423 CGM.VoidPtrPtrTy, 2424 CGM.Int64Ty->getPointerTo(), 2425 CGM.Int64Ty->getPointerTo()}; 2426 auto *FnTy = 2427 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2428 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2429 break; 2430 } 2431 case OMPRTL__tgt_target_nowait: { 2432 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2433 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2434 // int64_t *arg_types); 2435 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2436 CGM.VoidPtrTy, 2437 CGM.Int32Ty, 2438 CGM.VoidPtrPtrTy, 2439 CGM.VoidPtrPtrTy, 2440 CGM.Int64Ty->getPointerTo(), 2441 CGM.Int64Ty->getPointerTo()}; 2442 auto *FnTy = 2443 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2444 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2445 break; 2446 } 2447 case OMPRTL__tgt_target_teams: { 2448 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2449 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2450 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2451 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2452 CGM.VoidPtrTy, 2453 CGM.Int32Ty, 2454 CGM.VoidPtrPtrTy, 2455 CGM.VoidPtrPtrTy, 2456 CGM.Int64Ty->getPointerTo(), 2457 CGM.Int64Ty->getPointerTo(), 2458 CGM.Int32Ty, 2459 CGM.Int32Ty}; 2460 auto *FnTy = 2461 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2462 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2463 break; 2464 } 2465 case OMPRTL__tgt_target_teams_nowait: { 2466 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2467 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 2468 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2469 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2470 CGM.VoidPtrTy, 2471 CGM.Int32Ty, 2472 CGM.VoidPtrPtrTy, 2473 CGM.VoidPtrPtrTy, 2474 CGM.Int64Ty->getPointerTo(), 2475 CGM.Int64Ty->getPointerTo(), 2476 CGM.Int32Ty, 2477 CGM.Int32Ty}; 2478 auto *FnTy = 2479 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2480 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2481 break; 2482 } 2483 case OMPRTL__tgt_register_requires: { 2484 // Build void __tgt_register_requires(int64_t flags); 2485 llvm::Type *TypeParams[] = {CGM.Int64Ty}; 2486 auto *FnTy = 2487 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2488 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); 2489 break; 2490 } 2491 case OMPRTL__tgt_target_data_begin: { 2492 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2493 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2494 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2495 CGM.Int32Ty, 2496 CGM.VoidPtrPtrTy, 2497 CGM.VoidPtrPtrTy, 2498 CGM.Int64Ty->getPointerTo(), 2499 CGM.Int64Ty->getPointerTo()}; 2500 auto *FnTy = 2501 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2502 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2503 break; 2504 } 2505 case OMPRTL__tgt_target_data_begin_nowait: { 2506 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2507 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2508 // *arg_types); 2509 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2510 CGM.Int32Ty, 2511 CGM.VoidPtrPtrTy, 2512 CGM.VoidPtrPtrTy, 2513 CGM.Int64Ty->getPointerTo(), 2514 CGM.Int64Ty->getPointerTo()}; 2515 auto *FnTy = 2516 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2517 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2518 break; 2519 } 2520 case OMPRTL__tgt_target_data_end: { 2521 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2522 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2523 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2524 CGM.Int32Ty, 2525 CGM.VoidPtrPtrTy, 2526 CGM.VoidPtrPtrTy, 2527 CGM.Int64Ty->getPointerTo(), 2528 CGM.Int64Ty->getPointerTo()}; 2529 auto *FnTy = 2530 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2531 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2532 break; 2533 } 2534 case OMPRTL__tgt_target_data_end_nowait: { 2535 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2536 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2537 // *arg_types); 2538 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2539 CGM.Int32Ty, 2540 CGM.VoidPtrPtrTy, 2541 CGM.VoidPtrPtrTy, 2542 CGM.Int64Ty->getPointerTo(), 2543 CGM.Int64Ty->getPointerTo()}; 2544 auto *FnTy = 2545 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2546 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2547 break; 2548 } 2549 case OMPRTL__tgt_target_data_update: { 2550 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2551 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2552 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2553 CGM.Int32Ty, 2554 CGM.VoidPtrPtrTy, 2555 CGM.VoidPtrPtrTy, 2556 CGM.Int64Ty->getPointerTo(), 2557 CGM.Int64Ty->getPointerTo()}; 2558 auto *FnTy = 2559 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2560 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2561 break; 2562 } 2563 case OMPRTL__tgt_target_data_update_nowait: { 2564 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2565 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2566 // *arg_types); 2567 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2568 CGM.Int32Ty, 2569 CGM.VoidPtrPtrTy, 2570 CGM.VoidPtrPtrTy, 2571 CGM.Int64Ty->getPointerTo(), 2572 CGM.Int64Ty->getPointerTo()}; 2573 auto *FnTy = 2574 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2575 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2576 break; 2577 } 2578 case OMPRTL__tgt_mapper_num_components: { 2579 // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 2580 llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; 2581 auto *FnTy = 2582 llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); 2583 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components"); 2584 break; 2585 } 2586 case OMPRTL__tgt_push_mapper_component: { 2587 // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void 2588 // *base, void *begin, int64_t size, int64_t type); 2589 llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy, 2590 CGM.Int64Ty, CGM.Int64Ty}; 2591 auto *FnTy = 2592 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2593 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component"); 2594 break; 2595 } 2596 } 2597 assert(RTLFn && "Unable to find OpenMP runtime function"); 2598 return RTLFn; 2599 } 2600 2601 llvm::FunctionCallee 2602 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 2603 assert((IVSize == 32 || IVSize == 64) && 2604 "IV size is not compatible with the omp runtime"); 2605 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2606 : "__kmpc_for_static_init_4u") 2607 : (IVSigned ? "__kmpc_for_static_init_8" 2608 : "__kmpc_for_static_init_8u"); 2609 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2610 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2611 llvm::Type *TypeParams[] = { 2612 getIdentTyPointerTy(), // loc 2613 CGM.Int32Ty, // tid 2614 CGM.Int32Ty, // schedtype 2615 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2616 PtrTy, // p_lower 2617 PtrTy, // p_upper 2618 PtrTy, // p_stride 2619 ITy, // incr 2620 ITy // chunk 2621 }; 2622 auto *FnTy = 2623 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2624 return CGM.CreateRuntimeFunction(FnTy, Name); 2625 } 2626 2627 llvm::FunctionCallee 2628 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 2629 assert((IVSize == 32 || IVSize == 64) && 2630 "IV size is not compatible with the omp runtime"); 2631 StringRef Name = 2632 IVSize == 32 2633 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2634 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2635 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2636 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2637 CGM.Int32Ty, // tid 2638 CGM.Int32Ty, // schedtype 2639 ITy, // lower 2640 ITy, // upper 2641 ITy, // stride 2642 ITy // chunk 2643 }; 2644 auto *FnTy = 2645 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2646 return CGM.CreateRuntimeFunction(FnTy, Name); 2647 } 2648 2649 llvm::FunctionCallee 2650 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 2651 assert((IVSize == 32 || IVSize == 64) && 2652 "IV size is not compatible with the omp runtime"); 2653 StringRef Name = 2654 IVSize == 32 2655 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2656 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2657 llvm::Type *TypeParams[] = { 2658 getIdentTyPointerTy(), // loc 2659 CGM.Int32Ty, // tid 2660 }; 2661 auto *FnTy = 2662 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2663 return CGM.CreateRuntimeFunction(FnTy, Name); 2664 } 2665 2666 llvm::FunctionCallee 2667 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 2668 assert((IVSize == 32 || IVSize == 64) && 2669 "IV size is not compatible with the omp runtime"); 2670 StringRef Name = 2671 IVSize == 32 2672 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2673 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2674 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2675 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2676 llvm::Type *TypeParams[] = { 2677 getIdentTyPointerTy(), // loc 2678 CGM.Int32Ty, // tid 2679 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2680 PtrTy, // p_lower 2681 PtrTy, // p_upper 2682 PtrTy // p_stride 2683 }; 2684 auto *FnTy = 2685 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2686 return CGM.CreateRuntimeFunction(FnTy, Name); 2687 } 2688 2689 /// Obtain information that uniquely identifies a target entry. This 2690 /// consists of the file and device IDs as well as line number associated with 2691 /// the relevant entry source location. 2692 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2693 unsigned &DeviceID, unsigned &FileID, 2694 unsigned &LineNum) { 2695 SourceManager &SM = C.getSourceManager(); 2696 2697 // The loc should be always valid and have a file ID (the user cannot use 2698 // #pragma directives in macros) 2699 2700 assert(Loc.isValid() && "Source location is expected to be always valid."); 2701 2702 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2703 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2704 2705 llvm::sys::fs::UniqueID ID; 2706 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2707 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2708 << PLoc.getFilename() << EC.message(); 2709 2710 DeviceID = ID.getDevice(); 2711 FileID = ID.getFile(); 2712 LineNum = PLoc.getLine(); 2713 } 2714 2715 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 2716 if (CGM.getLangOpts().OpenMPSimd) 2717 return Address::invalid(); 2718 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2719 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2720 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 2721 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2722 HasRequiresUnifiedSharedMemory))) { 2723 SmallString<64> PtrName; 2724 { 2725 llvm::raw_svector_ostream OS(PtrName); 2726 OS << CGM.getMangledName(GlobalDecl(VD)); 2727 if (!VD->isExternallyVisible()) { 2728 unsigned DeviceID, FileID, Line; 2729 getTargetEntryUniqueInfo(CGM.getContext(), 2730 VD->getCanonicalDecl()->getBeginLoc(), 2731 DeviceID, FileID, Line); 2732 OS << llvm::format("_%x", FileID); 2733 } 2734 OS << "_decl_tgt_ref_ptr"; 2735 } 2736 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2737 if (!Ptr) { 2738 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2739 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2740 PtrName); 2741 2742 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2743 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 2744 2745 if (!CGM.getLangOpts().OpenMPIsDevice) 2746 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2747 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2748 } 2749 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2750 } 2751 return Address::invalid(); 2752 } 2753 2754 llvm::Constant * 2755 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2756 assert(!CGM.getLangOpts().OpenMPUseTLS || 2757 !CGM.getContext().getTargetInfo().isTLSSupported()); 2758 // Lookup the entry, lazily creating it if necessary. 2759 std::string Suffix = getName({"cache", ""}); 2760 return getOrCreateInternalVariable( 2761 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 2762 } 2763 2764 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2765 const VarDecl *VD, 2766 Address VDAddr, 2767 SourceLocation Loc) { 2768 if (CGM.getLangOpts().OpenMPUseTLS && 2769 CGM.getContext().getTargetInfo().isTLSSupported()) 2770 return VDAddr; 2771 2772 llvm::Type *VarTy = VDAddr.getElementType(); 2773 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2774 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2775 CGM.Int8PtrTy), 2776 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2777 getOrCreateThreadPrivateCache(VD)}; 2778 return Address(CGF.EmitRuntimeCall( 2779 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2780 VDAddr.getAlignment()); 2781 } 2782 2783 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2784 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2785 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2786 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2787 // library. 2788 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2789 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2790 OMPLoc); 2791 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2792 // to register constructor/destructor for variable. 2793 llvm::Value *Args[] = { 2794 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2795 Ctor, CopyCtor, Dtor}; 2796 CGF.EmitRuntimeCall( 2797 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2798 } 2799 2800 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2801 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2802 bool PerformInit, CodeGenFunction *CGF) { 2803 if (CGM.getLangOpts().OpenMPUseTLS && 2804 CGM.getContext().getTargetInfo().isTLSSupported()) 2805 return nullptr; 2806 2807 VD = VD->getDefinition(CGM.getContext()); 2808 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 2809 QualType ASTTy = VD->getType(); 2810 2811 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2812 const Expr *Init = VD->getAnyInitializer(); 2813 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2814 // Generate function that re-emits the declaration's initializer into the 2815 // threadprivate copy of the variable VD 2816 CodeGenFunction CtorCGF(CGM); 2817 FunctionArgList Args; 2818 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2819 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2820 ImplicitParamDecl::Other); 2821 Args.push_back(&Dst); 2822 2823 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2824 CGM.getContext().VoidPtrTy, Args); 2825 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2826 std::string Name = getName({"__kmpc_global_ctor_", ""}); 2827 llvm::Function *Fn = 2828 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2829 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2830 Args, Loc, Loc); 2831 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2832 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2833 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2834 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2835 Arg = CtorCGF.Builder.CreateElementBitCast( 2836 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2837 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2838 /*IsInitializer=*/true); 2839 ArgVal = CtorCGF.EmitLoadOfScalar( 2840 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2841 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2842 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2843 CtorCGF.FinishFunction(); 2844 Ctor = Fn; 2845 } 2846 if (VD->getType().isDestructedType() != QualType::DK_none) { 2847 // Generate function that emits destructor call for the threadprivate copy 2848 // of the variable VD 2849 CodeGenFunction DtorCGF(CGM); 2850 FunctionArgList Args; 2851 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2852 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2853 ImplicitParamDecl::Other); 2854 Args.push_back(&Dst); 2855 2856 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2857 CGM.getContext().VoidTy, Args); 2858 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2859 std::string Name = getName({"__kmpc_global_dtor_", ""}); 2860 llvm::Function *Fn = 2861 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2862 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2863 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2864 Loc, Loc); 2865 // Create a scope with an artificial location for the body of this function. 2866 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2867 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2868 DtorCGF.GetAddrOfLocalVar(&Dst), 2869 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2870 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2871 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2872 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2873 DtorCGF.FinishFunction(); 2874 Dtor = Fn; 2875 } 2876 // Do not emit init function if it is not required. 2877 if (!Ctor && !Dtor) 2878 return nullptr; 2879 2880 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2881 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2882 /*isVarArg=*/false) 2883 ->getPointerTo(); 2884 // Copying constructor for the threadprivate variable. 2885 // Must be NULL - reserved by runtime, but currently it requires that this 2886 // parameter is always NULL. Otherwise it fires assertion. 2887 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2888 if (Ctor == nullptr) { 2889 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2890 /*isVarArg=*/false) 2891 ->getPointerTo(); 2892 Ctor = llvm::Constant::getNullValue(CtorTy); 2893 } 2894 if (Dtor == nullptr) { 2895 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2896 /*isVarArg=*/false) 2897 ->getPointerTo(); 2898 Dtor = llvm::Constant::getNullValue(DtorTy); 2899 } 2900 if (!CGF) { 2901 auto *InitFunctionTy = 2902 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2903 std::string Name = getName({"__omp_threadprivate_init_", ""}); 2904 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2905 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 2906 CodeGenFunction InitCGF(CGM); 2907 FunctionArgList ArgList; 2908 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2909 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2910 Loc, Loc); 2911 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2912 InitCGF.FinishFunction(); 2913 return InitFunction; 2914 } 2915 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2916 } 2917 return nullptr; 2918 } 2919 2920 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2921 llvm::GlobalVariable *Addr, 2922 bool PerformInit) { 2923 if (CGM.getLangOpts().OMPTargetTriples.empty() && 2924 !CGM.getLangOpts().OpenMPIsDevice) 2925 return false; 2926 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2927 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2928 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 2929 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2930 HasRequiresUnifiedSharedMemory)) 2931 return CGM.getLangOpts().OpenMPIsDevice; 2932 VD = VD->getDefinition(CGM.getContext()); 2933 if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 2934 return CGM.getLangOpts().OpenMPIsDevice; 2935 2936 QualType ASTTy = VD->getType(); 2937 2938 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 2939 // Produce the unique prefix to identify the new target regions. We use 2940 // the source location of the variable declaration which we know to not 2941 // conflict with any target region. 2942 unsigned DeviceID; 2943 unsigned FileID; 2944 unsigned Line; 2945 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2946 SmallString<128> Buffer, Out; 2947 { 2948 llvm::raw_svector_ostream OS(Buffer); 2949 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2950 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2951 } 2952 2953 const Expr *Init = VD->getAnyInitializer(); 2954 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2955 llvm::Constant *Ctor; 2956 llvm::Constant *ID; 2957 if (CGM.getLangOpts().OpenMPIsDevice) { 2958 // Generate function that re-emits the declaration's initializer into 2959 // the threadprivate copy of the variable VD 2960 CodeGenFunction CtorCGF(CGM); 2961 2962 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2963 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2964 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2965 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2966 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2967 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2968 FunctionArgList(), Loc, Loc); 2969 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2970 CtorCGF.EmitAnyExprToMem(Init, 2971 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2972 Init->getType().getQualifiers(), 2973 /*IsInitializer=*/true); 2974 CtorCGF.FinishFunction(); 2975 Ctor = Fn; 2976 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2977 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 2978 } else { 2979 Ctor = new llvm::GlobalVariable( 2980 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2981 llvm::GlobalValue::PrivateLinkage, 2982 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2983 ID = Ctor; 2984 } 2985 2986 // Register the information for the entry associated with the constructor. 2987 Out.clear(); 2988 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2989 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2990 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2991 } 2992 if (VD->getType().isDestructedType() != QualType::DK_none) { 2993 llvm::Constant *Dtor; 2994 llvm::Constant *ID; 2995 if (CGM.getLangOpts().OpenMPIsDevice) { 2996 // Generate function that emits destructor call for the threadprivate 2997 // copy of the variable VD 2998 CodeGenFunction DtorCGF(CGM); 2999 3000 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 3001 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 3002 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 3003 FTy, Twine(Buffer, "_dtor"), FI, Loc); 3004 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 3005 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 3006 FunctionArgList(), Loc, Loc); 3007 // Create a scope with an artificial location for the body of this 3008 // function. 3009 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 3010 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 3011 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 3012 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 3013 DtorCGF.FinishFunction(); 3014 Dtor = Fn; 3015 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 3016 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 3017 } else { 3018 Dtor = new llvm::GlobalVariable( 3019 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 3020 llvm::GlobalValue::PrivateLinkage, 3021 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 3022 ID = Dtor; 3023 } 3024 // Register the information for the entry associated with the destructor. 3025 Out.clear(); 3026 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 3027 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 3028 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 3029 } 3030 return CGM.getLangOpts().OpenMPIsDevice; 3031 } 3032 3033 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 3034 QualType VarType, 3035 StringRef Name) { 3036 std::string Suffix = getName({"artificial", ""}); 3037 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 3038 llvm::Value *GAddr = 3039 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 3040 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 3041 CGM.getTarget().isTLSSupported()) { 3042 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 3043 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 3044 } 3045 std::string CacheSuffix = getName({"cache", ""}); 3046 llvm::Value *Args[] = { 3047 emitUpdateLocation(CGF, SourceLocation()), 3048 getThreadID(CGF, SourceLocation()), 3049 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 3050 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 3051 /*isSigned=*/false), 3052 getOrCreateInternalVariable( 3053 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 3054 return Address( 3055 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3056 CGF.EmitRuntimeCall( 3057 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 3058 VarLVType->getPointerTo(/*AddrSpace=*/0)), 3059 CGM.getContext().getTypeAlignInChars(VarType)); 3060 } 3061 3062 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 3063 const RegionCodeGenTy &ThenGen, 3064 const RegionCodeGenTy &ElseGen) { 3065 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 3066 3067 // If the condition constant folds and can be elided, try to avoid emitting 3068 // the condition and the dead arm of the if/else. 3069 bool CondConstant; 3070 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 3071 if (CondConstant) 3072 ThenGen(CGF); 3073 else 3074 ElseGen(CGF); 3075 return; 3076 } 3077 3078 // Otherwise, the condition did not fold, or we couldn't elide it. Just 3079 // emit the conditional branch. 3080 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3081 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 3082 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 3083 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 3084 3085 // Emit the 'then' code. 3086 CGF.EmitBlock(ThenBlock); 3087 ThenGen(CGF); 3088 CGF.EmitBranch(ContBlock); 3089 // Emit the 'else' code if present. 3090 // There is no need to emit line number for unconditional branch. 3091 (void)ApplyDebugLocation::CreateEmpty(CGF); 3092 CGF.EmitBlock(ElseBlock); 3093 ElseGen(CGF); 3094 // There is no need to emit line number for unconditional branch. 3095 (void)ApplyDebugLocation::CreateEmpty(CGF); 3096 CGF.EmitBranch(ContBlock); 3097 // Emit the continuation block for code after the if. 3098 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 3099 } 3100 3101 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 3102 llvm::Function *OutlinedFn, 3103 ArrayRef<llvm::Value *> CapturedVars, 3104 const Expr *IfCond) { 3105 if (!CGF.HaveInsertPoint()) 3106 return; 3107 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 3108 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 3109 PrePostActionTy &) { 3110 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 3111 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3112 llvm::Value *Args[] = { 3113 RTLoc, 3114 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 3115 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 3116 llvm::SmallVector<llvm::Value *, 16> RealArgs; 3117 RealArgs.append(std::begin(Args), std::end(Args)); 3118 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 3119 3120 llvm::FunctionCallee RTLFn = 3121 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 3122 CGF.EmitRuntimeCall(RTLFn, RealArgs); 3123 }; 3124 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 3125 PrePostActionTy &) { 3126 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3127 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 3128 // Build calls: 3129 // __kmpc_serialized_parallel(&Loc, GTid); 3130 llvm::Value *Args[] = {RTLoc, ThreadID}; 3131 CGF.EmitRuntimeCall( 3132 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 3133 3134 // OutlinedFn(>id, &zero_bound, CapturedStruct); 3135 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 3136 Address ZeroAddrBound = 3137 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 3138 /*Name=*/".bound.zero.addr"); 3139 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 3140 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 3141 // ThreadId for serialized parallels is 0. 3142 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 3143 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 3144 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 3145 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 3146 3147 // __kmpc_end_serialized_parallel(&Loc, GTid); 3148 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 3149 CGF.EmitRuntimeCall( 3150 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 3151 EndArgs); 3152 }; 3153 if (IfCond) { 3154 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 3155 } else { 3156 RegionCodeGenTy ThenRCG(ThenGen); 3157 ThenRCG(CGF); 3158 } 3159 } 3160 3161 // If we're inside an (outlined) parallel region, use the region info's 3162 // thread-ID variable (it is passed in a first argument of the outlined function 3163 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 3164 // regular serial code region, get thread ID by calling kmp_int32 3165 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 3166 // return the address of that temp. 3167 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 3168 SourceLocation Loc) { 3169 if (auto *OMPRegionInfo = 3170 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3171 if (OMPRegionInfo->getThreadIDVariable()) 3172 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 3173 3174 llvm::Value *ThreadID = getThreadID(CGF, Loc); 3175 QualType Int32Ty = 3176 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 3177 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 3178 CGF.EmitStoreOfScalar(ThreadID, 3179 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 3180 3181 return ThreadIDTemp; 3182 } 3183 3184 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 3185 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 3186 SmallString<256> Buffer; 3187 llvm::raw_svector_ostream Out(Buffer); 3188 Out << Name; 3189 StringRef RuntimeName = Out.str(); 3190 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 3191 if (Elem.second) { 3192 assert(Elem.second->getType()->getPointerElementType() == Ty && 3193 "OMP internal variable has different type than requested"); 3194 return &*Elem.second; 3195 } 3196 3197 return Elem.second = new llvm::GlobalVariable( 3198 CGM.getModule(), Ty, /*IsConstant*/ false, 3199 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 3200 Elem.first(), /*InsertBefore=*/nullptr, 3201 llvm::GlobalValue::NotThreadLocal, AddressSpace); 3202 } 3203 3204 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 3205 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 3206 std::string Name = getName({Prefix, "var"}); 3207 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 3208 } 3209 3210 namespace { 3211 /// Common pre(post)-action for different OpenMP constructs. 3212 class CommonActionTy final : public PrePostActionTy { 3213 llvm::FunctionCallee EnterCallee; 3214 ArrayRef<llvm::Value *> EnterArgs; 3215 llvm::FunctionCallee ExitCallee; 3216 ArrayRef<llvm::Value *> ExitArgs; 3217 bool Conditional; 3218 llvm::BasicBlock *ContBlock = nullptr; 3219 3220 public: 3221 CommonActionTy(llvm::FunctionCallee EnterCallee, 3222 ArrayRef<llvm::Value *> EnterArgs, 3223 llvm::FunctionCallee ExitCallee, 3224 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 3225 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 3226 ExitArgs(ExitArgs), Conditional(Conditional) {} 3227 void Enter(CodeGenFunction &CGF) override { 3228 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 3229 if (Conditional) { 3230 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 3231 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3232 ContBlock = CGF.createBasicBlock("omp_if.end"); 3233 // Generate the branch (If-stmt) 3234 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 3235 CGF.EmitBlock(ThenBlock); 3236 } 3237 } 3238 void Done(CodeGenFunction &CGF) { 3239 // Emit the rest of blocks/branches 3240 CGF.EmitBranch(ContBlock); 3241 CGF.EmitBlock(ContBlock, true); 3242 } 3243 void Exit(CodeGenFunction &CGF) override { 3244 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 3245 } 3246 }; 3247 } // anonymous namespace 3248 3249 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 3250 StringRef CriticalName, 3251 const RegionCodeGenTy &CriticalOpGen, 3252 SourceLocation Loc, const Expr *Hint) { 3253 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 3254 // CriticalOpGen(); 3255 // __kmpc_end_critical(ident_t *, gtid, Lock); 3256 // Prepare arguments and build a call to __kmpc_critical 3257 if (!CGF.HaveInsertPoint()) 3258 return; 3259 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3260 getCriticalRegionLock(CriticalName)}; 3261 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 3262 std::end(Args)); 3263 if (Hint) { 3264 EnterArgs.push_back(CGF.Builder.CreateIntCast( 3265 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 3266 } 3267 CommonActionTy Action( 3268 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 3269 : OMPRTL__kmpc_critical), 3270 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 3271 CriticalOpGen.setAction(Action); 3272 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 3273 } 3274 3275 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 3276 const RegionCodeGenTy &MasterOpGen, 3277 SourceLocation Loc) { 3278 if (!CGF.HaveInsertPoint()) 3279 return; 3280 // if(__kmpc_master(ident_t *, gtid)) { 3281 // MasterOpGen(); 3282 // __kmpc_end_master(ident_t *, gtid); 3283 // } 3284 // Prepare arguments and build a call to __kmpc_master 3285 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3286 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 3287 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 3288 /*Conditional=*/true); 3289 MasterOpGen.setAction(Action); 3290 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 3291 Action.Done(CGF); 3292 } 3293 3294 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 3295 SourceLocation Loc) { 3296 if (!CGF.HaveInsertPoint()) 3297 return; 3298 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3299 if (OMPBuilder) { 3300 OMPBuilder->CreateTaskyield(CGF.Builder); 3301 } else { 3302 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 3303 llvm::Value *Args[] = { 3304 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3305 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 3306 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), 3307 Args); 3308 } 3309 3310 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3311 Region->emitUntiedSwitch(CGF); 3312 } 3313 3314 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 3315 const RegionCodeGenTy &TaskgroupOpGen, 3316 SourceLocation Loc) { 3317 if (!CGF.HaveInsertPoint()) 3318 return; 3319 // __kmpc_taskgroup(ident_t *, gtid); 3320 // TaskgroupOpGen(); 3321 // __kmpc_end_taskgroup(ident_t *, gtid); 3322 // Prepare arguments and build a call to __kmpc_taskgroup 3323 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3324 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3325 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3326 Args); 3327 TaskgroupOpGen.setAction(Action); 3328 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3329 } 3330 3331 /// Given an array of pointers to variables, project the address of a 3332 /// given variable. 3333 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3334 unsigned Index, const VarDecl *Var) { 3335 // Pull out the pointer to the variable. 3336 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 3337 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3338 3339 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3340 Addr = CGF.Builder.CreateElementBitCast( 3341 Addr, CGF.ConvertTypeForMem(Var->getType())); 3342 return Addr; 3343 } 3344 3345 static llvm::Value *emitCopyprivateCopyFunction( 3346 CodeGenModule &CGM, llvm::Type *ArgsType, 3347 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3348 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3349 SourceLocation Loc) { 3350 ASTContext &C = CGM.getContext(); 3351 // void copy_func(void *LHSArg, void *RHSArg); 3352 FunctionArgList Args; 3353 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3354 ImplicitParamDecl::Other); 3355 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3356 ImplicitParamDecl::Other); 3357 Args.push_back(&LHSArg); 3358 Args.push_back(&RHSArg); 3359 const auto &CGFI = 3360 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3361 std::string Name = 3362 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 3363 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 3364 llvm::GlobalValue::InternalLinkage, Name, 3365 &CGM.getModule()); 3366 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3367 Fn->setDoesNotRecurse(); 3368 CodeGenFunction CGF(CGM); 3369 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3370 // Dest = (void*[n])(LHSArg); 3371 // Src = (void*[n])(RHSArg); 3372 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3373 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3374 ArgsType), CGF.getPointerAlign()); 3375 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3376 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3377 ArgsType), CGF.getPointerAlign()); 3378 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3379 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3380 // ... 3381 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3382 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3383 const auto *DestVar = 3384 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3385 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3386 3387 const auto *SrcVar = 3388 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3389 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3390 3391 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3392 QualType Type = VD->getType(); 3393 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3394 } 3395 CGF.FinishFunction(); 3396 return Fn; 3397 } 3398 3399 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3400 const RegionCodeGenTy &SingleOpGen, 3401 SourceLocation Loc, 3402 ArrayRef<const Expr *> CopyprivateVars, 3403 ArrayRef<const Expr *> SrcExprs, 3404 ArrayRef<const Expr *> DstExprs, 3405 ArrayRef<const Expr *> AssignmentOps) { 3406 if (!CGF.HaveInsertPoint()) 3407 return; 3408 assert(CopyprivateVars.size() == SrcExprs.size() && 3409 CopyprivateVars.size() == DstExprs.size() && 3410 CopyprivateVars.size() == AssignmentOps.size()); 3411 ASTContext &C = CGM.getContext(); 3412 // int32 did_it = 0; 3413 // if(__kmpc_single(ident_t *, gtid)) { 3414 // SingleOpGen(); 3415 // __kmpc_end_single(ident_t *, gtid); 3416 // did_it = 1; 3417 // } 3418 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3419 // <copy_func>, did_it); 3420 3421 Address DidIt = Address::invalid(); 3422 if (!CopyprivateVars.empty()) { 3423 // int32 did_it = 0; 3424 QualType KmpInt32Ty = 3425 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3426 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3427 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3428 } 3429 // Prepare arguments and build a call to __kmpc_single 3430 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3431 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3432 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3433 /*Conditional=*/true); 3434 SingleOpGen.setAction(Action); 3435 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3436 if (DidIt.isValid()) { 3437 // did_it = 1; 3438 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3439 } 3440 Action.Done(CGF); 3441 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3442 // <copy_func>, did_it); 3443 if (DidIt.isValid()) { 3444 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3445 QualType CopyprivateArrayTy = C.getConstantArrayType( 3446 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 3447 /*IndexTypeQuals=*/0); 3448 // Create a list of all private variables for copyprivate. 3449 Address CopyprivateList = 3450 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3451 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3452 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 3453 CGF.Builder.CreateStore( 3454 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3455 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 3456 CGF.VoidPtrTy), 3457 Elem); 3458 } 3459 // Build function that copies private values from single region to all other 3460 // threads in the corresponding parallel region. 3461 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3462 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3463 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3464 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3465 Address CL = 3466 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3467 CGF.VoidPtrTy); 3468 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3469 llvm::Value *Args[] = { 3470 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3471 getThreadID(CGF, Loc), // i32 <gtid> 3472 BufSize, // size_t <buf_size> 3473 CL.getPointer(), // void *<copyprivate list> 3474 CpyFn, // void (*) (void *, void *) <copy_func> 3475 DidItVal // i32 did_it 3476 }; 3477 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3478 } 3479 } 3480 3481 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3482 const RegionCodeGenTy &OrderedOpGen, 3483 SourceLocation Loc, bool IsThreads) { 3484 if (!CGF.HaveInsertPoint()) 3485 return; 3486 // __kmpc_ordered(ident_t *, gtid); 3487 // OrderedOpGen(); 3488 // __kmpc_end_ordered(ident_t *, gtid); 3489 // Prepare arguments and build a call to __kmpc_ordered 3490 if (IsThreads) { 3491 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3492 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3493 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3494 Args); 3495 OrderedOpGen.setAction(Action); 3496 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3497 return; 3498 } 3499 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3500 } 3501 3502 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 3503 unsigned Flags; 3504 if (Kind == OMPD_for) 3505 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3506 else if (Kind == OMPD_sections) 3507 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3508 else if (Kind == OMPD_single) 3509 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3510 else if (Kind == OMPD_barrier) 3511 Flags = OMP_IDENT_BARRIER_EXPL; 3512 else 3513 Flags = OMP_IDENT_BARRIER_IMPL; 3514 return Flags; 3515 } 3516 3517 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 3518 CodeGenFunction &CGF, const OMPLoopDirective &S, 3519 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 3520 // Check if the loop directive is actually a doacross loop directive. In this 3521 // case choose static, 1 schedule. 3522 if (llvm::any_of( 3523 S.getClausesOfKind<OMPOrderedClause>(), 3524 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 3525 ScheduleKind = OMPC_SCHEDULE_static; 3526 // Chunk size is 1 in this case. 3527 llvm::APInt ChunkSize(32, 1); 3528 ChunkExpr = IntegerLiteral::Create( 3529 CGF.getContext(), ChunkSize, 3530 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3531 SourceLocation()); 3532 } 3533 } 3534 3535 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3536 OpenMPDirectiveKind Kind, bool EmitChecks, 3537 bool ForceSimpleCall) { 3538 // Check if we should use the OMPBuilder 3539 auto *OMPRegionInfo = 3540 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 3541 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3542 if (OMPBuilder) { 3543 CGF.Builder.restoreIP(OMPBuilder->CreateBarrier( 3544 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 3545 return; 3546 } 3547 3548 if (!CGF.HaveInsertPoint()) 3549 return; 3550 // Build call __kmpc_cancel_barrier(loc, thread_id); 3551 // Build call __kmpc_barrier(loc, thread_id); 3552 unsigned Flags = getDefaultFlagsForBarriers(Kind); 3553 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3554 // thread_id); 3555 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3556 getThreadID(CGF, Loc)}; 3557 if (OMPRegionInfo) { 3558 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3559 llvm::Value *Result = CGF.EmitRuntimeCall( 3560 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3561 if (EmitChecks) { 3562 // if (__kmpc_cancel_barrier()) { 3563 // exit from construct; 3564 // } 3565 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3566 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3567 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3568 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3569 CGF.EmitBlock(ExitBB); 3570 // exit from construct; 3571 CodeGenFunction::JumpDest CancelDestination = 3572 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3573 CGF.EmitBranchThroughCleanup(CancelDestination); 3574 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3575 } 3576 return; 3577 } 3578 } 3579 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3580 } 3581 3582 /// Map the OpenMP loop schedule to the runtime enumeration. 3583 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3584 bool Chunked, bool Ordered) { 3585 switch (ScheduleKind) { 3586 case OMPC_SCHEDULE_static: 3587 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3588 : (Ordered ? OMP_ord_static : OMP_sch_static); 3589 case OMPC_SCHEDULE_dynamic: 3590 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3591 case OMPC_SCHEDULE_guided: 3592 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3593 case OMPC_SCHEDULE_runtime: 3594 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3595 case OMPC_SCHEDULE_auto: 3596 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3597 case OMPC_SCHEDULE_unknown: 3598 assert(!Chunked && "chunk was specified but schedule kind not known"); 3599 return Ordered ? OMP_ord_static : OMP_sch_static; 3600 } 3601 llvm_unreachable("Unexpected runtime schedule"); 3602 } 3603 3604 /// Map the OpenMP distribute schedule to the runtime enumeration. 3605 static OpenMPSchedType 3606 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3607 // only static is allowed for dist_schedule 3608 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3609 } 3610 3611 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3612 bool Chunked) const { 3613 OpenMPSchedType Schedule = 3614 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3615 return Schedule == OMP_sch_static; 3616 } 3617 3618 bool CGOpenMPRuntime::isStaticNonchunked( 3619 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3620 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3621 return Schedule == OMP_dist_sch_static; 3622 } 3623 3624 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 3625 bool Chunked) const { 3626 OpenMPSchedType Schedule = 3627 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3628 return Schedule == OMP_sch_static_chunked; 3629 } 3630 3631 bool CGOpenMPRuntime::isStaticChunked( 3632 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3633 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3634 return Schedule == OMP_dist_sch_static_chunked; 3635 } 3636 3637 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3638 OpenMPSchedType Schedule = 3639 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3640 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3641 return Schedule != OMP_sch_static; 3642 } 3643 3644 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 3645 OpenMPScheduleClauseModifier M1, 3646 OpenMPScheduleClauseModifier M2) { 3647 int Modifier = 0; 3648 switch (M1) { 3649 case OMPC_SCHEDULE_MODIFIER_monotonic: 3650 Modifier = OMP_sch_modifier_monotonic; 3651 break; 3652 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3653 Modifier = OMP_sch_modifier_nonmonotonic; 3654 break; 3655 case OMPC_SCHEDULE_MODIFIER_simd: 3656 if (Schedule == OMP_sch_static_chunked) 3657 Schedule = OMP_sch_static_balanced_chunked; 3658 break; 3659 case OMPC_SCHEDULE_MODIFIER_last: 3660 case OMPC_SCHEDULE_MODIFIER_unknown: 3661 break; 3662 } 3663 switch (M2) { 3664 case OMPC_SCHEDULE_MODIFIER_monotonic: 3665 Modifier = OMP_sch_modifier_monotonic; 3666 break; 3667 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3668 Modifier = OMP_sch_modifier_nonmonotonic; 3669 break; 3670 case OMPC_SCHEDULE_MODIFIER_simd: 3671 if (Schedule == OMP_sch_static_chunked) 3672 Schedule = OMP_sch_static_balanced_chunked; 3673 break; 3674 case OMPC_SCHEDULE_MODIFIER_last: 3675 case OMPC_SCHEDULE_MODIFIER_unknown: 3676 break; 3677 } 3678 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 3679 // If the static schedule kind is specified or if the ordered clause is 3680 // specified, and if the nonmonotonic modifier is not specified, the effect is 3681 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 3682 // modifier is specified, the effect is as if the nonmonotonic modifier is 3683 // specified. 3684 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 3685 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 3686 Schedule == OMP_sch_static_balanced_chunked || 3687 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 3688 Schedule == OMP_dist_sch_static_chunked || 3689 Schedule == OMP_dist_sch_static)) 3690 Modifier = OMP_sch_modifier_nonmonotonic; 3691 } 3692 return Schedule | Modifier; 3693 } 3694 3695 void CGOpenMPRuntime::emitForDispatchInit( 3696 CodeGenFunction &CGF, SourceLocation Loc, 3697 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3698 bool Ordered, const DispatchRTInput &DispatchValues) { 3699 if (!CGF.HaveInsertPoint()) 3700 return; 3701 OpenMPSchedType Schedule = getRuntimeSchedule( 3702 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3703 assert(Ordered || 3704 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3705 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3706 Schedule != OMP_sch_static_balanced_chunked)); 3707 // Call __kmpc_dispatch_init( 3708 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3709 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3710 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3711 3712 // If the Chunk was not specified in the clause - use default value 1. 3713 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3714 : CGF.Builder.getIntN(IVSize, 1); 3715 llvm::Value *Args[] = { 3716 emitUpdateLocation(CGF, Loc), 3717 getThreadID(CGF, Loc), 3718 CGF.Builder.getInt32(addMonoNonMonoModifier( 3719 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3720 DispatchValues.LB, // Lower 3721 DispatchValues.UB, // Upper 3722 CGF.Builder.getIntN(IVSize, 1), // Stride 3723 Chunk // Chunk 3724 }; 3725 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3726 } 3727 3728 static void emitForStaticInitCall( 3729 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3730 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 3731 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3732 const CGOpenMPRuntime::StaticRTInput &Values) { 3733 if (!CGF.HaveInsertPoint()) 3734 return; 3735 3736 assert(!Values.Ordered); 3737 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3738 Schedule == OMP_sch_static_balanced_chunked || 3739 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3740 Schedule == OMP_dist_sch_static || 3741 Schedule == OMP_dist_sch_static_chunked); 3742 3743 // Call __kmpc_for_static_init( 3744 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3745 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3746 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3747 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3748 llvm::Value *Chunk = Values.Chunk; 3749 if (Chunk == nullptr) { 3750 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3751 Schedule == OMP_dist_sch_static) && 3752 "expected static non-chunked schedule"); 3753 // If the Chunk was not specified in the clause - use default value 1. 3754 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3755 } else { 3756 assert((Schedule == OMP_sch_static_chunked || 3757 Schedule == OMP_sch_static_balanced_chunked || 3758 Schedule == OMP_ord_static_chunked || 3759 Schedule == OMP_dist_sch_static_chunked) && 3760 "expected static chunked schedule"); 3761 } 3762 llvm::Value *Args[] = { 3763 UpdateLocation, 3764 ThreadId, 3765 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 3766 M2)), // Schedule type 3767 Values.IL.getPointer(), // &isLastIter 3768 Values.LB.getPointer(), // &LB 3769 Values.UB.getPointer(), // &UB 3770 Values.ST.getPointer(), // &Stride 3771 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3772 Chunk // Chunk 3773 }; 3774 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3775 } 3776 3777 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3778 SourceLocation Loc, 3779 OpenMPDirectiveKind DKind, 3780 const OpenMPScheduleTy &ScheduleKind, 3781 const StaticRTInput &Values) { 3782 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3783 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3784 assert(isOpenMPWorksharingDirective(DKind) && 3785 "Expected loop-based or sections-based directive."); 3786 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3787 isOpenMPLoopDirective(DKind) 3788 ? OMP_IDENT_WORK_LOOP 3789 : OMP_IDENT_WORK_SECTIONS); 3790 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3791 llvm::FunctionCallee StaticInitFunction = 3792 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3793 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 3794 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3795 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3796 } 3797 3798 void CGOpenMPRuntime::emitDistributeStaticInit( 3799 CodeGenFunction &CGF, SourceLocation Loc, 3800 OpenMPDistScheduleClauseKind SchedKind, 3801 const CGOpenMPRuntime::StaticRTInput &Values) { 3802 OpenMPSchedType ScheduleNum = 3803 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3804 llvm::Value *UpdatedLocation = 3805 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3806 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3807 llvm::FunctionCallee StaticInitFunction = 3808 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3809 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3810 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3811 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3812 } 3813 3814 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3815 SourceLocation Loc, 3816 OpenMPDirectiveKind DKind) { 3817 if (!CGF.HaveInsertPoint()) 3818 return; 3819 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3820 llvm::Value *Args[] = { 3821 emitUpdateLocation(CGF, Loc, 3822 isOpenMPDistributeDirective(DKind) 3823 ? OMP_IDENT_WORK_DISTRIBUTE 3824 : isOpenMPLoopDirective(DKind) 3825 ? OMP_IDENT_WORK_LOOP 3826 : OMP_IDENT_WORK_SECTIONS), 3827 getThreadID(CGF, Loc)}; 3828 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 3829 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3830 Args); 3831 } 3832 3833 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3834 SourceLocation Loc, 3835 unsigned IVSize, 3836 bool IVSigned) { 3837 if (!CGF.HaveInsertPoint()) 3838 return; 3839 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3840 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3841 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3842 } 3843 3844 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3845 SourceLocation Loc, unsigned IVSize, 3846 bool IVSigned, Address IL, 3847 Address LB, Address UB, 3848 Address ST) { 3849 // Call __kmpc_dispatch_next( 3850 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3851 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3852 // kmp_int[32|64] *p_stride); 3853 llvm::Value *Args[] = { 3854 emitUpdateLocation(CGF, Loc), 3855 getThreadID(CGF, Loc), 3856 IL.getPointer(), // &isLastIter 3857 LB.getPointer(), // &Lower 3858 UB.getPointer(), // &Upper 3859 ST.getPointer() // &Stride 3860 }; 3861 llvm::Value *Call = 3862 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3863 return CGF.EmitScalarConversion( 3864 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3865 CGF.getContext().BoolTy, Loc); 3866 } 3867 3868 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3869 llvm::Value *NumThreads, 3870 SourceLocation Loc) { 3871 if (!CGF.HaveInsertPoint()) 3872 return; 3873 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3874 llvm::Value *Args[] = { 3875 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3876 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3877 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3878 Args); 3879 } 3880 3881 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3882 ProcBindKind ProcBind, 3883 SourceLocation Loc) { 3884 if (!CGF.HaveInsertPoint()) 3885 return; 3886 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 3887 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3888 llvm::Value *Args[] = { 3889 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3890 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 3891 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3892 } 3893 3894 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3895 SourceLocation Loc, llvm::AtomicOrdering AO) { 3896 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3897 if (OMPBuilder) { 3898 OMPBuilder->CreateFlush(CGF.Builder); 3899 } else { 3900 if (!CGF.HaveInsertPoint()) 3901 return; 3902 // Build call void __kmpc_flush(ident_t *loc) 3903 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3904 emitUpdateLocation(CGF, Loc)); 3905 } 3906 } 3907 3908 namespace { 3909 /// Indexes of fields for type kmp_task_t. 3910 enum KmpTaskTFields { 3911 /// List of shared variables. 3912 KmpTaskTShareds, 3913 /// Task routine. 3914 KmpTaskTRoutine, 3915 /// Partition id for the untied tasks. 3916 KmpTaskTPartId, 3917 /// Function with call of destructors for private variables. 3918 Data1, 3919 /// Task priority. 3920 Data2, 3921 /// (Taskloops only) Lower bound. 3922 KmpTaskTLowerBound, 3923 /// (Taskloops only) Upper bound. 3924 KmpTaskTUpperBound, 3925 /// (Taskloops only) Stride. 3926 KmpTaskTStride, 3927 /// (Taskloops only) Is last iteration flag. 3928 KmpTaskTLastIter, 3929 /// (Taskloops only) Reduction data. 3930 KmpTaskTReductions, 3931 }; 3932 } // anonymous namespace 3933 3934 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3935 return OffloadEntriesTargetRegion.empty() && 3936 OffloadEntriesDeviceGlobalVar.empty(); 3937 } 3938 3939 /// Initialize target region entry. 3940 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3941 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3942 StringRef ParentName, unsigned LineNum, 3943 unsigned Order) { 3944 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3945 "only required for the device " 3946 "code generation."); 3947 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3948 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3949 OMPTargetRegionEntryTargetRegion); 3950 ++OffloadingEntriesNum; 3951 } 3952 3953 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3954 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3955 StringRef ParentName, unsigned LineNum, 3956 llvm::Constant *Addr, llvm::Constant *ID, 3957 OMPTargetRegionEntryKind Flags) { 3958 // If we are emitting code for a target, the entry is already initialized, 3959 // only has to be registered. 3960 if (CGM.getLangOpts().OpenMPIsDevice) { 3961 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3962 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3963 DiagnosticsEngine::Error, 3964 "Unable to find target region on line '%0' in the device code."); 3965 CGM.getDiags().Report(DiagID) << LineNum; 3966 return; 3967 } 3968 auto &Entry = 3969 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3970 assert(Entry.isValid() && "Entry not initialized!"); 3971 Entry.setAddress(Addr); 3972 Entry.setID(ID); 3973 Entry.setFlags(Flags); 3974 } else { 3975 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3976 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3977 ++OffloadingEntriesNum; 3978 } 3979 } 3980 3981 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3982 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3983 unsigned LineNum) const { 3984 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3985 if (PerDevice == OffloadEntriesTargetRegion.end()) 3986 return false; 3987 auto PerFile = PerDevice->second.find(FileID); 3988 if (PerFile == PerDevice->second.end()) 3989 return false; 3990 auto PerParentName = PerFile->second.find(ParentName); 3991 if (PerParentName == PerFile->second.end()) 3992 return false; 3993 auto PerLine = PerParentName->second.find(LineNum); 3994 if (PerLine == PerParentName->second.end()) 3995 return false; 3996 // Fail if this entry is already registered. 3997 if (PerLine->second.getAddress() || PerLine->second.getID()) 3998 return false; 3999 return true; 4000 } 4001 4002 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 4003 const OffloadTargetRegionEntryInfoActTy &Action) { 4004 // Scan all target region entries and perform the provided action. 4005 for (const auto &D : OffloadEntriesTargetRegion) 4006 for (const auto &F : D.second) 4007 for (const auto &P : F.second) 4008 for (const auto &L : P.second) 4009 Action(D.first, F.first, P.first(), L.first, L.second); 4010 } 4011 4012 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4013 initializeDeviceGlobalVarEntryInfo(StringRef Name, 4014 OMPTargetGlobalVarEntryKind Flags, 4015 unsigned Order) { 4016 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 4017 "only required for the device " 4018 "code generation."); 4019 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 4020 ++OffloadingEntriesNum; 4021 } 4022 4023 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4024 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 4025 CharUnits VarSize, 4026 OMPTargetGlobalVarEntryKind Flags, 4027 llvm::GlobalValue::LinkageTypes Linkage) { 4028 if (CGM.getLangOpts().OpenMPIsDevice) { 4029 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 4030 assert(Entry.isValid() && Entry.getFlags() == Flags && 4031 "Entry not initialized!"); 4032 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 4033 "Resetting with the new address."); 4034 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 4035 if (Entry.getVarSize().isZero()) { 4036 Entry.setVarSize(VarSize); 4037 Entry.setLinkage(Linkage); 4038 } 4039 return; 4040 } 4041 Entry.setVarSize(VarSize); 4042 Entry.setLinkage(Linkage); 4043 Entry.setAddress(Addr); 4044 } else { 4045 if (hasDeviceGlobalVarEntryInfo(VarName)) { 4046 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 4047 assert(Entry.isValid() && Entry.getFlags() == Flags && 4048 "Entry not initialized!"); 4049 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 4050 "Resetting with the new address."); 4051 if (Entry.getVarSize().isZero()) { 4052 Entry.setVarSize(VarSize); 4053 Entry.setLinkage(Linkage); 4054 } 4055 return; 4056 } 4057 OffloadEntriesDeviceGlobalVar.try_emplace( 4058 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 4059 ++OffloadingEntriesNum; 4060 } 4061 } 4062 4063 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4064 actOnDeviceGlobalVarEntriesInfo( 4065 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 4066 // Scan all target region entries and perform the provided action. 4067 for (const auto &E : OffloadEntriesDeviceGlobalVar) 4068 Action(E.getKey(), E.getValue()); 4069 } 4070 4071 void CGOpenMPRuntime::createOffloadEntry( 4072 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 4073 llvm::GlobalValue::LinkageTypes Linkage) { 4074 StringRef Name = Addr->getName(); 4075 llvm::Module &M = CGM.getModule(); 4076 llvm::LLVMContext &C = M.getContext(); 4077 4078 // Create constant string with the name. 4079 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 4080 4081 std::string StringName = getName({"omp_offloading", "entry_name"}); 4082 auto *Str = new llvm::GlobalVariable( 4083 M, StrPtrInit->getType(), /*isConstant=*/true, 4084 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 4085 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4086 4087 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 4088 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 4089 llvm::ConstantInt::get(CGM.SizeTy, Size), 4090 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 4091 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 4092 std::string EntryName = getName({"omp_offloading", "entry", ""}); 4093 llvm::GlobalVariable *Entry = createGlobalStruct( 4094 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 4095 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 4096 4097 // The entry has to be created in the section the linker expects it to be. 4098 Entry->setSection("omp_offloading_entries"); 4099 } 4100 4101 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 4102 // Emit the offloading entries and metadata so that the device codegen side 4103 // can easily figure out what to emit. The produced metadata looks like 4104 // this: 4105 // 4106 // !omp_offload.info = !{!1, ...} 4107 // 4108 // Right now we only generate metadata for function that contain target 4109 // regions. 4110 4111 // If we are in simd mode or there are no entries, we don't need to do 4112 // anything. 4113 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 4114 return; 4115 4116 llvm::Module &M = CGM.getModule(); 4117 llvm::LLVMContext &C = M.getContext(); 4118 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 4119 SourceLocation, StringRef>, 4120 16> 4121 OrderedEntries(OffloadEntriesInfoManager.size()); 4122 llvm::SmallVector<StringRef, 16> ParentFunctions( 4123 OffloadEntriesInfoManager.size()); 4124 4125 // Auxiliary methods to create metadata values and strings. 4126 auto &&GetMDInt = [this](unsigned V) { 4127 return llvm::ConstantAsMetadata::get( 4128 llvm::ConstantInt::get(CGM.Int32Ty, V)); 4129 }; 4130 4131 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 4132 4133 // Create the offloading info metadata node. 4134 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 4135 4136 // Create function that emits metadata for each target region entry; 4137 auto &&TargetRegionMetadataEmitter = 4138 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 4139 &GetMDString]( 4140 unsigned DeviceID, unsigned FileID, StringRef ParentName, 4141 unsigned Line, 4142 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 4143 // Generate metadata for target regions. Each entry of this metadata 4144 // contains: 4145 // - Entry 0 -> Kind of this type of metadata (0). 4146 // - Entry 1 -> Device ID of the file where the entry was identified. 4147 // - Entry 2 -> File ID of the file where the entry was identified. 4148 // - Entry 3 -> Mangled name of the function where the entry was 4149 // identified. 4150 // - Entry 4 -> Line in the file where the entry was identified. 4151 // - Entry 5 -> Order the entry was created. 4152 // The first element of the metadata node is the kind. 4153 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 4154 GetMDInt(FileID), GetMDString(ParentName), 4155 GetMDInt(Line), GetMDInt(E.getOrder())}; 4156 4157 SourceLocation Loc; 4158 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 4159 E = CGM.getContext().getSourceManager().fileinfo_end(); 4160 I != E; ++I) { 4161 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 4162 I->getFirst()->getUniqueID().getFile() == FileID) { 4163 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 4164 I->getFirst(), Line, 1); 4165 break; 4166 } 4167 } 4168 // Save this entry in the right position of the ordered entries array. 4169 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 4170 ParentFunctions[E.getOrder()] = ParentName; 4171 4172 // Add metadata to the named metadata node. 4173 MD->addOperand(llvm::MDNode::get(C, Ops)); 4174 }; 4175 4176 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 4177 TargetRegionMetadataEmitter); 4178 4179 // Create function that emits metadata for each device global variable entry; 4180 auto &&DeviceGlobalVarMetadataEmitter = 4181 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 4182 MD](StringRef MangledName, 4183 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 4184 &E) { 4185 // Generate metadata for global variables. Each entry of this metadata 4186 // contains: 4187 // - Entry 0 -> Kind of this type of metadata (1). 4188 // - Entry 1 -> Mangled name of the variable. 4189 // - Entry 2 -> Declare target kind. 4190 // - Entry 3 -> Order the entry was created. 4191 // The first element of the metadata node is the kind. 4192 llvm::Metadata *Ops[] = { 4193 GetMDInt(E.getKind()), GetMDString(MangledName), 4194 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 4195 4196 // Save this entry in the right position of the ordered entries array. 4197 OrderedEntries[E.getOrder()] = 4198 std::make_tuple(&E, SourceLocation(), MangledName); 4199 4200 // Add metadata to the named metadata node. 4201 MD->addOperand(llvm::MDNode::get(C, Ops)); 4202 }; 4203 4204 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 4205 DeviceGlobalVarMetadataEmitter); 4206 4207 for (const auto &E : OrderedEntries) { 4208 assert(std::get<0>(E) && "All ordered entries must exist!"); 4209 if (const auto *CE = 4210 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 4211 std::get<0>(E))) { 4212 if (!CE->getID() || !CE->getAddress()) { 4213 // Do not blame the entry if the parent funtion is not emitted. 4214 StringRef FnName = ParentFunctions[CE->getOrder()]; 4215 if (!CGM.GetGlobalValue(FnName)) 4216 continue; 4217 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4218 DiagnosticsEngine::Error, 4219 "Offloading entry for target region in %0 is incorrect: either the " 4220 "address or the ID is invalid."); 4221 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 4222 continue; 4223 } 4224 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 4225 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 4226 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 4227 OffloadEntryInfoDeviceGlobalVar>( 4228 std::get<0>(E))) { 4229 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 4230 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4231 CE->getFlags()); 4232 switch (Flags) { 4233 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 4234 if (CGM.getLangOpts().OpenMPIsDevice && 4235 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 4236 continue; 4237 if (!CE->getAddress()) { 4238 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4239 DiagnosticsEngine::Error, "Offloading entry for declare target " 4240 "variable %0 is incorrect: the " 4241 "address is invalid."); 4242 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 4243 continue; 4244 } 4245 // The vaiable has no definition - no need to add the entry. 4246 if (CE->getVarSize().isZero()) 4247 continue; 4248 break; 4249 } 4250 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 4251 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 4252 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 4253 "Declaret target link address is set."); 4254 if (CGM.getLangOpts().OpenMPIsDevice) 4255 continue; 4256 if (!CE->getAddress()) { 4257 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4258 DiagnosticsEngine::Error, 4259 "Offloading entry for declare target variable is incorrect: the " 4260 "address is invalid."); 4261 CGM.getDiags().Report(DiagID); 4262 continue; 4263 } 4264 break; 4265 } 4266 createOffloadEntry(CE->getAddress(), CE->getAddress(), 4267 CE->getVarSize().getQuantity(), Flags, 4268 CE->getLinkage()); 4269 } else { 4270 llvm_unreachable("Unsupported entry kind."); 4271 } 4272 } 4273 } 4274 4275 /// Loads all the offload entries information from the host IR 4276 /// metadata. 4277 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 4278 // If we are in target mode, load the metadata from the host IR. This code has 4279 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 4280 4281 if (!CGM.getLangOpts().OpenMPIsDevice) 4282 return; 4283 4284 if (CGM.getLangOpts().OMPHostIRFile.empty()) 4285 return; 4286 4287 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 4288 if (auto EC = Buf.getError()) { 4289 CGM.getDiags().Report(diag::err_cannot_open_file) 4290 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4291 return; 4292 } 4293 4294 llvm::LLVMContext C; 4295 auto ME = expectedToErrorOrAndEmitErrors( 4296 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 4297 4298 if (auto EC = ME.getError()) { 4299 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4300 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 4301 CGM.getDiags().Report(DiagID) 4302 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4303 return; 4304 } 4305 4306 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4307 if (!MD) 4308 return; 4309 4310 for (llvm::MDNode *MN : MD->operands()) { 4311 auto &&GetMDInt = [MN](unsigned Idx) { 4312 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4313 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4314 }; 4315 4316 auto &&GetMDString = [MN](unsigned Idx) { 4317 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4318 return V->getString(); 4319 }; 4320 4321 switch (GetMDInt(0)) { 4322 default: 4323 llvm_unreachable("Unexpected metadata!"); 4324 break; 4325 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4326 OffloadingEntryInfoTargetRegion: 4327 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4328 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4329 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4330 /*Order=*/GetMDInt(5)); 4331 break; 4332 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4333 OffloadingEntryInfoDeviceGlobalVar: 4334 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4335 /*MangledName=*/GetMDString(1), 4336 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4337 /*Flags=*/GetMDInt(2)), 4338 /*Order=*/GetMDInt(3)); 4339 break; 4340 } 4341 } 4342 } 4343 4344 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4345 if (!KmpRoutineEntryPtrTy) { 4346 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4347 ASTContext &C = CGM.getContext(); 4348 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4349 FunctionProtoType::ExtProtoInfo EPI; 4350 KmpRoutineEntryPtrQTy = C.getPointerType( 4351 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4352 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4353 } 4354 } 4355 4356 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4357 // Make sure the type of the entry is already created. This is the type we 4358 // have to create: 4359 // struct __tgt_offload_entry{ 4360 // void *addr; // Pointer to the offload entry info. 4361 // // (function or global) 4362 // char *name; // Name of the function or global. 4363 // size_t size; // Size of the entry info (0 if it a function). 4364 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4365 // int32_t reserved; // Reserved, to use by the runtime library. 4366 // }; 4367 if (TgtOffloadEntryQTy.isNull()) { 4368 ASTContext &C = CGM.getContext(); 4369 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4370 RD->startDefinition(); 4371 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4372 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4373 addFieldToRecordDecl(C, RD, C.getSizeType()); 4374 addFieldToRecordDecl( 4375 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4376 addFieldToRecordDecl( 4377 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4378 RD->completeDefinition(); 4379 RD->addAttr(PackedAttr::CreateImplicit(C)); 4380 TgtOffloadEntryQTy = C.getRecordType(RD); 4381 } 4382 return TgtOffloadEntryQTy; 4383 } 4384 4385 namespace { 4386 struct PrivateHelpersTy { 4387 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 4388 const VarDecl *PrivateElemInit) 4389 : Original(Original), PrivateCopy(PrivateCopy), 4390 PrivateElemInit(PrivateElemInit) {} 4391 const VarDecl *Original; 4392 const VarDecl *PrivateCopy; 4393 const VarDecl *PrivateElemInit; 4394 }; 4395 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4396 } // anonymous namespace 4397 4398 static RecordDecl * 4399 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4400 if (!Privates.empty()) { 4401 ASTContext &C = CGM.getContext(); 4402 // Build struct .kmp_privates_t. { 4403 // /* private vars */ 4404 // }; 4405 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4406 RD->startDefinition(); 4407 for (const auto &Pair : Privates) { 4408 const VarDecl *VD = Pair.second.Original; 4409 QualType Type = VD->getType().getNonReferenceType(); 4410 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4411 if (VD->hasAttrs()) { 4412 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4413 E(VD->getAttrs().end()); 4414 I != E; ++I) 4415 FD->addAttr(*I); 4416 } 4417 } 4418 RD->completeDefinition(); 4419 return RD; 4420 } 4421 return nullptr; 4422 } 4423 4424 static RecordDecl * 4425 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4426 QualType KmpInt32Ty, 4427 QualType KmpRoutineEntryPointerQTy) { 4428 ASTContext &C = CGM.getContext(); 4429 // Build struct kmp_task_t { 4430 // void * shareds; 4431 // kmp_routine_entry_t routine; 4432 // kmp_int32 part_id; 4433 // kmp_cmplrdata_t data1; 4434 // kmp_cmplrdata_t data2; 4435 // For taskloops additional fields: 4436 // kmp_uint64 lb; 4437 // kmp_uint64 ub; 4438 // kmp_int64 st; 4439 // kmp_int32 liter; 4440 // void * reductions; 4441 // }; 4442 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4443 UD->startDefinition(); 4444 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4445 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4446 UD->completeDefinition(); 4447 QualType KmpCmplrdataTy = C.getRecordType(UD); 4448 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4449 RD->startDefinition(); 4450 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4451 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4452 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4453 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4454 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4455 if (isOpenMPTaskLoopDirective(Kind)) { 4456 QualType KmpUInt64Ty = 4457 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4458 QualType KmpInt64Ty = 4459 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4460 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4461 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4462 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4463 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4464 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4465 } 4466 RD->completeDefinition(); 4467 return RD; 4468 } 4469 4470 static RecordDecl * 4471 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4472 ArrayRef<PrivateDataTy> Privates) { 4473 ASTContext &C = CGM.getContext(); 4474 // Build struct kmp_task_t_with_privates { 4475 // kmp_task_t task_data; 4476 // .kmp_privates_t. privates; 4477 // }; 4478 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4479 RD->startDefinition(); 4480 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4481 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4482 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4483 RD->completeDefinition(); 4484 return RD; 4485 } 4486 4487 /// Emit a proxy function which accepts kmp_task_t as the second 4488 /// argument. 4489 /// \code 4490 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4491 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4492 /// For taskloops: 4493 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4494 /// tt->reductions, tt->shareds); 4495 /// return 0; 4496 /// } 4497 /// \endcode 4498 static llvm::Function * 4499 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4500 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4501 QualType KmpTaskTWithPrivatesPtrQTy, 4502 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4503 QualType SharedsPtrTy, llvm::Function *TaskFunction, 4504 llvm::Value *TaskPrivatesMap) { 4505 ASTContext &C = CGM.getContext(); 4506 FunctionArgList Args; 4507 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4508 ImplicitParamDecl::Other); 4509 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4510 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4511 ImplicitParamDecl::Other); 4512 Args.push_back(&GtidArg); 4513 Args.push_back(&TaskTypeArg); 4514 const auto &TaskEntryFnInfo = 4515 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4516 llvm::FunctionType *TaskEntryTy = 4517 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4518 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 4519 auto *TaskEntry = llvm::Function::Create( 4520 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4521 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4522 TaskEntry->setDoesNotRecurse(); 4523 CodeGenFunction CGF(CGM); 4524 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4525 Loc, Loc); 4526 4527 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4528 // tt, 4529 // For taskloops: 4530 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4531 // tt->task_data.shareds); 4532 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4533 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4534 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4535 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4536 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4537 const auto *KmpTaskTWithPrivatesQTyRD = 4538 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4539 LValue Base = 4540 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4541 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4542 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4543 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4544 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 4545 4546 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4547 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4548 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4549 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4550 CGF.ConvertTypeForMem(SharedsPtrTy)); 4551 4552 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4553 llvm::Value *PrivatesParam; 4554 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4555 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4556 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4557 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 4558 } else { 4559 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4560 } 4561 4562 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4563 TaskPrivatesMap, 4564 CGF.Builder 4565 .CreatePointerBitCastOrAddrSpaceCast( 4566 TDBase.getAddress(CGF), CGF.VoidPtrTy) 4567 .getPointer()}; 4568 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4569 std::end(CommonArgs)); 4570 if (isOpenMPTaskLoopDirective(Kind)) { 4571 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4572 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4573 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4574 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4575 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4576 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4577 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4578 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4579 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4580 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4581 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4582 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4583 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4584 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4585 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4586 CallArgs.push_back(LBParam); 4587 CallArgs.push_back(UBParam); 4588 CallArgs.push_back(StParam); 4589 CallArgs.push_back(LIParam); 4590 CallArgs.push_back(RParam); 4591 } 4592 CallArgs.push_back(SharedsParam); 4593 4594 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4595 CallArgs); 4596 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4597 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4598 CGF.FinishFunction(); 4599 return TaskEntry; 4600 } 4601 4602 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4603 SourceLocation Loc, 4604 QualType KmpInt32Ty, 4605 QualType KmpTaskTWithPrivatesPtrQTy, 4606 QualType KmpTaskTWithPrivatesQTy) { 4607 ASTContext &C = CGM.getContext(); 4608 FunctionArgList Args; 4609 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4610 ImplicitParamDecl::Other); 4611 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4612 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4613 ImplicitParamDecl::Other); 4614 Args.push_back(&GtidArg); 4615 Args.push_back(&TaskTypeArg); 4616 const auto &DestructorFnInfo = 4617 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4618 llvm::FunctionType *DestructorFnTy = 4619 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4620 std::string Name = 4621 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 4622 auto *DestructorFn = 4623 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4624 Name, &CGM.getModule()); 4625 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4626 DestructorFnInfo); 4627 DestructorFn->setDoesNotRecurse(); 4628 CodeGenFunction CGF(CGM); 4629 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4630 Args, Loc, Loc); 4631 4632 LValue Base = CGF.EmitLoadOfPointerLValue( 4633 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4634 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4635 const auto *KmpTaskTWithPrivatesQTyRD = 4636 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4637 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4638 Base = CGF.EmitLValueForField(Base, *FI); 4639 for (const auto *Field : 4640 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4641 if (QualType::DestructionKind DtorKind = 4642 Field->getType().isDestructedType()) { 4643 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4644 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 4645 } 4646 } 4647 CGF.FinishFunction(); 4648 return DestructorFn; 4649 } 4650 4651 /// Emit a privates mapping function for correct handling of private and 4652 /// firstprivate variables. 4653 /// \code 4654 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4655 /// **noalias priv1,..., <tyn> **noalias privn) { 4656 /// *priv1 = &.privates.priv1; 4657 /// ...; 4658 /// *privn = &.privates.privn; 4659 /// } 4660 /// \endcode 4661 static llvm::Value * 4662 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4663 ArrayRef<const Expr *> PrivateVars, 4664 ArrayRef<const Expr *> FirstprivateVars, 4665 ArrayRef<const Expr *> LastprivateVars, 4666 QualType PrivatesQTy, 4667 ArrayRef<PrivateDataTy> Privates) { 4668 ASTContext &C = CGM.getContext(); 4669 FunctionArgList Args; 4670 ImplicitParamDecl TaskPrivatesArg( 4671 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4672 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4673 ImplicitParamDecl::Other); 4674 Args.push_back(&TaskPrivatesArg); 4675 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4676 unsigned Counter = 1; 4677 for (const Expr *E : PrivateVars) { 4678 Args.push_back(ImplicitParamDecl::Create( 4679 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4680 C.getPointerType(C.getPointerType(E->getType())) 4681 .withConst() 4682 .withRestrict(), 4683 ImplicitParamDecl::Other)); 4684 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4685 PrivateVarsPos[VD] = Counter; 4686 ++Counter; 4687 } 4688 for (const Expr *E : FirstprivateVars) { 4689 Args.push_back(ImplicitParamDecl::Create( 4690 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4691 C.getPointerType(C.getPointerType(E->getType())) 4692 .withConst() 4693 .withRestrict(), 4694 ImplicitParamDecl::Other)); 4695 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4696 PrivateVarsPos[VD] = Counter; 4697 ++Counter; 4698 } 4699 for (const Expr *E : LastprivateVars) { 4700 Args.push_back(ImplicitParamDecl::Create( 4701 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4702 C.getPointerType(C.getPointerType(E->getType())) 4703 .withConst() 4704 .withRestrict(), 4705 ImplicitParamDecl::Other)); 4706 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4707 PrivateVarsPos[VD] = Counter; 4708 ++Counter; 4709 } 4710 const auto &TaskPrivatesMapFnInfo = 4711 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4712 llvm::FunctionType *TaskPrivatesMapTy = 4713 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4714 std::string Name = 4715 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 4716 auto *TaskPrivatesMap = llvm::Function::Create( 4717 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 4718 &CGM.getModule()); 4719 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4720 TaskPrivatesMapFnInfo); 4721 if (CGM.getLangOpts().Optimize) { 4722 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4723 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4724 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4725 } 4726 CodeGenFunction CGF(CGM); 4727 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4728 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4729 4730 // *privi = &.privates.privi; 4731 LValue Base = CGF.EmitLoadOfPointerLValue( 4732 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4733 TaskPrivatesArg.getType()->castAs<PointerType>()); 4734 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4735 Counter = 0; 4736 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4737 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4738 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4739 LValue RefLVal = 4740 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4741 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4742 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 4743 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 4744 ++Counter; 4745 } 4746 CGF.FinishFunction(); 4747 return TaskPrivatesMap; 4748 } 4749 4750 /// Emit initialization for private variables in task-based directives. 4751 static void emitPrivatesInit(CodeGenFunction &CGF, 4752 const OMPExecutableDirective &D, 4753 Address KmpTaskSharedsPtr, LValue TDBase, 4754 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4755 QualType SharedsTy, QualType SharedsPtrTy, 4756 const OMPTaskDataTy &Data, 4757 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4758 ASTContext &C = CGF.getContext(); 4759 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4760 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4761 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4762 ? OMPD_taskloop 4763 : OMPD_task; 4764 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4765 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4766 LValue SrcBase; 4767 bool IsTargetTask = 4768 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4769 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4770 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4771 // PointersArray and SizesArray. The original variables for these arrays are 4772 // not captured and we get their addresses explicitly. 4773 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || 4774 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4775 SrcBase = CGF.MakeAddrLValue( 4776 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4777 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4778 SharedsTy); 4779 } 4780 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4781 for (const PrivateDataTy &Pair : Privates) { 4782 const VarDecl *VD = Pair.second.PrivateCopy; 4783 const Expr *Init = VD->getAnyInitializer(); 4784 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4785 !CGF.isTrivialInitializer(Init)))) { 4786 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4787 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4788 const VarDecl *OriginalVD = Pair.second.Original; 4789 // Check if the variable is the target-based BasePointersArray, 4790 // PointersArray or SizesArray. 4791 LValue SharedRefLValue; 4792 QualType Type = PrivateLValue.getType(); 4793 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4794 if (IsTargetTask && !SharedField) { 4795 assert(isa<ImplicitParamDecl>(OriginalVD) && 4796 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4797 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4798 ->getNumParams() == 0 && 4799 isa<TranslationUnitDecl>( 4800 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4801 ->getDeclContext()) && 4802 "Expected artificial target data variable."); 4803 SharedRefLValue = 4804 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4805 } else { 4806 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4807 SharedRefLValue = CGF.MakeAddrLValue( 4808 Address(SharedRefLValue.getPointer(CGF), 4809 C.getDeclAlign(OriginalVD)), 4810 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4811 SharedRefLValue.getTBAAInfo()); 4812 } 4813 if (Type->isArrayType()) { 4814 // Initialize firstprivate array. 4815 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4816 // Perform simple memcpy. 4817 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4818 } else { 4819 // Initialize firstprivate array using element-by-element 4820 // initialization. 4821 CGF.EmitOMPAggregateAssign( 4822 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 4823 Type, 4824 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4825 Address SrcElement) { 4826 // Clean up any temporaries needed by the initialization. 4827 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4828 InitScope.addPrivate( 4829 Elem, [SrcElement]() -> Address { return SrcElement; }); 4830 (void)InitScope.Privatize(); 4831 // Emit initialization for single element. 4832 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4833 CGF, &CapturesInfo); 4834 CGF.EmitAnyExprToMem(Init, DestElement, 4835 Init->getType().getQualifiers(), 4836 /*IsInitializer=*/false); 4837 }); 4838 } 4839 } else { 4840 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4841 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 4842 return SharedRefLValue.getAddress(CGF); 4843 }); 4844 (void)InitScope.Privatize(); 4845 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4846 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4847 /*capturedByInit=*/false); 4848 } 4849 } else { 4850 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4851 } 4852 } 4853 ++FI; 4854 } 4855 } 4856 4857 /// Check if duplication function is required for taskloops. 4858 static bool checkInitIsRequired(CodeGenFunction &CGF, 4859 ArrayRef<PrivateDataTy> Privates) { 4860 bool InitRequired = false; 4861 for (const PrivateDataTy &Pair : Privates) { 4862 const VarDecl *VD = Pair.second.PrivateCopy; 4863 const Expr *Init = VD->getAnyInitializer(); 4864 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4865 !CGF.isTrivialInitializer(Init)); 4866 if (InitRequired) 4867 break; 4868 } 4869 return InitRequired; 4870 } 4871 4872 4873 /// Emit task_dup function (for initialization of 4874 /// private/firstprivate/lastprivate vars and last_iter flag) 4875 /// \code 4876 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4877 /// lastpriv) { 4878 /// // setup lastprivate flag 4879 /// task_dst->last = lastpriv; 4880 /// // could be constructor calls here... 4881 /// } 4882 /// \endcode 4883 static llvm::Value * 4884 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4885 const OMPExecutableDirective &D, 4886 QualType KmpTaskTWithPrivatesPtrQTy, 4887 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4888 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4889 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4890 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4891 ASTContext &C = CGM.getContext(); 4892 FunctionArgList Args; 4893 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4894 KmpTaskTWithPrivatesPtrQTy, 4895 ImplicitParamDecl::Other); 4896 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4897 KmpTaskTWithPrivatesPtrQTy, 4898 ImplicitParamDecl::Other); 4899 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4900 ImplicitParamDecl::Other); 4901 Args.push_back(&DstArg); 4902 Args.push_back(&SrcArg); 4903 Args.push_back(&LastprivArg); 4904 const auto &TaskDupFnInfo = 4905 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4906 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4907 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4908 auto *TaskDup = llvm::Function::Create( 4909 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4910 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4911 TaskDup->setDoesNotRecurse(); 4912 CodeGenFunction CGF(CGM); 4913 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4914 Loc); 4915 4916 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4917 CGF.GetAddrOfLocalVar(&DstArg), 4918 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4919 // task_dst->liter = lastpriv; 4920 if (WithLastIter) { 4921 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4922 LValue Base = CGF.EmitLValueForField( 4923 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4924 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4925 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4926 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4927 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4928 } 4929 4930 // Emit initial values for private copies (if any). 4931 assert(!Privates.empty()); 4932 Address KmpTaskSharedsPtr = Address::invalid(); 4933 if (!Data.FirstprivateVars.empty()) { 4934 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4935 CGF.GetAddrOfLocalVar(&SrcArg), 4936 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4937 LValue Base = CGF.EmitLValueForField( 4938 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4939 KmpTaskSharedsPtr = Address( 4940 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4941 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4942 KmpTaskTShareds)), 4943 Loc), 4944 CGF.getNaturalTypeAlignment(SharedsTy)); 4945 } 4946 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4947 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4948 CGF.FinishFunction(); 4949 return TaskDup; 4950 } 4951 4952 /// Checks if destructor function is required to be generated. 4953 /// \return true if cleanups are required, false otherwise. 4954 static bool 4955 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4956 bool NeedsCleanup = false; 4957 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4958 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4959 for (const FieldDecl *FD : PrivateRD->fields()) { 4960 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4961 if (NeedsCleanup) 4962 break; 4963 } 4964 return NeedsCleanup; 4965 } 4966 4967 CGOpenMPRuntime::TaskResultTy 4968 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4969 const OMPExecutableDirective &D, 4970 llvm::Function *TaskFunction, QualType SharedsTy, 4971 Address Shareds, const OMPTaskDataTy &Data) { 4972 ASTContext &C = CGM.getContext(); 4973 llvm::SmallVector<PrivateDataTy, 4> Privates; 4974 // Aggregate privates and sort them by the alignment. 4975 auto I = Data.PrivateCopies.begin(); 4976 for (const Expr *E : Data.PrivateVars) { 4977 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4978 Privates.emplace_back( 4979 C.getDeclAlign(VD), 4980 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4981 /*PrivateElemInit=*/nullptr)); 4982 ++I; 4983 } 4984 I = Data.FirstprivateCopies.begin(); 4985 auto IElemInitRef = Data.FirstprivateInits.begin(); 4986 for (const Expr *E : Data.FirstprivateVars) { 4987 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4988 Privates.emplace_back( 4989 C.getDeclAlign(VD), 4990 PrivateHelpersTy( 4991 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4992 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4993 ++I; 4994 ++IElemInitRef; 4995 } 4996 I = Data.LastprivateCopies.begin(); 4997 for (const Expr *E : Data.LastprivateVars) { 4998 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4999 Privates.emplace_back( 5000 C.getDeclAlign(VD), 5001 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5002 /*PrivateElemInit=*/nullptr)); 5003 ++I; 5004 } 5005 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 5006 return L.first > R.first; 5007 }); 5008 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 5009 // Build type kmp_routine_entry_t (if not built yet). 5010 emitKmpRoutineEntryT(KmpInt32Ty); 5011 // Build type kmp_task_t (if not built yet). 5012 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 5013 if (SavedKmpTaskloopTQTy.isNull()) { 5014 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5015 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5016 } 5017 KmpTaskTQTy = SavedKmpTaskloopTQTy; 5018 } else { 5019 assert((D.getDirectiveKind() == OMPD_task || 5020 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 5021 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 5022 "Expected taskloop, task or target directive"); 5023 if (SavedKmpTaskTQTy.isNull()) { 5024 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5025 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5026 } 5027 KmpTaskTQTy = SavedKmpTaskTQTy; 5028 } 5029 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 5030 // Build particular struct kmp_task_t for the given task. 5031 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 5032 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 5033 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 5034 QualType KmpTaskTWithPrivatesPtrQTy = 5035 C.getPointerType(KmpTaskTWithPrivatesQTy); 5036 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 5037 llvm::Type *KmpTaskTWithPrivatesPtrTy = 5038 KmpTaskTWithPrivatesTy->getPointerTo(); 5039 llvm::Value *KmpTaskTWithPrivatesTySize = 5040 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 5041 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 5042 5043 // Emit initial values for private copies (if any). 5044 llvm::Value *TaskPrivatesMap = nullptr; 5045 llvm::Type *TaskPrivatesMapTy = 5046 std::next(TaskFunction->arg_begin(), 3)->getType(); 5047 if (!Privates.empty()) { 5048 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 5049 TaskPrivatesMap = emitTaskPrivateMappingFunction( 5050 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 5051 FI->getType(), Privates); 5052 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5053 TaskPrivatesMap, TaskPrivatesMapTy); 5054 } else { 5055 TaskPrivatesMap = llvm::ConstantPointerNull::get( 5056 cast<llvm::PointerType>(TaskPrivatesMapTy)); 5057 } 5058 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 5059 // kmp_task_t *tt); 5060 llvm::Function *TaskEntry = emitProxyTaskFunction( 5061 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5062 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 5063 TaskPrivatesMap); 5064 5065 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 5066 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 5067 // kmp_routine_entry_t *task_entry); 5068 // Task flags. Format is taken from 5069 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 5070 // description of kmp_tasking_flags struct. 5071 enum { 5072 TiedFlag = 0x1, 5073 FinalFlag = 0x2, 5074 DestructorsFlag = 0x8, 5075 PriorityFlag = 0x20 5076 }; 5077 unsigned Flags = Data.Tied ? TiedFlag : 0; 5078 bool NeedsCleanup = false; 5079 if (!Privates.empty()) { 5080 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 5081 if (NeedsCleanup) 5082 Flags = Flags | DestructorsFlag; 5083 } 5084 if (Data.Priority.getInt()) 5085 Flags = Flags | PriorityFlag; 5086 llvm::Value *TaskFlags = 5087 Data.Final.getPointer() 5088 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 5089 CGF.Builder.getInt32(FinalFlag), 5090 CGF.Builder.getInt32(/*C=*/0)) 5091 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 5092 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 5093 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 5094 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 5095 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 5096 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5097 TaskEntry, KmpRoutineEntryPtrTy)}; 5098 llvm::Value *NewTask; 5099 if (D.hasClausesOfKind<OMPNowaitClause>()) { 5100 // Check if we have any device clause associated with the directive. 5101 const Expr *Device = nullptr; 5102 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 5103 Device = C->getDevice(); 5104 // Emit device ID if any otherwise use default value. 5105 llvm::Value *DeviceID; 5106 if (Device) 5107 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 5108 CGF.Int64Ty, /*isSigned=*/true); 5109 else 5110 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 5111 AllocArgs.push_back(DeviceID); 5112 NewTask = CGF.EmitRuntimeCall( 5113 createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs); 5114 } else { 5115 NewTask = CGF.EmitRuntimeCall( 5116 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 5117 } 5118 llvm::Value *NewTaskNewTaskTTy = 5119 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5120 NewTask, KmpTaskTWithPrivatesPtrTy); 5121 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 5122 KmpTaskTWithPrivatesQTy); 5123 LValue TDBase = 5124 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 5125 // Fill the data in the resulting kmp_task_t record. 5126 // Copy shareds if there are any. 5127 Address KmpTaskSharedsPtr = Address::invalid(); 5128 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 5129 KmpTaskSharedsPtr = 5130 Address(CGF.EmitLoadOfScalar( 5131 CGF.EmitLValueForField( 5132 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 5133 KmpTaskTShareds)), 5134 Loc), 5135 CGF.getNaturalTypeAlignment(SharedsTy)); 5136 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 5137 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 5138 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 5139 } 5140 // Emit initial values for private copies (if any). 5141 TaskResultTy Result; 5142 if (!Privates.empty()) { 5143 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 5144 SharedsTy, SharedsPtrTy, Data, Privates, 5145 /*ForDup=*/false); 5146 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 5147 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 5148 Result.TaskDupFn = emitTaskDupFunction( 5149 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 5150 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 5151 /*WithLastIter=*/!Data.LastprivateVars.empty()); 5152 } 5153 } 5154 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 5155 enum { Priority = 0, Destructors = 1 }; 5156 // Provide pointer to function with destructors for privates. 5157 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 5158 const RecordDecl *KmpCmplrdataUD = 5159 (*FI)->getType()->getAsUnionType()->getDecl(); 5160 if (NeedsCleanup) { 5161 llvm::Value *DestructorFn = emitDestructorsFunction( 5162 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5163 KmpTaskTWithPrivatesQTy); 5164 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 5165 LValue DestructorsLV = CGF.EmitLValueForField( 5166 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 5167 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5168 DestructorFn, KmpRoutineEntryPtrTy), 5169 DestructorsLV); 5170 } 5171 // Set priority. 5172 if (Data.Priority.getInt()) { 5173 LValue Data2LV = CGF.EmitLValueForField( 5174 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 5175 LValue PriorityLV = CGF.EmitLValueForField( 5176 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 5177 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 5178 } 5179 Result.NewTask = NewTask; 5180 Result.TaskEntry = TaskEntry; 5181 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 5182 Result.TDBase = TDBase; 5183 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 5184 return Result; 5185 } 5186 5187 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5188 const OMPExecutableDirective &D, 5189 llvm::Function *TaskFunction, 5190 QualType SharedsTy, Address Shareds, 5191 const Expr *IfCond, 5192 const OMPTaskDataTy &Data) { 5193 if (!CGF.HaveInsertPoint()) 5194 return; 5195 5196 TaskResultTy Result = 5197 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5198 llvm::Value *NewTask = Result.NewTask; 5199 llvm::Function *TaskEntry = Result.TaskEntry; 5200 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5201 LValue TDBase = Result.TDBase; 5202 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5203 ASTContext &C = CGM.getContext(); 5204 // Process list of dependences. 5205 Address DependenciesArray = Address::invalid(); 5206 unsigned NumDependencies = Data.Dependences.size(); 5207 if (NumDependencies) { 5208 // Dependence kind for RTL. 5209 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 }; 5210 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 5211 RecordDecl *KmpDependInfoRD; 5212 QualType FlagsTy = 5213 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 5214 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5215 if (KmpDependInfoTy.isNull()) { 5216 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 5217 KmpDependInfoRD->startDefinition(); 5218 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 5219 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 5220 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 5221 KmpDependInfoRD->completeDefinition(); 5222 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 5223 } else { 5224 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5225 } 5226 // Define type kmp_depend_info[<Dependences.size()>]; 5227 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5228 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 5229 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5230 // kmp_depend_info[<Dependences.size()>] deps; 5231 DependenciesArray = 5232 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 5233 for (unsigned I = 0; I < NumDependencies; ++I) { 5234 const Expr *E = Data.Dependences[I].second; 5235 LValue Addr = CGF.EmitLValue(E); 5236 llvm::Value *Size; 5237 QualType Ty = E->getType(); 5238 if (const auto *ASE = 5239 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 5240 LValue UpAddrLVal = 5241 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 5242 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 5243 UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 5244 llvm::Value *LowIntPtr = 5245 CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGM.SizeTy); 5246 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 5247 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 5248 } else { 5249 Size = CGF.getTypeSize(Ty); 5250 } 5251 LValue Base = CGF.MakeAddrLValue( 5252 CGF.Builder.CreateConstArrayGEP(DependenciesArray, I), 5253 KmpDependInfoTy); 5254 // deps[i].base_addr = &<Dependences[i].second>; 5255 LValue BaseAddrLVal = CGF.EmitLValueForField( 5256 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5257 CGF.EmitStoreOfScalar( 5258 CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGF.IntPtrTy), 5259 BaseAddrLVal); 5260 // deps[i].len = sizeof(<Dependences[i].second>); 5261 LValue LenLVal = CGF.EmitLValueForField( 5262 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 5263 CGF.EmitStoreOfScalar(Size, LenLVal); 5264 // deps[i].flags = <Dependences[i].first>; 5265 RTLDependenceKindTy DepKind; 5266 switch (Data.Dependences[I].first) { 5267 case OMPC_DEPEND_in: 5268 DepKind = DepIn; 5269 break; 5270 // Out and InOut dependencies must use the same code. 5271 case OMPC_DEPEND_out: 5272 case OMPC_DEPEND_inout: 5273 DepKind = DepInOut; 5274 break; 5275 case OMPC_DEPEND_mutexinoutset: 5276 DepKind = DepMutexInOutSet; 5277 break; 5278 case OMPC_DEPEND_source: 5279 case OMPC_DEPEND_sink: 5280 case OMPC_DEPEND_unknown: 5281 llvm_unreachable("Unknown task dependence type"); 5282 } 5283 LValue FlagsLVal = CGF.EmitLValueForField( 5284 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5285 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5286 FlagsLVal); 5287 } 5288 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5289 CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy); 5290 } 5291 5292 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5293 // libcall. 5294 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5295 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5296 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5297 // list is not empty 5298 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5299 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5300 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5301 llvm::Value *DepTaskArgs[7]; 5302 if (NumDependencies) { 5303 DepTaskArgs[0] = UpLoc; 5304 DepTaskArgs[1] = ThreadID; 5305 DepTaskArgs[2] = NewTask; 5306 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 5307 DepTaskArgs[4] = DependenciesArray.getPointer(); 5308 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5309 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5310 } 5311 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 5312 &TaskArgs, 5313 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5314 if (!Data.Tied) { 5315 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5316 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5317 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5318 } 5319 if (NumDependencies) { 5320 CGF.EmitRuntimeCall( 5321 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5322 } else { 5323 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5324 TaskArgs); 5325 } 5326 // Check if parent region is untied and build return for untied task; 5327 if (auto *Region = 5328 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5329 Region->emitUntiedSwitch(CGF); 5330 }; 5331 5332 llvm::Value *DepWaitTaskArgs[6]; 5333 if (NumDependencies) { 5334 DepWaitTaskArgs[0] = UpLoc; 5335 DepWaitTaskArgs[1] = ThreadID; 5336 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 5337 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5338 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5339 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5340 } 5341 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5342 NumDependencies, &DepWaitTaskArgs, 5343 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5344 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5345 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5346 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5347 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5348 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5349 // is specified. 5350 if (NumDependencies) 5351 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5352 DepWaitTaskArgs); 5353 // Call proxy_task_entry(gtid, new_task); 5354 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5355 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5356 Action.Enter(CGF); 5357 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5358 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5359 OutlinedFnArgs); 5360 }; 5361 5362 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5363 // kmp_task_t *new_task); 5364 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5365 // kmp_task_t *new_task); 5366 RegionCodeGenTy RCG(CodeGen); 5367 CommonActionTy Action( 5368 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5369 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5370 RCG.setAction(Action); 5371 RCG(CGF); 5372 }; 5373 5374 if (IfCond) { 5375 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5376 } else { 5377 RegionCodeGenTy ThenRCG(ThenCodeGen); 5378 ThenRCG(CGF); 5379 } 5380 } 5381 5382 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5383 const OMPLoopDirective &D, 5384 llvm::Function *TaskFunction, 5385 QualType SharedsTy, Address Shareds, 5386 const Expr *IfCond, 5387 const OMPTaskDataTy &Data) { 5388 if (!CGF.HaveInsertPoint()) 5389 return; 5390 TaskResultTy Result = 5391 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5392 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5393 // libcall. 5394 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5395 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5396 // sched, kmp_uint64 grainsize, void *task_dup); 5397 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5398 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5399 llvm::Value *IfVal; 5400 if (IfCond) { 5401 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5402 /*isSigned=*/true); 5403 } else { 5404 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5405 } 5406 5407 LValue LBLVal = CGF.EmitLValueForField( 5408 Result.TDBase, 5409 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5410 const auto *LBVar = 5411 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5412 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5413 LBLVal.getQuals(), 5414 /*IsInitializer=*/true); 5415 LValue UBLVal = CGF.EmitLValueForField( 5416 Result.TDBase, 5417 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5418 const auto *UBVar = 5419 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5420 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5421 UBLVal.getQuals(), 5422 /*IsInitializer=*/true); 5423 LValue StLVal = CGF.EmitLValueForField( 5424 Result.TDBase, 5425 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5426 const auto *StVar = 5427 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5428 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5429 StLVal.getQuals(), 5430 /*IsInitializer=*/true); 5431 // Store reductions address. 5432 LValue RedLVal = CGF.EmitLValueForField( 5433 Result.TDBase, 5434 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5435 if (Data.Reductions) { 5436 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5437 } else { 5438 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5439 CGF.getContext().VoidPtrTy); 5440 } 5441 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5442 llvm::Value *TaskArgs[] = { 5443 UpLoc, 5444 ThreadID, 5445 Result.NewTask, 5446 IfVal, 5447 LBLVal.getPointer(CGF), 5448 UBLVal.getPointer(CGF), 5449 CGF.EmitLoadOfScalar(StLVal, Loc), 5450 llvm::ConstantInt::getSigned( 5451 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5452 llvm::ConstantInt::getSigned( 5453 CGF.IntTy, Data.Schedule.getPointer() 5454 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5455 : NoSchedule), 5456 Data.Schedule.getPointer() 5457 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5458 /*isSigned=*/false) 5459 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5460 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5461 Result.TaskDupFn, CGF.VoidPtrTy) 5462 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5463 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 5464 } 5465 5466 /// Emit reduction operation for each element of array (required for 5467 /// array sections) LHS op = RHS. 5468 /// \param Type Type of array. 5469 /// \param LHSVar Variable on the left side of the reduction operation 5470 /// (references element of array in original variable). 5471 /// \param RHSVar Variable on the right side of the reduction operation 5472 /// (references element of array in original variable). 5473 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5474 /// RHSVar. 5475 static void EmitOMPAggregateReduction( 5476 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5477 const VarDecl *RHSVar, 5478 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5479 const Expr *, const Expr *)> &RedOpGen, 5480 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5481 const Expr *UpExpr = nullptr) { 5482 // Perform element-by-element initialization. 5483 QualType ElementTy; 5484 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5485 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5486 5487 // Drill down to the base element type on both arrays. 5488 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5489 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5490 5491 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5492 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5493 // Cast from pointer to array type to pointer to single element. 5494 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5495 // The basic structure here is a while-do loop. 5496 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5497 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5498 llvm::Value *IsEmpty = 5499 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5500 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5501 5502 // Enter the loop body, making that address the current address. 5503 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5504 CGF.EmitBlock(BodyBB); 5505 5506 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5507 5508 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5509 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5510 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5511 Address RHSElementCurrent = 5512 Address(RHSElementPHI, 5513 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5514 5515 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5516 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5517 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5518 Address LHSElementCurrent = 5519 Address(LHSElementPHI, 5520 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5521 5522 // Emit copy. 5523 CodeGenFunction::OMPPrivateScope Scope(CGF); 5524 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5525 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5526 Scope.Privatize(); 5527 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5528 Scope.ForceCleanup(); 5529 5530 // Shift the address forward by one element. 5531 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5532 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5533 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5534 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5535 // Check whether we've reached the end. 5536 llvm::Value *Done = 5537 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5538 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5539 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5540 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5541 5542 // Done. 5543 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5544 } 5545 5546 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5547 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5548 /// UDR combiner function. 5549 static void emitReductionCombiner(CodeGenFunction &CGF, 5550 const Expr *ReductionOp) { 5551 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5552 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5553 if (const auto *DRE = 5554 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5555 if (const auto *DRD = 5556 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5557 std::pair<llvm::Function *, llvm::Function *> Reduction = 5558 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5559 RValue Func = RValue::get(Reduction.first); 5560 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5561 CGF.EmitIgnoredExpr(ReductionOp); 5562 return; 5563 } 5564 CGF.EmitIgnoredExpr(ReductionOp); 5565 } 5566 5567 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5568 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5569 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5570 ArrayRef<const Expr *> ReductionOps) { 5571 ASTContext &C = CGM.getContext(); 5572 5573 // void reduction_func(void *LHSArg, void *RHSArg); 5574 FunctionArgList Args; 5575 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5576 ImplicitParamDecl::Other); 5577 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5578 ImplicitParamDecl::Other); 5579 Args.push_back(&LHSArg); 5580 Args.push_back(&RHSArg); 5581 const auto &CGFI = 5582 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5583 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5584 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5585 llvm::GlobalValue::InternalLinkage, Name, 5586 &CGM.getModule()); 5587 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5588 Fn->setDoesNotRecurse(); 5589 CodeGenFunction CGF(CGM); 5590 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5591 5592 // Dst = (void*[n])(LHSArg); 5593 // Src = (void*[n])(RHSArg); 5594 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5595 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5596 ArgsType), CGF.getPointerAlign()); 5597 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5598 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5599 ArgsType), CGF.getPointerAlign()); 5600 5601 // ... 5602 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5603 // ... 5604 CodeGenFunction::OMPPrivateScope Scope(CGF); 5605 auto IPriv = Privates.begin(); 5606 unsigned Idx = 0; 5607 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5608 const auto *RHSVar = 5609 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5610 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5611 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5612 }); 5613 const auto *LHSVar = 5614 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5615 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5616 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5617 }); 5618 QualType PrivTy = (*IPriv)->getType(); 5619 if (PrivTy->isVariablyModifiedType()) { 5620 // Get array size and emit VLA type. 5621 ++Idx; 5622 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5623 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5624 const VariableArrayType *VLA = 5625 CGF.getContext().getAsVariableArrayType(PrivTy); 5626 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5627 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5628 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5629 CGF.EmitVariablyModifiedType(PrivTy); 5630 } 5631 } 5632 Scope.Privatize(); 5633 IPriv = Privates.begin(); 5634 auto ILHS = LHSExprs.begin(); 5635 auto IRHS = RHSExprs.begin(); 5636 for (const Expr *E : ReductionOps) { 5637 if ((*IPriv)->getType()->isArrayType()) { 5638 // Emit reduction for array section. 5639 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5640 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5641 EmitOMPAggregateReduction( 5642 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5643 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5644 emitReductionCombiner(CGF, E); 5645 }); 5646 } else { 5647 // Emit reduction for array subscript or single variable. 5648 emitReductionCombiner(CGF, E); 5649 } 5650 ++IPriv; 5651 ++ILHS; 5652 ++IRHS; 5653 } 5654 Scope.ForceCleanup(); 5655 CGF.FinishFunction(); 5656 return Fn; 5657 } 5658 5659 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5660 const Expr *ReductionOp, 5661 const Expr *PrivateRef, 5662 const DeclRefExpr *LHS, 5663 const DeclRefExpr *RHS) { 5664 if (PrivateRef->getType()->isArrayType()) { 5665 // Emit reduction for array section. 5666 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5667 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5668 EmitOMPAggregateReduction( 5669 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5670 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5671 emitReductionCombiner(CGF, ReductionOp); 5672 }); 5673 } else { 5674 // Emit reduction for array subscript or single variable. 5675 emitReductionCombiner(CGF, ReductionOp); 5676 } 5677 } 5678 5679 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5680 ArrayRef<const Expr *> Privates, 5681 ArrayRef<const Expr *> LHSExprs, 5682 ArrayRef<const Expr *> RHSExprs, 5683 ArrayRef<const Expr *> ReductionOps, 5684 ReductionOptionsTy Options) { 5685 if (!CGF.HaveInsertPoint()) 5686 return; 5687 5688 bool WithNowait = Options.WithNowait; 5689 bool SimpleReduction = Options.SimpleReduction; 5690 5691 // Next code should be emitted for reduction: 5692 // 5693 // static kmp_critical_name lock = { 0 }; 5694 // 5695 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5696 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5697 // ... 5698 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5699 // *(Type<n>-1*)rhs[<n>-1]); 5700 // } 5701 // 5702 // ... 5703 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5704 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5705 // RedList, reduce_func, &<lock>)) { 5706 // case 1: 5707 // ... 5708 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5709 // ... 5710 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5711 // break; 5712 // case 2: 5713 // ... 5714 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5715 // ... 5716 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5717 // break; 5718 // default:; 5719 // } 5720 // 5721 // if SimpleReduction is true, only the next code is generated: 5722 // ... 5723 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5724 // ... 5725 5726 ASTContext &C = CGM.getContext(); 5727 5728 if (SimpleReduction) { 5729 CodeGenFunction::RunCleanupsScope Scope(CGF); 5730 auto IPriv = Privates.begin(); 5731 auto ILHS = LHSExprs.begin(); 5732 auto IRHS = RHSExprs.begin(); 5733 for (const Expr *E : ReductionOps) { 5734 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5735 cast<DeclRefExpr>(*IRHS)); 5736 ++IPriv; 5737 ++ILHS; 5738 ++IRHS; 5739 } 5740 return; 5741 } 5742 5743 // 1. Build a list of reduction variables. 5744 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5745 auto Size = RHSExprs.size(); 5746 for (const Expr *E : Privates) { 5747 if (E->getType()->isVariablyModifiedType()) 5748 // Reserve place for array size. 5749 ++Size; 5750 } 5751 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5752 QualType ReductionArrayTy = 5753 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5754 /*IndexTypeQuals=*/0); 5755 Address ReductionList = 5756 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5757 auto IPriv = Privates.begin(); 5758 unsigned Idx = 0; 5759 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5760 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5761 CGF.Builder.CreateStore( 5762 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5763 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5764 Elem); 5765 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5766 // Store array size. 5767 ++Idx; 5768 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5769 llvm::Value *Size = CGF.Builder.CreateIntCast( 5770 CGF.getVLASize( 5771 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5772 .NumElts, 5773 CGF.SizeTy, /*isSigned=*/false); 5774 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5775 Elem); 5776 } 5777 } 5778 5779 // 2. Emit reduce_func(). 5780 llvm::Function *ReductionFn = emitReductionFunction( 5781 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5782 LHSExprs, RHSExprs, ReductionOps); 5783 5784 // 3. Create static kmp_critical_name lock = { 0 }; 5785 std::string Name = getName({"reduction"}); 5786 llvm::Value *Lock = getCriticalRegionLock(Name); 5787 5788 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5789 // RedList, reduce_func, &<lock>); 5790 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5791 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5792 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5793 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5794 ReductionList.getPointer(), CGF.VoidPtrTy); 5795 llvm::Value *Args[] = { 5796 IdentTLoc, // ident_t *<loc> 5797 ThreadId, // i32 <gtid> 5798 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5799 ReductionArrayTySize, // size_type sizeof(RedList) 5800 RL, // void *RedList 5801 ReductionFn, // void (*) (void *, void *) <reduce_func> 5802 Lock // kmp_critical_name *&<lock> 5803 }; 5804 llvm::Value *Res = CGF.EmitRuntimeCall( 5805 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 5806 : OMPRTL__kmpc_reduce), 5807 Args); 5808 5809 // 5. Build switch(res) 5810 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5811 llvm::SwitchInst *SwInst = 5812 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5813 5814 // 6. Build case 1: 5815 // ... 5816 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5817 // ... 5818 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5819 // break; 5820 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5821 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5822 CGF.EmitBlock(Case1BB); 5823 5824 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5825 llvm::Value *EndArgs[] = { 5826 IdentTLoc, // ident_t *<loc> 5827 ThreadId, // i32 <gtid> 5828 Lock // kmp_critical_name *&<lock> 5829 }; 5830 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5831 CodeGenFunction &CGF, PrePostActionTy &Action) { 5832 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5833 auto IPriv = Privates.begin(); 5834 auto ILHS = LHSExprs.begin(); 5835 auto IRHS = RHSExprs.begin(); 5836 for (const Expr *E : ReductionOps) { 5837 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5838 cast<DeclRefExpr>(*IRHS)); 5839 ++IPriv; 5840 ++ILHS; 5841 ++IRHS; 5842 } 5843 }; 5844 RegionCodeGenTy RCG(CodeGen); 5845 CommonActionTy Action( 5846 nullptr, llvm::None, 5847 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 5848 : OMPRTL__kmpc_end_reduce), 5849 EndArgs); 5850 RCG.setAction(Action); 5851 RCG(CGF); 5852 5853 CGF.EmitBranch(DefaultBB); 5854 5855 // 7. Build case 2: 5856 // ... 5857 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5858 // ... 5859 // break; 5860 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5861 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5862 CGF.EmitBlock(Case2BB); 5863 5864 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5865 CodeGenFunction &CGF, PrePostActionTy &Action) { 5866 auto ILHS = LHSExprs.begin(); 5867 auto IRHS = RHSExprs.begin(); 5868 auto IPriv = Privates.begin(); 5869 for (const Expr *E : ReductionOps) { 5870 const Expr *XExpr = nullptr; 5871 const Expr *EExpr = nullptr; 5872 const Expr *UpExpr = nullptr; 5873 BinaryOperatorKind BO = BO_Comma; 5874 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5875 if (BO->getOpcode() == BO_Assign) { 5876 XExpr = BO->getLHS(); 5877 UpExpr = BO->getRHS(); 5878 } 5879 } 5880 // Try to emit update expression as a simple atomic. 5881 const Expr *RHSExpr = UpExpr; 5882 if (RHSExpr) { 5883 // Analyze RHS part of the whole expression. 5884 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5885 RHSExpr->IgnoreParenImpCasts())) { 5886 // If this is a conditional operator, analyze its condition for 5887 // min/max reduction operator. 5888 RHSExpr = ACO->getCond(); 5889 } 5890 if (const auto *BORHS = 5891 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5892 EExpr = BORHS->getRHS(); 5893 BO = BORHS->getOpcode(); 5894 } 5895 } 5896 if (XExpr) { 5897 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5898 auto &&AtomicRedGen = [BO, VD, 5899 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5900 const Expr *EExpr, const Expr *UpExpr) { 5901 LValue X = CGF.EmitLValue(XExpr); 5902 RValue E; 5903 if (EExpr) 5904 E = CGF.EmitAnyExpr(EExpr); 5905 CGF.EmitOMPAtomicSimpleUpdateExpr( 5906 X, E, BO, /*IsXLHSInRHSPart=*/true, 5907 llvm::AtomicOrdering::Monotonic, Loc, 5908 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5909 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5910 PrivateScope.addPrivate( 5911 VD, [&CGF, VD, XRValue, Loc]() { 5912 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5913 CGF.emitOMPSimpleStore( 5914 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5915 VD->getType().getNonReferenceType(), Loc); 5916 return LHSTemp; 5917 }); 5918 (void)PrivateScope.Privatize(); 5919 return CGF.EmitAnyExpr(UpExpr); 5920 }); 5921 }; 5922 if ((*IPriv)->getType()->isArrayType()) { 5923 // Emit atomic reduction for array section. 5924 const auto *RHSVar = 5925 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5926 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5927 AtomicRedGen, XExpr, EExpr, UpExpr); 5928 } else { 5929 // Emit atomic reduction for array subscript or single variable. 5930 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5931 } 5932 } else { 5933 // Emit as a critical region. 5934 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5935 const Expr *, const Expr *) { 5936 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5937 std::string Name = RT.getName({"atomic_reduction"}); 5938 RT.emitCriticalRegion( 5939 CGF, Name, 5940 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5941 Action.Enter(CGF); 5942 emitReductionCombiner(CGF, E); 5943 }, 5944 Loc); 5945 }; 5946 if ((*IPriv)->getType()->isArrayType()) { 5947 const auto *LHSVar = 5948 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5949 const auto *RHSVar = 5950 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5951 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5952 CritRedGen); 5953 } else { 5954 CritRedGen(CGF, nullptr, nullptr, nullptr); 5955 } 5956 } 5957 ++ILHS; 5958 ++IRHS; 5959 ++IPriv; 5960 } 5961 }; 5962 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5963 if (!WithNowait) { 5964 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5965 llvm::Value *EndArgs[] = { 5966 IdentTLoc, // ident_t *<loc> 5967 ThreadId, // i32 <gtid> 5968 Lock // kmp_critical_name *&<lock> 5969 }; 5970 CommonActionTy Action(nullptr, llvm::None, 5971 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 5972 EndArgs); 5973 AtomicRCG.setAction(Action); 5974 AtomicRCG(CGF); 5975 } else { 5976 AtomicRCG(CGF); 5977 } 5978 5979 CGF.EmitBranch(DefaultBB); 5980 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5981 } 5982 5983 /// Generates unique name for artificial threadprivate variables. 5984 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5985 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5986 const Expr *Ref) { 5987 SmallString<256> Buffer; 5988 llvm::raw_svector_ostream Out(Buffer); 5989 const clang::DeclRefExpr *DE; 5990 const VarDecl *D = ::getBaseDecl(Ref, DE); 5991 if (!D) 5992 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5993 D = D->getCanonicalDecl(); 5994 std::string Name = CGM.getOpenMPRuntime().getName( 5995 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5996 Out << Prefix << Name << "_" 5997 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5998 return std::string(Out.str()); 5999 } 6000 6001 /// Emits reduction initializer function: 6002 /// \code 6003 /// void @.red_init(void* %arg) { 6004 /// %0 = bitcast void* %arg to <type>* 6005 /// store <type> <init>, <type>* %0 6006 /// ret void 6007 /// } 6008 /// \endcode 6009 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 6010 SourceLocation Loc, 6011 ReductionCodeGen &RCG, unsigned N) { 6012 ASTContext &C = CGM.getContext(); 6013 FunctionArgList Args; 6014 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6015 ImplicitParamDecl::Other); 6016 Args.emplace_back(&Param); 6017 const auto &FnInfo = 6018 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6019 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6020 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 6021 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6022 Name, &CGM.getModule()); 6023 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6024 Fn->setDoesNotRecurse(); 6025 CodeGenFunction CGF(CGM); 6026 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6027 Address PrivateAddr = CGF.EmitLoadOfPointer( 6028 CGF.GetAddrOfLocalVar(&Param), 6029 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6030 llvm::Value *Size = nullptr; 6031 // If the size of the reduction item is non-constant, load it from global 6032 // threadprivate variable. 6033 if (RCG.getSizes(N).second) { 6034 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6035 CGF, CGM.getContext().getSizeType(), 6036 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6037 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6038 CGM.getContext().getSizeType(), Loc); 6039 } 6040 RCG.emitAggregateType(CGF, N, Size); 6041 LValue SharedLVal; 6042 // If initializer uses initializer from declare reduction construct, emit a 6043 // pointer to the address of the original reduction item (reuired by reduction 6044 // initializer) 6045 if (RCG.usesReductionInitializer(N)) { 6046 Address SharedAddr = 6047 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6048 CGF, CGM.getContext().VoidPtrTy, 6049 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6050 SharedAddr = CGF.EmitLoadOfPointer( 6051 SharedAddr, 6052 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 6053 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 6054 } else { 6055 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 6056 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 6057 CGM.getContext().VoidPtrTy); 6058 } 6059 // Emit the initializer: 6060 // %0 = bitcast void* %arg to <type>* 6061 // store <type> <init>, <type>* %0 6062 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 6063 [](CodeGenFunction &) { return false; }); 6064 CGF.FinishFunction(); 6065 return Fn; 6066 } 6067 6068 /// Emits reduction combiner function: 6069 /// \code 6070 /// void @.red_comb(void* %arg0, void* %arg1) { 6071 /// %lhs = bitcast void* %arg0 to <type>* 6072 /// %rhs = bitcast void* %arg1 to <type>* 6073 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 6074 /// store <type> %2, <type>* %lhs 6075 /// ret void 6076 /// } 6077 /// \endcode 6078 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 6079 SourceLocation Loc, 6080 ReductionCodeGen &RCG, unsigned N, 6081 const Expr *ReductionOp, 6082 const Expr *LHS, const Expr *RHS, 6083 const Expr *PrivateRef) { 6084 ASTContext &C = CGM.getContext(); 6085 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 6086 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 6087 FunctionArgList Args; 6088 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 6089 C.VoidPtrTy, ImplicitParamDecl::Other); 6090 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6091 ImplicitParamDecl::Other); 6092 Args.emplace_back(&ParamInOut); 6093 Args.emplace_back(&ParamIn); 6094 const auto &FnInfo = 6095 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6096 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6097 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 6098 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6099 Name, &CGM.getModule()); 6100 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6101 Fn->setDoesNotRecurse(); 6102 CodeGenFunction CGF(CGM); 6103 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6104 llvm::Value *Size = nullptr; 6105 // If the size of the reduction item is non-constant, load it from global 6106 // threadprivate variable. 6107 if (RCG.getSizes(N).second) { 6108 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6109 CGF, CGM.getContext().getSizeType(), 6110 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6111 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6112 CGM.getContext().getSizeType(), Loc); 6113 } 6114 RCG.emitAggregateType(CGF, N, Size); 6115 // Remap lhs and rhs variables to the addresses of the function arguments. 6116 // %lhs = bitcast void* %arg0 to <type>* 6117 // %rhs = bitcast void* %arg1 to <type>* 6118 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6119 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6120 // Pull out the pointer to the variable. 6121 Address PtrAddr = CGF.EmitLoadOfPointer( 6122 CGF.GetAddrOfLocalVar(&ParamInOut), 6123 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6124 return CGF.Builder.CreateElementBitCast( 6125 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6126 }); 6127 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6128 // Pull out the pointer to the variable. 6129 Address PtrAddr = CGF.EmitLoadOfPointer( 6130 CGF.GetAddrOfLocalVar(&ParamIn), 6131 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6132 return CGF.Builder.CreateElementBitCast( 6133 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6134 }); 6135 PrivateScope.Privatize(); 6136 // Emit the combiner body: 6137 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6138 // store <type> %2, <type>* %lhs 6139 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6140 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6141 cast<DeclRefExpr>(RHS)); 6142 CGF.FinishFunction(); 6143 return Fn; 6144 } 6145 6146 /// Emits reduction finalizer function: 6147 /// \code 6148 /// void @.red_fini(void* %arg) { 6149 /// %0 = bitcast void* %arg to <type>* 6150 /// <destroy>(<type>* %0) 6151 /// ret void 6152 /// } 6153 /// \endcode 6154 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6155 SourceLocation Loc, 6156 ReductionCodeGen &RCG, unsigned N) { 6157 if (!RCG.needCleanups(N)) 6158 return nullptr; 6159 ASTContext &C = CGM.getContext(); 6160 FunctionArgList Args; 6161 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6162 ImplicitParamDecl::Other); 6163 Args.emplace_back(&Param); 6164 const auto &FnInfo = 6165 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6166 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6167 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6168 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6169 Name, &CGM.getModule()); 6170 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6171 Fn->setDoesNotRecurse(); 6172 CodeGenFunction CGF(CGM); 6173 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6174 Address PrivateAddr = CGF.EmitLoadOfPointer( 6175 CGF.GetAddrOfLocalVar(&Param), 6176 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6177 llvm::Value *Size = nullptr; 6178 // If the size of the reduction item is non-constant, load it from global 6179 // threadprivate variable. 6180 if (RCG.getSizes(N).second) { 6181 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6182 CGF, CGM.getContext().getSizeType(), 6183 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6184 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6185 CGM.getContext().getSizeType(), Loc); 6186 } 6187 RCG.emitAggregateType(CGF, N, Size); 6188 // Emit the finalizer body: 6189 // <destroy>(<type>* %0) 6190 RCG.emitCleanups(CGF, N, PrivateAddr); 6191 CGF.FinishFunction(Loc); 6192 return Fn; 6193 } 6194 6195 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6196 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6197 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6198 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6199 return nullptr; 6200 6201 // Build typedef struct: 6202 // kmp_task_red_input { 6203 // void *reduce_shar; // shared reduction item 6204 // size_t reduce_size; // size of data item 6205 // void *reduce_init; // data initialization routine 6206 // void *reduce_fini; // data finalization routine 6207 // void *reduce_comb; // data combiner routine 6208 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6209 // } kmp_task_red_input_t; 6210 ASTContext &C = CGM.getContext(); 6211 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 6212 RD->startDefinition(); 6213 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6214 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6215 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6216 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6217 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6218 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6219 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6220 RD->completeDefinition(); 6221 QualType RDType = C.getRecordType(RD); 6222 unsigned Size = Data.ReductionVars.size(); 6223 llvm::APInt ArraySize(/*numBits=*/64, Size); 6224 QualType ArrayRDType = C.getConstantArrayType( 6225 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6226 // kmp_task_red_input_t .rd_input.[Size]; 6227 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6228 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 6229 Data.ReductionOps); 6230 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6231 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6232 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6233 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6234 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6235 TaskRedInput.getPointer(), Idxs, 6236 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6237 ".rd_input.gep."); 6238 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6239 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6240 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6241 RCG.emitSharedLValue(CGF, Cnt); 6242 llvm::Value *CastedShared = 6243 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6244 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6245 RCG.emitAggregateType(CGF, Cnt); 6246 llvm::Value *SizeValInChars; 6247 llvm::Value *SizeVal; 6248 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6249 // We use delayed creation/initialization for VLAs, array sections and 6250 // custom reduction initializations. It is required because runtime does not 6251 // provide the way to pass the sizes of VLAs/array sections to 6252 // initializer/combiner/finalizer functions and does not pass the pointer to 6253 // original reduction item to the initializer. Instead threadprivate global 6254 // variables are used to store these values and use them in the functions. 6255 bool DelayedCreation = !!SizeVal; 6256 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6257 /*isSigned=*/false); 6258 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6259 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6260 // ElemLVal.reduce_init = init; 6261 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6262 llvm::Value *InitAddr = 6263 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6264 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6265 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 6266 // ElemLVal.reduce_fini = fini; 6267 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6268 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6269 llvm::Value *FiniAddr = Fini 6270 ? CGF.EmitCastToVoidPtr(Fini) 6271 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6272 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6273 // ElemLVal.reduce_comb = comb; 6274 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6275 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6276 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6277 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6278 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6279 // ElemLVal.flags = 0; 6280 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6281 if (DelayedCreation) { 6282 CGF.EmitStoreOfScalar( 6283 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6284 FlagsLVal); 6285 } else 6286 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6287 FlagsLVal.getType()); 6288 } 6289 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 6290 // *data); 6291 llvm::Value *Args[] = { 6292 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6293 /*isSigned=*/true), 6294 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6295 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6296 CGM.VoidPtrTy)}; 6297 return CGF.EmitRuntimeCall( 6298 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 6299 } 6300 6301 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6302 SourceLocation Loc, 6303 ReductionCodeGen &RCG, 6304 unsigned N) { 6305 auto Sizes = RCG.getSizes(N); 6306 // Emit threadprivate global variable if the type is non-constant 6307 // (Sizes.second = nullptr). 6308 if (Sizes.second) { 6309 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6310 /*isSigned=*/false); 6311 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6312 CGF, CGM.getContext().getSizeType(), 6313 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6314 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6315 } 6316 // Store address of the original reduction item if custom initializer is used. 6317 if (RCG.usesReductionInitializer(N)) { 6318 Address SharedAddr = getAddrOfArtificialThreadPrivate( 6319 CGF, CGM.getContext().VoidPtrTy, 6320 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6321 CGF.Builder.CreateStore( 6322 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6323 RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy), 6324 SharedAddr, /*IsVolatile=*/false); 6325 } 6326 } 6327 6328 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6329 SourceLocation Loc, 6330 llvm::Value *ReductionsPtr, 6331 LValue SharedLVal) { 6332 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6333 // *d); 6334 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6335 CGM.IntTy, 6336 /*isSigned=*/true), 6337 ReductionsPtr, 6338 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6339 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6340 return Address( 6341 CGF.EmitRuntimeCall( 6342 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6343 SharedLVal.getAlignment()); 6344 } 6345 6346 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6347 SourceLocation Loc) { 6348 if (!CGF.HaveInsertPoint()) 6349 return; 6350 6351 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 6352 if (OMPBuilder) { 6353 OMPBuilder->CreateTaskwait(CGF.Builder); 6354 } else { 6355 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6356 // global_tid); 6357 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6358 // Ignore return result until untied tasks are supported. 6359 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 6360 } 6361 6362 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6363 Region->emitUntiedSwitch(CGF); 6364 } 6365 6366 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6367 OpenMPDirectiveKind InnerKind, 6368 const RegionCodeGenTy &CodeGen, 6369 bool HasCancel) { 6370 if (!CGF.HaveInsertPoint()) 6371 return; 6372 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6373 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6374 } 6375 6376 namespace { 6377 enum RTCancelKind { 6378 CancelNoreq = 0, 6379 CancelParallel = 1, 6380 CancelLoop = 2, 6381 CancelSections = 3, 6382 CancelTaskgroup = 4 6383 }; 6384 } // anonymous namespace 6385 6386 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6387 RTCancelKind CancelKind = CancelNoreq; 6388 if (CancelRegion == OMPD_parallel) 6389 CancelKind = CancelParallel; 6390 else if (CancelRegion == OMPD_for) 6391 CancelKind = CancelLoop; 6392 else if (CancelRegion == OMPD_sections) 6393 CancelKind = CancelSections; 6394 else { 6395 assert(CancelRegion == OMPD_taskgroup); 6396 CancelKind = CancelTaskgroup; 6397 } 6398 return CancelKind; 6399 } 6400 6401 void CGOpenMPRuntime::emitCancellationPointCall( 6402 CodeGenFunction &CGF, SourceLocation Loc, 6403 OpenMPDirectiveKind CancelRegion) { 6404 if (!CGF.HaveInsertPoint()) 6405 return; 6406 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6407 // global_tid, kmp_int32 cncl_kind); 6408 if (auto *OMPRegionInfo = 6409 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6410 // For 'cancellation point taskgroup', the task region info may not have a 6411 // cancel. This may instead happen in another adjacent task. 6412 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6413 llvm::Value *Args[] = { 6414 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6415 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6416 // Ignore return result until untied tasks are supported. 6417 llvm::Value *Result = CGF.EmitRuntimeCall( 6418 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 6419 // if (__kmpc_cancellationpoint()) { 6420 // exit from construct; 6421 // } 6422 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6423 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6424 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6425 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6426 CGF.EmitBlock(ExitBB); 6427 // exit from construct; 6428 CodeGenFunction::JumpDest CancelDest = 6429 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6430 CGF.EmitBranchThroughCleanup(CancelDest); 6431 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6432 } 6433 } 6434 } 6435 6436 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6437 const Expr *IfCond, 6438 OpenMPDirectiveKind CancelRegion) { 6439 if (!CGF.HaveInsertPoint()) 6440 return; 6441 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6442 // kmp_int32 cncl_kind); 6443 if (auto *OMPRegionInfo = 6444 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6445 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 6446 PrePostActionTy &) { 6447 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6448 llvm::Value *Args[] = { 6449 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6450 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6451 // Ignore return result until untied tasks are supported. 6452 llvm::Value *Result = CGF.EmitRuntimeCall( 6453 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 6454 // if (__kmpc_cancel()) { 6455 // exit from construct; 6456 // } 6457 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6458 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6459 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6460 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6461 CGF.EmitBlock(ExitBB); 6462 // exit from construct; 6463 CodeGenFunction::JumpDest CancelDest = 6464 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6465 CGF.EmitBranchThroughCleanup(CancelDest); 6466 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6467 }; 6468 if (IfCond) { 6469 emitIfClause(CGF, IfCond, ThenGen, 6470 [](CodeGenFunction &, PrePostActionTy &) {}); 6471 } else { 6472 RegionCodeGenTy ThenRCG(ThenGen); 6473 ThenRCG(CGF); 6474 } 6475 } 6476 } 6477 6478 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6479 const OMPExecutableDirective &D, StringRef ParentName, 6480 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6481 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6482 assert(!ParentName.empty() && "Invalid target region parent name!"); 6483 HasEmittedTargetRegion = true; 6484 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6485 IsOffloadEntry, CodeGen); 6486 } 6487 6488 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6489 const OMPExecutableDirective &D, StringRef ParentName, 6490 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6491 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6492 // Create a unique name for the entry function using the source location 6493 // information of the current target region. The name will be something like: 6494 // 6495 // __omp_offloading_DD_FFFF_PP_lBB 6496 // 6497 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6498 // mangled name of the function that encloses the target region and BB is the 6499 // line number of the target region. 6500 6501 unsigned DeviceID; 6502 unsigned FileID; 6503 unsigned Line; 6504 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6505 Line); 6506 SmallString<64> EntryFnName; 6507 { 6508 llvm::raw_svector_ostream OS(EntryFnName); 6509 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6510 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6511 } 6512 6513 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6514 6515 CodeGenFunction CGF(CGM, true); 6516 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6517 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6518 6519 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6520 6521 // If this target outline function is not an offload entry, we don't need to 6522 // register it. 6523 if (!IsOffloadEntry) 6524 return; 6525 6526 // The target region ID is used by the runtime library to identify the current 6527 // target region, so it only has to be unique and not necessarily point to 6528 // anything. It could be the pointer to the outlined function that implements 6529 // the target region, but we aren't using that so that the compiler doesn't 6530 // need to keep that, and could therefore inline the host function if proven 6531 // worthwhile during optimization. In the other hand, if emitting code for the 6532 // device, the ID has to be the function address so that it can retrieved from 6533 // the offloading entry and launched by the runtime library. We also mark the 6534 // outlined function to have external linkage in case we are emitting code for 6535 // the device, because these functions will be entry points to the device. 6536 6537 if (CGM.getLangOpts().OpenMPIsDevice) { 6538 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6539 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6540 OutlinedFn->setDSOLocal(false); 6541 } else { 6542 std::string Name = getName({EntryFnName, "region_id"}); 6543 OutlinedFnID = new llvm::GlobalVariable( 6544 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6545 llvm::GlobalValue::WeakAnyLinkage, 6546 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6547 } 6548 6549 // Register the information for the entry associated with this target region. 6550 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6551 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6552 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6553 } 6554 6555 /// Checks if the expression is constant or does not have non-trivial function 6556 /// calls. 6557 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6558 // We can skip constant expressions. 6559 // We can skip expressions with trivial calls or simple expressions. 6560 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6561 !E->hasNonTrivialCall(Ctx)) && 6562 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6563 } 6564 6565 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6566 const Stmt *Body) { 6567 const Stmt *Child = Body->IgnoreContainers(); 6568 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6569 Child = nullptr; 6570 for (const Stmt *S : C->body()) { 6571 if (const auto *E = dyn_cast<Expr>(S)) { 6572 if (isTrivial(Ctx, E)) 6573 continue; 6574 } 6575 // Some of the statements can be ignored. 6576 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6577 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6578 continue; 6579 // Analyze declarations. 6580 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6581 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6582 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6583 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6584 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6585 isa<UsingDirectiveDecl>(D) || 6586 isa<OMPDeclareReductionDecl>(D) || 6587 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6588 return true; 6589 const auto *VD = dyn_cast<VarDecl>(D); 6590 if (!VD) 6591 return false; 6592 return VD->isConstexpr() || 6593 ((VD->getType().isTrivialType(Ctx) || 6594 VD->getType()->isReferenceType()) && 6595 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6596 })) 6597 continue; 6598 } 6599 // Found multiple children - cannot get the one child only. 6600 if (Child) 6601 return nullptr; 6602 Child = S; 6603 } 6604 if (Child) 6605 Child = Child->IgnoreContainers(); 6606 } 6607 return Child; 6608 } 6609 6610 /// Emit the number of teams for a target directive. Inspect the num_teams 6611 /// clause associated with a teams construct combined or closely nested 6612 /// with the target directive. 6613 /// 6614 /// Emit a team of size one for directives such as 'target parallel' that 6615 /// have no associated teams construct. 6616 /// 6617 /// Otherwise, return nullptr. 6618 static llvm::Value * 6619 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6620 const OMPExecutableDirective &D) { 6621 assert(!CGF.getLangOpts().OpenMPIsDevice && 6622 "Clauses associated with the teams directive expected to be emitted " 6623 "only for the host!"); 6624 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6625 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6626 "Expected target-based executable directive."); 6627 CGBuilderTy &Bld = CGF.Builder; 6628 switch (DirectiveKind) { 6629 case OMPD_target: { 6630 const auto *CS = D.getInnermostCapturedStmt(); 6631 const auto *Body = 6632 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6633 const Stmt *ChildStmt = 6634 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6635 if (const auto *NestedDir = 6636 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6637 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6638 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6639 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6640 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6641 const Expr *NumTeams = 6642 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6643 llvm::Value *NumTeamsVal = 6644 CGF.EmitScalarExpr(NumTeams, 6645 /*IgnoreResultAssign*/ true); 6646 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6647 /*isSigned=*/true); 6648 } 6649 return Bld.getInt32(0); 6650 } 6651 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6652 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6653 return Bld.getInt32(1); 6654 return Bld.getInt32(0); 6655 } 6656 return nullptr; 6657 } 6658 case OMPD_target_teams: 6659 case OMPD_target_teams_distribute: 6660 case OMPD_target_teams_distribute_simd: 6661 case OMPD_target_teams_distribute_parallel_for: 6662 case OMPD_target_teams_distribute_parallel_for_simd: { 6663 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6664 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6665 const Expr *NumTeams = 6666 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6667 llvm::Value *NumTeamsVal = 6668 CGF.EmitScalarExpr(NumTeams, 6669 /*IgnoreResultAssign*/ true); 6670 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6671 /*isSigned=*/true); 6672 } 6673 return Bld.getInt32(0); 6674 } 6675 case OMPD_target_parallel: 6676 case OMPD_target_parallel_for: 6677 case OMPD_target_parallel_for_simd: 6678 case OMPD_target_simd: 6679 return Bld.getInt32(1); 6680 case OMPD_parallel: 6681 case OMPD_for: 6682 case OMPD_parallel_for: 6683 case OMPD_parallel_master: 6684 case OMPD_parallel_sections: 6685 case OMPD_for_simd: 6686 case OMPD_parallel_for_simd: 6687 case OMPD_cancel: 6688 case OMPD_cancellation_point: 6689 case OMPD_ordered: 6690 case OMPD_threadprivate: 6691 case OMPD_allocate: 6692 case OMPD_task: 6693 case OMPD_simd: 6694 case OMPD_sections: 6695 case OMPD_section: 6696 case OMPD_single: 6697 case OMPD_master: 6698 case OMPD_critical: 6699 case OMPD_taskyield: 6700 case OMPD_barrier: 6701 case OMPD_taskwait: 6702 case OMPD_taskgroup: 6703 case OMPD_atomic: 6704 case OMPD_flush: 6705 case OMPD_teams: 6706 case OMPD_target_data: 6707 case OMPD_target_exit_data: 6708 case OMPD_target_enter_data: 6709 case OMPD_distribute: 6710 case OMPD_distribute_simd: 6711 case OMPD_distribute_parallel_for: 6712 case OMPD_distribute_parallel_for_simd: 6713 case OMPD_teams_distribute: 6714 case OMPD_teams_distribute_simd: 6715 case OMPD_teams_distribute_parallel_for: 6716 case OMPD_teams_distribute_parallel_for_simd: 6717 case OMPD_target_update: 6718 case OMPD_declare_simd: 6719 case OMPD_declare_variant: 6720 case OMPD_declare_target: 6721 case OMPD_end_declare_target: 6722 case OMPD_declare_reduction: 6723 case OMPD_declare_mapper: 6724 case OMPD_taskloop: 6725 case OMPD_taskloop_simd: 6726 case OMPD_master_taskloop: 6727 case OMPD_master_taskloop_simd: 6728 case OMPD_parallel_master_taskloop: 6729 case OMPD_parallel_master_taskloop_simd: 6730 case OMPD_requires: 6731 case OMPD_unknown: 6732 break; 6733 } 6734 llvm_unreachable("Unexpected directive kind."); 6735 } 6736 6737 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6738 llvm::Value *DefaultThreadLimitVal) { 6739 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6740 CGF.getContext(), CS->getCapturedStmt()); 6741 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6742 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6743 llvm::Value *NumThreads = nullptr; 6744 llvm::Value *CondVal = nullptr; 6745 // Handle if clause. If if clause present, the number of threads is 6746 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6747 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6748 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6749 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6750 const OMPIfClause *IfClause = nullptr; 6751 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6752 if (C->getNameModifier() == OMPD_unknown || 6753 C->getNameModifier() == OMPD_parallel) { 6754 IfClause = C; 6755 break; 6756 } 6757 } 6758 if (IfClause) { 6759 const Expr *Cond = IfClause->getCondition(); 6760 bool Result; 6761 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6762 if (!Result) 6763 return CGF.Builder.getInt32(1); 6764 } else { 6765 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6766 if (const auto *PreInit = 6767 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6768 for (const auto *I : PreInit->decls()) { 6769 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6770 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6771 } else { 6772 CodeGenFunction::AutoVarEmission Emission = 6773 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6774 CGF.EmitAutoVarCleanups(Emission); 6775 } 6776 } 6777 } 6778 CondVal = CGF.EvaluateExprAsBool(Cond); 6779 } 6780 } 6781 } 6782 // Check the value of num_threads clause iff if clause was not specified 6783 // or is not evaluated to false. 6784 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6785 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6786 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6787 const auto *NumThreadsClause = 6788 Dir->getSingleClause<OMPNumThreadsClause>(); 6789 CodeGenFunction::LexicalScope Scope( 6790 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6791 if (const auto *PreInit = 6792 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6793 for (const auto *I : PreInit->decls()) { 6794 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6795 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6796 } else { 6797 CodeGenFunction::AutoVarEmission Emission = 6798 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6799 CGF.EmitAutoVarCleanups(Emission); 6800 } 6801 } 6802 } 6803 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6804 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6805 /*isSigned=*/false); 6806 if (DefaultThreadLimitVal) 6807 NumThreads = CGF.Builder.CreateSelect( 6808 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6809 DefaultThreadLimitVal, NumThreads); 6810 } else { 6811 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6812 : CGF.Builder.getInt32(0); 6813 } 6814 // Process condition of the if clause. 6815 if (CondVal) { 6816 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6817 CGF.Builder.getInt32(1)); 6818 } 6819 return NumThreads; 6820 } 6821 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6822 return CGF.Builder.getInt32(1); 6823 return DefaultThreadLimitVal; 6824 } 6825 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6826 : CGF.Builder.getInt32(0); 6827 } 6828 6829 /// Emit the number of threads for a target directive. Inspect the 6830 /// thread_limit clause associated with a teams construct combined or closely 6831 /// nested with the target directive. 6832 /// 6833 /// Emit the num_threads clause for directives such as 'target parallel' that 6834 /// have no associated teams construct. 6835 /// 6836 /// Otherwise, return nullptr. 6837 static llvm::Value * 6838 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6839 const OMPExecutableDirective &D) { 6840 assert(!CGF.getLangOpts().OpenMPIsDevice && 6841 "Clauses associated with the teams directive expected to be emitted " 6842 "only for the host!"); 6843 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6844 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6845 "Expected target-based executable directive."); 6846 CGBuilderTy &Bld = CGF.Builder; 6847 llvm::Value *ThreadLimitVal = nullptr; 6848 llvm::Value *NumThreadsVal = nullptr; 6849 switch (DirectiveKind) { 6850 case OMPD_target: { 6851 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6852 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6853 return NumThreads; 6854 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6855 CGF.getContext(), CS->getCapturedStmt()); 6856 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6857 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6858 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6859 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6860 const auto *ThreadLimitClause = 6861 Dir->getSingleClause<OMPThreadLimitClause>(); 6862 CodeGenFunction::LexicalScope Scope( 6863 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6864 if (const auto *PreInit = 6865 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6866 for (const auto *I : PreInit->decls()) { 6867 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6868 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6869 } else { 6870 CodeGenFunction::AutoVarEmission Emission = 6871 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6872 CGF.EmitAutoVarCleanups(Emission); 6873 } 6874 } 6875 } 6876 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6877 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6878 ThreadLimitVal = 6879 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6880 } 6881 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6882 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6883 CS = Dir->getInnermostCapturedStmt(); 6884 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6885 CGF.getContext(), CS->getCapturedStmt()); 6886 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6887 } 6888 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6889 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6890 CS = Dir->getInnermostCapturedStmt(); 6891 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6892 return NumThreads; 6893 } 6894 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6895 return Bld.getInt32(1); 6896 } 6897 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6898 } 6899 case OMPD_target_teams: { 6900 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6901 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6902 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6903 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6904 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6905 ThreadLimitVal = 6906 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6907 } 6908 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6909 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6910 return NumThreads; 6911 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6912 CGF.getContext(), CS->getCapturedStmt()); 6913 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6914 if (Dir->getDirectiveKind() == OMPD_distribute) { 6915 CS = Dir->getInnermostCapturedStmt(); 6916 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6917 return NumThreads; 6918 } 6919 } 6920 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6921 } 6922 case OMPD_target_teams_distribute: 6923 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6924 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6925 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6926 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6927 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6928 ThreadLimitVal = 6929 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6930 } 6931 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6932 case OMPD_target_parallel: 6933 case OMPD_target_parallel_for: 6934 case OMPD_target_parallel_for_simd: 6935 case OMPD_target_teams_distribute_parallel_for: 6936 case OMPD_target_teams_distribute_parallel_for_simd: { 6937 llvm::Value *CondVal = nullptr; 6938 // Handle if clause. If if clause present, the number of threads is 6939 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6940 if (D.hasClausesOfKind<OMPIfClause>()) { 6941 const OMPIfClause *IfClause = nullptr; 6942 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6943 if (C->getNameModifier() == OMPD_unknown || 6944 C->getNameModifier() == OMPD_parallel) { 6945 IfClause = C; 6946 break; 6947 } 6948 } 6949 if (IfClause) { 6950 const Expr *Cond = IfClause->getCondition(); 6951 bool Result; 6952 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6953 if (!Result) 6954 return Bld.getInt32(1); 6955 } else { 6956 CodeGenFunction::RunCleanupsScope Scope(CGF); 6957 CondVal = CGF.EvaluateExprAsBool(Cond); 6958 } 6959 } 6960 } 6961 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6962 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6963 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6964 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6965 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6966 ThreadLimitVal = 6967 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6968 } 6969 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6970 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6971 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6972 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6973 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6974 NumThreadsVal = 6975 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6976 ThreadLimitVal = ThreadLimitVal 6977 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6978 ThreadLimitVal), 6979 NumThreadsVal, ThreadLimitVal) 6980 : NumThreadsVal; 6981 } 6982 if (!ThreadLimitVal) 6983 ThreadLimitVal = Bld.getInt32(0); 6984 if (CondVal) 6985 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6986 return ThreadLimitVal; 6987 } 6988 case OMPD_target_teams_distribute_simd: 6989 case OMPD_target_simd: 6990 return Bld.getInt32(1); 6991 case OMPD_parallel: 6992 case OMPD_for: 6993 case OMPD_parallel_for: 6994 case OMPD_parallel_master: 6995 case OMPD_parallel_sections: 6996 case OMPD_for_simd: 6997 case OMPD_parallel_for_simd: 6998 case OMPD_cancel: 6999 case OMPD_cancellation_point: 7000 case OMPD_ordered: 7001 case OMPD_threadprivate: 7002 case OMPD_allocate: 7003 case OMPD_task: 7004 case OMPD_simd: 7005 case OMPD_sections: 7006 case OMPD_section: 7007 case OMPD_single: 7008 case OMPD_master: 7009 case OMPD_critical: 7010 case OMPD_taskyield: 7011 case OMPD_barrier: 7012 case OMPD_taskwait: 7013 case OMPD_taskgroup: 7014 case OMPD_atomic: 7015 case OMPD_flush: 7016 case OMPD_teams: 7017 case OMPD_target_data: 7018 case OMPD_target_exit_data: 7019 case OMPD_target_enter_data: 7020 case OMPD_distribute: 7021 case OMPD_distribute_simd: 7022 case OMPD_distribute_parallel_for: 7023 case OMPD_distribute_parallel_for_simd: 7024 case OMPD_teams_distribute: 7025 case OMPD_teams_distribute_simd: 7026 case OMPD_teams_distribute_parallel_for: 7027 case OMPD_teams_distribute_parallel_for_simd: 7028 case OMPD_target_update: 7029 case OMPD_declare_simd: 7030 case OMPD_declare_variant: 7031 case OMPD_declare_target: 7032 case OMPD_end_declare_target: 7033 case OMPD_declare_reduction: 7034 case OMPD_declare_mapper: 7035 case OMPD_taskloop: 7036 case OMPD_taskloop_simd: 7037 case OMPD_master_taskloop: 7038 case OMPD_master_taskloop_simd: 7039 case OMPD_parallel_master_taskloop: 7040 case OMPD_parallel_master_taskloop_simd: 7041 case OMPD_requires: 7042 case OMPD_unknown: 7043 break; 7044 } 7045 llvm_unreachable("Unsupported directive kind."); 7046 } 7047 7048 namespace { 7049 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7050 7051 // Utility to handle information from clauses associated with a given 7052 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7053 // It provides a convenient interface to obtain the information and generate 7054 // code for that information. 7055 class MappableExprsHandler { 7056 public: 7057 /// Values for bit flags used to specify the mapping type for 7058 /// offloading. 7059 enum OpenMPOffloadMappingFlags : uint64_t { 7060 /// No flags 7061 OMP_MAP_NONE = 0x0, 7062 /// Allocate memory on the device and move data from host to device. 7063 OMP_MAP_TO = 0x01, 7064 /// Allocate memory on the device and move data from device to host. 7065 OMP_MAP_FROM = 0x02, 7066 /// Always perform the requested mapping action on the element, even 7067 /// if it was already mapped before. 7068 OMP_MAP_ALWAYS = 0x04, 7069 /// Delete the element from the device environment, ignoring the 7070 /// current reference count associated with the element. 7071 OMP_MAP_DELETE = 0x08, 7072 /// The element being mapped is a pointer-pointee pair; both the 7073 /// pointer and the pointee should be mapped. 7074 OMP_MAP_PTR_AND_OBJ = 0x10, 7075 /// This flags signals that the base address of an entry should be 7076 /// passed to the target kernel as an argument. 7077 OMP_MAP_TARGET_PARAM = 0x20, 7078 /// Signal that the runtime library has to return the device pointer 7079 /// in the current position for the data being mapped. Used when we have the 7080 /// use_device_ptr clause. 7081 OMP_MAP_RETURN_PARAM = 0x40, 7082 /// This flag signals that the reference being passed is a pointer to 7083 /// private data. 7084 OMP_MAP_PRIVATE = 0x80, 7085 /// Pass the element to the device by value. 7086 OMP_MAP_LITERAL = 0x100, 7087 /// Implicit map 7088 OMP_MAP_IMPLICIT = 0x200, 7089 /// Close is a hint to the runtime to allocate memory close to 7090 /// the target device. 7091 OMP_MAP_CLOSE = 0x400, 7092 /// The 16 MSBs of the flags indicate whether the entry is member of some 7093 /// struct/class. 7094 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7095 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7096 }; 7097 7098 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7099 static unsigned getFlagMemberOffset() { 7100 unsigned Offset = 0; 7101 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7102 Remain = Remain >> 1) 7103 Offset++; 7104 return Offset; 7105 } 7106 7107 /// Class that associates information with a base pointer to be passed to the 7108 /// runtime library. 7109 class BasePointerInfo { 7110 /// The base pointer. 7111 llvm::Value *Ptr = nullptr; 7112 /// The base declaration that refers to this device pointer, or null if 7113 /// there is none. 7114 const ValueDecl *DevPtrDecl = nullptr; 7115 7116 public: 7117 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7118 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7119 llvm::Value *operator*() const { return Ptr; } 7120 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7121 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7122 }; 7123 7124 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7125 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7126 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7127 7128 /// Map between a struct and the its lowest & highest elements which have been 7129 /// mapped. 7130 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7131 /// HE(FieldIndex, Pointer)} 7132 struct StructRangeInfoTy { 7133 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7134 0, Address::invalid()}; 7135 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7136 0, Address::invalid()}; 7137 Address Base = Address::invalid(); 7138 }; 7139 7140 private: 7141 /// Kind that defines how a device pointer has to be returned. 7142 struct MapInfo { 7143 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7144 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7145 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7146 bool ReturnDevicePointer = false; 7147 bool IsImplicit = false; 7148 7149 MapInfo() = default; 7150 MapInfo( 7151 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7152 OpenMPMapClauseKind MapType, 7153 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7154 bool ReturnDevicePointer, bool IsImplicit) 7155 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7156 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 7157 }; 7158 7159 /// If use_device_ptr is used on a pointer which is a struct member and there 7160 /// is no map information about it, then emission of that entry is deferred 7161 /// until the whole struct has been processed. 7162 struct DeferredDevicePtrEntryTy { 7163 const Expr *IE = nullptr; 7164 const ValueDecl *VD = nullptr; 7165 7166 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 7167 : IE(IE), VD(VD) {} 7168 }; 7169 7170 /// The target directive from where the mappable clauses were extracted. It 7171 /// is either a executable directive or a user-defined mapper directive. 7172 llvm::PointerUnion<const OMPExecutableDirective *, 7173 const OMPDeclareMapperDecl *> 7174 CurDir; 7175 7176 /// Function the directive is being generated for. 7177 CodeGenFunction &CGF; 7178 7179 /// Set of all first private variables in the current directive. 7180 /// bool data is set to true if the variable is implicitly marked as 7181 /// firstprivate, false otherwise. 7182 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7183 7184 /// Map between device pointer declarations and their expression components. 7185 /// The key value for declarations in 'this' is null. 7186 llvm::DenseMap< 7187 const ValueDecl *, 7188 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7189 DevPointersMap; 7190 7191 llvm::Value *getExprTypeSize(const Expr *E) const { 7192 QualType ExprTy = E->getType().getCanonicalType(); 7193 7194 // Reference types are ignored for mapping purposes. 7195 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7196 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7197 7198 // Given that an array section is considered a built-in type, we need to 7199 // do the calculation based on the length of the section instead of relying 7200 // on CGF.getTypeSize(E->getType()). 7201 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7202 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7203 OAE->getBase()->IgnoreParenImpCasts()) 7204 .getCanonicalType(); 7205 7206 // If there is no length associated with the expression and lower bound is 7207 // not specified too, that means we are using the whole length of the 7208 // base. 7209 if (!OAE->getLength() && OAE->getColonLoc().isValid() && 7210 !OAE->getLowerBound()) 7211 return CGF.getTypeSize(BaseTy); 7212 7213 llvm::Value *ElemSize; 7214 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7215 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7216 } else { 7217 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7218 assert(ATy && "Expecting array type if not a pointer type."); 7219 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7220 } 7221 7222 // If we don't have a length at this point, that is because we have an 7223 // array section with a single element. 7224 if (!OAE->getLength() && OAE->getColonLoc().isInvalid()) 7225 return ElemSize; 7226 7227 if (const Expr *LenExpr = OAE->getLength()) { 7228 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7229 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7230 CGF.getContext().getSizeType(), 7231 LenExpr->getExprLoc()); 7232 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7233 } 7234 assert(!OAE->getLength() && OAE->getColonLoc().isValid() && 7235 OAE->getLowerBound() && "expected array_section[lb:]."); 7236 // Size = sizetype - lb * elemtype; 7237 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7238 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7239 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7240 CGF.getContext().getSizeType(), 7241 OAE->getLowerBound()->getExprLoc()); 7242 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7243 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7244 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7245 LengthVal = CGF.Builder.CreateSelect( 7246 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7247 return LengthVal; 7248 } 7249 return CGF.getTypeSize(ExprTy); 7250 } 7251 7252 /// Return the corresponding bits for a given map clause modifier. Add 7253 /// a flag marking the map as a pointer if requested. Add a flag marking the 7254 /// map as the first one of a series of maps that relate to the same map 7255 /// expression. 7256 OpenMPOffloadMappingFlags getMapTypeBits( 7257 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7258 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7259 OpenMPOffloadMappingFlags Bits = 7260 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7261 switch (MapType) { 7262 case OMPC_MAP_alloc: 7263 case OMPC_MAP_release: 7264 // alloc and release is the default behavior in the runtime library, i.e. 7265 // if we don't pass any bits alloc/release that is what the runtime is 7266 // going to do. Therefore, we don't need to signal anything for these two 7267 // type modifiers. 7268 break; 7269 case OMPC_MAP_to: 7270 Bits |= OMP_MAP_TO; 7271 break; 7272 case OMPC_MAP_from: 7273 Bits |= OMP_MAP_FROM; 7274 break; 7275 case OMPC_MAP_tofrom: 7276 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7277 break; 7278 case OMPC_MAP_delete: 7279 Bits |= OMP_MAP_DELETE; 7280 break; 7281 case OMPC_MAP_unknown: 7282 llvm_unreachable("Unexpected map type!"); 7283 } 7284 if (AddPtrFlag) 7285 Bits |= OMP_MAP_PTR_AND_OBJ; 7286 if (AddIsTargetParamFlag) 7287 Bits |= OMP_MAP_TARGET_PARAM; 7288 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7289 != MapModifiers.end()) 7290 Bits |= OMP_MAP_ALWAYS; 7291 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7292 != MapModifiers.end()) 7293 Bits |= OMP_MAP_CLOSE; 7294 return Bits; 7295 } 7296 7297 /// Return true if the provided expression is a final array section. A 7298 /// final array section, is one whose length can't be proved to be one. 7299 bool isFinalArraySectionExpression(const Expr *E) const { 7300 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7301 7302 // It is not an array section and therefore not a unity-size one. 7303 if (!OASE) 7304 return false; 7305 7306 // An array section with no colon always refer to a single element. 7307 if (OASE->getColonLoc().isInvalid()) 7308 return false; 7309 7310 const Expr *Length = OASE->getLength(); 7311 7312 // If we don't have a length we have to check if the array has size 1 7313 // for this dimension. Also, we should always expect a length if the 7314 // base type is pointer. 7315 if (!Length) { 7316 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7317 OASE->getBase()->IgnoreParenImpCasts()) 7318 .getCanonicalType(); 7319 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7320 return ATy->getSize().getSExtValue() != 1; 7321 // If we don't have a constant dimension length, we have to consider 7322 // the current section as having any size, so it is not necessarily 7323 // unitary. If it happen to be unity size, that's user fault. 7324 return true; 7325 } 7326 7327 // Check if the length evaluates to 1. 7328 Expr::EvalResult Result; 7329 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7330 return true; // Can have more that size 1. 7331 7332 llvm::APSInt ConstLength = Result.Val.getInt(); 7333 return ConstLength.getSExtValue() != 1; 7334 } 7335 7336 /// Generate the base pointers, section pointers, sizes and map type 7337 /// bits for the provided map type, map modifier, and expression components. 7338 /// \a IsFirstComponent should be set to true if the provided set of 7339 /// components is the first associated with a capture. 7340 void generateInfoForComponentList( 7341 OpenMPMapClauseKind MapType, 7342 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7343 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7344 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7345 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7346 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 7347 bool IsImplicit, 7348 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7349 OverlappedElements = llvm::None) const { 7350 // The following summarizes what has to be generated for each map and the 7351 // types below. The generated information is expressed in this order: 7352 // base pointer, section pointer, size, flags 7353 // (to add to the ones that come from the map type and modifier). 7354 // 7355 // double d; 7356 // int i[100]; 7357 // float *p; 7358 // 7359 // struct S1 { 7360 // int i; 7361 // float f[50]; 7362 // } 7363 // struct S2 { 7364 // int i; 7365 // float f[50]; 7366 // S1 s; 7367 // double *p; 7368 // struct S2 *ps; 7369 // } 7370 // S2 s; 7371 // S2 *ps; 7372 // 7373 // map(d) 7374 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7375 // 7376 // map(i) 7377 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7378 // 7379 // map(i[1:23]) 7380 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7381 // 7382 // map(p) 7383 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7384 // 7385 // map(p[1:24]) 7386 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7387 // 7388 // map(s) 7389 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7390 // 7391 // map(s.i) 7392 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7393 // 7394 // map(s.s.f) 7395 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7396 // 7397 // map(s.p) 7398 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7399 // 7400 // map(to: s.p[:22]) 7401 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7402 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7403 // &(s.p), &(s.p[0]), 22*sizeof(double), 7404 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7405 // (*) alloc space for struct members, only this is a target parameter 7406 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7407 // optimizes this entry out, same in the examples below) 7408 // (***) map the pointee (map: to) 7409 // 7410 // map(s.ps) 7411 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7412 // 7413 // map(from: s.ps->s.i) 7414 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7415 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7416 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7417 // 7418 // map(to: s.ps->ps) 7419 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7420 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7421 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7422 // 7423 // map(s.ps->ps->ps) 7424 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7425 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7426 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7427 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7428 // 7429 // map(to: s.ps->ps->s.f[:22]) 7430 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7431 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7432 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7433 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7434 // 7435 // map(ps) 7436 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7437 // 7438 // map(ps->i) 7439 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7440 // 7441 // map(ps->s.f) 7442 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7443 // 7444 // map(from: ps->p) 7445 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7446 // 7447 // map(to: ps->p[:22]) 7448 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7449 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7450 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7451 // 7452 // map(ps->ps) 7453 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7454 // 7455 // map(from: ps->ps->s.i) 7456 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7457 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7458 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7459 // 7460 // map(from: ps->ps->ps) 7461 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7462 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7463 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7464 // 7465 // map(ps->ps->ps->ps) 7466 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7467 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7468 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7469 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7470 // 7471 // map(to: ps->ps->ps->s.f[:22]) 7472 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7473 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7474 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7475 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7476 // 7477 // map(to: s.f[:22]) map(from: s.p[:33]) 7478 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7479 // sizeof(double*) (**), TARGET_PARAM 7480 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7481 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7482 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7483 // (*) allocate contiguous space needed to fit all mapped members even if 7484 // we allocate space for members not mapped (in this example, 7485 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7486 // them as well because they fall between &s.f[0] and &s.p) 7487 // 7488 // map(from: s.f[:22]) map(to: ps->p[:33]) 7489 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7490 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7491 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7492 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7493 // (*) the struct this entry pertains to is the 2nd element in the list of 7494 // arguments, hence MEMBER_OF(2) 7495 // 7496 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7497 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7498 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7499 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7500 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7501 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7502 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7503 // (*) the struct this entry pertains to is the 4th element in the list 7504 // of arguments, hence MEMBER_OF(4) 7505 7506 // Track if the map information being generated is the first for a capture. 7507 bool IsCaptureFirstInfo = IsFirstComponentList; 7508 // When the variable is on a declare target link or in a to clause with 7509 // unified memory, a reference is needed to hold the host/device address 7510 // of the variable. 7511 bool RequiresReference = false; 7512 7513 // Scan the components from the base to the complete expression. 7514 auto CI = Components.rbegin(); 7515 auto CE = Components.rend(); 7516 auto I = CI; 7517 7518 // Track if the map information being generated is the first for a list of 7519 // components. 7520 bool IsExpressionFirstInfo = true; 7521 Address BP = Address::invalid(); 7522 const Expr *AssocExpr = I->getAssociatedExpression(); 7523 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7524 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7525 7526 if (isa<MemberExpr>(AssocExpr)) { 7527 // The base is the 'this' pointer. The content of the pointer is going 7528 // to be the base of the field being mapped. 7529 BP = CGF.LoadCXXThisAddress(); 7530 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7531 (OASE && 7532 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7533 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7534 } else { 7535 // The base is the reference to the variable. 7536 // BP = &Var. 7537 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7538 if (const auto *VD = 7539 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7540 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7541 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7542 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7543 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7544 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7545 RequiresReference = true; 7546 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7547 } 7548 } 7549 } 7550 7551 // If the variable is a pointer and is being dereferenced (i.e. is not 7552 // the last component), the base has to be the pointer itself, not its 7553 // reference. References are ignored for mapping purposes. 7554 QualType Ty = 7555 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7556 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7557 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7558 7559 // We do not need to generate individual map information for the 7560 // pointer, it can be associated with the combined storage. 7561 ++I; 7562 } 7563 } 7564 7565 // Track whether a component of the list should be marked as MEMBER_OF some 7566 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7567 // in a component list should be marked as MEMBER_OF, all subsequent entries 7568 // do not belong to the base struct. E.g. 7569 // struct S2 s; 7570 // s.ps->ps->ps->f[:] 7571 // (1) (2) (3) (4) 7572 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7573 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7574 // is the pointee of ps(2) which is not member of struct s, so it should not 7575 // be marked as such (it is still PTR_AND_OBJ). 7576 // The variable is initialized to false so that PTR_AND_OBJ entries which 7577 // are not struct members are not considered (e.g. array of pointers to 7578 // data). 7579 bool ShouldBeMemberOf = false; 7580 7581 // Variable keeping track of whether or not we have encountered a component 7582 // in the component list which is a member expression. Useful when we have a 7583 // pointer or a final array section, in which case it is the previous 7584 // component in the list which tells us whether we have a member expression. 7585 // E.g. X.f[:] 7586 // While processing the final array section "[:]" it is "f" which tells us 7587 // whether we are dealing with a member of a declared struct. 7588 const MemberExpr *EncounteredME = nullptr; 7589 7590 for (; I != CE; ++I) { 7591 // If the current component is member of a struct (parent struct) mark it. 7592 if (!EncounteredME) { 7593 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7594 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7595 // as MEMBER_OF the parent struct. 7596 if (EncounteredME) 7597 ShouldBeMemberOf = true; 7598 } 7599 7600 auto Next = std::next(I); 7601 7602 // We need to generate the addresses and sizes if this is the last 7603 // component, if the component is a pointer or if it is an array section 7604 // whose length can't be proved to be one. If this is a pointer, it 7605 // becomes the base address for the following components. 7606 7607 // A final array section, is one whose length can't be proved to be one. 7608 bool IsFinalArraySection = 7609 isFinalArraySectionExpression(I->getAssociatedExpression()); 7610 7611 // Get information on whether the element is a pointer. Have to do a 7612 // special treatment for array sections given that they are built-in 7613 // types. 7614 const auto *OASE = 7615 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7616 bool IsPointer = 7617 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7618 .getCanonicalType() 7619 ->isAnyPointerType()) || 7620 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7621 7622 if (Next == CE || IsPointer || IsFinalArraySection) { 7623 // If this is not the last component, we expect the pointer to be 7624 // associated with an array expression or member expression. 7625 assert((Next == CE || 7626 isa<MemberExpr>(Next->getAssociatedExpression()) || 7627 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7628 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 7629 "Unexpected expression"); 7630 7631 Address LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7632 .getAddress(CGF); 7633 7634 // If this component is a pointer inside the base struct then we don't 7635 // need to create any entry for it - it will be combined with the object 7636 // it is pointing to into a single PTR_AND_OBJ entry. 7637 bool IsMemberPointer = 7638 IsPointer && EncounteredME && 7639 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7640 EncounteredME); 7641 if (!OverlappedElements.empty()) { 7642 // Handle base element with the info for overlapped elements. 7643 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7644 assert(Next == CE && 7645 "Expected last element for the overlapped elements."); 7646 assert(!IsPointer && 7647 "Unexpected base element with the pointer type."); 7648 // Mark the whole struct as the struct that requires allocation on the 7649 // device. 7650 PartialStruct.LowestElem = {0, LB}; 7651 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7652 I->getAssociatedExpression()->getType()); 7653 Address HB = CGF.Builder.CreateConstGEP( 7654 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7655 CGF.VoidPtrTy), 7656 TypeSize.getQuantity() - 1); 7657 PartialStruct.HighestElem = { 7658 std::numeric_limits<decltype( 7659 PartialStruct.HighestElem.first)>::max(), 7660 HB}; 7661 PartialStruct.Base = BP; 7662 // Emit data for non-overlapped data. 7663 OpenMPOffloadMappingFlags Flags = 7664 OMP_MAP_MEMBER_OF | 7665 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7666 /*AddPtrFlag=*/false, 7667 /*AddIsTargetParamFlag=*/false); 7668 LB = BP; 7669 llvm::Value *Size = nullptr; 7670 // Do bitcopy of all non-overlapped structure elements. 7671 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7672 Component : OverlappedElements) { 7673 Address ComponentLB = Address::invalid(); 7674 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7675 Component) { 7676 if (MC.getAssociatedDeclaration()) { 7677 ComponentLB = 7678 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7679 .getAddress(CGF); 7680 Size = CGF.Builder.CreatePtrDiff( 7681 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7682 CGF.EmitCastToVoidPtr(LB.getPointer())); 7683 break; 7684 } 7685 } 7686 BasePointers.push_back(BP.getPointer()); 7687 Pointers.push_back(LB.getPointer()); 7688 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, 7689 /*isSigned=*/true)); 7690 Types.push_back(Flags); 7691 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7692 } 7693 BasePointers.push_back(BP.getPointer()); 7694 Pointers.push_back(LB.getPointer()); 7695 Size = CGF.Builder.CreatePtrDiff( 7696 CGF.EmitCastToVoidPtr( 7697 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7698 CGF.EmitCastToVoidPtr(LB.getPointer())); 7699 Sizes.push_back( 7700 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7701 Types.push_back(Flags); 7702 break; 7703 } 7704 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7705 if (!IsMemberPointer) { 7706 BasePointers.push_back(BP.getPointer()); 7707 Pointers.push_back(LB.getPointer()); 7708 Sizes.push_back( 7709 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7710 7711 // We need to add a pointer flag for each map that comes from the 7712 // same expression except for the first one. We also need to signal 7713 // this map is the first one that relates with the current capture 7714 // (there is a set of entries for each capture). 7715 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7716 MapType, MapModifiers, IsImplicit, 7717 !IsExpressionFirstInfo || RequiresReference, 7718 IsCaptureFirstInfo && !RequiresReference); 7719 7720 if (!IsExpressionFirstInfo) { 7721 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7722 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7723 if (IsPointer) 7724 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7725 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7726 7727 if (ShouldBeMemberOf) { 7728 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7729 // should be later updated with the correct value of MEMBER_OF. 7730 Flags |= OMP_MAP_MEMBER_OF; 7731 // From now on, all subsequent PTR_AND_OBJ entries should not be 7732 // marked as MEMBER_OF. 7733 ShouldBeMemberOf = false; 7734 } 7735 } 7736 7737 Types.push_back(Flags); 7738 } 7739 7740 // If we have encountered a member expression so far, keep track of the 7741 // mapped member. If the parent is "*this", then the value declaration 7742 // is nullptr. 7743 if (EncounteredME) { 7744 const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl()); 7745 unsigned FieldIndex = FD->getFieldIndex(); 7746 7747 // Update info about the lowest and highest elements for this struct 7748 if (!PartialStruct.Base.isValid()) { 7749 PartialStruct.LowestElem = {FieldIndex, LB}; 7750 PartialStruct.HighestElem = {FieldIndex, LB}; 7751 PartialStruct.Base = BP; 7752 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7753 PartialStruct.LowestElem = {FieldIndex, LB}; 7754 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7755 PartialStruct.HighestElem = {FieldIndex, LB}; 7756 } 7757 } 7758 7759 // If we have a final array section, we are done with this expression. 7760 if (IsFinalArraySection) 7761 break; 7762 7763 // The pointer becomes the base for the next element. 7764 if (Next != CE) 7765 BP = LB; 7766 7767 IsExpressionFirstInfo = false; 7768 IsCaptureFirstInfo = false; 7769 } 7770 } 7771 } 7772 7773 /// Return the adjusted map modifiers if the declaration a capture refers to 7774 /// appears in a first-private clause. This is expected to be used only with 7775 /// directives that start with 'target'. 7776 MappableExprsHandler::OpenMPOffloadMappingFlags 7777 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7778 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7779 7780 // A first private variable captured by reference will use only the 7781 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7782 // declaration is known as first-private in this handler. 7783 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7784 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7785 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7786 return MappableExprsHandler::OMP_MAP_ALWAYS | 7787 MappableExprsHandler::OMP_MAP_TO; 7788 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7789 return MappableExprsHandler::OMP_MAP_TO | 7790 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 7791 return MappableExprsHandler::OMP_MAP_PRIVATE | 7792 MappableExprsHandler::OMP_MAP_TO; 7793 } 7794 return MappableExprsHandler::OMP_MAP_TO | 7795 MappableExprsHandler::OMP_MAP_FROM; 7796 } 7797 7798 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7799 // Rotate by getFlagMemberOffset() bits. 7800 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7801 << getFlagMemberOffset()); 7802 } 7803 7804 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7805 OpenMPOffloadMappingFlags MemberOfFlag) { 7806 // If the entry is PTR_AND_OBJ but has not been marked with the special 7807 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7808 // marked as MEMBER_OF. 7809 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7810 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7811 return; 7812 7813 // Reset the placeholder value to prepare the flag for the assignment of the 7814 // proper MEMBER_OF value. 7815 Flags &= ~OMP_MAP_MEMBER_OF; 7816 Flags |= MemberOfFlag; 7817 } 7818 7819 void getPlainLayout(const CXXRecordDecl *RD, 7820 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7821 bool AsBase) const { 7822 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7823 7824 llvm::StructType *St = 7825 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7826 7827 unsigned NumElements = St->getNumElements(); 7828 llvm::SmallVector< 7829 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7830 RecordLayout(NumElements); 7831 7832 // Fill bases. 7833 for (const auto &I : RD->bases()) { 7834 if (I.isVirtual()) 7835 continue; 7836 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7837 // Ignore empty bases. 7838 if (Base->isEmpty() || CGF.getContext() 7839 .getASTRecordLayout(Base) 7840 .getNonVirtualSize() 7841 .isZero()) 7842 continue; 7843 7844 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7845 RecordLayout[FieldIndex] = Base; 7846 } 7847 // Fill in virtual bases. 7848 for (const auto &I : RD->vbases()) { 7849 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7850 // Ignore empty bases. 7851 if (Base->isEmpty()) 7852 continue; 7853 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7854 if (RecordLayout[FieldIndex]) 7855 continue; 7856 RecordLayout[FieldIndex] = Base; 7857 } 7858 // Fill in all the fields. 7859 assert(!RD->isUnion() && "Unexpected union."); 7860 for (const auto *Field : RD->fields()) { 7861 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7862 // will fill in later.) 7863 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 7864 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7865 RecordLayout[FieldIndex] = Field; 7866 } 7867 } 7868 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7869 &Data : RecordLayout) { 7870 if (Data.isNull()) 7871 continue; 7872 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7873 getPlainLayout(Base, Layout, /*AsBase=*/true); 7874 else 7875 Layout.push_back(Data.get<const FieldDecl *>()); 7876 } 7877 } 7878 7879 public: 7880 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7881 : CurDir(&Dir), CGF(CGF) { 7882 // Extract firstprivate clause information. 7883 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7884 for (const auto *D : C->varlists()) 7885 FirstPrivateDecls.try_emplace( 7886 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 7887 // Extract device pointer clause information. 7888 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7889 for (auto L : C->component_lists()) 7890 DevPointersMap[L.first].push_back(L.second); 7891 } 7892 7893 /// Constructor for the declare mapper directive. 7894 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 7895 : CurDir(&Dir), CGF(CGF) {} 7896 7897 /// Generate code for the combined entry if we have a partially mapped struct 7898 /// and take care of the mapping flags of the arguments corresponding to 7899 /// individual struct members. 7900 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 7901 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7902 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 7903 const StructRangeInfoTy &PartialStruct) const { 7904 // Base is the base of the struct 7905 BasePointers.push_back(PartialStruct.Base.getPointer()); 7906 // Pointer is the address of the lowest element 7907 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7908 Pointers.push_back(LB); 7909 // Size is (addr of {highest+1} element) - (addr of lowest element) 7910 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7911 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7912 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7913 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7914 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7915 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 7916 /*isSigned=*/false); 7917 Sizes.push_back(Size); 7918 // Map type is always TARGET_PARAM 7919 Types.push_back(OMP_MAP_TARGET_PARAM); 7920 // Remove TARGET_PARAM flag from the first element 7921 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7922 7923 // All other current entries will be MEMBER_OF the combined entry 7924 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7925 // 0xFFFF in the MEMBER_OF field). 7926 OpenMPOffloadMappingFlags MemberOfFlag = 7927 getMemberOfFlag(BasePointers.size() - 1); 7928 for (auto &M : CurTypes) 7929 setCorrectMemberOfFlag(M, MemberOfFlag); 7930 } 7931 7932 /// Generate all the base pointers, section pointers, sizes and map 7933 /// types for the extracted mappable expressions. Also, for each item that 7934 /// relates with a device pointer, a pair of the relevant declaration and 7935 /// index where it occurs is appended to the device pointers info array. 7936 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 7937 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7938 MapFlagsArrayTy &Types) const { 7939 // We have to process the component lists that relate with the same 7940 // declaration in a single chunk so that we can generate the map flags 7941 // correctly. Therefore, we organize all lists in a map. 7942 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 7943 7944 // Helper function to fill the information map for the different supported 7945 // clauses. 7946 auto &&InfoGen = [&Info]( 7947 const ValueDecl *D, 7948 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7949 OpenMPMapClauseKind MapType, 7950 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7951 bool ReturnDevicePointer, bool IsImplicit) { 7952 const ValueDecl *VD = 7953 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 7954 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 7955 IsImplicit); 7956 }; 7957 7958 assert(CurDir.is<const OMPExecutableDirective *>() && 7959 "Expect a executable directive"); 7960 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 7961 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 7962 for (const auto L : C->component_lists()) { 7963 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 7964 /*ReturnDevicePointer=*/false, C->isImplicit()); 7965 } 7966 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 7967 for (const auto L : C->component_lists()) { 7968 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 7969 /*ReturnDevicePointer=*/false, C->isImplicit()); 7970 } 7971 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 7972 for (const auto L : C->component_lists()) { 7973 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 7974 /*ReturnDevicePointer=*/false, C->isImplicit()); 7975 } 7976 7977 // Look at the use_device_ptr clause information and mark the existing map 7978 // entries as such. If there is no map information for an entry in the 7979 // use_device_ptr list, we create one with map type 'alloc' and zero size 7980 // section. It is the user fault if that was not mapped before. If there is 7981 // no map information and the pointer is a struct member, then we defer the 7982 // emission of that entry until the whole struct has been processed. 7983 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 7984 DeferredInfo; 7985 7986 for (const auto *C : 7987 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 7988 for (const auto L : C->component_lists()) { 7989 assert(!L.second.empty() && "Not expecting empty list of components!"); 7990 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 7991 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 7992 const Expr *IE = L.second.back().getAssociatedExpression(); 7993 // If the first component is a member expression, we have to look into 7994 // 'this', which maps to null in the map of map information. Otherwise 7995 // look directly for the information. 7996 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 7997 7998 // We potentially have map information for this declaration already. 7999 // Look for the first set of components that refer to it. 8000 if (It != Info.end()) { 8001 auto CI = std::find_if( 8002 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 8003 return MI.Components.back().getAssociatedDeclaration() == VD; 8004 }); 8005 // If we found a map entry, signal that the pointer has to be returned 8006 // and move on to the next declaration. 8007 if (CI != It->second.end()) { 8008 CI->ReturnDevicePointer = true; 8009 continue; 8010 } 8011 } 8012 8013 // We didn't find any match in our map information - generate a zero 8014 // size array section - if the pointer is a struct member we defer this 8015 // action until the whole struct has been processed. 8016 if (isa<MemberExpr>(IE)) { 8017 // Insert the pointer into Info to be processed by 8018 // generateInfoForComponentList. Because it is a member pointer 8019 // without a pointee, no entry will be generated for it, therefore 8020 // we need to generate one after the whole struct has been processed. 8021 // Nonetheless, generateInfoForComponentList must be called to take 8022 // the pointer into account for the calculation of the range of the 8023 // partial struct. 8024 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 8025 /*ReturnDevicePointer=*/false, C->isImplicit()); 8026 DeferredInfo[nullptr].emplace_back(IE, VD); 8027 } else { 8028 llvm::Value *Ptr = 8029 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8030 BasePointers.emplace_back(Ptr, VD); 8031 Pointers.push_back(Ptr); 8032 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8033 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8034 } 8035 } 8036 } 8037 8038 for (const auto &M : Info) { 8039 // We need to know when we generate information for the first component 8040 // associated with a capture, because the mapping flags depend on it. 8041 bool IsFirstComponentList = true; 8042 8043 // Temporary versions of arrays 8044 MapBaseValuesArrayTy CurBasePointers; 8045 MapValuesArrayTy CurPointers; 8046 MapValuesArrayTy CurSizes; 8047 MapFlagsArrayTy CurTypes; 8048 StructRangeInfoTy PartialStruct; 8049 8050 for (const MapInfo &L : M.second) { 8051 assert(!L.Components.empty() && 8052 "Not expecting declaration with no component lists."); 8053 8054 // Remember the current base pointer index. 8055 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 8056 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8057 CurBasePointers, CurPointers, CurSizes, 8058 CurTypes, PartialStruct, 8059 IsFirstComponentList, L.IsImplicit); 8060 8061 // If this entry relates with a device pointer, set the relevant 8062 // declaration and add the 'return pointer' flag. 8063 if (L.ReturnDevicePointer) { 8064 assert(CurBasePointers.size() > CurrentBasePointersIdx && 8065 "Unexpected number of mapped base pointers."); 8066 8067 const ValueDecl *RelevantVD = 8068 L.Components.back().getAssociatedDeclaration(); 8069 assert(RelevantVD && 8070 "No relevant declaration related with device pointer??"); 8071 8072 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 8073 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8074 } 8075 IsFirstComponentList = false; 8076 } 8077 8078 // Append any pending zero-length pointers which are struct members and 8079 // used with use_device_ptr. 8080 auto CI = DeferredInfo.find(M.first); 8081 if (CI != DeferredInfo.end()) { 8082 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8083 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8084 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 8085 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 8086 CurBasePointers.emplace_back(BasePtr, L.VD); 8087 CurPointers.push_back(Ptr); 8088 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8089 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8090 // value MEMBER_OF=FFFF so that the entry is later updated with the 8091 // correct value of MEMBER_OF. 8092 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8093 OMP_MAP_MEMBER_OF); 8094 } 8095 } 8096 8097 // If there is an entry in PartialStruct it means we have a struct with 8098 // individual members mapped. Emit an extra combined entry. 8099 if (PartialStruct.Base.isValid()) 8100 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8101 PartialStruct); 8102 8103 // We need to append the results of this capture to what we already have. 8104 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8105 Pointers.append(CurPointers.begin(), CurPointers.end()); 8106 Sizes.append(CurSizes.begin(), CurSizes.end()); 8107 Types.append(CurTypes.begin(), CurTypes.end()); 8108 } 8109 } 8110 8111 /// Generate all the base pointers, section pointers, sizes and map types for 8112 /// the extracted map clauses of user-defined mapper. 8113 void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, 8114 MapValuesArrayTy &Pointers, 8115 MapValuesArrayTy &Sizes, 8116 MapFlagsArrayTy &Types) const { 8117 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8118 "Expect a declare mapper directive"); 8119 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8120 // We have to process the component lists that relate with the same 8121 // declaration in a single chunk so that we can generate the map flags 8122 // correctly. Therefore, we organize all lists in a map. 8123 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8124 8125 // Helper function to fill the information map for the different supported 8126 // clauses. 8127 auto &&InfoGen = [&Info]( 8128 const ValueDecl *D, 8129 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8130 OpenMPMapClauseKind MapType, 8131 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8132 bool ReturnDevicePointer, bool IsImplicit) { 8133 const ValueDecl *VD = 8134 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8135 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8136 IsImplicit); 8137 }; 8138 8139 for (const auto *C : CurMapperDir->clauselists()) { 8140 const auto *MC = cast<OMPMapClause>(C); 8141 for (const auto L : MC->component_lists()) { 8142 InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), 8143 /*ReturnDevicePointer=*/false, MC->isImplicit()); 8144 } 8145 } 8146 8147 for (const auto &M : Info) { 8148 // We need to know when we generate information for the first component 8149 // associated with a capture, because the mapping flags depend on it. 8150 bool IsFirstComponentList = true; 8151 8152 // Temporary versions of arrays 8153 MapBaseValuesArrayTy CurBasePointers; 8154 MapValuesArrayTy CurPointers; 8155 MapValuesArrayTy CurSizes; 8156 MapFlagsArrayTy CurTypes; 8157 StructRangeInfoTy PartialStruct; 8158 8159 for (const MapInfo &L : M.second) { 8160 assert(!L.Components.empty() && 8161 "Not expecting declaration with no component lists."); 8162 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8163 CurBasePointers, CurPointers, CurSizes, 8164 CurTypes, PartialStruct, 8165 IsFirstComponentList, L.IsImplicit); 8166 IsFirstComponentList = false; 8167 } 8168 8169 // If there is an entry in PartialStruct it means we have a struct with 8170 // individual members mapped. Emit an extra combined entry. 8171 if (PartialStruct.Base.isValid()) 8172 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8173 PartialStruct); 8174 8175 // We need to append the results of this capture to what we already have. 8176 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8177 Pointers.append(CurPointers.begin(), CurPointers.end()); 8178 Sizes.append(CurSizes.begin(), CurSizes.end()); 8179 Types.append(CurTypes.begin(), CurTypes.end()); 8180 } 8181 } 8182 8183 /// Emit capture info for lambdas for variables captured by reference. 8184 void generateInfoForLambdaCaptures( 8185 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 8186 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8187 MapFlagsArrayTy &Types, 8188 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8189 const auto *RD = VD->getType() 8190 .getCanonicalType() 8191 .getNonReferenceType() 8192 ->getAsCXXRecordDecl(); 8193 if (!RD || !RD->isLambda()) 8194 return; 8195 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8196 LValue VDLVal = CGF.MakeAddrLValue( 8197 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8198 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8199 FieldDecl *ThisCapture = nullptr; 8200 RD->getCaptureFields(Captures, ThisCapture); 8201 if (ThisCapture) { 8202 LValue ThisLVal = 8203 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8204 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8205 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8206 VDLVal.getPointer(CGF)); 8207 BasePointers.push_back(ThisLVal.getPointer(CGF)); 8208 Pointers.push_back(ThisLValVal.getPointer(CGF)); 8209 Sizes.push_back( 8210 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8211 CGF.Int64Ty, /*isSigned=*/true)); 8212 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8213 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8214 } 8215 for (const LambdaCapture &LC : RD->captures()) { 8216 if (!LC.capturesVariable()) 8217 continue; 8218 const VarDecl *VD = LC.getCapturedVar(); 8219 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8220 continue; 8221 auto It = Captures.find(VD); 8222 assert(It != Captures.end() && "Found lambda capture without field."); 8223 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8224 if (LC.getCaptureKind() == LCK_ByRef) { 8225 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8226 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8227 VDLVal.getPointer(CGF)); 8228 BasePointers.push_back(VarLVal.getPointer(CGF)); 8229 Pointers.push_back(VarLValVal.getPointer(CGF)); 8230 Sizes.push_back(CGF.Builder.CreateIntCast( 8231 CGF.getTypeSize( 8232 VD->getType().getCanonicalType().getNonReferenceType()), 8233 CGF.Int64Ty, /*isSigned=*/true)); 8234 } else { 8235 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8236 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8237 VDLVal.getPointer(CGF)); 8238 BasePointers.push_back(VarLVal.getPointer(CGF)); 8239 Pointers.push_back(VarRVal.getScalarVal()); 8240 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8241 } 8242 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8243 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8244 } 8245 } 8246 8247 /// Set correct indices for lambdas captures. 8248 void adjustMemberOfForLambdaCaptures( 8249 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8250 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8251 MapFlagsArrayTy &Types) const { 8252 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8253 // Set correct member_of idx for all implicit lambda captures. 8254 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8255 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8256 continue; 8257 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8258 assert(BasePtr && "Unable to find base lambda address."); 8259 int TgtIdx = -1; 8260 for (unsigned J = I; J > 0; --J) { 8261 unsigned Idx = J - 1; 8262 if (Pointers[Idx] != BasePtr) 8263 continue; 8264 TgtIdx = Idx; 8265 break; 8266 } 8267 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8268 // All other current entries will be MEMBER_OF the combined entry 8269 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8270 // 0xFFFF in the MEMBER_OF field). 8271 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8272 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8273 } 8274 } 8275 8276 /// Generate the base pointers, section pointers, sizes and map types 8277 /// associated to a given capture. 8278 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8279 llvm::Value *Arg, 8280 MapBaseValuesArrayTy &BasePointers, 8281 MapValuesArrayTy &Pointers, 8282 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8283 StructRangeInfoTy &PartialStruct) const { 8284 assert(!Cap->capturesVariableArrayType() && 8285 "Not expecting to generate map info for a variable array type!"); 8286 8287 // We need to know when we generating information for the first component 8288 const ValueDecl *VD = Cap->capturesThis() 8289 ? nullptr 8290 : Cap->getCapturedVar()->getCanonicalDecl(); 8291 8292 // If this declaration appears in a is_device_ptr clause we just have to 8293 // pass the pointer by value. If it is a reference to a declaration, we just 8294 // pass its value. 8295 if (DevPointersMap.count(VD)) { 8296 BasePointers.emplace_back(Arg, VD); 8297 Pointers.push_back(Arg); 8298 Sizes.push_back( 8299 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8300 CGF.Int64Ty, /*isSigned=*/true)); 8301 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8302 return; 8303 } 8304 8305 using MapData = 8306 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8307 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 8308 SmallVector<MapData, 4> DeclComponentLists; 8309 assert(CurDir.is<const OMPExecutableDirective *>() && 8310 "Expect a executable directive"); 8311 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8312 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8313 for (const auto L : C->decl_component_lists(VD)) { 8314 assert(L.first == VD && 8315 "We got information for the wrong declaration??"); 8316 assert(!L.second.empty() && 8317 "Not expecting declaration with no component lists."); 8318 DeclComponentLists.emplace_back(L.second, C->getMapType(), 8319 C->getMapTypeModifiers(), 8320 C->isImplicit()); 8321 } 8322 } 8323 8324 // Find overlapping elements (including the offset from the base element). 8325 llvm::SmallDenseMap< 8326 const MapData *, 8327 llvm::SmallVector< 8328 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8329 4> 8330 OverlappedData; 8331 size_t Count = 0; 8332 for (const MapData &L : DeclComponentLists) { 8333 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8334 OpenMPMapClauseKind MapType; 8335 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8336 bool IsImplicit; 8337 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8338 ++Count; 8339 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8340 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8341 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 8342 auto CI = Components.rbegin(); 8343 auto CE = Components.rend(); 8344 auto SI = Components1.rbegin(); 8345 auto SE = Components1.rend(); 8346 for (; CI != CE && SI != SE; ++CI, ++SI) { 8347 if (CI->getAssociatedExpression()->getStmtClass() != 8348 SI->getAssociatedExpression()->getStmtClass()) 8349 break; 8350 // Are we dealing with different variables/fields? 8351 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8352 break; 8353 } 8354 // Found overlapping if, at least for one component, reached the head of 8355 // the components list. 8356 if (CI == CE || SI == SE) { 8357 assert((CI != CE || SI != SE) && 8358 "Unexpected full match of the mapping components."); 8359 const MapData &BaseData = CI == CE ? L : L1; 8360 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8361 SI == SE ? Components : Components1; 8362 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8363 OverlappedElements.getSecond().push_back(SubData); 8364 } 8365 } 8366 } 8367 // Sort the overlapped elements for each item. 8368 llvm::SmallVector<const FieldDecl *, 4> Layout; 8369 if (!OverlappedData.empty()) { 8370 if (const auto *CRD = 8371 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8372 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8373 else { 8374 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8375 Layout.append(RD->field_begin(), RD->field_end()); 8376 } 8377 } 8378 for (auto &Pair : OverlappedData) { 8379 llvm::sort( 8380 Pair.getSecond(), 8381 [&Layout]( 8382 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8383 OMPClauseMappableExprCommon::MappableExprComponentListRef 8384 Second) { 8385 auto CI = First.rbegin(); 8386 auto CE = First.rend(); 8387 auto SI = Second.rbegin(); 8388 auto SE = Second.rend(); 8389 for (; CI != CE && SI != SE; ++CI, ++SI) { 8390 if (CI->getAssociatedExpression()->getStmtClass() != 8391 SI->getAssociatedExpression()->getStmtClass()) 8392 break; 8393 // Are we dealing with different variables/fields? 8394 if (CI->getAssociatedDeclaration() != 8395 SI->getAssociatedDeclaration()) 8396 break; 8397 } 8398 8399 // Lists contain the same elements. 8400 if (CI == CE && SI == SE) 8401 return false; 8402 8403 // List with less elements is less than list with more elements. 8404 if (CI == CE || SI == SE) 8405 return CI == CE; 8406 8407 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8408 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8409 if (FD1->getParent() == FD2->getParent()) 8410 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8411 const auto It = 8412 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8413 return FD == FD1 || FD == FD2; 8414 }); 8415 return *It == FD1; 8416 }); 8417 } 8418 8419 // Associated with a capture, because the mapping flags depend on it. 8420 // Go through all of the elements with the overlapped elements. 8421 for (const auto &Pair : OverlappedData) { 8422 const MapData &L = *Pair.getFirst(); 8423 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8424 OpenMPMapClauseKind MapType; 8425 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8426 bool IsImplicit; 8427 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8428 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8429 OverlappedComponents = Pair.getSecond(); 8430 bool IsFirstComponentList = true; 8431 generateInfoForComponentList(MapType, MapModifiers, Components, 8432 BasePointers, Pointers, Sizes, Types, 8433 PartialStruct, IsFirstComponentList, 8434 IsImplicit, OverlappedComponents); 8435 } 8436 // Go through other elements without overlapped elements. 8437 bool IsFirstComponentList = OverlappedData.empty(); 8438 for (const MapData &L : DeclComponentLists) { 8439 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8440 OpenMPMapClauseKind MapType; 8441 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8442 bool IsImplicit; 8443 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8444 auto It = OverlappedData.find(&L); 8445 if (It == OverlappedData.end()) 8446 generateInfoForComponentList(MapType, MapModifiers, Components, 8447 BasePointers, Pointers, Sizes, Types, 8448 PartialStruct, IsFirstComponentList, 8449 IsImplicit); 8450 IsFirstComponentList = false; 8451 } 8452 } 8453 8454 /// Generate the base pointers, section pointers, sizes and map types 8455 /// associated with the declare target link variables. 8456 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 8457 MapValuesArrayTy &Pointers, 8458 MapValuesArrayTy &Sizes, 8459 MapFlagsArrayTy &Types) const { 8460 assert(CurDir.is<const OMPExecutableDirective *>() && 8461 "Expect a executable directive"); 8462 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8463 // Map other list items in the map clause which are not captured variables 8464 // but "declare target link" global variables. 8465 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8466 for (const auto L : C->component_lists()) { 8467 if (!L.first) 8468 continue; 8469 const auto *VD = dyn_cast<VarDecl>(L.first); 8470 if (!VD) 8471 continue; 8472 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8473 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8474 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8475 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 8476 continue; 8477 StructRangeInfoTy PartialStruct; 8478 generateInfoForComponentList( 8479 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 8480 Pointers, Sizes, Types, PartialStruct, 8481 /*IsFirstComponentList=*/true, C->isImplicit()); 8482 assert(!PartialStruct.Base.isValid() && 8483 "No partial structs for declare target link expected."); 8484 } 8485 } 8486 } 8487 8488 /// Generate the default map information for a given capture \a CI, 8489 /// record field declaration \a RI and captured value \a CV. 8490 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8491 const FieldDecl &RI, llvm::Value *CV, 8492 MapBaseValuesArrayTy &CurBasePointers, 8493 MapValuesArrayTy &CurPointers, 8494 MapValuesArrayTy &CurSizes, 8495 MapFlagsArrayTy &CurMapTypes) const { 8496 bool IsImplicit = true; 8497 // Do the default mapping. 8498 if (CI.capturesThis()) { 8499 CurBasePointers.push_back(CV); 8500 CurPointers.push_back(CV); 8501 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8502 CurSizes.push_back( 8503 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8504 CGF.Int64Ty, /*isSigned=*/true)); 8505 // Default map type. 8506 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8507 } else if (CI.capturesVariableByCopy()) { 8508 CurBasePointers.push_back(CV); 8509 CurPointers.push_back(CV); 8510 if (!RI.getType()->isAnyPointerType()) { 8511 // We have to signal to the runtime captures passed by value that are 8512 // not pointers. 8513 CurMapTypes.push_back(OMP_MAP_LITERAL); 8514 CurSizes.push_back(CGF.Builder.CreateIntCast( 8515 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8516 } else { 8517 // Pointers are implicitly mapped with a zero size and no flags 8518 // (other than first map that is added for all implicit maps). 8519 CurMapTypes.push_back(OMP_MAP_NONE); 8520 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8521 } 8522 const VarDecl *VD = CI.getCapturedVar(); 8523 auto I = FirstPrivateDecls.find(VD); 8524 if (I != FirstPrivateDecls.end()) 8525 IsImplicit = I->getSecond(); 8526 } else { 8527 assert(CI.capturesVariable() && "Expected captured reference."); 8528 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8529 QualType ElementType = PtrTy->getPointeeType(); 8530 CurSizes.push_back(CGF.Builder.CreateIntCast( 8531 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8532 // The default map type for a scalar/complex type is 'to' because by 8533 // default the value doesn't have to be retrieved. For an aggregate 8534 // type, the default is 'tofrom'. 8535 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 8536 const VarDecl *VD = CI.getCapturedVar(); 8537 auto I = FirstPrivateDecls.find(VD); 8538 if (I != FirstPrivateDecls.end() && 8539 VD->getType().isConstant(CGF.getContext())) { 8540 llvm::Constant *Addr = 8541 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8542 // Copy the value of the original variable to the new global copy. 8543 CGF.Builder.CreateMemCpy( 8544 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 8545 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8546 CurSizes.back(), /*IsVolatile=*/false); 8547 // Use new global variable as the base pointers. 8548 CurBasePointers.push_back(Addr); 8549 CurPointers.push_back(Addr); 8550 } else { 8551 CurBasePointers.push_back(CV); 8552 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8553 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8554 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8555 AlignmentSource::Decl)); 8556 CurPointers.push_back(PtrAddr.getPointer()); 8557 } else { 8558 CurPointers.push_back(CV); 8559 } 8560 } 8561 if (I != FirstPrivateDecls.end()) 8562 IsImplicit = I->getSecond(); 8563 } 8564 // Every default map produces a single argument which is a target parameter. 8565 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 8566 8567 // Add flag stating this is an implicit map. 8568 if (IsImplicit) 8569 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 8570 } 8571 }; 8572 } // anonymous namespace 8573 8574 /// Emit the arrays used to pass the captures and map information to the 8575 /// offloading runtime library. If there is no map or capture information, 8576 /// return nullptr by reference. 8577 static void 8578 emitOffloadingArrays(CodeGenFunction &CGF, 8579 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 8580 MappableExprsHandler::MapValuesArrayTy &Pointers, 8581 MappableExprsHandler::MapValuesArrayTy &Sizes, 8582 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 8583 CGOpenMPRuntime::TargetDataInfo &Info) { 8584 CodeGenModule &CGM = CGF.CGM; 8585 ASTContext &Ctx = CGF.getContext(); 8586 8587 // Reset the array information. 8588 Info.clearArrayInfo(); 8589 Info.NumberOfPtrs = BasePointers.size(); 8590 8591 if (Info.NumberOfPtrs) { 8592 // Detect if we have any capture size requiring runtime evaluation of the 8593 // size so that a constant array could be eventually used. 8594 bool hasRuntimeEvaluationCaptureSize = false; 8595 for (llvm::Value *S : Sizes) 8596 if (!isa<llvm::Constant>(S)) { 8597 hasRuntimeEvaluationCaptureSize = true; 8598 break; 8599 } 8600 8601 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8602 QualType PointerArrayType = Ctx.getConstantArrayType( 8603 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 8604 /*IndexTypeQuals=*/0); 8605 8606 Info.BasePointersArray = 8607 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8608 Info.PointersArray = 8609 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8610 8611 // If we don't have any VLA types or other types that require runtime 8612 // evaluation, we can use a constant array for the map sizes, otherwise we 8613 // need to fill up the arrays as we do for the pointers. 8614 QualType Int64Ty = 8615 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8616 if (hasRuntimeEvaluationCaptureSize) { 8617 QualType SizeArrayType = Ctx.getConstantArrayType( 8618 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 8619 /*IndexTypeQuals=*/0); 8620 Info.SizesArray = 8621 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8622 } else { 8623 // We expect all the sizes to be constant, so we collect them to create 8624 // a constant array. 8625 SmallVector<llvm::Constant *, 16> ConstSizes; 8626 for (llvm::Value *S : Sizes) 8627 ConstSizes.push_back(cast<llvm::Constant>(S)); 8628 8629 auto *SizesArrayInit = llvm::ConstantArray::get( 8630 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 8631 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8632 auto *SizesArrayGbl = new llvm::GlobalVariable( 8633 CGM.getModule(), SizesArrayInit->getType(), 8634 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8635 SizesArrayInit, Name); 8636 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8637 Info.SizesArray = SizesArrayGbl; 8638 } 8639 8640 // The map types are always constant so we don't need to generate code to 8641 // fill arrays. Instead, we create an array constant. 8642 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 8643 llvm::copy(MapTypes, Mapping.begin()); 8644 llvm::Constant *MapTypesArrayInit = 8645 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8646 std::string MaptypesName = 8647 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8648 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8649 CGM.getModule(), MapTypesArrayInit->getType(), 8650 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8651 MapTypesArrayInit, MaptypesName); 8652 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8653 Info.MapTypesArray = MapTypesArrayGbl; 8654 8655 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8656 llvm::Value *BPVal = *BasePointers[I]; 8657 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8658 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8659 Info.BasePointersArray, 0, I); 8660 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8661 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8662 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8663 CGF.Builder.CreateStore(BPVal, BPAddr); 8664 8665 if (Info.requiresDevicePointerInfo()) 8666 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 8667 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8668 8669 llvm::Value *PVal = Pointers[I]; 8670 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8671 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8672 Info.PointersArray, 0, I); 8673 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8674 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8675 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8676 CGF.Builder.CreateStore(PVal, PAddr); 8677 8678 if (hasRuntimeEvaluationCaptureSize) { 8679 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8680 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8681 Info.SizesArray, 8682 /*Idx0=*/0, 8683 /*Idx1=*/I); 8684 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 8685 CGF.Builder.CreateStore( 8686 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true), 8687 SAddr); 8688 } 8689 } 8690 } 8691 } 8692 8693 /// Emit the arguments to be passed to the runtime library based on the 8694 /// arrays of pointers, sizes and map types. 8695 static void emitOffloadingArraysArgument( 8696 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8697 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8698 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 8699 CodeGenModule &CGM = CGF.CGM; 8700 if (Info.NumberOfPtrs) { 8701 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8702 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8703 Info.BasePointersArray, 8704 /*Idx0=*/0, /*Idx1=*/0); 8705 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8706 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8707 Info.PointersArray, 8708 /*Idx0=*/0, 8709 /*Idx1=*/0); 8710 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8711 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 8712 /*Idx0=*/0, /*Idx1=*/0); 8713 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8714 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8715 Info.MapTypesArray, 8716 /*Idx0=*/0, 8717 /*Idx1=*/0); 8718 } else { 8719 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8720 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8721 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8722 MapTypesArrayArg = 8723 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8724 } 8725 } 8726 8727 /// Check for inner distribute directive. 8728 static const OMPExecutableDirective * 8729 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8730 const auto *CS = D.getInnermostCapturedStmt(); 8731 const auto *Body = 8732 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8733 const Stmt *ChildStmt = 8734 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8735 8736 if (const auto *NestedDir = 8737 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8738 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8739 switch (D.getDirectiveKind()) { 8740 case OMPD_target: 8741 if (isOpenMPDistributeDirective(DKind)) 8742 return NestedDir; 8743 if (DKind == OMPD_teams) { 8744 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8745 /*IgnoreCaptured=*/true); 8746 if (!Body) 8747 return nullptr; 8748 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8749 if (const auto *NND = 8750 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8751 DKind = NND->getDirectiveKind(); 8752 if (isOpenMPDistributeDirective(DKind)) 8753 return NND; 8754 } 8755 } 8756 return nullptr; 8757 case OMPD_target_teams: 8758 if (isOpenMPDistributeDirective(DKind)) 8759 return NestedDir; 8760 return nullptr; 8761 case OMPD_target_parallel: 8762 case OMPD_target_simd: 8763 case OMPD_target_parallel_for: 8764 case OMPD_target_parallel_for_simd: 8765 return nullptr; 8766 case OMPD_target_teams_distribute: 8767 case OMPD_target_teams_distribute_simd: 8768 case OMPD_target_teams_distribute_parallel_for: 8769 case OMPD_target_teams_distribute_parallel_for_simd: 8770 case OMPD_parallel: 8771 case OMPD_for: 8772 case OMPD_parallel_for: 8773 case OMPD_parallel_master: 8774 case OMPD_parallel_sections: 8775 case OMPD_for_simd: 8776 case OMPD_parallel_for_simd: 8777 case OMPD_cancel: 8778 case OMPD_cancellation_point: 8779 case OMPD_ordered: 8780 case OMPD_threadprivate: 8781 case OMPD_allocate: 8782 case OMPD_task: 8783 case OMPD_simd: 8784 case OMPD_sections: 8785 case OMPD_section: 8786 case OMPD_single: 8787 case OMPD_master: 8788 case OMPD_critical: 8789 case OMPD_taskyield: 8790 case OMPD_barrier: 8791 case OMPD_taskwait: 8792 case OMPD_taskgroup: 8793 case OMPD_atomic: 8794 case OMPD_flush: 8795 case OMPD_teams: 8796 case OMPD_target_data: 8797 case OMPD_target_exit_data: 8798 case OMPD_target_enter_data: 8799 case OMPD_distribute: 8800 case OMPD_distribute_simd: 8801 case OMPD_distribute_parallel_for: 8802 case OMPD_distribute_parallel_for_simd: 8803 case OMPD_teams_distribute: 8804 case OMPD_teams_distribute_simd: 8805 case OMPD_teams_distribute_parallel_for: 8806 case OMPD_teams_distribute_parallel_for_simd: 8807 case OMPD_target_update: 8808 case OMPD_declare_simd: 8809 case OMPD_declare_variant: 8810 case OMPD_declare_target: 8811 case OMPD_end_declare_target: 8812 case OMPD_declare_reduction: 8813 case OMPD_declare_mapper: 8814 case OMPD_taskloop: 8815 case OMPD_taskloop_simd: 8816 case OMPD_master_taskloop: 8817 case OMPD_master_taskloop_simd: 8818 case OMPD_parallel_master_taskloop: 8819 case OMPD_parallel_master_taskloop_simd: 8820 case OMPD_requires: 8821 case OMPD_unknown: 8822 llvm_unreachable("Unexpected directive."); 8823 } 8824 } 8825 8826 return nullptr; 8827 } 8828 8829 /// Emit the user-defined mapper function. The code generation follows the 8830 /// pattern in the example below. 8831 /// \code 8832 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 8833 /// void *base, void *begin, 8834 /// int64_t size, int64_t type) { 8835 /// // Allocate space for an array section first. 8836 /// if (size > 1 && !maptype.IsDelete) 8837 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8838 /// size*sizeof(Ty), clearToFrom(type)); 8839 /// // Map members. 8840 /// for (unsigned i = 0; i < size; i++) { 8841 /// // For each component specified by this mapper: 8842 /// for (auto c : all_components) { 8843 /// if (c.hasMapper()) 8844 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 8845 /// c.arg_type); 8846 /// else 8847 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 8848 /// c.arg_begin, c.arg_size, c.arg_type); 8849 /// } 8850 /// } 8851 /// // Delete the array section. 8852 /// if (size > 1 && maptype.IsDelete) 8853 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8854 /// size*sizeof(Ty), clearToFrom(type)); 8855 /// } 8856 /// \endcode 8857 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 8858 CodeGenFunction *CGF) { 8859 if (UDMMap.count(D) > 0) 8860 return; 8861 ASTContext &C = CGM.getContext(); 8862 QualType Ty = D->getType(); 8863 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 8864 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 8865 auto *MapperVarDecl = 8866 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 8867 SourceLocation Loc = D->getLocation(); 8868 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 8869 8870 // Prepare mapper function arguments and attributes. 8871 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8872 C.VoidPtrTy, ImplicitParamDecl::Other); 8873 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 8874 ImplicitParamDecl::Other); 8875 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8876 C.VoidPtrTy, ImplicitParamDecl::Other); 8877 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8878 ImplicitParamDecl::Other); 8879 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8880 ImplicitParamDecl::Other); 8881 FunctionArgList Args; 8882 Args.push_back(&HandleArg); 8883 Args.push_back(&BaseArg); 8884 Args.push_back(&BeginArg); 8885 Args.push_back(&SizeArg); 8886 Args.push_back(&TypeArg); 8887 const CGFunctionInfo &FnInfo = 8888 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 8889 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 8890 SmallString<64> TyStr; 8891 llvm::raw_svector_ostream Out(TyStr); 8892 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 8893 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 8894 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 8895 Name, &CGM.getModule()); 8896 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 8897 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 8898 // Start the mapper function code generation. 8899 CodeGenFunction MapperCGF(CGM); 8900 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 8901 // Compute the starting and end addreses of array elements. 8902 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 8903 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 8904 C.getPointerType(Int64Ty), Loc); 8905 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 8906 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 8907 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 8908 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 8909 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 8910 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 8911 C.getPointerType(Int64Ty), Loc); 8912 // Prepare common arguments for array initiation and deletion. 8913 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 8914 MapperCGF.GetAddrOfLocalVar(&HandleArg), 8915 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8916 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 8917 MapperCGF.GetAddrOfLocalVar(&BaseArg), 8918 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8919 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 8920 MapperCGF.GetAddrOfLocalVar(&BeginArg), 8921 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8922 8923 // Emit array initiation if this is an array section and \p MapType indicates 8924 // that memory allocation is required. 8925 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 8926 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 8927 ElementSize, HeadBB, /*IsInit=*/true); 8928 8929 // Emit a for loop to iterate through SizeArg of elements and map all of them. 8930 8931 // Emit the loop header block. 8932 MapperCGF.EmitBlock(HeadBB); 8933 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 8934 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 8935 // Evaluate whether the initial condition is satisfied. 8936 llvm::Value *IsEmpty = 8937 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 8938 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 8939 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 8940 8941 // Emit the loop body block. 8942 MapperCGF.EmitBlock(BodyBB); 8943 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 8944 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 8945 PtrPHI->addIncoming(PtrBegin, EntryBB); 8946 Address PtrCurrent = 8947 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 8948 .getAlignment() 8949 .alignmentOfArrayElement(ElementSize)); 8950 // Privatize the declared variable of mapper to be the current array element. 8951 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 8952 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 8953 return MapperCGF 8954 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 8955 .getAddress(MapperCGF); 8956 }); 8957 (void)Scope.Privatize(); 8958 8959 // Get map clause information. Fill up the arrays with all mapped variables. 8960 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 8961 MappableExprsHandler::MapValuesArrayTy Pointers; 8962 MappableExprsHandler::MapValuesArrayTy Sizes; 8963 MappableExprsHandler::MapFlagsArrayTy MapTypes; 8964 MappableExprsHandler MEHandler(*D, MapperCGF); 8965 MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); 8966 8967 // Call the runtime API __tgt_mapper_num_components to get the number of 8968 // pre-existing components. 8969 llvm::Value *OffloadingArgs[] = {Handle}; 8970 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 8971 createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs); 8972 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 8973 PreviousSize, 8974 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 8975 8976 // Fill up the runtime mapper handle for all components. 8977 for (unsigned I = 0; I < BasePointers.size(); ++I) { 8978 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 8979 *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 8980 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 8981 Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 8982 llvm::Value *CurSizeArg = Sizes[I]; 8983 8984 // Extract the MEMBER_OF field from the map type. 8985 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 8986 MapperCGF.EmitBlock(MemberBB); 8987 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); 8988 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 8989 OriMapType, 8990 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 8991 llvm::BasicBlock *MemberCombineBB = 8992 MapperCGF.createBasicBlock("omp.member.combine"); 8993 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 8994 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 8995 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 8996 // Add the number of pre-existing components to the MEMBER_OF field if it 8997 // is valid. 8998 MapperCGF.EmitBlock(MemberCombineBB); 8999 llvm::Value *CombinedMember = 9000 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9001 // Do nothing if it is not a member of previous components. 9002 MapperCGF.EmitBlock(TypeBB); 9003 llvm::PHINode *MemberMapType = 9004 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9005 MemberMapType->addIncoming(OriMapType, MemberBB); 9006 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9007 9008 // Combine the map type inherited from user-defined mapper with that 9009 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9010 // bits of the \a MapType, which is the input argument of the mapper 9011 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9012 // bits of MemberMapType. 9013 // [OpenMP 5.0], 1.2.6. map-type decay. 9014 // | alloc | to | from | tofrom | release | delete 9015 // ---------------------------------------------------------- 9016 // alloc | alloc | alloc | alloc | alloc | release | delete 9017 // to | alloc | to | alloc | to | release | delete 9018 // from | alloc | alloc | from | from | release | delete 9019 // tofrom | alloc | to | from | tofrom | release | delete 9020 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9021 MapType, 9022 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9023 MappableExprsHandler::OMP_MAP_FROM)); 9024 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9025 llvm::BasicBlock *AllocElseBB = 9026 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9027 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9028 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9029 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9030 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9031 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9032 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9033 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9034 MapperCGF.EmitBlock(AllocBB); 9035 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9036 MemberMapType, 9037 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9038 MappableExprsHandler::OMP_MAP_FROM))); 9039 MapperCGF.Builder.CreateBr(EndBB); 9040 MapperCGF.EmitBlock(AllocElseBB); 9041 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9042 LeftToFrom, 9043 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9044 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9045 // In case of to, clear OMP_MAP_FROM. 9046 MapperCGF.EmitBlock(ToBB); 9047 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9048 MemberMapType, 9049 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9050 MapperCGF.Builder.CreateBr(EndBB); 9051 MapperCGF.EmitBlock(ToElseBB); 9052 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9053 LeftToFrom, 9054 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9055 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9056 // In case of from, clear OMP_MAP_TO. 9057 MapperCGF.EmitBlock(FromBB); 9058 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9059 MemberMapType, 9060 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9061 // In case of tofrom, do nothing. 9062 MapperCGF.EmitBlock(EndBB); 9063 llvm::PHINode *CurMapType = 9064 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9065 CurMapType->addIncoming(AllocMapType, AllocBB); 9066 CurMapType->addIncoming(ToMapType, ToBB); 9067 CurMapType->addIncoming(FromMapType, FromBB); 9068 CurMapType->addIncoming(MemberMapType, ToElseBB); 9069 9070 // TODO: call the corresponding mapper function if a user-defined mapper is 9071 // associated with this map clause. 9072 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9073 // data structure. 9074 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9075 CurSizeArg, CurMapType}; 9076 MapperCGF.EmitRuntimeCall( 9077 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), 9078 OffloadingArgs); 9079 } 9080 9081 // Update the pointer to point to the next element that needs to be mapped, 9082 // and check whether we have mapped all elements. 9083 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9084 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9085 PtrPHI->addIncoming(PtrNext, BodyBB); 9086 llvm::Value *IsDone = 9087 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9088 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9089 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9090 9091 MapperCGF.EmitBlock(ExitBB); 9092 // Emit array deletion if this is an array section and \p MapType indicates 9093 // that deletion is required. 9094 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9095 ElementSize, DoneBB, /*IsInit=*/false); 9096 9097 // Emit the function exit block. 9098 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9099 MapperCGF.FinishFunction(); 9100 UDMMap.try_emplace(D, Fn); 9101 if (CGF) { 9102 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9103 Decls.second.push_back(D); 9104 } 9105 } 9106 9107 /// Emit the array initialization or deletion portion for user-defined mapper 9108 /// code generation. First, it evaluates whether an array section is mapped and 9109 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9110 /// true, and \a MapType indicates to not delete this array, array 9111 /// initialization code is generated. If \a IsInit is false, and \a MapType 9112 /// indicates to not this array, array deletion code is generated. 9113 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9114 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9115 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9116 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9117 StringRef Prefix = IsInit ? ".init" : ".del"; 9118 9119 // Evaluate if this is an array section. 9120 llvm::BasicBlock *IsDeleteBB = 9121 MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"})); 9122 llvm::BasicBlock *BodyBB = 9123 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9124 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9125 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9126 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9127 9128 // Evaluate if we are going to delete this section. 9129 MapperCGF.EmitBlock(IsDeleteBB); 9130 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9131 MapType, 9132 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9133 llvm::Value *DeleteCond; 9134 if (IsInit) { 9135 DeleteCond = MapperCGF.Builder.CreateIsNull( 9136 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9137 } else { 9138 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9139 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9140 } 9141 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9142 9143 MapperCGF.EmitBlock(BodyBB); 9144 // Get the array size by multiplying element size and element number (i.e., \p 9145 // Size). 9146 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9147 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9148 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9149 // memory allocation/deletion purpose only. 9150 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9151 MapType, 9152 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9153 MappableExprsHandler::OMP_MAP_FROM))); 9154 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9155 // data structure. 9156 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9157 MapperCGF.EmitRuntimeCall( 9158 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs); 9159 } 9160 9161 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9162 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9163 llvm::Value *DeviceID, 9164 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9165 const OMPLoopDirective &D)> 9166 SizeEmitter) { 9167 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9168 const OMPExecutableDirective *TD = &D; 9169 // Get nested teams distribute kind directive, if any. 9170 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9171 TD = getNestedDistributeDirective(CGM.getContext(), D); 9172 if (!TD) 9173 return; 9174 const auto *LD = cast<OMPLoopDirective>(TD); 9175 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, 9176 PrePostActionTy &) { 9177 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9178 llvm::Value *Args[] = {DeviceID, NumIterations}; 9179 CGF.EmitRuntimeCall( 9180 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); 9181 } 9182 }; 9183 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9184 } 9185 9186 void CGOpenMPRuntime::emitTargetCall( 9187 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9188 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9189 const Expr *Device, 9190 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9191 const OMPLoopDirective &D)> 9192 SizeEmitter) { 9193 if (!CGF.HaveInsertPoint()) 9194 return; 9195 9196 assert(OutlinedFn && "Invalid outlined function!"); 9197 9198 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 9199 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9200 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9201 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9202 PrePostActionTy &) { 9203 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9204 }; 9205 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9206 9207 CodeGenFunction::OMPTargetDataInfo InputInfo; 9208 llvm::Value *MapTypesArray = nullptr; 9209 // Fill up the pointer arrays and transfer execution to the device. 9210 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9211 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, 9212 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9213 // On top of the arrays that were filled up, the target offloading call 9214 // takes as arguments the device id as well as the host pointer. The host 9215 // pointer is used by the runtime library to identify the current target 9216 // region, so it only has to be unique and not necessarily point to 9217 // anything. It could be the pointer to the outlined function that 9218 // implements the target region, but we aren't using that so that the 9219 // compiler doesn't need to keep that, and could therefore inline the host 9220 // function if proven worthwhile during optimization. 9221 9222 // From this point on, we need to have an ID of the target region defined. 9223 assert(OutlinedFnID && "Invalid outlined function ID!"); 9224 9225 // Emit device ID if any. 9226 llvm::Value *DeviceID; 9227 if (Device) { 9228 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9229 CGF.Int64Ty, /*isSigned=*/true); 9230 } else { 9231 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9232 } 9233 9234 // Emit the number of elements in the offloading arrays. 9235 llvm::Value *PointerNum = 9236 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9237 9238 // Return value of the runtime offloading call. 9239 llvm::Value *Return; 9240 9241 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9242 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9243 9244 // Emit tripcount for the target loop-based directive. 9245 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 9246 9247 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9248 // The target region is an outlined function launched by the runtime 9249 // via calls __tgt_target() or __tgt_target_teams(). 9250 // 9251 // __tgt_target() launches a target region with one team and one thread, 9252 // executing a serial region. This master thread may in turn launch 9253 // more threads within its team upon encountering a parallel region, 9254 // however, no additional teams can be launched on the device. 9255 // 9256 // __tgt_target_teams() launches a target region with one or more teams, 9257 // each with one or more threads. This call is required for target 9258 // constructs such as: 9259 // 'target teams' 9260 // 'target' / 'teams' 9261 // 'target teams distribute parallel for' 9262 // 'target parallel' 9263 // and so on. 9264 // 9265 // Note that on the host and CPU targets, the runtime implementation of 9266 // these calls simply call the outlined function without forking threads. 9267 // The outlined functions themselves have runtime calls to 9268 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9269 // the compiler in emitTeamsCall() and emitParallelCall(). 9270 // 9271 // In contrast, on the NVPTX target, the implementation of 9272 // __tgt_target_teams() launches a GPU kernel with the requested number 9273 // of teams and threads so no additional calls to the runtime are required. 9274 if (NumTeams) { 9275 // If we have NumTeams defined this means that we have an enclosed teams 9276 // region. Therefore we also expect to have NumThreads defined. These two 9277 // values should be defined in the presence of a teams directive, 9278 // regardless of having any clauses associated. If the user is using teams 9279 // but no clauses, these two values will be the default that should be 9280 // passed to the runtime library - a 32-bit integer with the value zero. 9281 assert(NumThreads && "Thread limit expression should be available along " 9282 "with number of teams."); 9283 llvm::Value *OffloadingArgs[] = {DeviceID, 9284 OutlinedFnID, 9285 PointerNum, 9286 InputInfo.BasePointersArray.getPointer(), 9287 InputInfo.PointersArray.getPointer(), 9288 InputInfo.SizesArray.getPointer(), 9289 MapTypesArray, 9290 NumTeams, 9291 NumThreads}; 9292 Return = CGF.EmitRuntimeCall( 9293 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 9294 : OMPRTL__tgt_target_teams), 9295 OffloadingArgs); 9296 } else { 9297 llvm::Value *OffloadingArgs[] = {DeviceID, 9298 OutlinedFnID, 9299 PointerNum, 9300 InputInfo.BasePointersArray.getPointer(), 9301 InputInfo.PointersArray.getPointer(), 9302 InputInfo.SizesArray.getPointer(), 9303 MapTypesArray}; 9304 Return = CGF.EmitRuntimeCall( 9305 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 9306 : OMPRTL__tgt_target), 9307 OffloadingArgs); 9308 } 9309 9310 // Check the error code and execute the host version if required. 9311 llvm::BasicBlock *OffloadFailedBlock = 9312 CGF.createBasicBlock("omp_offload.failed"); 9313 llvm::BasicBlock *OffloadContBlock = 9314 CGF.createBasicBlock("omp_offload.cont"); 9315 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9316 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9317 9318 CGF.EmitBlock(OffloadFailedBlock); 9319 if (RequiresOuterTask) { 9320 CapturedVars.clear(); 9321 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9322 } 9323 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9324 CGF.EmitBranch(OffloadContBlock); 9325 9326 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9327 }; 9328 9329 // Notify that the host version must be executed. 9330 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 9331 RequiresOuterTask](CodeGenFunction &CGF, 9332 PrePostActionTy &) { 9333 if (RequiresOuterTask) { 9334 CapturedVars.clear(); 9335 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9336 } 9337 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9338 }; 9339 9340 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 9341 &CapturedVars, RequiresOuterTask, 9342 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 9343 // Fill up the arrays with all the captured variables. 9344 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9345 MappableExprsHandler::MapValuesArrayTy Pointers; 9346 MappableExprsHandler::MapValuesArrayTy Sizes; 9347 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9348 9349 // Get mappable expression information. 9350 MappableExprsHandler MEHandler(D, CGF); 9351 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9352 9353 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9354 auto CV = CapturedVars.begin(); 9355 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9356 CE = CS.capture_end(); 9357 CI != CE; ++CI, ++RI, ++CV) { 9358 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 9359 MappableExprsHandler::MapValuesArrayTy CurPointers; 9360 MappableExprsHandler::MapValuesArrayTy CurSizes; 9361 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 9362 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9363 9364 // VLA sizes are passed to the outlined region by copy and do not have map 9365 // information associated. 9366 if (CI->capturesVariableArrayType()) { 9367 CurBasePointers.push_back(*CV); 9368 CurPointers.push_back(*CV); 9369 CurSizes.push_back(CGF.Builder.CreateIntCast( 9370 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9371 // Copy to the device as an argument. No need to retrieve it. 9372 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 9373 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 9374 MappableExprsHandler::OMP_MAP_IMPLICIT); 9375 } else { 9376 // If we have any information in the map clause, we use it, otherwise we 9377 // just do a default mapping. 9378 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 9379 CurSizes, CurMapTypes, PartialStruct); 9380 if (CurBasePointers.empty()) 9381 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 9382 CurPointers, CurSizes, CurMapTypes); 9383 // Generate correct mapping for variables captured by reference in 9384 // lambdas. 9385 if (CI->capturesVariable()) 9386 MEHandler.generateInfoForLambdaCaptures( 9387 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 9388 CurMapTypes, LambdaPointers); 9389 } 9390 // We expect to have at least an element of information for this capture. 9391 assert(!CurBasePointers.empty() && 9392 "Non-existing map pointer for capture!"); 9393 assert(CurBasePointers.size() == CurPointers.size() && 9394 CurBasePointers.size() == CurSizes.size() && 9395 CurBasePointers.size() == CurMapTypes.size() && 9396 "Inconsistent map information sizes!"); 9397 9398 // If there is an entry in PartialStruct it means we have a struct with 9399 // individual members mapped. Emit an extra combined entry. 9400 if (PartialStruct.Base.isValid()) 9401 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 9402 CurMapTypes, PartialStruct); 9403 9404 // We need to append the results of this capture to what we already have. 9405 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 9406 Pointers.append(CurPointers.begin(), CurPointers.end()); 9407 Sizes.append(CurSizes.begin(), CurSizes.end()); 9408 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 9409 } 9410 // Adjust MEMBER_OF flags for the lambdas captures. 9411 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 9412 Pointers, MapTypes); 9413 // Map other list items in the map clause which are not captured variables 9414 // but "declare target link" global variables. 9415 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 9416 MapTypes); 9417 9418 TargetDataInfo Info; 9419 // Fill up the arrays and create the arguments. 9420 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9421 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9422 Info.PointersArray, Info.SizesArray, 9423 Info.MapTypesArray, Info); 9424 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9425 InputInfo.BasePointersArray = 9426 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9427 InputInfo.PointersArray = 9428 Address(Info.PointersArray, CGM.getPointerAlign()); 9429 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 9430 MapTypesArray = Info.MapTypesArray; 9431 if (RequiresOuterTask) 9432 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9433 else 9434 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9435 }; 9436 9437 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 9438 CodeGenFunction &CGF, PrePostActionTy &) { 9439 if (RequiresOuterTask) { 9440 CodeGenFunction::OMPTargetDataInfo InputInfo; 9441 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9442 } else { 9443 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9444 } 9445 }; 9446 9447 // If we have a target function ID it means that we need to support 9448 // offloading, otherwise, just execute on the host. We need to execute on host 9449 // regardless of the conditional in the if clause if, e.g., the user do not 9450 // specify target triples. 9451 if (OutlinedFnID) { 9452 if (IfCond) { 9453 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9454 } else { 9455 RegionCodeGenTy ThenRCG(TargetThenGen); 9456 ThenRCG(CGF); 9457 } 9458 } else { 9459 RegionCodeGenTy ElseRCG(TargetElseGen); 9460 ElseRCG(CGF); 9461 } 9462 } 9463 9464 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9465 StringRef ParentName) { 9466 if (!S) 9467 return; 9468 9469 // Codegen OMP target directives that offload compute to the device. 9470 bool RequiresDeviceCodegen = 9471 isa<OMPExecutableDirective>(S) && 9472 isOpenMPTargetExecutionDirective( 9473 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9474 9475 if (RequiresDeviceCodegen) { 9476 const auto &E = *cast<OMPExecutableDirective>(S); 9477 unsigned DeviceID; 9478 unsigned FileID; 9479 unsigned Line; 9480 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 9481 FileID, Line); 9482 9483 // Is this a target region that should not be emitted as an entry point? If 9484 // so just signal we are done with this target region. 9485 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 9486 ParentName, Line)) 9487 return; 9488 9489 switch (E.getDirectiveKind()) { 9490 case OMPD_target: 9491 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9492 cast<OMPTargetDirective>(E)); 9493 break; 9494 case OMPD_target_parallel: 9495 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9496 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9497 break; 9498 case OMPD_target_teams: 9499 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9500 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9501 break; 9502 case OMPD_target_teams_distribute: 9503 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9504 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9505 break; 9506 case OMPD_target_teams_distribute_simd: 9507 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9508 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9509 break; 9510 case OMPD_target_parallel_for: 9511 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9512 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9513 break; 9514 case OMPD_target_parallel_for_simd: 9515 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9516 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9517 break; 9518 case OMPD_target_simd: 9519 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9520 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9521 break; 9522 case OMPD_target_teams_distribute_parallel_for: 9523 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9524 CGM, ParentName, 9525 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9526 break; 9527 case OMPD_target_teams_distribute_parallel_for_simd: 9528 CodeGenFunction:: 9529 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9530 CGM, ParentName, 9531 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9532 break; 9533 case OMPD_parallel: 9534 case OMPD_for: 9535 case OMPD_parallel_for: 9536 case OMPD_parallel_master: 9537 case OMPD_parallel_sections: 9538 case OMPD_for_simd: 9539 case OMPD_parallel_for_simd: 9540 case OMPD_cancel: 9541 case OMPD_cancellation_point: 9542 case OMPD_ordered: 9543 case OMPD_threadprivate: 9544 case OMPD_allocate: 9545 case OMPD_task: 9546 case OMPD_simd: 9547 case OMPD_sections: 9548 case OMPD_section: 9549 case OMPD_single: 9550 case OMPD_master: 9551 case OMPD_critical: 9552 case OMPD_taskyield: 9553 case OMPD_barrier: 9554 case OMPD_taskwait: 9555 case OMPD_taskgroup: 9556 case OMPD_atomic: 9557 case OMPD_flush: 9558 case OMPD_teams: 9559 case OMPD_target_data: 9560 case OMPD_target_exit_data: 9561 case OMPD_target_enter_data: 9562 case OMPD_distribute: 9563 case OMPD_distribute_simd: 9564 case OMPD_distribute_parallel_for: 9565 case OMPD_distribute_parallel_for_simd: 9566 case OMPD_teams_distribute: 9567 case OMPD_teams_distribute_simd: 9568 case OMPD_teams_distribute_parallel_for: 9569 case OMPD_teams_distribute_parallel_for_simd: 9570 case OMPD_target_update: 9571 case OMPD_declare_simd: 9572 case OMPD_declare_variant: 9573 case OMPD_declare_target: 9574 case OMPD_end_declare_target: 9575 case OMPD_declare_reduction: 9576 case OMPD_declare_mapper: 9577 case OMPD_taskloop: 9578 case OMPD_taskloop_simd: 9579 case OMPD_master_taskloop: 9580 case OMPD_master_taskloop_simd: 9581 case OMPD_parallel_master_taskloop: 9582 case OMPD_parallel_master_taskloop_simd: 9583 case OMPD_requires: 9584 case OMPD_unknown: 9585 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9586 } 9587 return; 9588 } 9589 9590 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9591 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9592 return; 9593 9594 scanForTargetRegionsFunctions( 9595 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9596 return; 9597 } 9598 9599 // If this is a lambda function, look into its body. 9600 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9601 S = L->getBody(); 9602 9603 // Keep looking for target regions recursively. 9604 for (const Stmt *II : S->children()) 9605 scanForTargetRegionsFunctions(II, ParentName); 9606 } 9607 9608 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9609 // If emitting code for the host, we do not process FD here. Instead we do 9610 // the normal code generation. 9611 if (!CGM.getLangOpts().OpenMPIsDevice) { 9612 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 9613 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9614 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9615 // Do not emit device_type(nohost) functions for the host. 9616 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 9617 return true; 9618 } 9619 return false; 9620 } 9621 9622 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9623 // Try to detect target regions in the function. 9624 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 9625 StringRef Name = CGM.getMangledName(GD); 9626 scanForTargetRegionsFunctions(FD->getBody(), Name); 9627 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9628 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9629 // Do not emit device_type(nohost) functions for the host. 9630 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 9631 return true; 9632 } 9633 9634 // Do not to emit function if it is not marked as declare target. 9635 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9636 AlreadyEmittedTargetDecls.count(VD) == 0; 9637 } 9638 9639 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9640 if (!CGM.getLangOpts().OpenMPIsDevice) 9641 return false; 9642 9643 // Check if there are Ctors/Dtors in this declaration and look for target 9644 // regions in it. We use the complete variant to produce the kernel name 9645 // mangling. 9646 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9647 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9648 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9649 StringRef ParentName = 9650 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9651 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9652 } 9653 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9654 StringRef ParentName = 9655 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9656 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9657 } 9658 } 9659 9660 // Do not to emit variable if it is not marked as declare target. 9661 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9662 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9663 cast<VarDecl>(GD.getDecl())); 9664 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9665 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9666 HasRequiresUnifiedSharedMemory)) { 9667 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9668 return true; 9669 } 9670 return false; 9671 } 9672 9673 llvm::Constant * 9674 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9675 const VarDecl *VD) { 9676 assert(VD->getType().isConstant(CGM.getContext()) && 9677 "Expected constant variable."); 9678 StringRef VarName; 9679 llvm::Constant *Addr; 9680 llvm::GlobalValue::LinkageTypes Linkage; 9681 QualType Ty = VD->getType(); 9682 SmallString<128> Buffer; 9683 { 9684 unsigned DeviceID; 9685 unsigned FileID; 9686 unsigned Line; 9687 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9688 FileID, Line); 9689 llvm::raw_svector_ostream OS(Buffer); 9690 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9691 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9692 VarName = OS.str(); 9693 } 9694 Linkage = llvm::GlobalValue::InternalLinkage; 9695 Addr = 9696 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9697 getDefaultFirstprivateAddressSpace()); 9698 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9699 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9700 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9701 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9702 VarName, Addr, VarSize, 9703 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9704 return Addr; 9705 } 9706 9707 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9708 llvm::Constant *Addr) { 9709 if (CGM.getLangOpts().OMPTargetTriples.empty() && 9710 !CGM.getLangOpts().OpenMPIsDevice) 9711 return; 9712 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9713 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9714 if (!Res) { 9715 if (CGM.getLangOpts().OpenMPIsDevice) { 9716 // Register non-target variables being emitted in device code (debug info 9717 // may cause this). 9718 StringRef VarName = CGM.getMangledName(VD); 9719 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9720 } 9721 return; 9722 } 9723 // Register declare target variables. 9724 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9725 StringRef VarName; 9726 CharUnits VarSize; 9727 llvm::GlobalValue::LinkageTypes Linkage; 9728 9729 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9730 !HasRequiresUnifiedSharedMemory) { 9731 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9732 VarName = CGM.getMangledName(VD); 9733 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9734 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9735 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9736 } else { 9737 VarSize = CharUnits::Zero(); 9738 } 9739 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 9740 // Temp solution to prevent optimizations of the internal variables. 9741 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 9742 std::string RefName = getName({VarName, "ref"}); 9743 if (!CGM.GetGlobalValue(RefName)) { 9744 llvm::Constant *AddrRef = 9745 getOrCreateInternalVariable(Addr->getType(), RefName); 9746 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 9747 GVAddrRef->setConstant(/*Val=*/true); 9748 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 9749 GVAddrRef->setInitializer(Addr); 9750 CGM.addCompilerUsedGlobal(GVAddrRef); 9751 } 9752 } 9753 } else { 9754 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 9755 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9756 HasRequiresUnifiedSharedMemory)) && 9757 "Declare target attribute must link or to with unified memory."); 9758 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 9759 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 9760 else 9761 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9762 9763 if (CGM.getLangOpts().OpenMPIsDevice) { 9764 VarName = Addr->getName(); 9765 Addr = nullptr; 9766 } else { 9767 VarName = getAddrOfDeclareTargetVar(VD).getName(); 9768 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 9769 } 9770 VarSize = CGM.getPointerSize(); 9771 Linkage = llvm::GlobalValue::WeakAnyLinkage; 9772 } 9773 9774 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9775 VarName, Addr, VarSize, Flags, Linkage); 9776 } 9777 9778 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9779 if (isa<FunctionDecl>(GD.getDecl()) || 9780 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9781 return emitTargetFunctions(GD); 9782 9783 return emitTargetGlobalVariable(GD); 9784 } 9785 9786 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9787 for (const VarDecl *VD : DeferredGlobalVariables) { 9788 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9789 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9790 if (!Res) 9791 continue; 9792 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9793 !HasRequiresUnifiedSharedMemory) { 9794 CGM.EmitGlobal(VD); 9795 } else { 9796 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 9797 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9798 HasRequiresUnifiedSharedMemory)) && 9799 "Expected link clause or to clause with unified memory."); 9800 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 9801 } 9802 } 9803 } 9804 9805 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 9806 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 9807 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 9808 " Expected target-based directive."); 9809 } 9810 9811 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 9812 for (const OMPClause *Clause : D->clauselists()) { 9813 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 9814 HasRequiresUnifiedSharedMemory = true; 9815 } else if (const auto *AC = 9816 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 9817 switch (AC->getAtomicDefaultMemOrderKind()) { 9818 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 9819 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 9820 break; 9821 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 9822 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 9823 break; 9824 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 9825 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 9826 break; 9827 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 9828 break; 9829 } 9830 } 9831 } 9832 } 9833 9834 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 9835 return RequiresAtomicOrdering; 9836 } 9837 9838 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 9839 LangAS &AS) { 9840 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 9841 return false; 9842 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 9843 switch(A->getAllocatorType()) { 9844 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 9845 // Not supported, fallback to the default mem space. 9846 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 9847 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 9848 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 9849 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 9850 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 9851 case OMPAllocateDeclAttr::OMPConstMemAlloc: 9852 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 9853 AS = LangAS::Default; 9854 return true; 9855 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 9856 llvm_unreachable("Expected predefined allocator for the variables with the " 9857 "static storage."); 9858 } 9859 return false; 9860 } 9861 9862 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 9863 return HasRequiresUnifiedSharedMemory; 9864 } 9865 9866 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 9867 CodeGenModule &CGM) 9868 : CGM(CGM) { 9869 if (CGM.getLangOpts().OpenMPIsDevice) { 9870 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 9871 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 9872 } 9873 } 9874 9875 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 9876 if (CGM.getLangOpts().OpenMPIsDevice) 9877 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 9878 } 9879 9880 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 9881 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 9882 return true; 9883 9884 const auto *D = cast<FunctionDecl>(GD.getDecl()); 9885 // Do not to emit function if it is marked as declare target as it was already 9886 // emitted. 9887 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 9888 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 9889 if (auto *F = dyn_cast_or_null<llvm::Function>( 9890 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 9891 return !F->isDeclaration(); 9892 return false; 9893 } 9894 return true; 9895 } 9896 9897 return !AlreadyEmittedTargetDecls.insert(D).second; 9898 } 9899 9900 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 9901 // If we don't have entries or if we are emitting code for the device, we 9902 // don't need to do anything. 9903 if (CGM.getLangOpts().OMPTargetTriples.empty() || 9904 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 9905 (OffloadEntriesInfoManager.empty() && 9906 !HasEmittedDeclareTargetRegion && 9907 !HasEmittedTargetRegion)) 9908 return nullptr; 9909 9910 // Create and register the function that handles the requires directives. 9911 ASTContext &C = CGM.getContext(); 9912 9913 llvm::Function *RequiresRegFn; 9914 { 9915 CodeGenFunction CGF(CGM); 9916 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 9917 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 9918 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 9919 RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); 9920 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 9921 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 9922 // TODO: check for other requires clauses. 9923 // The requires directive takes effect only when a target region is 9924 // present in the compilation unit. Otherwise it is ignored and not 9925 // passed to the runtime. This avoids the runtime from throwing an error 9926 // for mismatching requires clauses across compilation units that don't 9927 // contain at least 1 target region. 9928 assert((HasEmittedTargetRegion || 9929 HasEmittedDeclareTargetRegion || 9930 !OffloadEntriesInfoManager.empty()) && 9931 "Target or declare target region expected."); 9932 if (HasRequiresUnifiedSharedMemory) 9933 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 9934 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires), 9935 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 9936 CGF.FinishFunction(); 9937 } 9938 return RequiresRegFn; 9939 } 9940 9941 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 9942 const OMPExecutableDirective &D, 9943 SourceLocation Loc, 9944 llvm::Function *OutlinedFn, 9945 ArrayRef<llvm::Value *> CapturedVars) { 9946 if (!CGF.HaveInsertPoint()) 9947 return; 9948 9949 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9950 CodeGenFunction::RunCleanupsScope Scope(CGF); 9951 9952 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 9953 llvm::Value *Args[] = { 9954 RTLoc, 9955 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 9956 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 9957 llvm::SmallVector<llvm::Value *, 16> RealArgs; 9958 RealArgs.append(std::begin(Args), std::end(Args)); 9959 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 9960 9961 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 9962 CGF.EmitRuntimeCall(RTLFn, RealArgs); 9963 } 9964 9965 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 9966 const Expr *NumTeams, 9967 const Expr *ThreadLimit, 9968 SourceLocation Loc) { 9969 if (!CGF.HaveInsertPoint()) 9970 return; 9971 9972 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9973 9974 llvm::Value *NumTeamsVal = 9975 NumTeams 9976 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 9977 CGF.CGM.Int32Ty, /* isSigned = */ true) 9978 : CGF.Builder.getInt32(0); 9979 9980 llvm::Value *ThreadLimitVal = 9981 ThreadLimit 9982 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 9983 CGF.CGM.Int32Ty, /* isSigned = */ true) 9984 : CGF.Builder.getInt32(0); 9985 9986 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 9987 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 9988 ThreadLimitVal}; 9989 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 9990 PushNumTeamsArgs); 9991 } 9992 9993 void CGOpenMPRuntime::emitTargetDataCalls( 9994 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9995 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 9996 if (!CGF.HaveInsertPoint()) 9997 return; 9998 9999 // Action used to replace the default codegen action and turn privatization 10000 // off. 10001 PrePostActionTy NoPrivAction; 10002 10003 // Generate the code for the opening of the data environment. Capture all the 10004 // arguments of the runtime call by reference because they are used in the 10005 // closing of the region. 10006 auto &&BeginThenGen = [this, &D, Device, &Info, 10007 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10008 // Fill up the arrays with all the mapped variables. 10009 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10010 MappableExprsHandler::MapValuesArrayTy Pointers; 10011 MappableExprsHandler::MapValuesArrayTy Sizes; 10012 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10013 10014 // Get map clause information. 10015 MappableExprsHandler MCHandler(D, CGF); 10016 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10017 10018 // Fill up the arrays and create the arguments. 10019 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10020 10021 llvm::Value *BasePointersArrayArg = nullptr; 10022 llvm::Value *PointersArrayArg = nullptr; 10023 llvm::Value *SizesArrayArg = nullptr; 10024 llvm::Value *MapTypesArrayArg = nullptr; 10025 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10026 SizesArrayArg, MapTypesArrayArg, Info); 10027 10028 // Emit device ID if any. 10029 llvm::Value *DeviceID = nullptr; 10030 if (Device) { 10031 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10032 CGF.Int64Ty, /*isSigned=*/true); 10033 } else { 10034 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10035 } 10036 10037 // Emit the number of elements in the offloading arrays. 10038 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10039 10040 llvm::Value *OffloadingArgs[] = { 10041 DeviceID, PointerNum, BasePointersArrayArg, 10042 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10043 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 10044 OffloadingArgs); 10045 10046 // If device pointer privatization is required, emit the body of the region 10047 // here. It will have to be duplicated: with and without privatization. 10048 if (!Info.CaptureDeviceAddrMap.empty()) 10049 CodeGen(CGF); 10050 }; 10051 10052 // Generate code for the closing of the data region. 10053 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10054 PrePostActionTy &) { 10055 assert(Info.isValid() && "Invalid data environment closing arguments."); 10056 10057 llvm::Value *BasePointersArrayArg = nullptr; 10058 llvm::Value *PointersArrayArg = nullptr; 10059 llvm::Value *SizesArrayArg = nullptr; 10060 llvm::Value *MapTypesArrayArg = nullptr; 10061 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10062 SizesArrayArg, MapTypesArrayArg, Info); 10063 10064 // Emit device ID if any. 10065 llvm::Value *DeviceID = nullptr; 10066 if (Device) { 10067 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10068 CGF.Int64Ty, /*isSigned=*/true); 10069 } else { 10070 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10071 } 10072 10073 // Emit the number of elements in the offloading arrays. 10074 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10075 10076 llvm::Value *OffloadingArgs[] = { 10077 DeviceID, PointerNum, BasePointersArrayArg, 10078 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10079 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 10080 OffloadingArgs); 10081 }; 10082 10083 // If we need device pointer privatization, we need to emit the body of the 10084 // region with no privatization in the 'else' branch of the conditional. 10085 // Otherwise, we don't have to do anything. 10086 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10087 PrePostActionTy &) { 10088 if (!Info.CaptureDeviceAddrMap.empty()) { 10089 CodeGen.setAction(NoPrivAction); 10090 CodeGen(CGF); 10091 } 10092 }; 10093 10094 // We don't have to do anything to close the region if the if clause evaluates 10095 // to false. 10096 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10097 10098 if (IfCond) { 10099 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10100 } else { 10101 RegionCodeGenTy RCG(BeginThenGen); 10102 RCG(CGF); 10103 } 10104 10105 // If we don't require privatization of device pointers, we emit the body in 10106 // between the runtime calls. This avoids duplicating the body code. 10107 if (Info.CaptureDeviceAddrMap.empty()) { 10108 CodeGen.setAction(NoPrivAction); 10109 CodeGen(CGF); 10110 } 10111 10112 if (IfCond) { 10113 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10114 } else { 10115 RegionCodeGenTy RCG(EndThenGen); 10116 RCG(CGF); 10117 } 10118 } 10119 10120 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10121 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10122 const Expr *Device) { 10123 if (!CGF.HaveInsertPoint()) 10124 return; 10125 10126 assert((isa<OMPTargetEnterDataDirective>(D) || 10127 isa<OMPTargetExitDataDirective>(D) || 10128 isa<OMPTargetUpdateDirective>(D)) && 10129 "Expecting either target enter, exit data, or update directives."); 10130 10131 CodeGenFunction::OMPTargetDataInfo InputInfo; 10132 llvm::Value *MapTypesArray = nullptr; 10133 // Generate the code for the opening of the data environment. 10134 auto &&ThenGen = [this, &D, Device, &InputInfo, 10135 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10136 // Emit device ID if any. 10137 llvm::Value *DeviceID = nullptr; 10138 if (Device) { 10139 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10140 CGF.Int64Ty, /*isSigned=*/true); 10141 } else { 10142 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10143 } 10144 10145 // Emit the number of elements in the offloading arrays. 10146 llvm::Constant *PointerNum = 10147 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10148 10149 llvm::Value *OffloadingArgs[] = {DeviceID, 10150 PointerNum, 10151 InputInfo.BasePointersArray.getPointer(), 10152 InputInfo.PointersArray.getPointer(), 10153 InputInfo.SizesArray.getPointer(), 10154 MapTypesArray}; 10155 10156 // Select the right runtime function call for each expected standalone 10157 // directive. 10158 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10159 OpenMPRTLFunction RTLFn; 10160 switch (D.getDirectiveKind()) { 10161 case OMPD_target_enter_data: 10162 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 10163 : OMPRTL__tgt_target_data_begin; 10164 break; 10165 case OMPD_target_exit_data: 10166 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 10167 : OMPRTL__tgt_target_data_end; 10168 break; 10169 case OMPD_target_update: 10170 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 10171 : OMPRTL__tgt_target_data_update; 10172 break; 10173 case OMPD_parallel: 10174 case OMPD_for: 10175 case OMPD_parallel_for: 10176 case OMPD_parallel_master: 10177 case OMPD_parallel_sections: 10178 case OMPD_for_simd: 10179 case OMPD_parallel_for_simd: 10180 case OMPD_cancel: 10181 case OMPD_cancellation_point: 10182 case OMPD_ordered: 10183 case OMPD_threadprivate: 10184 case OMPD_allocate: 10185 case OMPD_task: 10186 case OMPD_simd: 10187 case OMPD_sections: 10188 case OMPD_section: 10189 case OMPD_single: 10190 case OMPD_master: 10191 case OMPD_critical: 10192 case OMPD_taskyield: 10193 case OMPD_barrier: 10194 case OMPD_taskwait: 10195 case OMPD_taskgroup: 10196 case OMPD_atomic: 10197 case OMPD_flush: 10198 case OMPD_teams: 10199 case OMPD_target_data: 10200 case OMPD_distribute: 10201 case OMPD_distribute_simd: 10202 case OMPD_distribute_parallel_for: 10203 case OMPD_distribute_parallel_for_simd: 10204 case OMPD_teams_distribute: 10205 case OMPD_teams_distribute_simd: 10206 case OMPD_teams_distribute_parallel_for: 10207 case OMPD_teams_distribute_parallel_for_simd: 10208 case OMPD_declare_simd: 10209 case OMPD_declare_variant: 10210 case OMPD_declare_target: 10211 case OMPD_end_declare_target: 10212 case OMPD_declare_reduction: 10213 case OMPD_declare_mapper: 10214 case OMPD_taskloop: 10215 case OMPD_taskloop_simd: 10216 case OMPD_master_taskloop: 10217 case OMPD_master_taskloop_simd: 10218 case OMPD_parallel_master_taskloop: 10219 case OMPD_parallel_master_taskloop_simd: 10220 case OMPD_target: 10221 case OMPD_target_simd: 10222 case OMPD_target_teams_distribute: 10223 case OMPD_target_teams_distribute_simd: 10224 case OMPD_target_teams_distribute_parallel_for: 10225 case OMPD_target_teams_distribute_parallel_for_simd: 10226 case OMPD_target_teams: 10227 case OMPD_target_parallel: 10228 case OMPD_target_parallel_for: 10229 case OMPD_target_parallel_for_simd: 10230 case OMPD_requires: 10231 case OMPD_unknown: 10232 llvm_unreachable("Unexpected standalone target data directive."); 10233 break; 10234 } 10235 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 10236 }; 10237 10238 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 10239 CodeGenFunction &CGF, PrePostActionTy &) { 10240 // Fill up the arrays with all the mapped variables. 10241 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10242 MappableExprsHandler::MapValuesArrayTy Pointers; 10243 MappableExprsHandler::MapValuesArrayTy Sizes; 10244 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10245 10246 // Get map clause information. 10247 MappableExprsHandler MEHandler(D, CGF); 10248 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10249 10250 TargetDataInfo Info; 10251 // Fill up the arrays and create the arguments. 10252 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10253 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10254 Info.PointersArray, Info.SizesArray, 10255 Info.MapTypesArray, Info); 10256 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10257 InputInfo.BasePointersArray = 10258 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10259 InputInfo.PointersArray = 10260 Address(Info.PointersArray, CGM.getPointerAlign()); 10261 InputInfo.SizesArray = 10262 Address(Info.SizesArray, CGM.getPointerAlign()); 10263 MapTypesArray = Info.MapTypesArray; 10264 if (D.hasClausesOfKind<OMPDependClause>()) 10265 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10266 else 10267 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10268 }; 10269 10270 if (IfCond) { 10271 emitIfClause(CGF, IfCond, TargetThenGen, 10272 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10273 } else { 10274 RegionCodeGenTy ThenRCG(TargetThenGen); 10275 ThenRCG(CGF); 10276 } 10277 } 10278 10279 namespace { 10280 /// Kind of parameter in a function with 'declare simd' directive. 10281 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 10282 /// Attribute set of the parameter. 10283 struct ParamAttrTy { 10284 ParamKindTy Kind = Vector; 10285 llvm::APSInt StrideOrArg; 10286 llvm::APSInt Alignment; 10287 }; 10288 } // namespace 10289 10290 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10291 ArrayRef<ParamAttrTy> ParamAttrs) { 10292 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10293 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10294 // of that clause. The VLEN value must be power of 2. 10295 // In other case the notion of the function`s "characteristic data type" (CDT) 10296 // is used to compute the vector length. 10297 // CDT is defined in the following order: 10298 // a) For non-void function, the CDT is the return type. 10299 // b) If the function has any non-uniform, non-linear parameters, then the 10300 // CDT is the type of the first such parameter. 10301 // c) If the CDT determined by a) or b) above is struct, union, or class 10302 // type which is pass-by-value (except for the type that maps to the 10303 // built-in complex data type), the characteristic data type is int. 10304 // d) If none of the above three cases is applicable, the CDT is int. 10305 // The VLEN is then determined based on the CDT and the size of vector 10306 // register of that ISA for which current vector version is generated. The 10307 // VLEN is computed using the formula below: 10308 // VLEN = sizeof(vector_register) / sizeof(CDT), 10309 // where vector register size specified in section 3.2.1 Registers and the 10310 // Stack Frame of original AMD64 ABI document. 10311 QualType RetType = FD->getReturnType(); 10312 if (RetType.isNull()) 10313 return 0; 10314 ASTContext &C = FD->getASTContext(); 10315 QualType CDT; 10316 if (!RetType.isNull() && !RetType->isVoidType()) { 10317 CDT = RetType; 10318 } else { 10319 unsigned Offset = 0; 10320 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10321 if (ParamAttrs[Offset].Kind == Vector) 10322 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10323 ++Offset; 10324 } 10325 if (CDT.isNull()) { 10326 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10327 if (ParamAttrs[I + Offset].Kind == Vector) { 10328 CDT = FD->getParamDecl(I)->getType(); 10329 break; 10330 } 10331 } 10332 } 10333 } 10334 if (CDT.isNull()) 10335 CDT = C.IntTy; 10336 CDT = CDT->getCanonicalTypeUnqualified(); 10337 if (CDT->isRecordType() || CDT->isUnionType()) 10338 CDT = C.IntTy; 10339 return C.getTypeSize(CDT); 10340 } 10341 10342 static void 10343 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10344 const llvm::APSInt &VLENVal, 10345 ArrayRef<ParamAttrTy> ParamAttrs, 10346 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10347 struct ISADataTy { 10348 char ISA; 10349 unsigned VecRegSize; 10350 }; 10351 ISADataTy ISAData[] = { 10352 { 10353 'b', 128 10354 }, // SSE 10355 { 10356 'c', 256 10357 }, // AVX 10358 { 10359 'd', 256 10360 }, // AVX2 10361 { 10362 'e', 512 10363 }, // AVX512 10364 }; 10365 llvm::SmallVector<char, 2> Masked; 10366 switch (State) { 10367 case OMPDeclareSimdDeclAttr::BS_Undefined: 10368 Masked.push_back('N'); 10369 Masked.push_back('M'); 10370 break; 10371 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10372 Masked.push_back('N'); 10373 break; 10374 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10375 Masked.push_back('M'); 10376 break; 10377 } 10378 for (char Mask : Masked) { 10379 for (const ISADataTy &Data : ISAData) { 10380 SmallString<256> Buffer; 10381 llvm::raw_svector_ostream Out(Buffer); 10382 Out << "_ZGV" << Data.ISA << Mask; 10383 if (!VLENVal) { 10384 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10385 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10386 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10387 } else { 10388 Out << VLENVal; 10389 } 10390 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 10391 switch (ParamAttr.Kind){ 10392 case LinearWithVarStride: 10393 Out << 's' << ParamAttr.StrideOrArg; 10394 break; 10395 case Linear: 10396 Out << 'l'; 10397 if (!!ParamAttr.StrideOrArg) 10398 Out << ParamAttr.StrideOrArg; 10399 break; 10400 case Uniform: 10401 Out << 'u'; 10402 break; 10403 case Vector: 10404 Out << 'v'; 10405 break; 10406 } 10407 if (!!ParamAttr.Alignment) 10408 Out << 'a' << ParamAttr.Alignment; 10409 } 10410 Out << '_' << Fn->getName(); 10411 Fn->addFnAttr(Out.str()); 10412 } 10413 } 10414 } 10415 10416 // This are the Functions that are needed to mangle the name of the 10417 // vector functions generated by the compiler, according to the rules 10418 // defined in the "Vector Function ABI specifications for AArch64", 10419 // available at 10420 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10421 10422 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 10423 /// 10424 /// TODO: Need to implement the behavior for reference marked with a 10425 /// var or no linear modifiers (1.b in the section). For this, we 10426 /// need to extend ParamKindTy to support the linear modifiers. 10427 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10428 QT = QT.getCanonicalType(); 10429 10430 if (QT->isVoidType()) 10431 return false; 10432 10433 if (Kind == ParamKindTy::Uniform) 10434 return false; 10435 10436 if (Kind == ParamKindTy::Linear) 10437 return false; 10438 10439 // TODO: Handle linear references with modifiers 10440 10441 if (Kind == ParamKindTy::LinearWithVarStride) 10442 return false; 10443 10444 return true; 10445 } 10446 10447 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10448 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10449 QT = QT.getCanonicalType(); 10450 unsigned Size = C.getTypeSize(QT); 10451 10452 // Only scalars and complex within 16 bytes wide set PVB to true. 10453 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10454 return false; 10455 10456 if (QT->isFloatingType()) 10457 return true; 10458 10459 if (QT->isIntegerType()) 10460 return true; 10461 10462 if (QT->isPointerType()) 10463 return true; 10464 10465 // TODO: Add support for complex types (section 3.1.2, item 2). 10466 10467 return false; 10468 } 10469 10470 /// Computes the lane size (LS) of a return type or of an input parameter, 10471 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10472 /// TODO: Add support for references, section 3.2.1, item 1. 10473 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10474 if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10475 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10476 if (getAArch64PBV(PTy, C)) 10477 return C.getTypeSize(PTy); 10478 } 10479 if (getAArch64PBV(QT, C)) 10480 return C.getTypeSize(QT); 10481 10482 return C.getTypeSize(C.getUIntPtrType()); 10483 } 10484 10485 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10486 // signature of the scalar function, as defined in 3.2.2 of the 10487 // AAVFABI. 10488 static std::tuple<unsigned, unsigned, bool> 10489 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10490 QualType RetType = FD->getReturnType().getCanonicalType(); 10491 10492 ASTContext &C = FD->getASTContext(); 10493 10494 bool OutputBecomesInput = false; 10495 10496 llvm::SmallVector<unsigned, 8> Sizes; 10497 if (!RetType->isVoidType()) { 10498 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10499 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10500 OutputBecomesInput = true; 10501 } 10502 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10503 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10504 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10505 } 10506 10507 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10508 // The LS of a function parameter / return value can only be a power 10509 // of 2, starting from 8 bits, up to 128. 10510 assert(std::all_of(Sizes.begin(), Sizes.end(), 10511 [](unsigned Size) { 10512 return Size == 8 || Size == 16 || Size == 32 || 10513 Size == 64 || Size == 128; 10514 }) && 10515 "Invalid size"); 10516 10517 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10518 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10519 OutputBecomesInput); 10520 } 10521 10522 /// Mangle the parameter part of the vector function name according to 10523 /// their OpenMP classification. The mangling function is defined in 10524 /// section 3.5 of the AAVFABI. 10525 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10526 SmallString<256> Buffer; 10527 llvm::raw_svector_ostream Out(Buffer); 10528 for (const auto &ParamAttr : ParamAttrs) { 10529 switch (ParamAttr.Kind) { 10530 case LinearWithVarStride: 10531 Out << "ls" << ParamAttr.StrideOrArg; 10532 break; 10533 case Linear: 10534 Out << 'l'; 10535 // Don't print the step value if it is not present or if it is 10536 // equal to 1. 10537 if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1) 10538 Out << ParamAttr.StrideOrArg; 10539 break; 10540 case Uniform: 10541 Out << 'u'; 10542 break; 10543 case Vector: 10544 Out << 'v'; 10545 break; 10546 } 10547 10548 if (!!ParamAttr.Alignment) 10549 Out << 'a' << ParamAttr.Alignment; 10550 } 10551 10552 return std::string(Out.str()); 10553 } 10554 10555 // Function used to add the attribute. The parameter `VLEN` is 10556 // templated to allow the use of "x" when targeting scalable functions 10557 // for SVE. 10558 template <typename T> 10559 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10560 char ISA, StringRef ParSeq, 10561 StringRef MangledName, bool OutputBecomesInput, 10562 llvm::Function *Fn) { 10563 SmallString<256> Buffer; 10564 llvm::raw_svector_ostream Out(Buffer); 10565 Out << Prefix << ISA << LMask << VLEN; 10566 if (OutputBecomesInput) 10567 Out << "v"; 10568 Out << ParSeq << "_" << MangledName; 10569 Fn->addFnAttr(Out.str()); 10570 } 10571 10572 // Helper function to generate the Advanced SIMD names depending on 10573 // the value of the NDS when simdlen is not present. 10574 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10575 StringRef Prefix, char ISA, 10576 StringRef ParSeq, StringRef MangledName, 10577 bool OutputBecomesInput, 10578 llvm::Function *Fn) { 10579 switch (NDS) { 10580 case 8: 10581 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10582 OutputBecomesInput, Fn); 10583 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10584 OutputBecomesInput, Fn); 10585 break; 10586 case 16: 10587 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10588 OutputBecomesInput, Fn); 10589 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10590 OutputBecomesInput, Fn); 10591 break; 10592 case 32: 10593 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10594 OutputBecomesInput, Fn); 10595 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10596 OutputBecomesInput, Fn); 10597 break; 10598 case 64: 10599 case 128: 10600 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10601 OutputBecomesInput, Fn); 10602 break; 10603 default: 10604 llvm_unreachable("Scalar type is too wide."); 10605 } 10606 } 10607 10608 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10609 static void emitAArch64DeclareSimdFunction( 10610 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10611 ArrayRef<ParamAttrTy> ParamAttrs, 10612 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10613 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10614 10615 // Get basic data for building the vector signature. 10616 const auto Data = getNDSWDS(FD, ParamAttrs); 10617 const unsigned NDS = std::get<0>(Data); 10618 const unsigned WDS = std::get<1>(Data); 10619 const bool OutputBecomesInput = std::get<2>(Data); 10620 10621 // Check the values provided via `simdlen` by the user. 10622 // 1. A `simdlen(1)` doesn't produce vector signatures, 10623 if (UserVLEN == 1) { 10624 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10625 DiagnosticsEngine::Warning, 10626 "The clause simdlen(1) has no effect when targeting aarch64."); 10627 CGM.getDiags().Report(SLoc, DiagID); 10628 return; 10629 } 10630 10631 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10632 // Advanced SIMD output. 10633 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10634 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10635 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10636 "power of 2 when targeting Advanced SIMD."); 10637 CGM.getDiags().Report(SLoc, DiagID); 10638 return; 10639 } 10640 10641 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10642 // limits. 10643 if (ISA == 's' && UserVLEN != 0) { 10644 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10645 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10646 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10647 "lanes in the architectural constraints " 10648 "for SVE (min is 128-bit, max is " 10649 "2048-bit, by steps of 128-bit)"); 10650 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10651 return; 10652 } 10653 } 10654 10655 // Sort out parameter sequence. 10656 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10657 StringRef Prefix = "_ZGV"; 10658 // Generate simdlen from user input (if any). 10659 if (UserVLEN) { 10660 if (ISA == 's') { 10661 // SVE generates only a masked function. 10662 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10663 OutputBecomesInput, Fn); 10664 } else { 10665 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10666 // Advanced SIMD generates one or two functions, depending on 10667 // the `[not]inbranch` clause. 10668 switch (State) { 10669 case OMPDeclareSimdDeclAttr::BS_Undefined: 10670 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10671 OutputBecomesInput, Fn); 10672 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10673 OutputBecomesInput, Fn); 10674 break; 10675 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10676 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10677 OutputBecomesInput, Fn); 10678 break; 10679 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10680 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10681 OutputBecomesInput, Fn); 10682 break; 10683 } 10684 } 10685 } else { 10686 // If no user simdlen is provided, follow the AAVFABI rules for 10687 // generating the vector length. 10688 if (ISA == 's') { 10689 // SVE, section 3.4.1, item 1. 10690 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10691 OutputBecomesInput, Fn); 10692 } else { 10693 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10694 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10695 // two vector names depending on the use of the clause 10696 // `[not]inbranch`. 10697 switch (State) { 10698 case OMPDeclareSimdDeclAttr::BS_Undefined: 10699 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10700 OutputBecomesInput, Fn); 10701 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10702 OutputBecomesInput, Fn); 10703 break; 10704 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10705 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10706 OutputBecomesInput, Fn); 10707 break; 10708 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10709 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10710 OutputBecomesInput, Fn); 10711 break; 10712 } 10713 } 10714 } 10715 } 10716 10717 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10718 llvm::Function *Fn) { 10719 ASTContext &C = CGM.getContext(); 10720 FD = FD->getMostRecentDecl(); 10721 // Map params to their positions in function decl. 10722 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10723 if (isa<CXXMethodDecl>(FD)) 10724 ParamPositions.try_emplace(FD, 0); 10725 unsigned ParamPos = ParamPositions.size(); 10726 for (const ParmVarDecl *P : FD->parameters()) { 10727 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10728 ++ParamPos; 10729 } 10730 while (FD) { 10731 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10732 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10733 // Mark uniform parameters. 10734 for (const Expr *E : Attr->uniforms()) { 10735 E = E->IgnoreParenImpCasts(); 10736 unsigned Pos; 10737 if (isa<CXXThisExpr>(E)) { 10738 Pos = ParamPositions[FD]; 10739 } else { 10740 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10741 ->getCanonicalDecl(); 10742 Pos = ParamPositions[PVD]; 10743 } 10744 ParamAttrs[Pos].Kind = Uniform; 10745 } 10746 // Get alignment info. 10747 auto NI = Attr->alignments_begin(); 10748 for (const Expr *E : Attr->aligneds()) { 10749 E = E->IgnoreParenImpCasts(); 10750 unsigned Pos; 10751 QualType ParmTy; 10752 if (isa<CXXThisExpr>(E)) { 10753 Pos = ParamPositions[FD]; 10754 ParmTy = E->getType(); 10755 } else { 10756 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10757 ->getCanonicalDecl(); 10758 Pos = ParamPositions[PVD]; 10759 ParmTy = PVD->getType(); 10760 } 10761 ParamAttrs[Pos].Alignment = 10762 (*NI) 10763 ? (*NI)->EvaluateKnownConstInt(C) 10764 : llvm::APSInt::getUnsigned( 10765 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10766 .getQuantity()); 10767 ++NI; 10768 } 10769 // Mark linear parameters. 10770 auto SI = Attr->steps_begin(); 10771 auto MI = Attr->modifiers_begin(); 10772 for (const Expr *E : Attr->linears()) { 10773 E = E->IgnoreParenImpCasts(); 10774 unsigned Pos; 10775 if (isa<CXXThisExpr>(E)) { 10776 Pos = ParamPositions[FD]; 10777 } else { 10778 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10779 ->getCanonicalDecl(); 10780 Pos = ParamPositions[PVD]; 10781 } 10782 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 10783 ParamAttr.Kind = Linear; 10784 if (*SI) { 10785 Expr::EvalResult Result; 10786 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 10787 if (const auto *DRE = 10788 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 10789 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 10790 ParamAttr.Kind = LinearWithVarStride; 10791 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 10792 ParamPositions[StridePVD->getCanonicalDecl()]); 10793 } 10794 } 10795 } else { 10796 ParamAttr.StrideOrArg = Result.Val.getInt(); 10797 } 10798 } 10799 ++SI; 10800 ++MI; 10801 } 10802 llvm::APSInt VLENVal; 10803 SourceLocation ExprLoc; 10804 const Expr *VLENExpr = Attr->getSimdlen(); 10805 if (VLENExpr) { 10806 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 10807 ExprLoc = VLENExpr->getExprLoc(); 10808 } 10809 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 10810 if (CGM.getTriple().isX86()) { 10811 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 10812 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 10813 unsigned VLEN = VLENVal.getExtValue(); 10814 StringRef MangledName = Fn->getName(); 10815 if (CGM.getTarget().hasFeature("sve")) 10816 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10817 MangledName, 's', 128, Fn, ExprLoc); 10818 if (CGM.getTarget().hasFeature("neon")) 10819 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10820 MangledName, 'n', 128, Fn, ExprLoc); 10821 } 10822 } 10823 FD = FD->getPreviousDecl(); 10824 } 10825 } 10826 10827 namespace { 10828 /// Cleanup action for doacross support. 10829 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 10830 public: 10831 static const int DoacrossFinArgs = 2; 10832 10833 private: 10834 llvm::FunctionCallee RTLFn; 10835 llvm::Value *Args[DoacrossFinArgs]; 10836 10837 public: 10838 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 10839 ArrayRef<llvm::Value *> CallArgs) 10840 : RTLFn(RTLFn) { 10841 assert(CallArgs.size() == DoacrossFinArgs); 10842 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10843 } 10844 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10845 if (!CGF.HaveInsertPoint()) 10846 return; 10847 CGF.EmitRuntimeCall(RTLFn, Args); 10848 } 10849 }; 10850 } // namespace 10851 10852 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 10853 const OMPLoopDirective &D, 10854 ArrayRef<Expr *> NumIterations) { 10855 if (!CGF.HaveInsertPoint()) 10856 return; 10857 10858 ASTContext &C = CGM.getContext(); 10859 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 10860 RecordDecl *RD; 10861 if (KmpDimTy.isNull()) { 10862 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 10863 // kmp_int64 lo; // lower 10864 // kmp_int64 up; // upper 10865 // kmp_int64 st; // stride 10866 // }; 10867 RD = C.buildImplicitRecord("kmp_dim"); 10868 RD->startDefinition(); 10869 addFieldToRecordDecl(C, RD, Int64Ty); 10870 addFieldToRecordDecl(C, RD, Int64Ty); 10871 addFieldToRecordDecl(C, RD, Int64Ty); 10872 RD->completeDefinition(); 10873 KmpDimTy = C.getRecordType(RD); 10874 } else { 10875 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 10876 } 10877 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 10878 QualType ArrayTy = 10879 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 10880 10881 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 10882 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 10883 enum { LowerFD = 0, UpperFD, StrideFD }; 10884 // Fill dims with data. 10885 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 10886 LValue DimsLVal = CGF.MakeAddrLValue( 10887 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 10888 // dims.upper = num_iterations; 10889 LValue UpperLVal = CGF.EmitLValueForField( 10890 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 10891 llvm::Value *NumIterVal = 10892 CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]), 10893 D.getNumIterations()->getType(), Int64Ty, 10894 D.getNumIterations()->getExprLoc()); 10895 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 10896 // dims.stride = 1; 10897 LValue StrideLVal = CGF.EmitLValueForField( 10898 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 10899 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 10900 StrideLVal); 10901 } 10902 10903 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 10904 // kmp_int32 num_dims, struct kmp_dim * dims); 10905 llvm::Value *Args[] = { 10906 emitUpdateLocation(CGF, D.getBeginLoc()), 10907 getThreadID(CGF, D.getBeginLoc()), 10908 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 10909 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 10910 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 10911 CGM.VoidPtrTy)}; 10912 10913 llvm::FunctionCallee RTLFn = 10914 createRuntimeFunction(OMPRTL__kmpc_doacross_init); 10915 CGF.EmitRuntimeCall(RTLFn, Args); 10916 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 10917 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 10918 llvm::FunctionCallee FiniRTLFn = 10919 createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 10920 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 10921 llvm::makeArrayRef(FiniArgs)); 10922 } 10923 10924 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 10925 const OMPDependClause *C) { 10926 QualType Int64Ty = 10927 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 10928 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 10929 QualType ArrayTy = CGM.getContext().getConstantArrayType( 10930 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 10931 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 10932 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 10933 const Expr *CounterVal = C->getLoopData(I); 10934 assert(CounterVal); 10935 llvm::Value *CntVal = CGF.EmitScalarConversion( 10936 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 10937 CounterVal->getExprLoc()); 10938 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 10939 /*Volatile=*/false, Int64Ty); 10940 } 10941 llvm::Value *Args[] = { 10942 emitUpdateLocation(CGF, C->getBeginLoc()), 10943 getThreadID(CGF, C->getBeginLoc()), 10944 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 10945 llvm::FunctionCallee RTLFn; 10946 if (C->getDependencyKind() == OMPC_DEPEND_source) { 10947 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 10948 } else { 10949 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 10950 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 10951 } 10952 CGF.EmitRuntimeCall(RTLFn, Args); 10953 } 10954 10955 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 10956 llvm::FunctionCallee Callee, 10957 ArrayRef<llvm::Value *> Args) const { 10958 assert(Loc.isValid() && "Outlined function call location must be valid."); 10959 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 10960 10961 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 10962 if (Fn->doesNotThrow()) { 10963 CGF.EmitNounwindRuntimeCall(Fn, Args); 10964 return; 10965 } 10966 } 10967 CGF.EmitRuntimeCall(Callee, Args); 10968 } 10969 10970 void CGOpenMPRuntime::emitOutlinedFunctionCall( 10971 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 10972 ArrayRef<llvm::Value *> Args) const { 10973 emitCall(CGF, Loc, OutlinedFn, Args); 10974 } 10975 10976 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 10977 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 10978 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 10979 HasEmittedDeclareTargetRegion = true; 10980 } 10981 10982 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 10983 const VarDecl *NativeParam, 10984 const VarDecl *TargetParam) const { 10985 return CGF.GetAddrOfLocalVar(NativeParam); 10986 } 10987 10988 namespace { 10989 /// Cleanup action for allocate support. 10990 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 10991 public: 10992 static const int CleanupArgs = 3; 10993 10994 private: 10995 llvm::FunctionCallee RTLFn; 10996 llvm::Value *Args[CleanupArgs]; 10997 10998 public: 10999 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 11000 ArrayRef<llvm::Value *> CallArgs) 11001 : RTLFn(RTLFn) { 11002 assert(CallArgs.size() == CleanupArgs && 11003 "Size of arguments does not match."); 11004 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11005 } 11006 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11007 if (!CGF.HaveInsertPoint()) 11008 return; 11009 CGF.EmitRuntimeCall(RTLFn, Args); 11010 } 11011 }; 11012 } // namespace 11013 11014 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11015 const VarDecl *VD) { 11016 if (!VD) 11017 return Address::invalid(); 11018 const VarDecl *CVD = VD->getCanonicalDecl(); 11019 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 11020 return Address::invalid(); 11021 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11022 // Use the default allocation. 11023 if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 11024 !AA->getAllocator()) 11025 return Address::invalid(); 11026 llvm::Value *Size; 11027 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11028 if (CVD->getType()->isVariablyModifiedType()) { 11029 Size = CGF.getTypeSize(CVD->getType()); 11030 // Align the size: ((size + align - 1) / align) * align 11031 Size = CGF.Builder.CreateNUWAdd( 11032 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11033 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11034 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11035 } else { 11036 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11037 Size = CGM.getSize(Sz.alignTo(Align)); 11038 } 11039 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11040 assert(AA->getAllocator() && 11041 "Expected allocator expression for non-default allocator."); 11042 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11043 // According to the standard, the original allocator type is a enum (integer). 11044 // Convert to pointer type, if required. 11045 if (Allocator->getType()->isIntegerTy()) 11046 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 11047 else if (Allocator->getType()->isPointerTy()) 11048 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 11049 CGM.VoidPtrTy); 11050 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11051 11052 llvm::Value *Addr = 11053 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, 11054 getName({CVD->getName(), ".void.addr"})); 11055 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 11056 Allocator}; 11057 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); 11058 11059 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11060 llvm::makeArrayRef(FiniArgs)); 11061 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11062 Addr, 11063 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 11064 getName({CVD->getName(), ".addr"})); 11065 return Address(Addr, Align); 11066 } 11067 11068 /// Finds the variant function that matches current context with its context 11069 /// selector. 11070 static const FunctionDecl *getDeclareVariantFunction(CodeGenModule &CGM, 11071 const FunctionDecl *FD) { 11072 if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>()) 11073 return FD; 11074 11075 SmallVector<Expr *, 8> VariantExprs; 11076 SmallVector<VariantMatchInfo, 8> VMIs; 11077 for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) { 11078 const OMPTraitInfo *TI = A->getTraitInfos(); 11079 if (!TI) 11080 continue; 11081 VMIs.push_back(VariantMatchInfo()); 11082 TI->getAsVariantMatchInfo(CGM.getContext(), VMIs.back()); 11083 VariantExprs.push_back(A->getVariantFuncRef()); 11084 } 11085 11086 OMPContext Ctx(CGM.getLangOpts().OpenMPIsDevice, CGM.getTriple()); 11087 // FIXME: Keep the context in the OMPIRBuilder so we can add constructs as we 11088 // build them. 11089 11090 int BestMatchIdx = getBestVariantMatchForContext(VMIs, Ctx); 11091 if (BestMatchIdx < 0) 11092 return FD; 11093 11094 return cast<FunctionDecl>( 11095 cast<DeclRefExpr>(VariantExprs[BestMatchIdx]->IgnoreParenImpCasts()) 11096 ->getDecl()); 11097 } 11098 11099 bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) { 11100 const auto *D = cast<FunctionDecl>(GD.getDecl()); 11101 // If the original function is defined already, use its definition. 11102 StringRef MangledName = CGM.getMangledName(GD); 11103 llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName); 11104 if (Orig && !Orig->isDeclaration()) 11105 return false; 11106 const FunctionDecl *NewFD = getDeclareVariantFunction(CGM, D); 11107 // Emit original function if it does not have declare variant attribute or the 11108 // context does not match. 11109 if (NewFD == D) 11110 return false; 11111 GlobalDecl NewGD = GD.getWithDecl(NewFD); 11112 if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) { 11113 DeferredVariantFunction.erase(D); 11114 return true; 11115 } 11116 DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD))); 11117 return true; 11118 } 11119 11120 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11121 CodeGenModule &CGM, const OMPLoopDirective &S) 11122 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11123 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11124 if (!NeedToPush) 11125 return; 11126 NontemporalDeclsSet &DS = 11127 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11128 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11129 for (const Stmt *Ref : C->private_refs()) { 11130 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11131 const ValueDecl *VD; 11132 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11133 VD = DRE->getDecl(); 11134 } else { 11135 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11136 assert((ME->isImplicitCXXThis() || 11137 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11138 "Expected member of current class."); 11139 VD = ME->getMemberDecl(); 11140 } 11141 DS.insert(VD); 11142 } 11143 } 11144 } 11145 11146 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11147 if (!NeedToPush) 11148 return; 11149 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11150 } 11151 11152 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11153 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11154 11155 return llvm::any_of( 11156 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11157 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 11158 } 11159 11160 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11161 const OMPExecutableDirective &S, 11162 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11163 const { 11164 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11165 // Vars in target/task regions must be excluded completely. 11166 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11167 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11168 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11169 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11170 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11171 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11172 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11173 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11174 } 11175 } 11176 // Exclude vars in private clauses. 11177 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11178 for (const Expr *Ref : C->varlists()) { 11179 if (!Ref->getType()->isScalarType()) 11180 continue; 11181 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11182 if (!DRE) 11183 continue; 11184 NeedToCheckForLPCs.insert(DRE->getDecl()); 11185 } 11186 } 11187 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11188 for (const Expr *Ref : C->varlists()) { 11189 if (!Ref->getType()->isScalarType()) 11190 continue; 11191 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11192 if (!DRE) 11193 continue; 11194 NeedToCheckForLPCs.insert(DRE->getDecl()); 11195 } 11196 } 11197 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11198 for (const Expr *Ref : C->varlists()) { 11199 if (!Ref->getType()->isScalarType()) 11200 continue; 11201 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11202 if (!DRE) 11203 continue; 11204 NeedToCheckForLPCs.insert(DRE->getDecl()); 11205 } 11206 } 11207 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11208 for (const Expr *Ref : C->varlists()) { 11209 if (!Ref->getType()->isScalarType()) 11210 continue; 11211 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11212 if (!DRE) 11213 continue; 11214 NeedToCheckForLPCs.insert(DRE->getDecl()); 11215 } 11216 } 11217 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 11218 for (const Expr *Ref : C->varlists()) { 11219 if (!Ref->getType()->isScalarType()) 11220 continue; 11221 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11222 if (!DRE) 11223 continue; 11224 NeedToCheckForLPCs.insert(DRE->getDecl()); 11225 } 11226 } 11227 for (const Decl *VD : NeedToCheckForLPCs) { 11228 for (const LastprivateConditionalData &Data : 11229 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 11230 if (Data.DeclToUniqueName.count(VD) > 0) { 11231 if (!Data.Disabled) 11232 NeedToAddForLPCsAsDisabled.insert(VD); 11233 break; 11234 } 11235 } 11236 } 11237 } 11238 11239 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11240 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 11241 : CGM(CGF.CGM), 11242 Action((CGM.getLangOpts().OpenMP >= 50 && 11243 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11244 [](const OMPLastprivateClause *C) { 11245 return C->getKind() == 11246 OMPC_LASTPRIVATE_conditional; 11247 })) 11248 ? ActionToDo::PushAsLastprivateConditional 11249 : ActionToDo::DoNotPush) { 11250 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11251 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 11252 return; 11253 assert(Action == ActionToDo::PushAsLastprivateConditional && 11254 "Expected a push action."); 11255 LastprivateConditionalData &Data = 11256 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11257 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11258 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 11259 continue; 11260 11261 for (const Expr *Ref : C->varlists()) { 11262 Data.DeclToUniqueName.insert(std::make_pair( 11263 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 11264 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 11265 } 11266 } 11267 Data.IVLVal = IVLVal; 11268 Data.Fn = CGF.CurFn; 11269 } 11270 11271 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11272 CodeGenFunction &CGF, const OMPExecutableDirective &S) 11273 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 11274 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11275 if (CGM.getLangOpts().OpenMP < 50) 11276 return; 11277 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 11278 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 11279 if (!NeedToAddForLPCsAsDisabled.empty()) { 11280 Action = ActionToDo::DisableLastprivateConditional; 11281 LastprivateConditionalData &Data = 11282 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11283 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 11284 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 11285 Data.Fn = CGF.CurFn; 11286 Data.Disabled = true; 11287 } 11288 } 11289 11290 CGOpenMPRuntime::LastprivateConditionalRAII 11291 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 11292 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 11293 return LastprivateConditionalRAII(CGF, S); 11294 } 11295 11296 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 11297 if (CGM.getLangOpts().OpenMP < 50) 11298 return; 11299 if (Action == ActionToDo::DisableLastprivateConditional) { 11300 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11301 "Expected list of disabled private vars."); 11302 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11303 } 11304 if (Action == ActionToDo::PushAsLastprivateConditional) { 11305 assert( 11306 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11307 "Expected list of lastprivate conditional vars."); 11308 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11309 } 11310 } 11311 11312 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 11313 const VarDecl *VD) { 11314 ASTContext &C = CGM.getContext(); 11315 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 11316 if (I == LastprivateConditionalToTypes.end()) 11317 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 11318 QualType NewType; 11319 const FieldDecl *VDField; 11320 const FieldDecl *FiredField; 11321 LValue BaseLVal; 11322 auto VI = I->getSecond().find(VD); 11323 if (VI == I->getSecond().end()) { 11324 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 11325 RD->startDefinition(); 11326 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 11327 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 11328 RD->completeDefinition(); 11329 NewType = C.getRecordType(RD); 11330 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 11331 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 11332 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 11333 } else { 11334 NewType = std::get<0>(VI->getSecond()); 11335 VDField = std::get<1>(VI->getSecond()); 11336 FiredField = std::get<2>(VI->getSecond()); 11337 BaseLVal = std::get<3>(VI->getSecond()); 11338 } 11339 LValue FiredLVal = 11340 CGF.EmitLValueForField(BaseLVal, FiredField); 11341 CGF.EmitStoreOfScalar( 11342 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 11343 FiredLVal); 11344 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 11345 } 11346 11347 namespace { 11348 /// Checks if the lastprivate conditional variable is referenced in LHS. 11349 class LastprivateConditionalRefChecker final 11350 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 11351 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 11352 const Expr *FoundE = nullptr; 11353 const Decl *FoundD = nullptr; 11354 StringRef UniqueDeclName; 11355 LValue IVLVal; 11356 llvm::Function *FoundFn = nullptr; 11357 SourceLocation Loc; 11358 11359 public: 11360 bool VisitDeclRefExpr(const DeclRefExpr *E) { 11361 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11362 llvm::reverse(LPM)) { 11363 auto It = D.DeclToUniqueName.find(E->getDecl()); 11364 if (It == D.DeclToUniqueName.end()) 11365 continue; 11366 if (D.Disabled) 11367 return false; 11368 FoundE = E; 11369 FoundD = E->getDecl()->getCanonicalDecl(); 11370 UniqueDeclName = It->second; 11371 IVLVal = D.IVLVal; 11372 FoundFn = D.Fn; 11373 break; 11374 } 11375 return FoundE == E; 11376 } 11377 bool VisitMemberExpr(const MemberExpr *E) { 11378 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 11379 return false; 11380 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11381 llvm::reverse(LPM)) { 11382 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 11383 if (It == D.DeclToUniqueName.end()) 11384 continue; 11385 if (D.Disabled) 11386 return false; 11387 FoundE = E; 11388 FoundD = E->getMemberDecl()->getCanonicalDecl(); 11389 UniqueDeclName = It->second; 11390 IVLVal = D.IVLVal; 11391 FoundFn = D.Fn; 11392 break; 11393 } 11394 return FoundE == E; 11395 } 11396 bool VisitStmt(const Stmt *S) { 11397 for (const Stmt *Child : S->children()) { 11398 if (!Child) 11399 continue; 11400 if (const auto *E = dyn_cast<Expr>(Child)) 11401 if (!E->isGLValue()) 11402 continue; 11403 if (Visit(Child)) 11404 return true; 11405 } 11406 return false; 11407 } 11408 explicit LastprivateConditionalRefChecker( 11409 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 11410 : LPM(LPM) {} 11411 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 11412 getFoundData() const { 11413 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 11414 } 11415 }; 11416 } // namespace 11417 11418 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 11419 LValue IVLVal, 11420 StringRef UniqueDeclName, 11421 LValue LVal, 11422 SourceLocation Loc) { 11423 // Last updated loop counter for the lastprivate conditional var. 11424 // int<xx> last_iv = 0; 11425 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 11426 llvm::Constant *LastIV = 11427 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 11428 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 11429 IVLVal.getAlignment().getAsAlign()); 11430 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 11431 11432 // Last value of the lastprivate conditional. 11433 // decltype(priv_a) last_a; 11434 llvm::Constant *Last = getOrCreateInternalVariable( 11435 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 11436 cast<llvm::GlobalVariable>(Last)->setAlignment( 11437 LVal.getAlignment().getAsAlign()); 11438 LValue LastLVal = 11439 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 11440 11441 // Global loop counter. Required to handle inner parallel-for regions. 11442 // iv 11443 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 11444 11445 // #pragma omp critical(a) 11446 // if (last_iv <= iv) { 11447 // last_iv = iv; 11448 // last_a = priv_a; 11449 // } 11450 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 11451 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 11452 Action.Enter(CGF); 11453 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 11454 // (last_iv <= iv) ? Check if the variable is updated and store new 11455 // value in global var. 11456 llvm::Value *CmpRes; 11457 if (IVLVal.getType()->isSignedIntegerType()) { 11458 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 11459 } else { 11460 assert(IVLVal.getType()->isUnsignedIntegerType() && 11461 "Loop iteration variable must be integer."); 11462 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 11463 } 11464 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 11465 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 11466 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 11467 // { 11468 CGF.EmitBlock(ThenBB); 11469 11470 // last_iv = iv; 11471 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 11472 11473 // last_a = priv_a; 11474 switch (CGF.getEvaluationKind(LVal.getType())) { 11475 case TEK_Scalar: { 11476 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 11477 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 11478 break; 11479 } 11480 case TEK_Complex: { 11481 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 11482 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 11483 break; 11484 } 11485 case TEK_Aggregate: 11486 llvm_unreachable( 11487 "Aggregates are not supported in lastprivate conditional."); 11488 } 11489 // } 11490 CGF.EmitBranch(ExitBB); 11491 // There is no need to emit line number for unconditional branch. 11492 (void)ApplyDebugLocation::CreateEmpty(CGF); 11493 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 11494 }; 11495 11496 if (CGM.getLangOpts().OpenMPSimd) { 11497 // Do not emit as a critical region as no parallel region could be emitted. 11498 RegionCodeGenTy ThenRCG(CodeGen); 11499 ThenRCG(CGF); 11500 } else { 11501 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 11502 } 11503 } 11504 11505 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 11506 const Expr *LHS) { 11507 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11508 return; 11509 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 11510 if (!Checker.Visit(LHS)) 11511 return; 11512 const Expr *FoundE; 11513 const Decl *FoundD; 11514 StringRef UniqueDeclName; 11515 LValue IVLVal; 11516 llvm::Function *FoundFn; 11517 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 11518 Checker.getFoundData(); 11519 if (FoundFn != CGF.CurFn) { 11520 // Special codegen for inner parallel regions. 11521 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 11522 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 11523 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 11524 "Lastprivate conditional is not found in outer region."); 11525 QualType StructTy = std::get<0>(It->getSecond()); 11526 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 11527 LValue PrivLVal = CGF.EmitLValue(FoundE); 11528 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11529 PrivLVal.getAddress(CGF), 11530 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 11531 LValue BaseLVal = 11532 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 11533 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 11534 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 11535 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 11536 FiredLVal, llvm::AtomicOrdering::Unordered, 11537 /*IsVolatile=*/true, /*isInit=*/false); 11538 return; 11539 } 11540 11541 // Private address of the lastprivate conditional in the current context. 11542 // priv_a 11543 LValue LVal = CGF.EmitLValue(FoundE); 11544 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 11545 FoundE->getExprLoc()); 11546 } 11547 11548 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 11549 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11550 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 11551 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11552 return; 11553 auto Range = llvm::reverse(LastprivateConditionalStack); 11554 auto It = llvm::find_if( 11555 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 11556 if (It == Range.end() || It->Fn != CGF.CurFn) 11557 return; 11558 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 11559 assert(LPCI != LastprivateConditionalToTypes.end() && 11560 "Lastprivates must be registered already."); 11561 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11562 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 11563 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 11564 for (const auto &Pair : It->DeclToUniqueName) { 11565 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 11566 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 11567 continue; 11568 auto I = LPCI->getSecond().find(Pair.first); 11569 assert(I != LPCI->getSecond().end() && 11570 "Lastprivate must be rehistered already."); 11571 // bool Cmp = priv_a.Fired != 0; 11572 LValue BaseLVal = std::get<3>(I->getSecond()); 11573 LValue FiredLVal = 11574 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 11575 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 11576 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 11577 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 11578 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 11579 // if (Cmp) { 11580 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 11581 CGF.EmitBlock(ThenBB); 11582 Address Addr = CGF.GetAddrOfLocalVar(VD); 11583 LValue LVal; 11584 if (VD->getType()->isReferenceType()) 11585 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 11586 AlignmentSource::Decl); 11587 else 11588 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 11589 AlignmentSource::Decl); 11590 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 11591 D.getBeginLoc()); 11592 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 11593 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 11594 // } 11595 } 11596 } 11597 11598 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 11599 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 11600 SourceLocation Loc) { 11601 if (CGF.getLangOpts().OpenMP < 50) 11602 return; 11603 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 11604 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 11605 "Unknown lastprivate conditional variable."); 11606 StringRef UniqueName = It->second; 11607 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 11608 // The variable was not updated in the region - exit. 11609 if (!GV) 11610 return; 11611 LValue LPLVal = CGF.MakeAddrLValue( 11612 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 11613 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 11614 CGF.EmitStoreOfScalar(Res, PrivLVal); 11615 } 11616 11617 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11618 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11619 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11620 llvm_unreachable("Not supported in SIMD-only mode"); 11621 } 11622 11623 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11624 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11625 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11626 llvm_unreachable("Not supported in SIMD-only mode"); 11627 } 11628 11629 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11630 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11631 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11632 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11633 bool Tied, unsigned &NumberOfParts) { 11634 llvm_unreachable("Not supported in SIMD-only mode"); 11635 } 11636 11637 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11638 SourceLocation Loc, 11639 llvm::Function *OutlinedFn, 11640 ArrayRef<llvm::Value *> CapturedVars, 11641 const Expr *IfCond) { 11642 llvm_unreachable("Not supported in SIMD-only mode"); 11643 } 11644 11645 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11646 CodeGenFunction &CGF, StringRef CriticalName, 11647 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11648 const Expr *Hint) { 11649 llvm_unreachable("Not supported in SIMD-only mode"); 11650 } 11651 11652 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11653 const RegionCodeGenTy &MasterOpGen, 11654 SourceLocation Loc) { 11655 llvm_unreachable("Not supported in SIMD-only mode"); 11656 } 11657 11658 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11659 SourceLocation Loc) { 11660 llvm_unreachable("Not supported in SIMD-only mode"); 11661 } 11662 11663 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 11664 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 11665 SourceLocation Loc) { 11666 llvm_unreachable("Not supported in SIMD-only mode"); 11667 } 11668 11669 void CGOpenMPSIMDRuntime::emitSingleRegion( 11670 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 11671 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 11672 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 11673 ArrayRef<const Expr *> AssignmentOps) { 11674 llvm_unreachable("Not supported in SIMD-only mode"); 11675 } 11676 11677 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 11678 const RegionCodeGenTy &OrderedOpGen, 11679 SourceLocation Loc, 11680 bool IsThreads) { 11681 llvm_unreachable("Not supported in SIMD-only mode"); 11682 } 11683 11684 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 11685 SourceLocation Loc, 11686 OpenMPDirectiveKind Kind, 11687 bool EmitChecks, 11688 bool ForceSimpleCall) { 11689 llvm_unreachable("Not supported in SIMD-only mode"); 11690 } 11691 11692 void CGOpenMPSIMDRuntime::emitForDispatchInit( 11693 CodeGenFunction &CGF, SourceLocation Loc, 11694 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 11695 bool Ordered, const DispatchRTInput &DispatchValues) { 11696 llvm_unreachable("Not supported in SIMD-only mode"); 11697 } 11698 11699 void CGOpenMPSIMDRuntime::emitForStaticInit( 11700 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 11701 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 11702 llvm_unreachable("Not supported in SIMD-only mode"); 11703 } 11704 11705 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 11706 CodeGenFunction &CGF, SourceLocation Loc, 11707 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 11708 llvm_unreachable("Not supported in SIMD-only mode"); 11709 } 11710 11711 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 11712 SourceLocation Loc, 11713 unsigned IVSize, 11714 bool IVSigned) { 11715 llvm_unreachable("Not supported in SIMD-only mode"); 11716 } 11717 11718 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 11719 SourceLocation Loc, 11720 OpenMPDirectiveKind DKind) { 11721 llvm_unreachable("Not supported in SIMD-only mode"); 11722 } 11723 11724 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 11725 SourceLocation Loc, 11726 unsigned IVSize, bool IVSigned, 11727 Address IL, Address LB, 11728 Address UB, Address ST) { 11729 llvm_unreachable("Not supported in SIMD-only mode"); 11730 } 11731 11732 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 11733 llvm::Value *NumThreads, 11734 SourceLocation Loc) { 11735 llvm_unreachable("Not supported in SIMD-only mode"); 11736 } 11737 11738 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 11739 ProcBindKind ProcBind, 11740 SourceLocation Loc) { 11741 llvm_unreachable("Not supported in SIMD-only mode"); 11742 } 11743 11744 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 11745 const VarDecl *VD, 11746 Address VDAddr, 11747 SourceLocation Loc) { 11748 llvm_unreachable("Not supported in SIMD-only mode"); 11749 } 11750 11751 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 11752 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 11753 CodeGenFunction *CGF) { 11754 llvm_unreachable("Not supported in SIMD-only mode"); 11755 } 11756 11757 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 11758 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 11759 llvm_unreachable("Not supported in SIMD-only mode"); 11760 } 11761 11762 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 11763 ArrayRef<const Expr *> Vars, 11764 SourceLocation Loc, 11765 llvm::AtomicOrdering AO) { 11766 llvm_unreachable("Not supported in SIMD-only mode"); 11767 } 11768 11769 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 11770 const OMPExecutableDirective &D, 11771 llvm::Function *TaskFunction, 11772 QualType SharedsTy, Address Shareds, 11773 const Expr *IfCond, 11774 const OMPTaskDataTy &Data) { 11775 llvm_unreachable("Not supported in SIMD-only mode"); 11776 } 11777 11778 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 11779 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 11780 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 11781 const Expr *IfCond, const OMPTaskDataTy &Data) { 11782 llvm_unreachable("Not supported in SIMD-only mode"); 11783 } 11784 11785 void CGOpenMPSIMDRuntime::emitReduction( 11786 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 11787 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 11788 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 11789 assert(Options.SimpleReduction && "Only simple reduction is expected."); 11790 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 11791 ReductionOps, Options); 11792 } 11793 11794 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 11795 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 11796 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 11797 llvm_unreachable("Not supported in SIMD-only mode"); 11798 } 11799 11800 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 11801 SourceLocation Loc, 11802 ReductionCodeGen &RCG, 11803 unsigned N) { 11804 llvm_unreachable("Not supported in SIMD-only mode"); 11805 } 11806 11807 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 11808 SourceLocation Loc, 11809 llvm::Value *ReductionsPtr, 11810 LValue SharedLVal) { 11811 llvm_unreachable("Not supported in SIMD-only mode"); 11812 } 11813 11814 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 11815 SourceLocation Loc) { 11816 llvm_unreachable("Not supported in SIMD-only mode"); 11817 } 11818 11819 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 11820 CodeGenFunction &CGF, SourceLocation Loc, 11821 OpenMPDirectiveKind CancelRegion) { 11822 llvm_unreachable("Not supported in SIMD-only mode"); 11823 } 11824 11825 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 11826 SourceLocation Loc, const Expr *IfCond, 11827 OpenMPDirectiveKind CancelRegion) { 11828 llvm_unreachable("Not supported in SIMD-only mode"); 11829 } 11830 11831 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 11832 const OMPExecutableDirective &D, StringRef ParentName, 11833 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 11834 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 11835 llvm_unreachable("Not supported in SIMD-only mode"); 11836 } 11837 11838 void CGOpenMPSIMDRuntime::emitTargetCall( 11839 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11840 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 11841 const Expr *Device, 11842 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 11843 const OMPLoopDirective &D)> 11844 SizeEmitter) { 11845 llvm_unreachable("Not supported in SIMD-only mode"); 11846 } 11847 11848 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 11849 llvm_unreachable("Not supported in SIMD-only mode"); 11850 } 11851 11852 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 11853 llvm_unreachable("Not supported in SIMD-only mode"); 11854 } 11855 11856 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 11857 return false; 11858 } 11859 11860 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 11861 const OMPExecutableDirective &D, 11862 SourceLocation Loc, 11863 llvm::Function *OutlinedFn, 11864 ArrayRef<llvm::Value *> CapturedVars) { 11865 llvm_unreachable("Not supported in SIMD-only mode"); 11866 } 11867 11868 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11869 const Expr *NumTeams, 11870 const Expr *ThreadLimit, 11871 SourceLocation Loc) { 11872 llvm_unreachable("Not supported in SIMD-only mode"); 11873 } 11874 11875 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 11876 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11877 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11878 llvm_unreachable("Not supported in SIMD-only mode"); 11879 } 11880 11881 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 11882 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11883 const Expr *Device) { 11884 llvm_unreachable("Not supported in SIMD-only mode"); 11885 } 11886 11887 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11888 const OMPLoopDirective &D, 11889 ArrayRef<Expr *> NumIterations) { 11890 llvm_unreachable("Not supported in SIMD-only mode"); 11891 } 11892 11893 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11894 const OMPDependClause *C) { 11895 llvm_unreachable("Not supported in SIMD-only mode"); 11896 } 11897 11898 const VarDecl * 11899 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 11900 const VarDecl *NativeParam) const { 11901 llvm_unreachable("Not supported in SIMD-only mode"); 11902 } 11903 11904 Address 11905 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 11906 const VarDecl *NativeParam, 11907 const VarDecl *TargetParam) const { 11908 llvm_unreachable("Not supported in SIMD-only mode"); 11909 } 11910