1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/FileManager.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/CodeGen/ConstantInitBuilder.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/SetOperations.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/Bitcode/BitcodeReader.h" 32 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 33 #include "llvm/IR/Constants.h" 34 #include "llvm/IR/DerivedTypes.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/IR/Value.h" 37 #include "llvm/Support/AtomicOrdering.h" 38 #include "llvm/Support/Format.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include <cassert> 41 42 using namespace clang; 43 using namespace CodeGen; 44 using namespace llvm::omp; 45 46 namespace { 47 /// Base class for handling code generation inside OpenMP regions. 48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 49 public: 50 /// Kinds of OpenMP regions used in codegen. 51 enum CGOpenMPRegionKind { 52 /// Region with outlined function for standalone 'parallel' 53 /// directive. 54 ParallelOutlinedRegion, 55 /// Region with outlined function for standalone 'task' directive. 56 TaskOutlinedRegion, 57 /// Region for constructs that do not require function outlining, 58 /// like 'for', 'sections', 'atomic' etc. directives. 59 InlinedRegion, 60 /// Region with outlined function for standalone 'target' directive. 61 TargetRegion, 62 }; 63 64 CGOpenMPRegionInfo(const CapturedStmt &CS, 65 const CGOpenMPRegionKind RegionKind, 66 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 67 bool HasCancel) 68 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 69 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 70 71 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 73 bool HasCancel) 74 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 75 Kind(Kind), HasCancel(HasCancel) {} 76 77 /// Get a variable or parameter for storing global thread id 78 /// inside OpenMP construct. 79 virtual const VarDecl *getThreadIDVariable() const = 0; 80 81 /// Emit the captured statement body. 82 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 83 84 /// Get an LValue for the current ThreadID variable. 85 /// \return LValue for thread id variable. This LValue always has type int32*. 86 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 87 88 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 89 90 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 91 92 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 93 94 bool hasCancel() const { return HasCancel; } 95 96 static bool classof(const CGCapturedStmtInfo *Info) { 97 return Info->getKind() == CR_OpenMP; 98 } 99 100 ~CGOpenMPRegionInfo() override = default; 101 102 protected: 103 CGOpenMPRegionKind RegionKind; 104 RegionCodeGenTy CodeGen; 105 OpenMPDirectiveKind Kind; 106 bool HasCancel; 107 }; 108 109 /// API for captured statement code generation in OpenMP constructs. 110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 111 public: 112 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 113 const RegionCodeGenTy &CodeGen, 114 OpenMPDirectiveKind Kind, bool HasCancel, 115 StringRef HelperName) 116 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 117 HasCancel), 118 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 119 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 120 } 121 122 /// Get a variable or parameter for storing global thread id 123 /// inside OpenMP construct. 124 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 125 126 /// Get the name of the capture helper. 127 StringRef getHelperName() const override { return HelperName; } 128 129 static bool classof(const CGCapturedStmtInfo *Info) { 130 return CGOpenMPRegionInfo::classof(Info) && 131 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 132 ParallelOutlinedRegion; 133 } 134 135 private: 136 /// A variable or parameter storing global thread id for OpenMP 137 /// constructs. 138 const VarDecl *ThreadIDVar; 139 StringRef HelperName; 140 }; 141 142 /// API for captured statement code generation in OpenMP constructs. 143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 144 public: 145 class UntiedTaskActionTy final : public PrePostActionTy { 146 bool Untied; 147 const VarDecl *PartIDVar; 148 const RegionCodeGenTy UntiedCodeGen; 149 llvm::SwitchInst *UntiedSwitch = nullptr; 150 151 public: 152 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 153 const RegionCodeGenTy &UntiedCodeGen) 154 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 155 void Enter(CodeGenFunction &CGF) override { 156 if (Untied) { 157 // Emit task switching point. 158 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 159 CGF.GetAddrOfLocalVar(PartIDVar), 160 PartIDVar->getType()->castAs<PointerType>()); 161 llvm::Value *Res = 162 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 163 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 164 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 165 CGF.EmitBlock(DoneBB); 166 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 167 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 168 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 169 CGF.Builder.GetInsertBlock()); 170 emitUntiedSwitch(CGF); 171 } 172 } 173 void emitUntiedSwitch(CodeGenFunction &CGF) const { 174 if (Untied) { 175 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 176 CGF.GetAddrOfLocalVar(PartIDVar), 177 PartIDVar->getType()->castAs<PointerType>()); 178 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 179 PartIdLVal); 180 UntiedCodeGen(CGF); 181 CodeGenFunction::JumpDest CurPoint = 182 CGF.getJumpDestInCurrentScope(".untied.next."); 183 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 184 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 185 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 186 CGF.Builder.GetInsertBlock()); 187 CGF.EmitBranchThroughCleanup(CurPoint); 188 CGF.EmitBlock(CurPoint.getBlock()); 189 } 190 } 191 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 192 }; 193 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 194 const VarDecl *ThreadIDVar, 195 const RegionCodeGenTy &CodeGen, 196 OpenMPDirectiveKind Kind, bool HasCancel, 197 const UntiedTaskActionTy &Action) 198 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 199 ThreadIDVar(ThreadIDVar), Action(Action) { 200 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 201 } 202 203 /// Get a variable or parameter for storing global thread id 204 /// inside OpenMP construct. 205 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 206 207 /// Get an LValue for the current ThreadID variable. 208 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 209 210 /// Get the name of the capture helper. 211 StringRef getHelperName() const override { return ".omp_outlined."; } 212 213 void emitUntiedSwitch(CodeGenFunction &CGF) override { 214 Action.emitUntiedSwitch(CGF); 215 } 216 217 static bool classof(const CGCapturedStmtInfo *Info) { 218 return CGOpenMPRegionInfo::classof(Info) && 219 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 220 TaskOutlinedRegion; 221 } 222 223 private: 224 /// A variable or parameter storing global thread id for OpenMP 225 /// constructs. 226 const VarDecl *ThreadIDVar; 227 /// Action for emitting code for untied tasks. 228 const UntiedTaskActionTy &Action; 229 }; 230 231 /// API for inlined captured statement code generation in OpenMP 232 /// constructs. 233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 234 public: 235 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 236 const RegionCodeGenTy &CodeGen, 237 OpenMPDirectiveKind Kind, bool HasCancel) 238 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 239 OldCSI(OldCSI), 240 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 241 242 // Retrieve the value of the context parameter. 243 llvm::Value *getContextValue() const override { 244 if (OuterRegionInfo) 245 return OuterRegionInfo->getContextValue(); 246 llvm_unreachable("No context value for inlined OpenMP region"); 247 } 248 249 void setContextValue(llvm::Value *V) override { 250 if (OuterRegionInfo) { 251 OuterRegionInfo->setContextValue(V); 252 return; 253 } 254 llvm_unreachable("No context value for inlined OpenMP region"); 255 } 256 257 /// Lookup the captured field decl for a variable. 258 const FieldDecl *lookup(const VarDecl *VD) const override { 259 if (OuterRegionInfo) 260 return OuterRegionInfo->lookup(VD); 261 // If there is no outer outlined region,no need to lookup in a list of 262 // captured variables, we can use the original one. 263 return nullptr; 264 } 265 266 FieldDecl *getThisFieldDecl() const override { 267 if (OuterRegionInfo) 268 return OuterRegionInfo->getThisFieldDecl(); 269 return nullptr; 270 } 271 272 /// Get a variable or parameter for storing global thread id 273 /// inside OpenMP construct. 274 const VarDecl *getThreadIDVariable() const override { 275 if (OuterRegionInfo) 276 return OuterRegionInfo->getThreadIDVariable(); 277 return nullptr; 278 } 279 280 /// Get an LValue for the current ThreadID variable. 281 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 282 if (OuterRegionInfo) 283 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 284 llvm_unreachable("No LValue for inlined OpenMP construct"); 285 } 286 287 /// Get the name of the capture helper. 288 StringRef getHelperName() const override { 289 if (auto *OuterRegionInfo = getOldCSI()) 290 return OuterRegionInfo->getHelperName(); 291 llvm_unreachable("No helper name for inlined OpenMP construct"); 292 } 293 294 void emitUntiedSwitch(CodeGenFunction &CGF) override { 295 if (OuterRegionInfo) 296 OuterRegionInfo->emitUntiedSwitch(CGF); 297 } 298 299 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 300 301 static bool classof(const CGCapturedStmtInfo *Info) { 302 return CGOpenMPRegionInfo::classof(Info) && 303 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 304 } 305 306 ~CGOpenMPInlinedRegionInfo() override = default; 307 308 private: 309 /// CodeGen info about outer OpenMP region. 310 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 311 CGOpenMPRegionInfo *OuterRegionInfo; 312 }; 313 314 /// API for captured statement code generation in OpenMP target 315 /// constructs. For this captures, implicit parameters are used instead of the 316 /// captured fields. The name of the target region has to be unique in a given 317 /// application so it is provided by the client, because only the client has 318 /// the information to generate that. 319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 320 public: 321 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 322 const RegionCodeGenTy &CodeGen, StringRef HelperName) 323 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 324 /*HasCancel=*/false), 325 HelperName(HelperName) {} 326 327 /// This is unused for target regions because each starts executing 328 /// with a single thread. 329 const VarDecl *getThreadIDVariable() const override { return nullptr; } 330 331 /// Get the name of the capture helper. 332 StringRef getHelperName() const override { return HelperName; } 333 334 static bool classof(const CGCapturedStmtInfo *Info) { 335 return CGOpenMPRegionInfo::classof(Info) && 336 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 337 } 338 339 private: 340 StringRef HelperName; 341 }; 342 343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 344 llvm_unreachable("No codegen for expressions"); 345 } 346 /// API for generation of expressions captured in a innermost OpenMP 347 /// region. 348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 349 public: 350 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 351 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 352 OMPD_unknown, 353 /*HasCancel=*/false), 354 PrivScope(CGF) { 355 // Make sure the globals captured in the provided statement are local by 356 // using the privatization logic. We assume the same variable is not 357 // captured more than once. 358 for (const auto &C : CS.captures()) { 359 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 360 continue; 361 362 const VarDecl *VD = C.getCapturedVar(); 363 if (VD->isLocalVarDeclOrParm()) 364 continue; 365 366 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 367 /*RefersToEnclosingVariableOrCapture=*/false, 368 VD->getType().getNonReferenceType(), VK_LValue, 369 C.getLocation()); 370 PrivScope.addPrivate( 371 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 372 } 373 (void)PrivScope.Privatize(); 374 } 375 376 /// Lookup the captured field decl for a variable. 377 const FieldDecl *lookup(const VarDecl *VD) const override { 378 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 379 return FD; 380 return nullptr; 381 } 382 383 /// Emit the captured statement body. 384 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 385 llvm_unreachable("No body for expressions"); 386 } 387 388 /// Get a variable or parameter for storing global thread id 389 /// inside OpenMP construct. 390 const VarDecl *getThreadIDVariable() const override { 391 llvm_unreachable("No thread id for expressions"); 392 } 393 394 /// Get the name of the capture helper. 395 StringRef getHelperName() const override { 396 llvm_unreachable("No helper name for expressions"); 397 } 398 399 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 400 401 private: 402 /// Private scope to capture global variables. 403 CodeGenFunction::OMPPrivateScope PrivScope; 404 }; 405 406 /// RAII for emitting code of OpenMP constructs. 407 class InlinedOpenMPRegionRAII { 408 CodeGenFunction &CGF; 409 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 410 FieldDecl *LambdaThisCaptureField = nullptr; 411 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 412 413 public: 414 /// Constructs region for combined constructs. 415 /// \param CodeGen Code generation sequence for combined directives. Includes 416 /// a list of functions used for code generation of implicitly inlined 417 /// regions. 418 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 419 OpenMPDirectiveKind Kind, bool HasCancel) 420 : CGF(CGF) { 421 // Start emission for the construct. 422 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 423 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 424 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 425 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 426 CGF.LambdaThisCaptureField = nullptr; 427 BlockInfo = CGF.BlockInfo; 428 CGF.BlockInfo = nullptr; 429 } 430 431 ~InlinedOpenMPRegionRAII() { 432 // Restore original CapturedStmtInfo only if we're done with code emission. 433 auto *OldCSI = 434 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 435 delete CGF.CapturedStmtInfo; 436 CGF.CapturedStmtInfo = OldCSI; 437 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 438 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 439 CGF.BlockInfo = BlockInfo; 440 } 441 }; 442 443 /// Values for bit flags used in the ident_t to describe the fields. 444 /// All enumeric elements are named and described in accordance with the code 445 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 446 enum OpenMPLocationFlags : unsigned { 447 /// Use trampoline for internal microtask. 448 OMP_IDENT_IMD = 0x01, 449 /// Use c-style ident structure. 450 OMP_IDENT_KMPC = 0x02, 451 /// Atomic reduction option for kmpc_reduce. 452 OMP_ATOMIC_REDUCE = 0x10, 453 /// Explicit 'barrier' directive. 454 OMP_IDENT_BARRIER_EXPL = 0x20, 455 /// Implicit barrier in code. 456 OMP_IDENT_BARRIER_IMPL = 0x40, 457 /// Implicit barrier in 'for' directive. 458 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 459 /// Implicit barrier in 'sections' directive. 460 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 461 /// Implicit barrier in 'single' directive. 462 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 463 /// Call of __kmp_for_static_init for static loop. 464 OMP_IDENT_WORK_LOOP = 0x200, 465 /// Call of __kmp_for_static_init for sections. 466 OMP_IDENT_WORK_SECTIONS = 0x400, 467 /// Call of __kmp_for_static_init for distribute. 468 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 469 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 470 }; 471 472 namespace { 473 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 474 /// Values for bit flags for marking which requires clauses have been used. 475 enum OpenMPOffloadingRequiresDirFlags : int64_t { 476 /// flag undefined. 477 OMP_REQ_UNDEFINED = 0x000, 478 /// no requires clause present. 479 OMP_REQ_NONE = 0x001, 480 /// reverse_offload clause. 481 OMP_REQ_REVERSE_OFFLOAD = 0x002, 482 /// unified_address clause. 483 OMP_REQ_UNIFIED_ADDRESS = 0x004, 484 /// unified_shared_memory clause. 485 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 486 /// dynamic_allocators clause. 487 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 488 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 489 }; 490 491 enum OpenMPOffloadingReservedDeviceIDs { 492 /// Device ID if the device was not defined, runtime should get it 493 /// from environment variables in the spec. 494 OMP_DEVICEID_UNDEF = -1, 495 }; 496 } // anonymous namespace 497 498 /// Describes ident structure that describes a source location. 499 /// All descriptions are taken from 500 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 501 /// Original structure: 502 /// typedef struct ident { 503 /// kmp_int32 reserved_1; /**< might be used in Fortran; 504 /// see above */ 505 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 506 /// KMP_IDENT_KMPC identifies this union 507 /// member */ 508 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 509 /// see above */ 510 ///#if USE_ITT_BUILD 511 /// /* but currently used for storing 512 /// region-specific ITT */ 513 /// /* contextual information. */ 514 ///#endif /* USE_ITT_BUILD */ 515 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 516 /// C++ */ 517 /// char const *psource; /**< String describing the source location. 518 /// The string is composed of semi-colon separated 519 // fields which describe the source file, 520 /// the function and a pair of line numbers that 521 /// delimit the construct. 522 /// */ 523 /// } ident_t; 524 enum IdentFieldIndex { 525 /// might be used in Fortran 526 IdentField_Reserved_1, 527 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 528 IdentField_Flags, 529 /// Not really used in Fortran any more 530 IdentField_Reserved_2, 531 /// Source[4] in Fortran, do not use for C++ 532 IdentField_Reserved_3, 533 /// String describing the source location. The string is composed of 534 /// semi-colon separated fields which describe the source file, the function 535 /// and a pair of line numbers that delimit the construct. 536 IdentField_PSource 537 }; 538 539 /// Schedule types for 'omp for' loops (these enumerators are taken from 540 /// the enum sched_type in kmp.h). 541 enum OpenMPSchedType { 542 /// Lower bound for default (unordered) versions. 543 OMP_sch_lower = 32, 544 OMP_sch_static_chunked = 33, 545 OMP_sch_static = 34, 546 OMP_sch_dynamic_chunked = 35, 547 OMP_sch_guided_chunked = 36, 548 OMP_sch_runtime = 37, 549 OMP_sch_auto = 38, 550 /// static with chunk adjustment (e.g., simd) 551 OMP_sch_static_balanced_chunked = 45, 552 /// Lower bound for 'ordered' versions. 553 OMP_ord_lower = 64, 554 OMP_ord_static_chunked = 65, 555 OMP_ord_static = 66, 556 OMP_ord_dynamic_chunked = 67, 557 OMP_ord_guided_chunked = 68, 558 OMP_ord_runtime = 69, 559 OMP_ord_auto = 70, 560 OMP_sch_default = OMP_sch_static, 561 /// dist_schedule types 562 OMP_dist_sch_static_chunked = 91, 563 OMP_dist_sch_static = 92, 564 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 565 /// Set if the monotonic schedule modifier was present. 566 OMP_sch_modifier_monotonic = (1 << 29), 567 /// Set if the nonmonotonic schedule modifier was present. 568 OMP_sch_modifier_nonmonotonic = (1 << 30), 569 }; 570 571 enum OpenMPRTLFunction { 572 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 573 /// kmpc_micro microtask, ...); 574 OMPRTL__kmpc_fork_call, 575 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, 576 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 577 OMPRTL__kmpc_threadprivate_cached, 578 /// Call to void __kmpc_threadprivate_register( ident_t *, 579 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 580 OMPRTL__kmpc_threadprivate_register, 581 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 582 OMPRTL__kmpc_global_thread_num, 583 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 584 // kmp_critical_name *crit); 585 OMPRTL__kmpc_critical, 586 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 587 // global_tid, kmp_critical_name *crit, uintptr_t hint); 588 OMPRTL__kmpc_critical_with_hint, 589 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 590 // kmp_critical_name *crit); 591 OMPRTL__kmpc_end_critical, 592 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 593 // global_tid); 594 OMPRTL__kmpc_cancel_barrier, 595 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 596 OMPRTL__kmpc_barrier, 597 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 598 OMPRTL__kmpc_for_static_fini, 599 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 600 // global_tid); 601 OMPRTL__kmpc_serialized_parallel, 602 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 603 // global_tid); 604 OMPRTL__kmpc_end_serialized_parallel, 605 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 606 // kmp_int32 num_threads); 607 OMPRTL__kmpc_push_num_threads, 608 // Call to void __kmpc_flush(ident_t *loc); 609 OMPRTL__kmpc_flush, 610 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 611 OMPRTL__kmpc_master, 612 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 613 OMPRTL__kmpc_end_master, 614 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 615 // int end_part); 616 OMPRTL__kmpc_omp_taskyield, 617 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 618 OMPRTL__kmpc_single, 619 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 620 OMPRTL__kmpc_end_single, 621 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 622 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 623 // kmp_routine_entry_t *task_entry); 624 OMPRTL__kmpc_omp_task_alloc, 625 // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *, 626 // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, 627 // size_t sizeof_shareds, kmp_routine_entry_t *task_entry, 628 // kmp_int64 device_id); 629 OMPRTL__kmpc_omp_target_task_alloc, 630 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 631 // new_task); 632 OMPRTL__kmpc_omp_task, 633 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 634 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 635 // kmp_int32 didit); 636 OMPRTL__kmpc_copyprivate, 637 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 638 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 639 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 640 OMPRTL__kmpc_reduce, 641 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 642 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 643 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 644 // *lck); 645 OMPRTL__kmpc_reduce_nowait, 646 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 647 // kmp_critical_name *lck); 648 OMPRTL__kmpc_end_reduce, 649 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 650 // kmp_critical_name *lck); 651 OMPRTL__kmpc_end_reduce_nowait, 652 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 653 // kmp_task_t * new_task); 654 OMPRTL__kmpc_omp_task_begin_if0, 655 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 656 // kmp_task_t * new_task); 657 OMPRTL__kmpc_omp_task_complete_if0, 658 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 659 OMPRTL__kmpc_ordered, 660 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 661 OMPRTL__kmpc_end_ordered, 662 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 663 // global_tid); 664 OMPRTL__kmpc_omp_taskwait, 665 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 666 OMPRTL__kmpc_taskgroup, 667 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 668 OMPRTL__kmpc_end_taskgroup, 669 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 670 // int proc_bind); 671 OMPRTL__kmpc_push_proc_bind, 672 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 673 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 674 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 675 OMPRTL__kmpc_omp_task_with_deps, 676 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 677 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 678 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 679 OMPRTL__kmpc_omp_wait_deps, 680 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 681 // global_tid, kmp_int32 cncl_kind); 682 OMPRTL__kmpc_cancellationpoint, 683 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 684 // kmp_int32 cncl_kind); 685 OMPRTL__kmpc_cancel, 686 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 687 // kmp_int32 num_teams, kmp_int32 thread_limit); 688 OMPRTL__kmpc_push_num_teams, 689 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 690 // microtask, ...); 691 OMPRTL__kmpc_fork_teams, 692 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 693 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 694 // sched, kmp_uint64 grainsize, void *task_dup); 695 OMPRTL__kmpc_taskloop, 696 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 697 // num_dims, struct kmp_dim *dims); 698 OMPRTL__kmpc_doacross_init, 699 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 700 OMPRTL__kmpc_doacross_fini, 701 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 702 // *vec); 703 OMPRTL__kmpc_doacross_post, 704 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 705 // *vec); 706 OMPRTL__kmpc_doacross_wait, 707 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 708 // *data); 709 OMPRTL__kmpc_task_reduction_init, 710 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 711 // *d); 712 OMPRTL__kmpc_task_reduction_get_th_data, 713 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); 714 OMPRTL__kmpc_alloc, 715 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); 716 OMPRTL__kmpc_free, 717 718 // 719 // Offloading related calls 720 // 721 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 722 // size); 723 OMPRTL__kmpc_push_target_tripcount, 724 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 725 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 726 // *arg_types); 727 OMPRTL__tgt_target, 728 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 729 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 730 // *arg_types); 731 OMPRTL__tgt_target_nowait, 732 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 733 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 734 // *arg_types, int32_t num_teams, int32_t thread_limit); 735 OMPRTL__tgt_target_teams, 736 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 737 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 738 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 739 OMPRTL__tgt_target_teams_nowait, 740 // Call to void __tgt_register_requires(int64_t flags); 741 OMPRTL__tgt_register_requires, 742 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 743 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 744 OMPRTL__tgt_target_data_begin, 745 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 746 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 747 // *arg_types); 748 OMPRTL__tgt_target_data_begin_nowait, 749 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 750 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 751 OMPRTL__tgt_target_data_end, 752 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 753 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 754 // *arg_types); 755 OMPRTL__tgt_target_data_end_nowait, 756 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 757 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 758 OMPRTL__tgt_target_data_update, 759 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 760 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 761 // *arg_types); 762 OMPRTL__tgt_target_data_update_nowait, 763 // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 764 OMPRTL__tgt_mapper_num_components, 765 // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void 766 // *base, void *begin, int64_t size, int64_t type); 767 OMPRTL__tgt_push_mapper_component, 768 }; 769 770 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 771 /// region. 772 class CleanupTy final : public EHScopeStack::Cleanup { 773 PrePostActionTy *Action; 774 775 public: 776 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 777 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 778 if (!CGF.HaveInsertPoint()) 779 return; 780 Action->Exit(CGF); 781 } 782 }; 783 784 } // anonymous namespace 785 786 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 787 CodeGenFunction::RunCleanupsScope Scope(CGF); 788 if (PrePostAction) { 789 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 790 Callback(CodeGen, CGF, *PrePostAction); 791 } else { 792 PrePostActionTy Action; 793 Callback(CodeGen, CGF, Action); 794 } 795 } 796 797 /// Check if the combiner is a call to UDR combiner and if it is so return the 798 /// UDR decl used for reduction. 799 static const OMPDeclareReductionDecl * 800 getReductionInit(const Expr *ReductionOp) { 801 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 802 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 803 if (const auto *DRE = 804 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 805 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 806 return DRD; 807 return nullptr; 808 } 809 810 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 811 const OMPDeclareReductionDecl *DRD, 812 const Expr *InitOp, 813 Address Private, Address Original, 814 QualType Ty) { 815 if (DRD->getInitializer()) { 816 std::pair<llvm::Function *, llvm::Function *> Reduction = 817 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 818 const auto *CE = cast<CallExpr>(InitOp); 819 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 820 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 821 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 822 const auto *LHSDRE = 823 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 824 const auto *RHSDRE = 825 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 826 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 827 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 828 [=]() { return Private; }); 829 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 830 [=]() { return Original; }); 831 (void)PrivateScope.Privatize(); 832 RValue Func = RValue::get(Reduction.second); 833 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 834 CGF.EmitIgnoredExpr(InitOp); 835 } else { 836 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 837 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 838 auto *GV = new llvm::GlobalVariable( 839 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 840 llvm::GlobalValue::PrivateLinkage, Init, Name); 841 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 842 RValue InitRVal; 843 switch (CGF.getEvaluationKind(Ty)) { 844 case TEK_Scalar: 845 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 846 break; 847 case TEK_Complex: 848 InitRVal = 849 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 850 break; 851 case TEK_Aggregate: 852 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 853 break; 854 } 855 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 856 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 857 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 858 /*IsInitializer=*/false); 859 } 860 } 861 862 /// Emit initialization of arrays of complex types. 863 /// \param DestAddr Address of the array. 864 /// \param Type Type of array. 865 /// \param Init Initial expression of array. 866 /// \param SrcAddr Address of the original array. 867 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 868 QualType Type, bool EmitDeclareReductionInit, 869 const Expr *Init, 870 const OMPDeclareReductionDecl *DRD, 871 Address SrcAddr = Address::invalid()) { 872 // Perform element-by-element initialization. 873 QualType ElementTy; 874 875 // Drill down to the base element type on both arrays. 876 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 877 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 878 DestAddr = 879 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 880 if (DRD) 881 SrcAddr = 882 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 883 884 llvm::Value *SrcBegin = nullptr; 885 if (DRD) 886 SrcBegin = SrcAddr.getPointer(); 887 llvm::Value *DestBegin = DestAddr.getPointer(); 888 // Cast from pointer to array type to pointer to single element. 889 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 890 // The basic structure here is a while-do loop. 891 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 892 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 893 llvm::Value *IsEmpty = 894 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 895 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 896 897 // Enter the loop body, making that address the current address. 898 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 899 CGF.EmitBlock(BodyBB); 900 901 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 902 903 llvm::PHINode *SrcElementPHI = nullptr; 904 Address SrcElementCurrent = Address::invalid(); 905 if (DRD) { 906 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 907 "omp.arraycpy.srcElementPast"); 908 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 909 SrcElementCurrent = 910 Address(SrcElementPHI, 911 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 912 } 913 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 914 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 915 DestElementPHI->addIncoming(DestBegin, EntryBB); 916 Address DestElementCurrent = 917 Address(DestElementPHI, 918 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 919 920 // Emit copy. 921 { 922 CodeGenFunction::RunCleanupsScope InitScope(CGF); 923 if (EmitDeclareReductionInit) { 924 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 925 SrcElementCurrent, ElementTy); 926 } else 927 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 928 /*IsInitializer=*/false); 929 } 930 931 if (DRD) { 932 // Shift the address forward by one element. 933 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 934 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 935 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 936 } 937 938 // Shift the address forward by one element. 939 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 940 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 941 // Check whether we've reached the end. 942 llvm::Value *Done = 943 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 944 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 945 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 946 947 // Done. 948 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 949 } 950 951 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 952 return CGF.EmitOMPSharedLValue(E); 953 } 954 955 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 956 const Expr *E) { 957 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 958 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 959 return LValue(); 960 } 961 962 void ReductionCodeGen::emitAggregateInitialization( 963 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 964 const OMPDeclareReductionDecl *DRD) { 965 // Emit VarDecl with copy init for arrays. 966 // Get the address of the original variable captured in current 967 // captured region. 968 const auto *PrivateVD = 969 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 970 bool EmitDeclareReductionInit = 971 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 972 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 973 EmitDeclareReductionInit, 974 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 975 : PrivateVD->getInit(), 976 DRD, SharedLVal.getAddress(CGF)); 977 } 978 979 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 980 ArrayRef<const Expr *> Privates, 981 ArrayRef<const Expr *> ReductionOps) { 982 ClausesData.reserve(Shareds.size()); 983 SharedAddresses.reserve(Shareds.size()); 984 Sizes.reserve(Shareds.size()); 985 BaseDecls.reserve(Shareds.size()); 986 auto IPriv = Privates.begin(); 987 auto IRed = ReductionOps.begin(); 988 for (const Expr *Ref : Shareds) { 989 ClausesData.emplace_back(Ref, *IPriv, *IRed); 990 std::advance(IPriv, 1); 991 std::advance(IRed, 1); 992 } 993 } 994 995 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 996 assert(SharedAddresses.size() == N && 997 "Number of generated lvalues must be exactly N."); 998 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 999 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 1000 SharedAddresses.emplace_back(First, Second); 1001 } 1002 1003 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 1004 const auto *PrivateVD = 1005 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1006 QualType PrivateType = PrivateVD->getType(); 1007 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 1008 if (!PrivateType->isVariablyModifiedType()) { 1009 Sizes.emplace_back( 1010 CGF.getTypeSize( 1011 SharedAddresses[N].first.getType().getNonReferenceType()), 1012 nullptr); 1013 return; 1014 } 1015 llvm::Value *Size; 1016 llvm::Value *SizeInChars; 1017 auto *ElemType = cast<llvm::PointerType>( 1018 SharedAddresses[N].first.getPointer(CGF)->getType()) 1019 ->getElementType(); 1020 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 1021 if (AsArraySection) { 1022 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF), 1023 SharedAddresses[N].first.getPointer(CGF)); 1024 Size = CGF.Builder.CreateNUWAdd( 1025 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 1026 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 1027 } else { 1028 SizeInChars = CGF.getTypeSize( 1029 SharedAddresses[N].first.getType().getNonReferenceType()); 1030 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 1031 } 1032 Sizes.emplace_back(SizeInChars, Size); 1033 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1034 CGF, 1035 cast<OpaqueValueExpr>( 1036 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1037 RValue::get(Size)); 1038 CGF.EmitVariablyModifiedType(PrivateType); 1039 } 1040 1041 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 1042 llvm::Value *Size) { 1043 const auto *PrivateVD = 1044 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1045 QualType PrivateType = PrivateVD->getType(); 1046 if (!PrivateType->isVariablyModifiedType()) { 1047 assert(!Size && !Sizes[N].second && 1048 "Size should be nullptr for non-variably modified reduction " 1049 "items."); 1050 return; 1051 } 1052 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1053 CGF, 1054 cast<OpaqueValueExpr>( 1055 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1056 RValue::get(Size)); 1057 CGF.EmitVariablyModifiedType(PrivateType); 1058 } 1059 1060 void ReductionCodeGen::emitInitialization( 1061 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1062 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1063 assert(SharedAddresses.size() > N && "No variable was generated"); 1064 const auto *PrivateVD = 1065 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1066 const OMPDeclareReductionDecl *DRD = 1067 getReductionInit(ClausesData[N].ReductionOp); 1068 QualType PrivateType = PrivateVD->getType(); 1069 PrivateAddr = CGF.Builder.CreateElementBitCast( 1070 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1071 QualType SharedType = SharedAddresses[N].first.getType(); 1072 SharedLVal = CGF.MakeAddrLValue( 1073 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 1074 CGF.ConvertTypeForMem(SharedType)), 1075 SharedType, SharedAddresses[N].first.getBaseInfo(), 1076 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1077 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1078 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1079 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1080 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1081 PrivateAddr, SharedLVal.getAddress(CGF), 1082 SharedLVal.getType()); 1083 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1084 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1085 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1086 PrivateVD->getType().getQualifiers(), 1087 /*IsInitializer=*/false); 1088 } 1089 } 1090 1091 bool ReductionCodeGen::needCleanups(unsigned N) { 1092 const auto *PrivateVD = 1093 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1094 QualType PrivateType = PrivateVD->getType(); 1095 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1096 return DTorKind != QualType::DK_none; 1097 } 1098 1099 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1100 Address PrivateAddr) { 1101 const auto *PrivateVD = 1102 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1103 QualType PrivateType = PrivateVD->getType(); 1104 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1105 if (needCleanups(N)) { 1106 PrivateAddr = CGF.Builder.CreateElementBitCast( 1107 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1108 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1109 } 1110 } 1111 1112 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1113 LValue BaseLV) { 1114 BaseTy = BaseTy.getNonReferenceType(); 1115 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1116 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1117 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1118 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 1119 } else { 1120 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 1121 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1122 } 1123 BaseTy = BaseTy->getPointeeType(); 1124 } 1125 return CGF.MakeAddrLValue( 1126 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 1127 CGF.ConvertTypeForMem(ElTy)), 1128 BaseLV.getType(), BaseLV.getBaseInfo(), 1129 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1130 } 1131 1132 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1133 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1134 llvm::Value *Addr) { 1135 Address Tmp = Address::invalid(); 1136 Address TopTmp = Address::invalid(); 1137 Address MostTopTmp = Address::invalid(); 1138 BaseTy = BaseTy.getNonReferenceType(); 1139 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1140 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1141 Tmp = CGF.CreateMemTemp(BaseTy); 1142 if (TopTmp.isValid()) 1143 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1144 else 1145 MostTopTmp = Tmp; 1146 TopTmp = Tmp; 1147 BaseTy = BaseTy->getPointeeType(); 1148 } 1149 llvm::Type *Ty = BaseLVType; 1150 if (Tmp.isValid()) 1151 Ty = Tmp.getElementType(); 1152 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1153 if (Tmp.isValid()) { 1154 CGF.Builder.CreateStore(Addr, Tmp); 1155 return MostTopTmp; 1156 } 1157 return Address(Addr, BaseLVAlignment); 1158 } 1159 1160 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1161 const VarDecl *OrigVD = nullptr; 1162 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1163 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1164 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1165 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1166 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1167 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1168 DE = cast<DeclRefExpr>(Base); 1169 OrigVD = cast<VarDecl>(DE->getDecl()); 1170 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1171 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1172 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1173 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1174 DE = cast<DeclRefExpr>(Base); 1175 OrigVD = cast<VarDecl>(DE->getDecl()); 1176 } 1177 return OrigVD; 1178 } 1179 1180 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1181 Address PrivateAddr) { 1182 const DeclRefExpr *DE; 1183 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1184 BaseDecls.emplace_back(OrigVD); 1185 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1186 LValue BaseLValue = 1187 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1188 OriginalBaseLValue); 1189 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1190 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1191 llvm::Value *PrivatePointer = 1192 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1193 PrivateAddr.getPointer(), 1194 SharedAddresses[N].first.getAddress(CGF).getType()); 1195 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1196 return castToBase(CGF, OrigVD->getType(), 1197 SharedAddresses[N].first.getType(), 1198 OriginalBaseLValue.getAddress(CGF).getType(), 1199 OriginalBaseLValue.getAlignment(), Ptr); 1200 } 1201 BaseDecls.emplace_back( 1202 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1203 return PrivateAddr; 1204 } 1205 1206 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1207 const OMPDeclareReductionDecl *DRD = 1208 getReductionInit(ClausesData[N].ReductionOp); 1209 return DRD && DRD->getInitializer(); 1210 } 1211 1212 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1213 return CGF.EmitLoadOfPointerLValue( 1214 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1215 getThreadIDVariable()->getType()->castAs<PointerType>()); 1216 } 1217 1218 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1219 if (!CGF.HaveInsertPoint()) 1220 return; 1221 // 1.2.2 OpenMP Language Terminology 1222 // Structured block - An executable statement with a single entry at the 1223 // top and a single exit at the bottom. 1224 // The point of exit cannot be a branch out of the structured block. 1225 // longjmp() and throw() must not violate the entry/exit criteria. 1226 CGF.EHStack.pushTerminate(); 1227 CodeGen(CGF); 1228 CGF.EHStack.popTerminate(); 1229 } 1230 1231 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1232 CodeGenFunction &CGF) { 1233 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1234 getThreadIDVariable()->getType(), 1235 AlignmentSource::Decl); 1236 } 1237 1238 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1239 QualType FieldTy) { 1240 auto *Field = FieldDecl::Create( 1241 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1242 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1243 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1244 Field->setAccess(AS_public); 1245 DC->addDecl(Field); 1246 return Field; 1247 } 1248 1249 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1250 StringRef Separator) 1251 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1252 OffloadEntriesInfoManager(CGM) { 1253 ASTContext &C = CGM.getContext(); 1254 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1255 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1256 RD->startDefinition(); 1257 // reserved_1 1258 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1259 // flags 1260 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1261 // reserved_2 1262 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1263 // reserved_3 1264 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1265 // psource 1266 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1267 RD->completeDefinition(); 1268 IdentQTy = C.getRecordType(RD); 1269 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1270 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1271 1272 loadOffloadInfoMetadata(); 1273 } 1274 1275 bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD, 1276 const GlobalDecl &OldGD, 1277 llvm::GlobalValue *OrigAddr, 1278 bool IsForDefinition) { 1279 // Emit at least a definition for the aliasee if the the address of the 1280 // original function is requested. 1281 if (IsForDefinition || OrigAddr) 1282 (void)CGM.GetAddrOfGlobal(NewGD); 1283 StringRef NewMangledName = CGM.getMangledName(NewGD); 1284 llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName); 1285 if (Addr && !Addr->isDeclaration()) { 1286 const auto *D = cast<FunctionDecl>(OldGD.getDecl()); 1287 const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(NewGD); 1288 llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI); 1289 1290 // Create a reference to the named value. This ensures that it is emitted 1291 // if a deferred decl. 1292 llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD); 1293 1294 // Create the new alias itself, but don't set a name yet. 1295 auto *GA = 1296 llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule()); 1297 1298 if (OrigAddr) { 1299 assert(OrigAddr->isDeclaration() && "Expected declaration"); 1300 1301 GA->takeName(OrigAddr); 1302 OrigAddr->replaceAllUsesWith( 1303 llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType())); 1304 OrigAddr->eraseFromParent(); 1305 } else { 1306 GA->setName(CGM.getMangledName(OldGD)); 1307 } 1308 1309 // Set attributes which are particular to an alias; this is a 1310 // specialization of the attributes which may be set on a global function. 1311 if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() || 1312 D->isWeakImported()) 1313 GA->setLinkage(llvm::Function::WeakAnyLinkage); 1314 1315 CGM.SetCommonAttributes(OldGD, GA); 1316 return true; 1317 } 1318 return false; 1319 } 1320 1321 void CGOpenMPRuntime::clear() { 1322 InternalVars.clear(); 1323 // Clean non-target variable declarations possibly used only in debug info. 1324 for (const auto &Data : EmittedNonTargetVariables) { 1325 if (!Data.getValue().pointsToAliveValue()) 1326 continue; 1327 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1328 if (!GV) 1329 continue; 1330 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1331 continue; 1332 GV->eraseFromParent(); 1333 } 1334 // Emit aliases for the deferred aliasees. 1335 for (const auto &Pair : DeferredVariantFunction) { 1336 StringRef MangledName = CGM.getMangledName(Pair.second.second); 1337 llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName); 1338 // If not able to emit alias, just emit original declaration. 1339 (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr, 1340 /*IsForDefinition=*/false); 1341 } 1342 } 1343 1344 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1345 SmallString<128> Buffer; 1346 llvm::raw_svector_ostream OS(Buffer); 1347 StringRef Sep = FirstSeparator; 1348 for (StringRef Part : Parts) { 1349 OS << Sep << Part; 1350 Sep = Separator; 1351 } 1352 return std::string(OS.str()); 1353 } 1354 1355 static llvm::Function * 1356 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1357 const Expr *CombinerInitializer, const VarDecl *In, 1358 const VarDecl *Out, bool IsCombiner) { 1359 // void .omp_combiner.(Ty *in, Ty *out); 1360 ASTContext &C = CGM.getContext(); 1361 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1362 FunctionArgList Args; 1363 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1364 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1365 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1366 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1367 Args.push_back(&OmpOutParm); 1368 Args.push_back(&OmpInParm); 1369 const CGFunctionInfo &FnInfo = 1370 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1371 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1372 std::string Name = CGM.getOpenMPRuntime().getName( 1373 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1374 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1375 Name, &CGM.getModule()); 1376 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1377 if (CGM.getLangOpts().Optimize) { 1378 Fn->removeFnAttr(llvm::Attribute::NoInline); 1379 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1380 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1381 } 1382 CodeGenFunction CGF(CGM); 1383 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1384 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1385 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1386 Out->getLocation()); 1387 CodeGenFunction::OMPPrivateScope Scope(CGF); 1388 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1389 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1390 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1391 .getAddress(CGF); 1392 }); 1393 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1394 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1395 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1396 .getAddress(CGF); 1397 }); 1398 (void)Scope.Privatize(); 1399 if (!IsCombiner && Out->hasInit() && 1400 !CGF.isTrivialInitializer(Out->getInit())) { 1401 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1402 Out->getType().getQualifiers(), 1403 /*IsInitializer=*/true); 1404 } 1405 if (CombinerInitializer) 1406 CGF.EmitIgnoredExpr(CombinerInitializer); 1407 Scope.ForceCleanup(); 1408 CGF.FinishFunction(); 1409 return Fn; 1410 } 1411 1412 void CGOpenMPRuntime::emitUserDefinedReduction( 1413 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1414 if (UDRMap.count(D) > 0) 1415 return; 1416 llvm::Function *Combiner = emitCombinerOrInitializer( 1417 CGM, D->getType(), D->getCombiner(), 1418 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1419 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1420 /*IsCombiner=*/true); 1421 llvm::Function *Initializer = nullptr; 1422 if (const Expr *Init = D->getInitializer()) { 1423 Initializer = emitCombinerOrInitializer( 1424 CGM, D->getType(), 1425 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1426 : nullptr, 1427 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1428 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1429 /*IsCombiner=*/false); 1430 } 1431 UDRMap.try_emplace(D, Combiner, Initializer); 1432 if (CGF) { 1433 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1434 Decls.second.push_back(D); 1435 } 1436 } 1437 1438 std::pair<llvm::Function *, llvm::Function *> 1439 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1440 auto I = UDRMap.find(D); 1441 if (I != UDRMap.end()) 1442 return I->second; 1443 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1444 return UDRMap.lookup(D); 1445 } 1446 1447 namespace { 1448 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1449 // Builder if one is present. 1450 struct PushAndPopStackRAII { 1451 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1452 bool HasCancel) 1453 : OMPBuilder(OMPBuilder) { 1454 if (!OMPBuilder) 1455 return; 1456 1457 // The following callback is the crucial part of clangs cleanup process. 1458 // 1459 // NOTE: 1460 // Once the OpenMPIRBuilder is used to create parallel regions (and 1461 // similar), the cancellation destination (Dest below) is determined via 1462 // IP. That means if we have variables to finalize we split the block at IP, 1463 // use the new block (=BB) as destination to build a JumpDest (via 1464 // getJumpDestInCurrentScope(BB)) which then is fed to 1465 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1466 // to push & pop an FinalizationInfo object. 1467 // The FiniCB will still be needed but at the point where the 1468 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1469 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1470 assert(IP.getBlock()->end() == IP.getPoint() && 1471 "Clang CG should cause non-terminated block!"); 1472 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1473 CGF.Builder.restoreIP(IP); 1474 CodeGenFunction::JumpDest Dest = 1475 CGF.getOMPCancelDestination(OMPD_parallel); 1476 CGF.EmitBranchThroughCleanup(Dest); 1477 }; 1478 1479 // TODO: Remove this once we emit parallel regions through the 1480 // OpenMPIRBuilder as it can do this setup internally. 1481 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1482 {FiniCB, OMPD_parallel, HasCancel}); 1483 OMPBuilder->pushFinalizationCB(std::move(FI)); 1484 } 1485 ~PushAndPopStackRAII() { 1486 if (OMPBuilder) 1487 OMPBuilder->popFinalizationCB(); 1488 } 1489 llvm::OpenMPIRBuilder *OMPBuilder; 1490 }; 1491 } // namespace 1492 1493 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1494 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1495 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1496 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1497 assert(ThreadIDVar->getType()->isPointerType() && 1498 "thread id variable must be of type kmp_int32 *"); 1499 CodeGenFunction CGF(CGM, true); 1500 bool HasCancel = false; 1501 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1502 HasCancel = OPD->hasCancel(); 1503 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1504 HasCancel = OPSD->hasCancel(); 1505 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1506 HasCancel = OPFD->hasCancel(); 1507 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1508 HasCancel = OPFD->hasCancel(); 1509 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1510 HasCancel = OPFD->hasCancel(); 1511 else if (const auto *OPFD = 1512 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1513 HasCancel = OPFD->hasCancel(); 1514 else if (const auto *OPFD = 1515 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1516 HasCancel = OPFD->hasCancel(); 1517 1518 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1519 // parallel region to make cancellation barriers work properly. 1520 llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder(); 1521 PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel); 1522 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1523 HasCancel, OutlinedHelperName); 1524 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1525 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1526 } 1527 1528 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1529 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1530 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1531 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1532 return emitParallelOrTeamsOutlinedFunction( 1533 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1534 } 1535 1536 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1537 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1538 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1539 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1540 return emitParallelOrTeamsOutlinedFunction( 1541 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1542 } 1543 1544 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1545 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1546 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1547 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1548 bool Tied, unsigned &NumberOfParts) { 1549 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1550 PrePostActionTy &) { 1551 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1552 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1553 llvm::Value *TaskArgs[] = { 1554 UpLoc, ThreadID, 1555 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1556 TaskTVar->getType()->castAs<PointerType>()) 1557 .getPointer(CGF)}; 1558 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1559 }; 1560 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1561 UntiedCodeGen); 1562 CodeGen.setAction(Action); 1563 assert(!ThreadIDVar->getType()->isPointerType() && 1564 "thread id variable must be of type kmp_int32 for tasks"); 1565 const OpenMPDirectiveKind Region = 1566 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1567 : OMPD_task; 1568 const CapturedStmt *CS = D.getCapturedStmt(Region); 1569 bool HasCancel = false; 1570 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1571 HasCancel = TD->hasCancel(); 1572 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1573 HasCancel = TD->hasCancel(); 1574 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1575 HasCancel = TD->hasCancel(); 1576 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1577 HasCancel = TD->hasCancel(); 1578 1579 CodeGenFunction CGF(CGM, true); 1580 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1581 InnermostKind, HasCancel, Action); 1582 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1583 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1584 if (!Tied) 1585 NumberOfParts = Action.getNumberOfParts(); 1586 return Res; 1587 } 1588 1589 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1590 const RecordDecl *RD, const CGRecordLayout &RL, 1591 ArrayRef<llvm::Constant *> Data) { 1592 llvm::StructType *StructTy = RL.getLLVMType(); 1593 unsigned PrevIdx = 0; 1594 ConstantInitBuilder CIBuilder(CGM); 1595 auto DI = Data.begin(); 1596 for (const FieldDecl *FD : RD->fields()) { 1597 unsigned Idx = RL.getLLVMFieldNo(FD); 1598 // Fill the alignment. 1599 for (unsigned I = PrevIdx; I < Idx; ++I) 1600 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1601 PrevIdx = Idx + 1; 1602 Fields.add(*DI); 1603 ++DI; 1604 } 1605 } 1606 1607 template <class... As> 1608 static llvm::GlobalVariable * 1609 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1610 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1611 As &&... Args) { 1612 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1613 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1614 ConstantInitBuilder CIBuilder(CGM); 1615 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1616 buildStructValue(Fields, CGM, RD, RL, Data); 1617 return Fields.finishAndCreateGlobal( 1618 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1619 std::forward<As>(Args)...); 1620 } 1621 1622 template <typename T> 1623 static void 1624 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1625 ArrayRef<llvm::Constant *> Data, 1626 T &Parent) { 1627 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1628 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1629 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1630 buildStructValue(Fields, CGM, RD, RL, Data); 1631 Fields.finishAndAddTo(Parent); 1632 } 1633 1634 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1635 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1636 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1637 FlagsTy FlagsKey(Flags, Reserved2Flags); 1638 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1639 if (!Entry) { 1640 if (!DefaultOpenMPPSource) { 1641 // Initialize default location for psource field of ident_t structure of 1642 // all ident_t objects. Format is ";file;function;line;column;;". 1643 // Taken from 1644 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1645 DefaultOpenMPPSource = 1646 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1647 DefaultOpenMPPSource = 1648 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1649 } 1650 1651 llvm::Constant *Data[] = { 1652 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1653 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1654 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1655 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1656 llvm::GlobalValue *DefaultOpenMPLocation = 1657 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1658 llvm::GlobalValue::PrivateLinkage); 1659 DefaultOpenMPLocation->setUnnamedAddr( 1660 llvm::GlobalValue::UnnamedAddr::Global); 1661 1662 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1663 } 1664 return Address(Entry, Align); 1665 } 1666 1667 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1668 bool AtCurrentPoint) { 1669 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1670 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1671 1672 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1673 if (AtCurrentPoint) { 1674 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1675 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1676 } else { 1677 Elem.second.ServiceInsertPt = 1678 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1679 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1680 } 1681 } 1682 1683 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1684 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1685 if (Elem.second.ServiceInsertPt) { 1686 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1687 Elem.second.ServiceInsertPt = nullptr; 1688 Ptr->eraseFromParent(); 1689 } 1690 } 1691 1692 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1693 SourceLocation Loc, 1694 unsigned Flags) { 1695 Flags |= OMP_IDENT_KMPC; 1696 // If no debug info is generated - return global default location. 1697 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1698 Loc.isInvalid()) 1699 return getOrCreateDefaultLocation(Flags).getPointer(); 1700 1701 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1702 1703 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1704 Address LocValue = Address::invalid(); 1705 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1706 if (I != OpenMPLocThreadIDMap.end()) 1707 LocValue = Address(I->second.DebugLoc, Align); 1708 1709 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1710 // GetOpenMPThreadID was called before this routine. 1711 if (!LocValue.isValid()) { 1712 // Generate "ident_t .kmpc_loc.addr;" 1713 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1714 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1715 Elem.second.DebugLoc = AI.getPointer(); 1716 LocValue = AI; 1717 1718 if (!Elem.second.ServiceInsertPt) 1719 setLocThreadIdInsertPt(CGF); 1720 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1721 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1722 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1723 CGF.getTypeSize(IdentQTy)); 1724 } 1725 1726 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1727 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1728 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1729 LValue PSource = 1730 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1731 1732 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1733 if (OMPDebugLoc == nullptr) { 1734 SmallString<128> Buffer2; 1735 llvm::raw_svector_ostream OS2(Buffer2); 1736 // Build debug location 1737 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1738 OS2 << ";" << PLoc.getFilename() << ";"; 1739 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1740 OS2 << FD->getQualifiedNameAsString(); 1741 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1742 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1743 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1744 } 1745 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1746 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1747 1748 // Our callers always pass this to a runtime function, so for 1749 // convenience, go ahead and return a naked pointer. 1750 return LocValue.getPointer(); 1751 } 1752 1753 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1754 SourceLocation Loc) { 1755 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1756 1757 llvm::Value *ThreadID = nullptr; 1758 // Check whether we've already cached a load of the thread id in this 1759 // function. 1760 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1761 if (I != OpenMPLocThreadIDMap.end()) { 1762 ThreadID = I->second.ThreadID; 1763 if (ThreadID != nullptr) 1764 return ThreadID; 1765 } 1766 // If exceptions are enabled, do not use parameter to avoid possible crash. 1767 if (auto *OMPRegionInfo = 1768 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1769 if (OMPRegionInfo->getThreadIDVariable()) { 1770 // Check if this an outlined function with thread id passed as argument. 1771 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1772 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1773 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1774 !CGF.getLangOpts().CXXExceptions || 1775 CGF.Builder.GetInsertBlock() == TopBlock || 1776 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1777 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1778 TopBlock || 1779 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1780 CGF.Builder.GetInsertBlock()) { 1781 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1782 // If value loaded in entry block, cache it and use it everywhere in 1783 // function. 1784 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1785 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1786 Elem.second.ThreadID = ThreadID; 1787 } 1788 return ThreadID; 1789 } 1790 } 1791 } 1792 1793 // This is not an outlined function region - need to call __kmpc_int32 1794 // kmpc_global_thread_num(ident_t *loc). 1795 // Generate thread id value and cache this value for use across the 1796 // function. 1797 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1798 if (!Elem.second.ServiceInsertPt) 1799 setLocThreadIdInsertPt(CGF); 1800 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1801 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1802 llvm::CallInst *Call = CGF.Builder.CreateCall( 1803 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1804 emitUpdateLocation(CGF, Loc)); 1805 Call->setCallingConv(CGF.getRuntimeCC()); 1806 Elem.second.ThreadID = Call; 1807 return Call; 1808 } 1809 1810 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1811 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1812 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1813 clearLocThreadIdInsertPt(CGF); 1814 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1815 } 1816 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1817 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1818 UDRMap.erase(D); 1819 FunctionUDRMap.erase(CGF.CurFn); 1820 } 1821 auto I = FunctionUDMMap.find(CGF.CurFn); 1822 if (I != FunctionUDMMap.end()) { 1823 for(const auto *D : I->second) 1824 UDMMap.erase(D); 1825 FunctionUDMMap.erase(I); 1826 } 1827 LastprivateConditionalToTypes.erase(CGF.CurFn); 1828 } 1829 1830 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1831 return IdentTy->getPointerTo(); 1832 } 1833 1834 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1835 if (!Kmpc_MicroTy) { 1836 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1837 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1838 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1839 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1840 } 1841 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1842 } 1843 1844 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1845 llvm::FunctionCallee RTLFn = nullptr; 1846 switch (static_cast<OpenMPRTLFunction>(Function)) { 1847 case OMPRTL__kmpc_fork_call: { 1848 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1849 // microtask, ...); 1850 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1851 getKmpc_MicroPointerTy()}; 1852 auto *FnTy = 1853 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1854 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1855 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 1856 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 1857 llvm::LLVMContext &Ctx = F->getContext(); 1858 llvm::MDBuilder MDB(Ctx); 1859 // Annotate the callback behavior of the __kmpc_fork_call: 1860 // - The callback callee is argument number 2 (microtask). 1861 // - The first two arguments of the callback callee are unknown (-1). 1862 // - All variadic arguments to the __kmpc_fork_call are passed to the 1863 // callback callee. 1864 F->addMetadata( 1865 llvm::LLVMContext::MD_callback, 1866 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 1867 2, {-1, -1}, 1868 /* VarArgsArePassed */ true)})); 1869 } 1870 } 1871 break; 1872 } 1873 case OMPRTL__kmpc_global_thread_num: { 1874 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1875 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1876 auto *FnTy = 1877 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1878 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1879 break; 1880 } 1881 case OMPRTL__kmpc_threadprivate_cached: { 1882 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1883 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1884 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1885 CGM.VoidPtrTy, CGM.SizeTy, 1886 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1887 auto *FnTy = 1888 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1889 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1890 break; 1891 } 1892 case OMPRTL__kmpc_critical: { 1893 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1894 // kmp_critical_name *crit); 1895 llvm::Type *TypeParams[] = { 1896 getIdentTyPointerTy(), CGM.Int32Ty, 1897 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1898 auto *FnTy = 1899 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1900 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1901 break; 1902 } 1903 case OMPRTL__kmpc_critical_with_hint: { 1904 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1905 // kmp_critical_name *crit, uintptr_t hint); 1906 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1907 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1908 CGM.IntPtrTy}; 1909 auto *FnTy = 1910 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1911 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1912 break; 1913 } 1914 case OMPRTL__kmpc_threadprivate_register: { 1915 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1916 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1917 // typedef void *(*kmpc_ctor)(void *); 1918 auto *KmpcCtorTy = 1919 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1920 /*isVarArg*/ false)->getPointerTo(); 1921 // typedef void *(*kmpc_cctor)(void *, void *); 1922 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1923 auto *KmpcCopyCtorTy = 1924 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1925 /*isVarArg*/ false) 1926 ->getPointerTo(); 1927 // typedef void (*kmpc_dtor)(void *); 1928 auto *KmpcDtorTy = 1929 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1930 ->getPointerTo(); 1931 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1932 KmpcCopyCtorTy, KmpcDtorTy}; 1933 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1934 /*isVarArg*/ false); 1935 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1936 break; 1937 } 1938 case OMPRTL__kmpc_end_critical: { 1939 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1940 // kmp_critical_name *crit); 1941 llvm::Type *TypeParams[] = { 1942 getIdentTyPointerTy(), CGM.Int32Ty, 1943 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1944 auto *FnTy = 1945 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1946 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1947 break; 1948 } 1949 case OMPRTL__kmpc_cancel_barrier: { 1950 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1951 // global_tid); 1952 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1953 auto *FnTy = 1954 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1955 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1956 break; 1957 } 1958 case OMPRTL__kmpc_barrier: { 1959 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1960 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1961 auto *FnTy = 1962 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1963 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1964 break; 1965 } 1966 case OMPRTL__kmpc_for_static_fini: { 1967 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1968 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1969 auto *FnTy = 1970 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1971 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1972 break; 1973 } 1974 case OMPRTL__kmpc_push_num_threads: { 1975 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1976 // kmp_int32 num_threads) 1977 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1978 CGM.Int32Ty}; 1979 auto *FnTy = 1980 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1981 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1982 break; 1983 } 1984 case OMPRTL__kmpc_serialized_parallel: { 1985 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1986 // global_tid); 1987 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1988 auto *FnTy = 1989 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1990 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1991 break; 1992 } 1993 case OMPRTL__kmpc_end_serialized_parallel: { 1994 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1995 // global_tid); 1996 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1997 auto *FnTy = 1998 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1999 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 2000 break; 2001 } 2002 case OMPRTL__kmpc_flush: { 2003 // Build void __kmpc_flush(ident_t *loc); 2004 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 2005 auto *FnTy = 2006 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2007 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 2008 break; 2009 } 2010 case OMPRTL__kmpc_master: { 2011 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 2012 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2013 auto *FnTy = 2014 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2015 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 2016 break; 2017 } 2018 case OMPRTL__kmpc_end_master: { 2019 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 2020 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2021 auto *FnTy = 2022 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2023 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 2024 break; 2025 } 2026 case OMPRTL__kmpc_omp_taskyield: { 2027 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 2028 // int end_part); 2029 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2030 auto *FnTy = 2031 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2032 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 2033 break; 2034 } 2035 case OMPRTL__kmpc_single: { 2036 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 2037 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2038 auto *FnTy = 2039 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2040 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 2041 break; 2042 } 2043 case OMPRTL__kmpc_end_single: { 2044 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 2045 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2046 auto *FnTy = 2047 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2048 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 2049 break; 2050 } 2051 case OMPRTL__kmpc_omp_task_alloc: { 2052 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 2053 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2054 // kmp_routine_entry_t *task_entry); 2055 assert(KmpRoutineEntryPtrTy != nullptr && 2056 "Type kmp_routine_entry_t must be created."); 2057 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2058 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 2059 // Return void * and then cast to particular kmp_task_t type. 2060 auto *FnTy = 2061 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2062 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 2063 break; 2064 } 2065 case OMPRTL__kmpc_omp_target_task_alloc: { 2066 // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid, 2067 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2068 // kmp_routine_entry_t *task_entry, kmp_int64 device_id); 2069 assert(KmpRoutineEntryPtrTy != nullptr && 2070 "Type kmp_routine_entry_t must be created."); 2071 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2072 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy, 2073 CGM.Int64Ty}; 2074 // Return void * and then cast to particular kmp_task_t type. 2075 auto *FnTy = 2076 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2077 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc"); 2078 break; 2079 } 2080 case OMPRTL__kmpc_omp_task: { 2081 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2082 // *new_task); 2083 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2084 CGM.VoidPtrTy}; 2085 auto *FnTy = 2086 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2087 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 2088 break; 2089 } 2090 case OMPRTL__kmpc_copyprivate: { 2091 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 2092 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 2093 // kmp_int32 didit); 2094 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2095 auto *CpyFnTy = 2096 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 2097 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 2098 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 2099 CGM.Int32Ty}; 2100 auto *FnTy = 2101 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2102 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 2103 break; 2104 } 2105 case OMPRTL__kmpc_reduce: { 2106 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 2107 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 2108 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 2109 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2110 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2111 /*isVarArg=*/false); 2112 llvm::Type *TypeParams[] = { 2113 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2114 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2115 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2116 auto *FnTy = 2117 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2118 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 2119 break; 2120 } 2121 case OMPRTL__kmpc_reduce_nowait: { 2122 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 2123 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 2124 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 2125 // *lck); 2126 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2127 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2128 /*isVarArg=*/false); 2129 llvm::Type *TypeParams[] = { 2130 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2131 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2132 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2133 auto *FnTy = 2134 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2135 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 2136 break; 2137 } 2138 case OMPRTL__kmpc_end_reduce: { 2139 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 2140 // kmp_critical_name *lck); 2141 llvm::Type *TypeParams[] = { 2142 getIdentTyPointerTy(), CGM.Int32Ty, 2143 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2144 auto *FnTy = 2145 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2146 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 2147 break; 2148 } 2149 case OMPRTL__kmpc_end_reduce_nowait: { 2150 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 2151 // kmp_critical_name *lck); 2152 llvm::Type *TypeParams[] = { 2153 getIdentTyPointerTy(), CGM.Int32Ty, 2154 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2155 auto *FnTy = 2156 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2157 RTLFn = 2158 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 2159 break; 2160 } 2161 case OMPRTL__kmpc_omp_task_begin_if0: { 2162 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2163 // *new_task); 2164 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2165 CGM.VoidPtrTy}; 2166 auto *FnTy = 2167 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2168 RTLFn = 2169 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 2170 break; 2171 } 2172 case OMPRTL__kmpc_omp_task_complete_if0: { 2173 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2174 // *new_task); 2175 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2176 CGM.VoidPtrTy}; 2177 auto *FnTy = 2178 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2179 RTLFn = CGM.CreateRuntimeFunction(FnTy, 2180 /*Name=*/"__kmpc_omp_task_complete_if0"); 2181 break; 2182 } 2183 case OMPRTL__kmpc_ordered: { 2184 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 2185 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2186 auto *FnTy = 2187 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2188 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 2189 break; 2190 } 2191 case OMPRTL__kmpc_end_ordered: { 2192 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 2193 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2194 auto *FnTy = 2195 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2196 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 2197 break; 2198 } 2199 case OMPRTL__kmpc_omp_taskwait: { 2200 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 2201 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2202 auto *FnTy = 2203 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2204 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 2205 break; 2206 } 2207 case OMPRTL__kmpc_taskgroup: { 2208 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 2209 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2210 auto *FnTy = 2211 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2212 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 2213 break; 2214 } 2215 case OMPRTL__kmpc_end_taskgroup: { 2216 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 2217 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2218 auto *FnTy = 2219 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2220 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 2221 break; 2222 } 2223 case OMPRTL__kmpc_push_proc_bind: { 2224 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 2225 // int proc_bind) 2226 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2227 auto *FnTy = 2228 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2229 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 2230 break; 2231 } 2232 case OMPRTL__kmpc_omp_task_with_deps: { 2233 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2234 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2235 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 2236 llvm::Type *TypeParams[] = { 2237 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 2238 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 2239 auto *FnTy = 2240 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2241 RTLFn = 2242 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 2243 break; 2244 } 2245 case OMPRTL__kmpc_omp_wait_deps: { 2246 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2247 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2248 // kmp_depend_info_t *noalias_dep_list); 2249 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2250 CGM.Int32Ty, CGM.VoidPtrTy, 2251 CGM.Int32Ty, CGM.VoidPtrTy}; 2252 auto *FnTy = 2253 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2254 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2255 break; 2256 } 2257 case OMPRTL__kmpc_cancellationpoint: { 2258 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2259 // global_tid, kmp_int32 cncl_kind) 2260 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2261 auto *FnTy = 2262 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2263 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2264 break; 2265 } 2266 case OMPRTL__kmpc_cancel: { 2267 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2268 // kmp_int32 cncl_kind) 2269 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2270 auto *FnTy = 2271 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2272 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2273 break; 2274 } 2275 case OMPRTL__kmpc_push_num_teams: { 2276 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2277 // kmp_int32 num_teams, kmp_int32 num_threads) 2278 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2279 CGM.Int32Ty}; 2280 auto *FnTy = 2281 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2282 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2283 break; 2284 } 2285 case OMPRTL__kmpc_fork_teams: { 2286 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2287 // microtask, ...); 2288 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2289 getKmpc_MicroPointerTy()}; 2290 auto *FnTy = 2291 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2292 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2293 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 2294 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 2295 llvm::LLVMContext &Ctx = F->getContext(); 2296 llvm::MDBuilder MDB(Ctx); 2297 // Annotate the callback behavior of the __kmpc_fork_teams: 2298 // - The callback callee is argument number 2 (microtask). 2299 // - The first two arguments of the callback callee are unknown (-1). 2300 // - All variadic arguments to the __kmpc_fork_teams are passed to the 2301 // callback callee. 2302 F->addMetadata( 2303 llvm::LLVMContext::MD_callback, 2304 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 2305 2, {-1, -1}, 2306 /* VarArgsArePassed */ true)})); 2307 } 2308 } 2309 break; 2310 } 2311 case OMPRTL__kmpc_taskloop: { 2312 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2313 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2314 // sched, kmp_uint64 grainsize, void *task_dup); 2315 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2316 CGM.IntTy, 2317 CGM.VoidPtrTy, 2318 CGM.IntTy, 2319 CGM.Int64Ty->getPointerTo(), 2320 CGM.Int64Ty->getPointerTo(), 2321 CGM.Int64Ty, 2322 CGM.IntTy, 2323 CGM.IntTy, 2324 CGM.Int64Ty, 2325 CGM.VoidPtrTy}; 2326 auto *FnTy = 2327 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2328 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2329 break; 2330 } 2331 case OMPRTL__kmpc_doacross_init: { 2332 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2333 // num_dims, struct kmp_dim *dims); 2334 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2335 CGM.Int32Ty, 2336 CGM.Int32Ty, 2337 CGM.VoidPtrTy}; 2338 auto *FnTy = 2339 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2340 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2341 break; 2342 } 2343 case OMPRTL__kmpc_doacross_fini: { 2344 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2345 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2346 auto *FnTy = 2347 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2348 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2349 break; 2350 } 2351 case OMPRTL__kmpc_doacross_post: { 2352 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2353 // *vec); 2354 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2355 CGM.Int64Ty->getPointerTo()}; 2356 auto *FnTy = 2357 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2358 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2359 break; 2360 } 2361 case OMPRTL__kmpc_doacross_wait: { 2362 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2363 // *vec); 2364 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2365 CGM.Int64Ty->getPointerTo()}; 2366 auto *FnTy = 2367 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2368 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2369 break; 2370 } 2371 case OMPRTL__kmpc_task_reduction_init: { 2372 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2373 // *data); 2374 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2375 auto *FnTy = 2376 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2377 RTLFn = 2378 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2379 break; 2380 } 2381 case OMPRTL__kmpc_task_reduction_get_th_data: { 2382 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2383 // *d); 2384 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2385 auto *FnTy = 2386 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2387 RTLFn = CGM.CreateRuntimeFunction( 2388 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2389 break; 2390 } 2391 case OMPRTL__kmpc_alloc: { 2392 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t 2393 // al); omp_allocator_handle_t type is void *. 2394 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy}; 2395 auto *FnTy = 2396 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2397 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc"); 2398 break; 2399 } 2400 case OMPRTL__kmpc_free: { 2401 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t 2402 // al); omp_allocator_handle_t type is void *. 2403 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2404 auto *FnTy = 2405 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2406 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); 2407 break; 2408 } 2409 case OMPRTL__kmpc_push_target_tripcount: { 2410 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 2411 // size); 2412 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; 2413 llvm::FunctionType *FnTy = 2414 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2415 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); 2416 break; 2417 } 2418 case OMPRTL__tgt_target: { 2419 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2420 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2421 // *arg_types); 2422 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2423 CGM.VoidPtrTy, 2424 CGM.Int32Ty, 2425 CGM.VoidPtrPtrTy, 2426 CGM.VoidPtrPtrTy, 2427 CGM.Int64Ty->getPointerTo(), 2428 CGM.Int64Ty->getPointerTo()}; 2429 auto *FnTy = 2430 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2431 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2432 break; 2433 } 2434 case OMPRTL__tgt_target_nowait: { 2435 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2436 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2437 // int64_t *arg_types); 2438 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2439 CGM.VoidPtrTy, 2440 CGM.Int32Ty, 2441 CGM.VoidPtrPtrTy, 2442 CGM.VoidPtrPtrTy, 2443 CGM.Int64Ty->getPointerTo(), 2444 CGM.Int64Ty->getPointerTo()}; 2445 auto *FnTy = 2446 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2447 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2448 break; 2449 } 2450 case OMPRTL__tgt_target_teams: { 2451 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2452 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2453 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2454 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2455 CGM.VoidPtrTy, 2456 CGM.Int32Ty, 2457 CGM.VoidPtrPtrTy, 2458 CGM.VoidPtrPtrTy, 2459 CGM.Int64Ty->getPointerTo(), 2460 CGM.Int64Ty->getPointerTo(), 2461 CGM.Int32Ty, 2462 CGM.Int32Ty}; 2463 auto *FnTy = 2464 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2465 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2466 break; 2467 } 2468 case OMPRTL__tgt_target_teams_nowait: { 2469 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2470 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 2471 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2472 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2473 CGM.VoidPtrTy, 2474 CGM.Int32Ty, 2475 CGM.VoidPtrPtrTy, 2476 CGM.VoidPtrPtrTy, 2477 CGM.Int64Ty->getPointerTo(), 2478 CGM.Int64Ty->getPointerTo(), 2479 CGM.Int32Ty, 2480 CGM.Int32Ty}; 2481 auto *FnTy = 2482 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2483 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2484 break; 2485 } 2486 case OMPRTL__tgt_register_requires: { 2487 // Build void __tgt_register_requires(int64_t flags); 2488 llvm::Type *TypeParams[] = {CGM.Int64Ty}; 2489 auto *FnTy = 2490 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2491 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); 2492 break; 2493 } 2494 case OMPRTL__tgt_target_data_begin: { 2495 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2496 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2497 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2498 CGM.Int32Ty, 2499 CGM.VoidPtrPtrTy, 2500 CGM.VoidPtrPtrTy, 2501 CGM.Int64Ty->getPointerTo(), 2502 CGM.Int64Ty->getPointerTo()}; 2503 auto *FnTy = 2504 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2505 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2506 break; 2507 } 2508 case OMPRTL__tgt_target_data_begin_nowait: { 2509 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2510 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2511 // *arg_types); 2512 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2513 CGM.Int32Ty, 2514 CGM.VoidPtrPtrTy, 2515 CGM.VoidPtrPtrTy, 2516 CGM.Int64Ty->getPointerTo(), 2517 CGM.Int64Ty->getPointerTo()}; 2518 auto *FnTy = 2519 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2520 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2521 break; 2522 } 2523 case OMPRTL__tgt_target_data_end: { 2524 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2525 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2526 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2527 CGM.Int32Ty, 2528 CGM.VoidPtrPtrTy, 2529 CGM.VoidPtrPtrTy, 2530 CGM.Int64Ty->getPointerTo(), 2531 CGM.Int64Ty->getPointerTo()}; 2532 auto *FnTy = 2533 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2534 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2535 break; 2536 } 2537 case OMPRTL__tgt_target_data_end_nowait: { 2538 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2539 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2540 // *arg_types); 2541 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2542 CGM.Int32Ty, 2543 CGM.VoidPtrPtrTy, 2544 CGM.VoidPtrPtrTy, 2545 CGM.Int64Ty->getPointerTo(), 2546 CGM.Int64Ty->getPointerTo()}; 2547 auto *FnTy = 2548 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2549 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2550 break; 2551 } 2552 case OMPRTL__tgt_target_data_update: { 2553 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2554 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2555 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2556 CGM.Int32Ty, 2557 CGM.VoidPtrPtrTy, 2558 CGM.VoidPtrPtrTy, 2559 CGM.Int64Ty->getPointerTo(), 2560 CGM.Int64Ty->getPointerTo()}; 2561 auto *FnTy = 2562 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2563 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2564 break; 2565 } 2566 case OMPRTL__tgt_target_data_update_nowait: { 2567 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2568 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2569 // *arg_types); 2570 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2571 CGM.Int32Ty, 2572 CGM.VoidPtrPtrTy, 2573 CGM.VoidPtrPtrTy, 2574 CGM.Int64Ty->getPointerTo(), 2575 CGM.Int64Ty->getPointerTo()}; 2576 auto *FnTy = 2577 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2578 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2579 break; 2580 } 2581 case OMPRTL__tgt_mapper_num_components: { 2582 // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 2583 llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; 2584 auto *FnTy = 2585 llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); 2586 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components"); 2587 break; 2588 } 2589 case OMPRTL__tgt_push_mapper_component: { 2590 // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void 2591 // *base, void *begin, int64_t size, int64_t type); 2592 llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy, 2593 CGM.Int64Ty, CGM.Int64Ty}; 2594 auto *FnTy = 2595 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2596 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component"); 2597 break; 2598 } 2599 } 2600 assert(RTLFn && "Unable to find OpenMP runtime function"); 2601 return RTLFn; 2602 } 2603 2604 llvm::FunctionCallee 2605 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 2606 assert((IVSize == 32 || IVSize == 64) && 2607 "IV size is not compatible with the omp runtime"); 2608 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2609 : "__kmpc_for_static_init_4u") 2610 : (IVSigned ? "__kmpc_for_static_init_8" 2611 : "__kmpc_for_static_init_8u"); 2612 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2613 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2614 llvm::Type *TypeParams[] = { 2615 getIdentTyPointerTy(), // loc 2616 CGM.Int32Ty, // tid 2617 CGM.Int32Ty, // schedtype 2618 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2619 PtrTy, // p_lower 2620 PtrTy, // p_upper 2621 PtrTy, // p_stride 2622 ITy, // incr 2623 ITy // chunk 2624 }; 2625 auto *FnTy = 2626 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2627 return CGM.CreateRuntimeFunction(FnTy, Name); 2628 } 2629 2630 llvm::FunctionCallee 2631 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 2632 assert((IVSize == 32 || IVSize == 64) && 2633 "IV size is not compatible with the omp runtime"); 2634 StringRef Name = 2635 IVSize == 32 2636 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2637 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2638 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2639 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2640 CGM.Int32Ty, // tid 2641 CGM.Int32Ty, // schedtype 2642 ITy, // lower 2643 ITy, // upper 2644 ITy, // stride 2645 ITy // chunk 2646 }; 2647 auto *FnTy = 2648 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2649 return CGM.CreateRuntimeFunction(FnTy, Name); 2650 } 2651 2652 llvm::FunctionCallee 2653 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 2654 assert((IVSize == 32 || IVSize == 64) && 2655 "IV size is not compatible with the omp runtime"); 2656 StringRef Name = 2657 IVSize == 32 2658 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2659 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2660 llvm::Type *TypeParams[] = { 2661 getIdentTyPointerTy(), // loc 2662 CGM.Int32Ty, // tid 2663 }; 2664 auto *FnTy = 2665 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2666 return CGM.CreateRuntimeFunction(FnTy, Name); 2667 } 2668 2669 llvm::FunctionCallee 2670 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 2671 assert((IVSize == 32 || IVSize == 64) && 2672 "IV size is not compatible with the omp runtime"); 2673 StringRef Name = 2674 IVSize == 32 2675 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2676 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2677 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2678 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2679 llvm::Type *TypeParams[] = { 2680 getIdentTyPointerTy(), // loc 2681 CGM.Int32Ty, // tid 2682 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2683 PtrTy, // p_lower 2684 PtrTy, // p_upper 2685 PtrTy // p_stride 2686 }; 2687 auto *FnTy = 2688 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2689 return CGM.CreateRuntimeFunction(FnTy, Name); 2690 } 2691 2692 /// Obtain information that uniquely identifies a target entry. This 2693 /// consists of the file and device IDs as well as line number associated with 2694 /// the relevant entry source location. 2695 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2696 unsigned &DeviceID, unsigned &FileID, 2697 unsigned &LineNum) { 2698 SourceManager &SM = C.getSourceManager(); 2699 2700 // The loc should be always valid and have a file ID (the user cannot use 2701 // #pragma directives in macros) 2702 2703 assert(Loc.isValid() && "Source location is expected to be always valid."); 2704 2705 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2706 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2707 2708 llvm::sys::fs::UniqueID ID; 2709 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2710 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2711 << PLoc.getFilename() << EC.message(); 2712 2713 DeviceID = ID.getDevice(); 2714 FileID = ID.getFile(); 2715 LineNum = PLoc.getLine(); 2716 } 2717 2718 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 2719 if (CGM.getLangOpts().OpenMPSimd) 2720 return Address::invalid(); 2721 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2722 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2723 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 2724 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2725 HasRequiresUnifiedSharedMemory))) { 2726 SmallString<64> PtrName; 2727 { 2728 llvm::raw_svector_ostream OS(PtrName); 2729 OS << CGM.getMangledName(GlobalDecl(VD)); 2730 if (!VD->isExternallyVisible()) { 2731 unsigned DeviceID, FileID, Line; 2732 getTargetEntryUniqueInfo(CGM.getContext(), 2733 VD->getCanonicalDecl()->getBeginLoc(), 2734 DeviceID, FileID, Line); 2735 OS << llvm::format("_%x", FileID); 2736 } 2737 OS << "_decl_tgt_ref_ptr"; 2738 } 2739 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2740 if (!Ptr) { 2741 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2742 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2743 PtrName); 2744 2745 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2746 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 2747 2748 if (!CGM.getLangOpts().OpenMPIsDevice) 2749 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2750 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2751 } 2752 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2753 } 2754 return Address::invalid(); 2755 } 2756 2757 llvm::Constant * 2758 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2759 assert(!CGM.getLangOpts().OpenMPUseTLS || 2760 !CGM.getContext().getTargetInfo().isTLSSupported()); 2761 // Lookup the entry, lazily creating it if necessary. 2762 std::string Suffix = getName({"cache", ""}); 2763 return getOrCreateInternalVariable( 2764 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 2765 } 2766 2767 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2768 const VarDecl *VD, 2769 Address VDAddr, 2770 SourceLocation Loc) { 2771 if (CGM.getLangOpts().OpenMPUseTLS && 2772 CGM.getContext().getTargetInfo().isTLSSupported()) 2773 return VDAddr; 2774 2775 llvm::Type *VarTy = VDAddr.getElementType(); 2776 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2777 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2778 CGM.Int8PtrTy), 2779 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2780 getOrCreateThreadPrivateCache(VD)}; 2781 return Address(CGF.EmitRuntimeCall( 2782 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2783 VDAddr.getAlignment()); 2784 } 2785 2786 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2787 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2788 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2789 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2790 // library. 2791 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2792 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2793 OMPLoc); 2794 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2795 // to register constructor/destructor for variable. 2796 llvm::Value *Args[] = { 2797 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2798 Ctor, CopyCtor, Dtor}; 2799 CGF.EmitRuntimeCall( 2800 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2801 } 2802 2803 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2804 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2805 bool PerformInit, CodeGenFunction *CGF) { 2806 if (CGM.getLangOpts().OpenMPUseTLS && 2807 CGM.getContext().getTargetInfo().isTLSSupported()) 2808 return nullptr; 2809 2810 VD = VD->getDefinition(CGM.getContext()); 2811 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 2812 QualType ASTTy = VD->getType(); 2813 2814 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2815 const Expr *Init = VD->getAnyInitializer(); 2816 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2817 // Generate function that re-emits the declaration's initializer into the 2818 // threadprivate copy of the variable VD 2819 CodeGenFunction CtorCGF(CGM); 2820 FunctionArgList Args; 2821 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2822 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2823 ImplicitParamDecl::Other); 2824 Args.push_back(&Dst); 2825 2826 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2827 CGM.getContext().VoidPtrTy, Args); 2828 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2829 std::string Name = getName({"__kmpc_global_ctor_", ""}); 2830 llvm::Function *Fn = 2831 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2832 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2833 Args, Loc, Loc); 2834 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2835 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2836 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2837 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2838 Arg = CtorCGF.Builder.CreateElementBitCast( 2839 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2840 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2841 /*IsInitializer=*/true); 2842 ArgVal = CtorCGF.EmitLoadOfScalar( 2843 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2844 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2845 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2846 CtorCGF.FinishFunction(); 2847 Ctor = Fn; 2848 } 2849 if (VD->getType().isDestructedType() != QualType::DK_none) { 2850 // Generate function that emits destructor call for the threadprivate copy 2851 // of the variable VD 2852 CodeGenFunction DtorCGF(CGM); 2853 FunctionArgList Args; 2854 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2855 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2856 ImplicitParamDecl::Other); 2857 Args.push_back(&Dst); 2858 2859 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2860 CGM.getContext().VoidTy, Args); 2861 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2862 std::string Name = getName({"__kmpc_global_dtor_", ""}); 2863 llvm::Function *Fn = 2864 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2865 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2866 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2867 Loc, Loc); 2868 // Create a scope with an artificial location for the body of this function. 2869 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2870 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2871 DtorCGF.GetAddrOfLocalVar(&Dst), 2872 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2873 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2874 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2875 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2876 DtorCGF.FinishFunction(); 2877 Dtor = Fn; 2878 } 2879 // Do not emit init function if it is not required. 2880 if (!Ctor && !Dtor) 2881 return nullptr; 2882 2883 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2884 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2885 /*isVarArg=*/false) 2886 ->getPointerTo(); 2887 // Copying constructor for the threadprivate variable. 2888 // Must be NULL - reserved by runtime, but currently it requires that this 2889 // parameter is always NULL. Otherwise it fires assertion. 2890 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2891 if (Ctor == nullptr) { 2892 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2893 /*isVarArg=*/false) 2894 ->getPointerTo(); 2895 Ctor = llvm::Constant::getNullValue(CtorTy); 2896 } 2897 if (Dtor == nullptr) { 2898 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2899 /*isVarArg=*/false) 2900 ->getPointerTo(); 2901 Dtor = llvm::Constant::getNullValue(DtorTy); 2902 } 2903 if (!CGF) { 2904 auto *InitFunctionTy = 2905 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2906 std::string Name = getName({"__omp_threadprivate_init_", ""}); 2907 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2908 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 2909 CodeGenFunction InitCGF(CGM); 2910 FunctionArgList ArgList; 2911 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2912 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2913 Loc, Loc); 2914 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2915 InitCGF.FinishFunction(); 2916 return InitFunction; 2917 } 2918 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2919 } 2920 return nullptr; 2921 } 2922 2923 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2924 llvm::GlobalVariable *Addr, 2925 bool PerformInit) { 2926 if (CGM.getLangOpts().OMPTargetTriples.empty() && 2927 !CGM.getLangOpts().OpenMPIsDevice) 2928 return false; 2929 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2930 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2931 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 2932 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2933 HasRequiresUnifiedSharedMemory)) 2934 return CGM.getLangOpts().OpenMPIsDevice; 2935 VD = VD->getDefinition(CGM.getContext()); 2936 assert(VD && "Unknown VarDecl"); 2937 2938 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 2939 return CGM.getLangOpts().OpenMPIsDevice; 2940 2941 QualType ASTTy = VD->getType(); 2942 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 2943 2944 // Produce the unique prefix to identify the new target regions. We use 2945 // the source location of the variable declaration which we know to not 2946 // conflict with any target region. 2947 unsigned DeviceID; 2948 unsigned FileID; 2949 unsigned Line; 2950 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2951 SmallString<128> Buffer, Out; 2952 { 2953 llvm::raw_svector_ostream OS(Buffer); 2954 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2955 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2956 } 2957 2958 const Expr *Init = VD->getAnyInitializer(); 2959 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2960 llvm::Constant *Ctor; 2961 llvm::Constant *ID; 2962 if (CGM.getLangOpts().OpenMPIsDevice) { 2963 // Generate function that re-emits the declaration's initializer into 2964 // the threadprivate copy of the variable VD 2965 CodeGenFunction CtorCGF(CGM); 2966 2967 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2968 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2969 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2970 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2971 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2972 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2973 FunctionArgList(), Loc, Loc); 2974 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2975 CtorCGF.EmitAnyExprToMem(Init, 2976 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2977 Init->getType().getQualifiers(), 2978 /*IsInitializer=*/true); 2979 CtorCGF.FinishFunction(); 2980 Ctor = Fn; 2981 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2982 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 2983 } else { 2984 Ctor = new llvm::GlobalVariable( 2985 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2986 llvm::GlobalValue::PrivateLinkage, 2987 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2988 ID = Ctor; 2989 } 2990 2991 // Register the information for the entry associated with the constructor. 2992 Out.clear(); 2993 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2994 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2995 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2996 } 2997 if (VD->getType().isDestructedType() != QualType::DK_none) { 2998 llvm::Constant *Dtor; 2999 llvm::Constant *ID; 3000 if (CGM.getLangOpts().OpenMPIsDevice) { 3001 // Generate function that emits destructor call for the threadprivate 3002 // copy of the variable VD 3003 CodeGenFunction DtorCGF(CGM); 3004 3005 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 3006 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 3007 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 3008 FTy, Twine(Buffer, "_dtor"), FI, Loc); 3009 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 3010 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 3011 FunctionArgList(), Loc, Loc); 3012 // Create a scope with an artificial location for the body of this 3013 // function. 3014 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 3015 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 3016 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 3017 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 3018 DtorCGF.FinishFunction(); 3019 Dtor = Fn; 3020 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 3021 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 3022 } else { 3023 Dtor = new llvm::GlobalVariable( 3024 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 3025 llvm::GlobalValue::PrivateLinkage, 3026 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 3027 ID = Dtor; 3028 } 3029 // Register the information for the entry associated with the destructor. 3030 Out.clear(); 3031 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 3032 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 3033 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 3034 } 3035 return CGM.getLangOpts().OpenMPIsDevice; 3036 } 3037 3038 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 3039 QualType VarType, 3040 StringRef Name) { 3041 std::string Suffix = getName({"artificial", ""}); 3042 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 3043 llvm::Value *GAddr = 3044 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 3045 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 3046 CGM.getTarget().isTLSSupported()) { 3047 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 3048 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 3049 } 3050 std::string CacheSuffix = getName({"cache", ""}); 3051 llvm::Value *Args[] = { 3052 emitUpdateLocation(CGF, SourceLocation()), 3053 getThreadID(CGF, SourceLocation()), 3054 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 3055 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 3056 /*isSigned=*/false), 3057 getOrCreateInternalVariable( 3058 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 3059 return Address( 3060 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3061 CGF.EmitRuntimeCall( 3062 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 3063 VarLVType->getPointerTo(/*AddrSpace=*/0)), 3064 CGM.getContext().getTypeAlignInChars(VarType)); 3065 } 3066 3067 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 3068 const RegionCodeGenTy &ThenGen, 3069 const RegionCodeGenTy &ElseGen) { 3070 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 3071 3072 // If the condition constant folds and can be elided, try to avoid emitting 3073 // the condition and the dead arm of the if/else. 3074 bool CondConstant; 3075 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 3076 if (CondConstant) 3077 ThenGen(CGF); 3078 else 3079 ElseGen(CGF); 3080 return; 3081 } 3082 3083 // Otherwise, the condition did not fold, or we couldn't elide it. Just 3084 // emit the conditional branch. 3085 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3086 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 3087 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 3088 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 3089 3090 // Emit the 'then' code. 3091 CGF.EmitBlock(ThenBlock); 3092 ThenGen(CGF); 3093 CGF.EmitBranch(ContBlock); 3094 // Emit the 'else' code if present. 3095 // There is no need to emit line number for unconditional branch. 3096 (void)ApplyDebugLocation::CreateEmpty(CGF); 3097 CGF.EmitBlock(ElseBlock); 3098 ElseGen(CGF); 3099 // There is no need to emit line number for unconditional branch. 3100 (void)ApplyDebugLocation::CreateEmpty(CGF); 3101 CGF.EmitBranch(ContBlock); 3102 // Emit the continuation block for code after the if. 3103 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 3104 } 3105 3106 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 3107 llvm::Function *OutlinedFn, 3108 ArrayRef<llvm::Value *> CapturedVars, 3109 const Expr *IfCond) { 3110 if (!CGF.HaveInsertPoint()) 3111 return; 3112 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 3113 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 3114 PrePostActionTy &) { 3115 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 3116 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3117 llvm::Value *Args[] = { 3118 RTLoc, 3119 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 3120 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 3121 llvm::SmallVector<llvm::Value *, 16> RealArgs; 3122 RealArgs.append(std::begin(Args), std::end(Args)); 3123 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 3124 3125 llvm::FunctionCallee RTLFn = 3126 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 3127 CGF.EmitRuntimeCall(RTLFn, RealArgs); 3128 }; 3129 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 3130 PrePostActionTy &) { 3131 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3132 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 3133 // Build calls: 3134 // __kmpc_serialized_parallel(&Loc, GTid); 3135 llvm::Value *Args[] = {RTLoc, ThreadID}; 3136 CGF.EmitRuntimeCall( 3137 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 3138 3139 // OutlinedFn(>id, &zero_bound, CapturedStruct); 3140 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 3141 Address ZeroAddrBound = 3142 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 3143 /*Name=*/".bound.zero.addr"); 3144 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 3145 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 3146 // ThreadId for serialized parallels is 0. 3147 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 3148 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 3149 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 3150 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 3151 3152 // __kmpc_end_serialized_parallel(&Loc, GTid); 3153 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 3154 CGF.EmitRuntimeCall( 3155 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 3156 EndArgs); 3157 }; 3158 if (IfCond) { 3159 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 3160 } else { 3161 RegionCodeGenTy ThenRCG(ThenGen); 3162 ThenRCG(CGF); 3163 } 3164 } 3165 3166 // If we're inside an (outlined) parallel region, use the region info's 3167 // thread-ID variable (it is passed in a first argument of the outlined function 3168 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 3169 // regular serial code region, get thread ID by calling kmp_int32 3170 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 3171 // return the address of that temp. 3172 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 3173 SourceLocation Loc) { 3174 if (auto *OMPRegionInfo = 3175 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3176 if (OMPRegionInfo->getThreadIDVariable()) 3177 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 3178 3179 llvm::Value *ThreadID = getThreadID(CGF, Loc); 3180 QualType Int32Ty = 3181 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 3182 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 3183 CGF.EmitStoreOfScalar(ThreadID, 3184 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 3185 3186 return ThreadIDTemp; 3187 } 3188 3189 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 3190 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 3191 SmallString<256> Buffer; 3192 llvm::raw_svector_ostream Out(Buffer); 3193 Out << Name; 3194 StringRef RuntimeName = Out.str(); 3195 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 3196 if (Elem.second) { 3197 assert(Elem.second->getType()->getPointerElementType() == Ty && 3198 "OMP internal variable has different type than requested"); 3199 return &*Elem.second; 3200 } 3201 3202 return Elem.second = new llvm::GlobalVariable( 3203 CGM.getModule(), Ty, /*IsConstant*/ false, 3204 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 3205 Elem.first(), /*InsertBefore=*/nullptr, 3206 llvm::GlobalValue::NotThreadLocal, AddressSpace); 3207 } 3208 3209 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 3210 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 3211 std::string Name = getName({Prefix, "var"}); 3212 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 3213 } 3214 3215 namespace { 3216 /// Common pre(post)-action for different OpenMP constructs. 3217 class CommonActionTy final : public PrePostActionTy { 3218 llvm::FunctionCallee EnterCallee; 3219 ArrayRef<llvm::Value *> EnterArgs; 3220 llvm::FunctionCallee ExitCallee; 3221 ArrayRef<llvm::Value *> ExitArgs; 3222 bool Conditional; 3223 llvm::BasicBlock *ContBlock = nullptr; 3224 3225 public: 3226 CommonActionTy(llvm::FunctionCallee EnterCallee, 3227 ArrayRef<llvm::Value *> EnterArgs, 3228 llvm::FunctionCallee ExitCallee, 3229 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 3230 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 3231 ExitArgs(ExitArgs), Conditional(Conditional) {} 3232 void Enter(CodeGenFunction &CGF) override { 3233 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 3234 if (Conditional) { 3235 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 3236 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3237 ContBlock = CGF.createBasicBlock("omp_if.end"); 3238 // Generate the branch (If-stmt) 3239 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 3240 CGF.EmitBlock(ThenBlock); 3241 } 3242 } 3243 void Done(CodeGenFunction &CGF) { 3244 // Emit the rest of blocks/branches 3245 CGF.EmitBranch(ContBlock); 3246 CGF.EmitBlock(ContBlock, true); 3247 } 3248 void Exit(CodeGenFunction &CGF) override { 3249 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 3250 } 3251 }; 3252 } // anonymous namespace 3253 3254 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 3255 StringRef CriticalName, 3256 const RegionCodeGenTy &CriticalOpGen, 3257 SourceLocation Loc, const Expr *Hint) { 3258 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 3259 // CriticalOpGen(); 3260 // __kmpc_end_critical(ident_t *, gtid, Lock); 3261 // Prepare arguments and build a call to __kmpc_critical 3262 if (!CGF.HaveInsertPoint()) 3263 return; 3264 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3265 getCriticalRegionLock(CriticalName)}; 3266 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 3267 std::end(Args)); 3268 if (Hint) { 3269 EnterArgs.push_back(CGF.Builder.CreateIntCast( 3270 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 3271 } 3272 CommonActionTy Action( 3273 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 3274 : OMPRTL__kmpc_critical), 3275 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 3276 CriticalOpGen.setAction(Action); 3277 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 3278 } 3279 3280 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 3281 const RegionCodeGenTy &MasterOpGen, 3282 SourceLocation Loc) { 3283 if (!CGF.HaveInsertPoint()) 3284 return; 3285 // if(__kmpc_master(ident_t *, gtid)) { 3286 // MasterOpGen(); 3287 // __kmpc_end_master(ident_t *, gtid); 3288 // } 3289 // Prepare arguments and build a call to __kmpc_master 3290 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3291 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 3292 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 3293 /*Conditional=*/true); 3294 MasterOpGen.setAction(Action); 3295 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 3296 Action.Done(CGF); 3297 } 3298 3299 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 3300 SourceLocation Loc) { 3301 if (!CGF.HaveInsertPoint()) 3302 return; 3303 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3304 if (OMPBuilder) { 3305 OMPBuilder->CreateTaskyield(CGF.Builder); 3306 } else { 3307 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 3308 llvm::Value *Args[] = { 3309 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3310 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 3311 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), 3312 Args); 3313 } 3314 3315 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3316 Region->emitUntiedSwitch(CGF); 3317 } 3318 3319 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 3320 const RegionCodeGenTy &TaskgroupOpGen, 3321 SourceLocation Loc) { 3322 if (!CGF.HaveInsertPoint()) 3323 return; 3324 // __kmpc_taskgroup(ident_t *, gtid); 3325 // TaskgroupOpGen(); 3326 // __kmpc_end_taskgroup(ident_t *, gtid); 3327 // Prepare arguments and build a call to __kmpc_taskgroup 3328 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3329 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3330 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3331 Args); 3332 TaskgroupOpGen.setAction(Action); 3333 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3334 } 3335 3336 /// Given an array of pointers to variables, project the address of a 3337 /// given variable. 3338 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3339 unsigned Index, const VarDecl *Var) { 3340 // Pull out the pointer to the variable. 3341 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 3342 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3343 3344 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3345 Addr = CGF.Builder.CreateElementBitCast( 3346 Addr, CGF.ConvertTypeForMem(Var->getType())); 3347 return Addr; 3348 } 3349 3350 static llvm::Value *emitCopyprivateCopyFunction( 3351 CodeGenModule &CGM, llvm::Type *ArgsType, 3352 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3353 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3354 SourceLocation Loc) { 3355 ASTContext &C = CGM.getContext(); 3356 // void copy_func(void *LHSArg, void *RHSArg); 3357 FunctionArgList Args; 3358 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3359 ImplicitParamDecl::Other); 3360 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3361 ImplicitParamDecl::Other); 3362 Args.push_back(&LHSArg); 3363 Args.push_back(&RHSArg); 3364 const auto &CGFI = 3365 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3366 std::string Name = 3367 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 3368 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 3369 llvm::GlobalValue::InternalLinkage, Name, 3370 &CGM.getModule()); 3371 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3372 Fn->setDoesNotRecurse(); 3373 CodeGenFunction CGF(CGM); 3374 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3375 // Dest = (void*[n])(LHSArg); 3376 // Src = (void*[n])(RHSArg); 3377 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3378 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3379 ArgsType), CGF.getPointerAlign()); 3380 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3381 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3382 ArgsType), CGF.getPointerAlign()); 3383 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3384 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3385 // ... 3386 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3387 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3388 const auto *DestVar = 3389 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3390 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3391 3392 const auto *SrcVar = 3393 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3394 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3395 3396 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3397 QualType Type = VD->getType(); 3398 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3399 } 3400 CGF.FinishFunction(); 3401 return Fn; 3402 } 3403 3404 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3405 const RegionCodeGenTy &SingleOpGen, 3406 SourceLocation Loc, 3407 ArrayRef<const Expr *> CopyprivateVars, 3408 ArrayRef<const Expr *> SrcExprs, 3409 ArrayRef<const Expr *> DstExprs, 3410 ArrayRef<const Expr *> AssignmentOps) { 3411 if (!CGF.HaveInsertPoint()) 3412 return; 3413 assert(CopyprivateVars.size() == SrcExprs.size() && 3414 CopyprivateVars.size() == DstExprs.size() && 3415 CopyprivateVars.size() == AssignmentOps.size()); 3416 ASTContext &C = CGM.getContext(); 3417 // int32 did_it = 0; 3418 // if(__kmpc_single(ident_t *, gtid)) { 3419 // SingleOpGen(); 3420 // __kmpc_end_single(ident_t *, gtid); 3421 // did_it = 1; 3422 // } 3423 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3424 // <copy_func>, did_it); 3425 3426 Address DidIt = Address::invalid(); 3427 if (!CopyprivateVars.empty()) { 3428 // int32 did_it = 0; 3429 QualType KmpInt32Ty = 3430 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3431 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3432 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3433 } 3434 // Prepare arguments and build a call to __kmpc_single 3435 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3436 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3437 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3438 /*Conditional=*/true); 3439 SingleOpGen.setAction(Action); 3440 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3441 if (DidIt.isValid()) { 3442 // did_it = 1; 3443 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3444 } 3445 Action.Done(CGF); 3446 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3447 // <copy_func>, did_it); 3448 if (DidIt.isValid()) { 3449 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3450 QualType CopyprivateArrayTy = C.getConstantArrayType( 3451 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 3452 /*IndexTypeQuals=*/0); 3453 // Create a list of all private variables for copyprivate. 3454 Address CopyprivateList = 3455 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3456 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3457 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 3458 CGF.Builder.CreateStore( 3459 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3460 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 3461 CGF.VoidPtrTy), 3462 Elem); 3463 } 3464 // Build function that copies private values from single region to all other 3465 // threads in the corresponding parallel region. 3466 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3467 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3468 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3469 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3470 Address CL = 3471 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3472 CGF.VoidPtrTy); 3473 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3474 llvm::Value *Args[] = { 3475 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3476 getThreadID(CGF, Loc), // i32 <gtid> 3477 BufSize, // size_t <buf_size> 3478 CL.getPointer(), // void *<copyprivate list> 3479 CpyFn, // void (*) (void *, void *) <copy_func> 3480 DidItVal // i32 did_it 3481 }; 3482 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3483 } 3484 } 3485 3486 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3487 const RegionCodeGenTy &OrderedOpGen, 3488 SourceLocation Loc, bool IsThreads) { 3489 if (!CGF.HaveInsertPoint()) 3490 return; 3491 // __kmpc_ordered(ident_t *, gtid); 3492 // OrderedOpGen(); 3493 // __kmpc_end_ordered(ident_t *, gtid); 3494 // Prepare arguments and build a call to __kmpc_ordered 3495 if (IsThreads) { 3496 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3497 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3498 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3499 Args); 3500 OrderedOpGen.setAction(Action); 3501 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3502 return; 3503 } 3504 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3505 } 3506 3507 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 3508 unsigned Flags; 3509 if (Kind == OMPD_for) 3510 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3511 else if (Kind == OMPD_sections) 3512 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3513 else if (Kind == OMPD_single) 3514 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3515 else if (Kind == OMPD_barrier) 3516 Flags = OMP_IDENT_BARRIER_EXPL; 3517 else 3518 Flags = OMP_IDENT_BARRIER_IMPL; 3519 return Flags; 3520 } 3521 3522 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 3523 CodeGenFunction &CGF, const OMPLoopDirective &S, 3524 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 3525 // Check if the loop directive is actually a doacross loop directive. In this 3526 // case choose static, 1 schedule. 3527 if (llvm::any_of( 3528 S.getClausesOfKind<OMPOrderedClause>(), 3529 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 3530 ScheduleKind = OMPC_SCHEDULE_static; 3531 // Chunk size is 1 in this case. 3532 llvm::APInt ChunkSize(32, 1); 3533 ChunkExpr = IntegerLiteral::Create( 3534 CGF.getContext(), ChunkSize, 3535 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3536 SourceLocation()); 3537 } 3538 } 3539 3540 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3541 OpenMPDirectiveKind Kind, bool EmitChecks, 3542 bool ForceSimpleCall) { 3543 // Check if we should use the OMPBuilder 3544 auto *OMPRegionInfo = 3545 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 3546 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3547 if (OMPBuilder) { 3548 CGF.Builder.restoreIP(OMPBuilder->CreateBarrier( 3549 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 3550 return; 3551 } 3552 3553 if (!CGF.HaveInsertPoint()) 3554 return; 3555 // Build call __kmpc_cancel_barrier(loc, thread_id); 3556 // Build call __kmpc_barrier(loc, thread_id); 3557 unsigned Flags = getDefaultFlagsForBarriers(Kind); 3558 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3559 // thread_id); 3560 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3561 getThreadID(CGF, Loc)}; 3562 if (OMPRegionInfo) { 3563 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3564 llvm::Value *Result = CGF.EmitRuntimeCall( 3565 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3566 if (EmitChecks) { 3567 // if (__kmpc_cancel_barrier()) { 3568 // exit from construct; 3569 // } 3570 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3571 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3572 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3573 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3574 CGF.EmitBlock(ExitBB); 3575 // exit from construct; 3576 CodeGenFunction::JumpDest CancelDestination = 3577 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3578 CGF.EmitBranchThroughCleanup(CancelDestination); 3579 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3580 } 3581 return; 3582 } 3583 } 3584 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3585 } 3586 3587 /// Map the OpenMP loop schedule to the runtime enumeration. 3588 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3589 bool Chunked, bool Ordered) { 3590 switch (ScheduleKind) { 3591 case OMPC_SCHEDULE_static: 3592 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3593 : (Ordered ? OMP_ord_static : OMP_sch_static); 3594 case OMPC_SCHEDULE_dynamic: 3595 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3596 case OMPC_SCHEDULE_guided: 3597 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3598 case OMPC_SCHEDULE_runtime: 3599 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3600 case OMPC_SCHEDULE_auto: 3601 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3602 case OMPC_SCHEDULE_unknown: 3603 assert(!Chunked && "chunk was specified but schedule kind not known"); 3604 return Ordered ? OMP_ord_static : OMP_sch_static; 3605 } 3606 llvm_unreachable("Unexpected runtime schedule"); 3607 } 3608 3609 /// Map the OpenMP distribute schedule to the runtime enumeration. 3610 static OpenMPSchedType 3611 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3612 // only static is allowed for dist_schedule 3613 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3614 } 3615 3616 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3617 bool Chunked) const { 3618 OpenMPSchedType Schedule = 3619 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3620 return Schedule == OMP_sch_static; 3621 } 3622 3623 bool CGOpenMPRuntime::isStaticNonchunked( 3624 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3625 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3626 return Schedule == OMP_dist_sch_static; 3627 } 3628 3629 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 3630 bool Chunked) const { 3631 OpenMPSchedType Schedule = 3632 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3633 return Schedule == OMP_sch_static_chunked; 3634 } 3635 3636 bool CGOpenMPRuntime::isStaticChunked( 3637 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3638 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3639 return Schedule == OMP_dist_sch_static_chunked; 3640 } 3641 3642 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3643 OpenMPSchedType Schedule = 3644 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3645 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3646 return Schedule != OMP_sch_static; 3647 } 3648 3649 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 3650 OpenMPScheduleClauseModifier M1, 3651 OpenMPScheduleClauseModifier M2) { 3652 int Modifier = 0; 3653 switch (M1) { 3654 case OMPC_SCHEDULE_MODIFIER_monotonic: 3655 Modifier = OMP_sch_modifier_monotonic; 3656 break; 3657 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3658 Modifier = OMP_sch_modifier_nonmonotonic; 3659 break; 3660 case OMPC_SCHEDULE_MODIFIER_simd: 3661 if (Schedule == OMP_sch_static_chunked) 3662 Schedule = OMP_sch_static_balanced_chunked; 3663 break; 3664 case OMPC_SCHEDULE_MODIFIER_last: 3665 case OMPC_SCHEDULE_MODIFIER_unknown: 3666 break; 3667 } 3668 switch (M2) { 3669 case OMPC_SCHEDULE_MODIFIER_monotonic: 3670 Modifier = OMP_sch_modifier_monotonic; 3671 break; 3672 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3673 Modifier = OMP_sch_modifier_nonmonotonic; 3674 break; 3675 case OMPC_SCHEDULE_MODIFIER_simd: 3676 if (Schedule == OMP_sch_static_chunked) 3677 Schedule = OMP_sch_static_balanced_chunked; 3678 break; 3679 case OMPC_SCHEDULE_MODIFIER_last: 3680 case OMPC_SCHEDULE_MODIFIER_unknown: 3681 break; 3682 } 3683 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 3684 // If the static schedule kind is specified or if the ordered clause is 3685 // specified, and if the nonmonotonic modifier is not specified, the effect is 3686 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 3687 // modifier is specified, the effect is as if the nonmonotonic modifier is 3688 // specified. 3689 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 3690 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 3691 Schedule == OMP_sch_static_balanced_chunked || 3692 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 3693 Schedule == OMP_dist_sch_static_chunked || 3694 Schedule == OMP_dist_sch_static)) 3695 Modifier = OMP_sch_modifier_nonmonotonic; 3696 } 3697 return Schedule | Modifier; 3698 } 3699 3700 void CGOpenMPRuntime::emitForDispatchInit( 3701 CodeGenFunction &CGF, SourceLocation Loc, 3702 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3703 bool Ordered, const DispatchRTInput &DispatchValues) { 3704 if (!CGF.HaveInsertPoint()) 3705 return; 3706 OpenMPSchedType Schedule = getRuntimeSchedule( 3707 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3708 assert(Ordered || 3709 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3710 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3711 Schedule != OMP_sch_static_balanced_chunked)); 3712 // Call __kmpc_dispatch_init( 3713 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3714 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3715 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3716 3717 // If the Chunk was not specified in the clause - use default value 1. 3718 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3719 : CGF.Builder.getIntN(IVSize, 1); 3720 llvm::Value *Args[] = { 3721 emitUpdateLocation(CGF, Loc), 3722 getThreadID(CGF, Loc), 3723 CGF.Builder.getInt32(addMonoNonMonoModifier( 3724 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3725 DispatchValues.LB, // Lower 3726 DispatchValues.UB, // Upper 3727 CGF.Builder.getIntN(IVSize, 1), // Stride 3728 Chunk // Chunk 3729 }; 3730 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3731 } 3732 3733 static void emitForStaticInitCall( 3734 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3735 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 3736 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3737 const CGOpenMPRuntime::StaticRTInput &Values) { 3738 if (!CGF.HaveInsertPoint()) 3739 return; 3740 3741 assert(!Values.Ordered); 3742 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3743 Schedule == OMP_sch_static_balanced_chunked || 3744 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3745 Schedule == OMP_dist_sch_static || 3746 Schedule == OMP_dist_sch_static_chunked); 3747 3748 // Call __kmpc_for_static_init( 3749 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3750 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3751 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3752 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3753 llvm::Value *Chunk = Values.Chunk; 3754 if (Chunk == nullptr) { 3755 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3756 Schedule == OMP_dist_sch_static) && 3757 "expected static non-chunked schedule"); 3758 // If the Chunk was not specified in the clause - use default value 1. 3759 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3760 } else { 3761 assert((Schedule == OMP_sch_static_chunked || 3762 Schedule == OMP_sch_static_balanced_chunked || 3763 Schedule == OMP_ord_static_chunked || 3764 Schedule == OMP_dist_sch_static_chunked) && 3765 "expected static chunked schedule"); 3766 } 3767 llvm::Value *Args[] = { 3768 UpdateLocation, 3769 ThreadId, 3770 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 3771 M2)), // Schedule type 3772 Values.IL.getPointer(), // &isLastIter 3773 Values.LB.getPointer(), // &LB 3774 Values.UB.getPointer(), // &UB 3775 Values.ST.getPointer(), // &Stride 3776 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3777 Chunk // Chunk 3778 }; 3779 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3780 } 3781 3782 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3783 SourceLocation Loc, 3784 OpenMPDirectiveKind DKind, 3785 const OpenMPScheduleTy &ScheduleKind, 3786 const StaticRTInput &Values) { 3787 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3788 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3789 assert(isOpenMPWorksharingDirective(DKind) && 3790 "Expected loop-based or sections-based directive."); 3791 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3792 isOpenMPLoopDirective(DKind) 3793 ? OMP_IDENT_WORK_LOOP 3794 : OMP_IDENT_WORK_SECTIONS); 3795 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3796 llvm::FunctionCallee StaticInitFunction = 3797 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3798 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 3799 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3800 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3801 } 3802 3803 void CGOpenMPRuntime::emitDistributeStaticInit( 3804 CodeGenFunction &CGF, SourceLocation Loc, 3805 OpenMPDistScheduleClauseKind SchedKind, 3806 const CGOpenMPRuntime::StaticRTInput &Values) { 3807 OpenMPSchedType ScheduleNum = 3808 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3809 llvm::Value *UpdatedLocation = 3810 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3811 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3812 llvm::FunctionCallee StaticInitFunction = 3813 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3814 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3815 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3816 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3817 } 3818 3819 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3820 SourceLocation Loc, 3821 OpenMPDirectiveKind DKind) { 3822 if (!CGF.HaveInsertPoint()) 3823 return; 3824 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3825 llvm::Value *Args[] = { 3826 emitUpdateLocation(CGF, Loc, 3827 isOpenMPDistributeDirective(DKind) 3828 ? OMP_IDENT_WORK_DISTRIBUTE 3829 : isOpenMPLoopDirective(DKind) 3830 ? OMP_IDENT_WORK_LOOP 3831 : OMP_IDENT_WORK_SECTIONS), 3832 getThreadID(CGF, Loc)}; 3833 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 3834 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3835 Args); 3836 } 3837 3838 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3839 SourceLocation Loc, 3840 unsigned IVSize, 3841 bool IVSigned) { 3842 if (!CGF.HaveInsertPoint()) 3843 return; 3844 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3845 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3846 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3847 } 3848 3849 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3850 SourceLocation Loc, unsigned IVSize, 3851 bool IVSigned, Address IL, 3852 Address LB, Address UB, 3853 Address ST) { 3854 // Call __kmpc_dispatch_next( 3855 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3856 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3857 // kmp_int[32|64] *p_stride); 3858 llvm::Value *Args[] = { 3859 emitUpdateLocation(CGF, Loc), 3860 getThreadID(CGF, Loc), 3861 IL.getPointer(), // &isLastIter 3862 LB.getPointer(), // &Lower 3863 UB.getPointer(), // &Upper 3864 ST.getPointer() // &Stride 3865 }; 3866 llvm::Value *Call = 3867 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3868 return CGF.EmitScalarConversion( 3869 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3870 CGF.getContext().BoolTy, Loc); 3871 } 3872 3873 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3874 llvm::Value *NumThreads, 3875 SourceLocation Loc) { 3876 if (!CGF.HaveInsertPoint()) 3877 return; 3878 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3879 llvm::Value *Args[] = { 3880 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3881 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3882 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3883 Args); 3884 } 3885 3886 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3887 ProcBindKind ProcBind, 3888 SourceLocation Loc) { 3889 if (!CGF.HaveInsertPoint()) 3890 return; 3891 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 3892 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3893 llvm::Value *Args[] = { 3894 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3895 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 3896 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3897 } 3898 3899 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3900 SourceLocation Loc, llvm::AtomicOrdering AO) { 3901 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3902 if (OMPBuilder) { 3903 OMPBuilder->CreateFlush(CGF.Builder); 3904 } else { 3905 if (!CGF.HaveInsertPoint()) 3906 return; 3907 // Build call void __kmpc_flush(ident_t *loc) 3908 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3909 emitUpdateLocation(CGF, Loc)); 3910 } 3911 } 3912 3913 namespace { 3914 /// Indexes of fields for type kmp_task_t. 3915 enum KmpTaskTFields { 3916 /// List of shared variables. 3917 KmpTaskTShareds, 3918 /// Task routine. 3919 KmpTaskTRoutine, 3920 /// Partition id for the untied tasks. 3921 KmpTaskTPartId, 3922 /// Function with call of destructors for private variables. 3923 Data1, 3924 /// Task priority. 3925 Data2, 3926 /// (Taskloops only) Lower bound. 3927 KmpTaskTLowerBound, 3928 /// (Taskloops only) Upper bound. 3929 KmpTaskTUpperBound, 3930 /// (Taskloops only) Stride. 3931 KmpTaskTStride, 3932 /// (Taskloops only) Is last iteration flag. 3933 KmpTaskTLastIter, 3934 /// (Taskloops only) Reduction data. 3935 KmpTaskTReductions, 3936 }; 3937 } // anonymous namespace 3938 3939 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3940 return OffloadEntriesTargetRegion.empty() && 3941 OffloadEntriesDeviceGlobalVar.empty(); 3942 } 3943 3944 /// Initialize target region entry. 3945 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3946 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3947 StringRef ParentName, unsigned LineNum, 3948 unsigned Order) { 3949 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3950 "only required for the device " 3951 "code generation."); 3952 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3953 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3954 OMPTargetRegionEntryTargetRegion); 3955 ++OffloadingEntriesNum; 3956 } 3957 3958 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3959 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3960 StringRef ParentName, unsigned LineNum, 3961 llvm::Constant *Addr, llvm::Constant *ID, 3962 OMPTargetRegionEntryKind Flags) { 3963 // If we are emitting code for a target, the entry is already initialized, 3964 // only has to be registered. 3965 if (CGM.getLangOpts().OpenMPIsDevice) { 3966 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3967 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3968 DiagnosticsEngine::Error, 3969 "Unable to find target region on line '%0' in the device code."); 3970 CGM.getDiags().Report(DiagID) << LineNum; 3971 return; 3972 } 3973 auto &Entry = 3974 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3975 assert(Entry.isValid() && "Entry not initialized!"); 3976 Entry.setAddress(Addr); 3977 Entry.setID(ID); 3978 Entry.setFlags(Flags); 3979 } else { 3980 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3981 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3982 ++OffloadingEntriesNum; 3983 } 3984 } 3985 3986 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3987 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3988 unsigned LineNum) const { 3989 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3990 if (PerDevice == OffloadEntriesTargetRegion.end()) 3991 return false; 3992 auto PerFile = PerDevice->second.find(FileID); 3993 if (PerFile == PerDevice->second.end()) 3994 return false; 3995 auto PerParentName = PerFile->second.find(ParentName); 3996 if (PerParentName == PerFile->second.end()) 3997 return false; 3998 auto PerLine = PerParentName->second.find(LineNum); 3999 if (PerLine == PerParentName->second.end()) 4000 return false; 4001 // Fail if this entry is already registered. 4002 if (PerLine->second.getAddress() || PerLine->second.getID()) 4003 return false; 4004 return true; 4005 } 4006 4007 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 4008 const OffloadTargetRegionEntryInfoActTy &Action) { 4009 // Scan all target region entries and perform the provided action. 4010 for (const auto &D : OffloadEntriesTargetRegion) 4011 for (const auto &F : D.second) 4012 for (const auto &P : F.second) 4013 for (const auto &L : P.second) 4014 Action(D.first, F.first, P.first(), L.first, L.second); 4015 } 4016 4017 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4018 initializeDeviceGlobalVarEntryInfo(StringRef Name, 4019 OMPTargetGlobalVarEntryKind Flags, 4020 unsigned Order) { 4021 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 4022 "only required for the device " 4023 "code generation."); 4024 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 4025 ++OffloadingEntriesNum; 4026 } 4027 4028 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4029 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 4030 CharUnits VarSize, 4031 OMPTargetGlobalVarEntryKind Flags, 4032 llvm::GlobalValue::LinkageTypes Linkage) { 4033 if (CGM.getLangOpts().OpenMPIsDevice) { 4034 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 4035 assert(Entry.isValid() && Entry.getFlags() == Flags && 4036 "Entry not initialized!"); 4037 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 4038 "Resetting with the new address."); 4039 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 4040 if (Entry.getVarSize().isZero()) { 4041 Entry.setVarSize(VarSize); 4042 Entry.setLinkage(Linkage); 4043 } 4044 return; 4045 } 4046 Entry.setVarSize(VarSize); 4047 Entry.setLinkage(Linkage); 4048 Entry.setAddress(Addr); 4049 } else { 4050 if (hasDeviceGlobalVarEntryInfo(VarName)) { 4051 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 4052 assert(Entry.isValid() && Entry.getFlags() == Flags && 4053 "Entry not initialized!"); 4054 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 4055 "Resetting with the new address."); 4056 if (Entry.getVarSize().isZero()) { 4057 Entry.setVarSize(VarSize); 4058 Entry.setLinkage(Linkage); 4059 } 4060 return; 4061 } 4062 OffloadEntriesDeviceGlobalVar.try_emplace( 4063 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 4064 ++OffloadingEntriesNum; 4065 } 4066 } 4067 4068 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4069 actOnDeviceGlobalVarEntriesInfo( 4070 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 4071 // Scan all target region entries and perform the provided action. 4072 for (const auto &E : OffloadEntriesDeviceGlobalVar) 4073 Action(E.getKey(), E.getValue()); 4074 } 4075 4076 void CGOpenMPRuntime::createOffloadEntry( 4077 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 4078 llvm::GlobalValue::LinkageTypes Linkage) { 4079 StringRef Name = Addr->getName(); 4080 llvm::Module &M = CGM.getModule(); 4081 llvm::LLVMContext &C = M.getContext(); 4082 4083 // Create constant string with the name. 4084 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 4085 4086 std::string StringName = getName({"omp_offloading", "entry_name"}); 4087 auto *Str = new llvm::GlobalVariable( 4088 M, StrPtrInit->getType(), /*isConstant=*/true, 4089 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 4090 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4091 4092 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 4093 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 4094 llvm::ConstantInt::get(CGM.SizeTy, Size), 4095 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 4096 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 4097 std::string EntryName = getName({"omp_offloading", "entry", ""}); 4098 llvm::GlobalVariable *Entry = createGlobalStruct( 4099 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 4100 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 4101 4102 // The entry has to be created in the section the linker expects it to be. 4103 Entry->setSection("omp_offloading_entries"); 4104 } 4105 4106 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 4107 // Emit the offloading entries and metadata so that the device codegen side 4108 // can easily figure out what to emit. The produced metadata looks like 4109 // this: 4110 // 4111 // !omp_offload.info = !{!1, ...} 4112 // 4113 // Right now we only generate metadata for function that contain target 4114 // regions. 4115 4116 // If we are in simd mode or there are no entries, we don't need to do 4117 // anything. 4118 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 4119 return; 4120 4121 llvm::Module &M = CGM.getModule(); 4122 llvm::LLVMContext &C = M.getContext(); 4123 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 4124 SourceLocation, StringRef>, 4125 16> 4126 OrderedEntries(OffloadEntriesInfoManager.size()); 4127 llvm::SmallVector<StringRef, 16> ParentFunctions( 4128 OffloadEntriesInfoManager.size()); 4129 4130 // Auxiliary methods to create metadata values and strings. 4131 auto &&GetMDInt = [this](unsigned V) { 4132 return llvm::ConstantAsMetadata::get( 4133 llvm::ConstantInt::get(CGM.Int32Ty, V)); 4134 }; 4135 4136 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 4137 4138 // Create the offloading info metadata node. 4139 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 4140 4141 // Create function that emits metadata for each target region entry; 4142 auto &&TargetRegionMetadataEmitter = 4143 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 4144 &GetMDString]( 4145 unsigned DeviceID, unsigned FileID, StringRef ParentName, 4146 unsigned Line, 4147 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 4148 // Generate metadata for target regions. Each entry of this metadata 4149 // contains: 4150 // - Entry 0 -> Kind of this type of metadata (0). 4151 // - Entry 1 -> Device ID of the file where the entry was identified. 4152 // - Entry 2 -> File ID of the file where the entry was identified. 4153 // - Entry 3 -> Mangled name of the function where the entry was 4154 // identified. 4155 // - Entry 4 -> Line in the file where the entry was identified. 4156 // - Entry 5 -> Order the entry was created. 4157 // The first element of the metadata node is the kind. 4158 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 4159 GetMDInt(FileID), GetMDString(ParentName), 4160 GetMDInt(Line), GetMDInt(E.getOrder())}; 4161 4162 SourceLocation Loc; 4163 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 4164 E = CGM.getContext().getSourceManager().fileinfo_end(); 4165 I != E; ++I) { 4166 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 4167 I->getFirst()->getUniqueID().getFile() == FileID) { 4168 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 4169 I->getFirst(), Line, 1); 4170 break; 4171 } 4172 } 4173 // Save this entry in the right position of the ordered entries array. 4174 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 4175 ParentFunctions[E.getOrder()] = ParentName; 4176 4177 // Add metadata to the named metadata node. 4178 MD->addOperand(llvm::MDNode::get(C, Ops)); 4179 }; 4180 4181 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 4182 TargetRegionMetadataEmitter); 4183 4184 // Create function that emits metadata for each device global variable entry; 4185 auto &&DeviceGlobalVarMetadataEmitter = 4186 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 4187 MD](StringRef MangledName, 4188 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 4189 &E) { 4190 // Generate metadata for global variables. Each entry of this metadata 4191 // contains: 4192 // - Entry 0 -> Kind of this type of metadata (1). 4193 // - Entry 1 -> Mangled name of the variable. 4194 // - Entry 2 -> Declare target kind. 4195 // - Entry 3 -> Order the entry was created. 4196 // The first element of the metadata node is the kind. 4197 llvm::Metadata *Ops[] = { 4198 GetMDInt(E.getKind()), GetMDString(MangledName), 4199 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 4200 4201 // Save this entry in the right position of the ordered entries array. 4202 OrderedEntries[E.getOrder()] = 4203 std::make_tuple(&E, SourceLocation(), MangledName); 4204 4205 // Add metadata to the named metadata node. 4206 MD->addOperand(llvm::MDNode::get(C, Ops)); 4207 }; 4208 4209 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 4210 DeviceGlobalVarMetadataEmitter); 4211 4212 for (const auto &E : OrderedEntries) { 4213 assert(std::get<0>(E) && "All ordered entries must exist!"); 4214 if (const auto *CE = 4215 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 4216 std::get<0>(E))) { 4217 if (!CE->getID() || !CE->getAddress()) { 4218 // Do not blame the entry if the parent funtion is not emitted. 4219 StringRef FnName = ParentFunctions[CE->getOrder()]; 4220 if (!CGM.GetGlobalValue(FnName)) 4221 continue; 4222 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4223 DiagnosticsEngine::Error, 4224 "Offloading entry for target region in %0 is incorrect: either the " 4225 "address or the ID is invalid."); 4226 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 4227 continue; 4228 } 4229 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 4230 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 4231 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 4232 OffloadEntryInfoDeviceGlobalVar>( 4233 std::get<0>(E))) { 4234 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 4235 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4236 CE->getFlags()); 4237 switch (Flags) { 4238 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 4239 if (CGM.getLangOpts().OpenMPIsDevice && 4240 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 4241 continue; 4242 if (!CE->getAddress()) { 4243 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4244 DiagnosticsEngine::Error, "Offloading entry for declare target " 4245 "variable %0 is incorrect: the " 4246 "address is invalid."); 4247 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 4248 continue; 4249 } 4250 // The vaiable has no definition - no need to add the entry. 4251 if (CE->getVarSize().isZero()) 4252 continue; 4253 break; 4254 } 4255 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 4256 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 4257 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 4258 "Declaret target link address is set."); 4259 if (CGM.getLangOpts().OpenMPIsDevice) 4260 continue; 4261 if (!CE->getAddress()) { 4262 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4263 DiagnosticsEngine::Error, 4264 "Offloading entry for declare target variable is incorrect: the " 4265 "address is invalid."); 4266 CGM.getDiags().Report(DiagID); 4267 continue; 4268 } 4269 break; 4270 } 4271 createOffloadEntry(CE->getAddress(), CE->getAddress(), 4272 CE->getVarSize().getQuantity(), Flags, 4273 CE->getLinkage()); 4274 } else { 4275 llvm_unreachable("Unsupported entry kind."); 4276 } 4277 } 4278 } 4279 4280 /// Loads all the offload entries information from the host IR 4281 /// metadata. 4282 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 4283 // If we are in target mode, load the metadata from the host IR. This code has 4284 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 4285 4286 if (!CGM.getLangOpts().OpenMPIsDevice) 4287 return; 4288 4289 if (CGM.getLangOpts().OMPHostIRFile.empty()) 4290 return; 4291 4292 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 4293 if (auto EC = Buf.getError()) { 4294 CGM.getDiags().Report(diag::err_cannot_open_file) 4295 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4296 return; 4297 } 4298 4299 llvm::LLVMContext C; 4300 auto ME = expectedToErrorOrAndEmitErrors( 4301 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 4302 4303 if (auto EC = ME.getError()) { 4304 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4305 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 4306 CGM.getDiags().Report(DiagID) 4307 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4308 return; 4309 } 4310 4311 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4312 if (!MD) 4313 return; 4314 4315 for (llvm::MDNode *MN : MD->operands()) { 4316 auto &&GetMDInt = [MN](unsigned Idx) { 4317 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4318 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4319 }; 4320 4321 auto &&GetMDString = [MN](unsigned Idx) { 4322 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4323 return V->getString(); 4324 }; 4325 4326 switch (GetMDInt(0)) { 4327 default: 4328 llvm_unreachable("Unexpected metadata!"); 4329 break; 4330 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4331 OffloadingEntryInfoTargetRegion: 4332 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4333 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4334 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4335 /*Order=*/GetMDInt(5)); 4336 break; 4337 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4338 OffloadingEntryInfoDeviceGlobalVar: 4339 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4340 /*MangledName=*/GetMDString(1), 4341 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4342 /*Flags=*/GetMDInt(2)), 4343 /*Order=*/GetMDInt(3)); 4344 break; 4345 } 4346 } 4347 } 4348 4349 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4350 if (!KmpRoutineEntryPtrTy) { 4351 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4352 ASTContext &C = CGM.getContext(); 4353 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4354 FunctionProtoType::ExtProtoInfo EPI; 4355 KmpRoutineEntryPtrQTy = C.getPointerType( 4356 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4357 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4358 } 4359 } 4360 4361 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4362 // Make sure the type of the entry is already created. This is the type we 4363 // have to create: 4364 // struct __tgt_offload_entry{ 4365 // void *addr; // Pointer to the offload entry info. 4366 // // (function or global) 4367 // char *name; // Name of the function or global. 4368 // size_t size; // Size of the entry info (0 if it a function). 4369 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4370 // int32_t reserved; // Reserved, to use by the runtime library. 4371 // }; 4372 if (TgtOffloadEntryQTy.isNull()) { 4373 ASTContext &C = CGM.getContext(); 4374 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4375 RD->startDefinition(); 4376 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4377 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4378 addFieldToRecordDecl(C, RD, C.getSizeType()); 4379 addFieldToRecordDecl( 4380 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4381 addFieldToRecordDecl( 4382 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4383 RD->completeDefinition(); 4384 RD->addAttr(PackedAttr::CreateImplicit(C)); 4385 TgtOffloadEntryQTy = C.getRecordType(RD); 4386 } 4387 return TgtOffloadEntryQTy; 4388 } 4389 4390 namespace { 4391 struct PrivateHelpersTy { 4392 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 4393 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 4394 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 4395 PrivateElemInit(PrivateElemInit) {} 4396 const Expr *OriginalRef = nullptr; 4397 const VarDecl *Original = nullptr; 4398 const VarDecl *PrivateCopy = nullptr; 4399 const VarDecl *PrivateElemInit = nullptr; 4400 }; 4401 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4402 } // anonymous namespace 4403 4404 static RecordDecl * 4405 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4406 if (!Privates.empty()) { 4407 ASTContext &C = CGM.getContext(); 4408 // Build struct .kmp_privates_t. { 4409 // /* private vars */ 4410 // }; 4411 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4412 RD->startDefinition(); 4413 for (const auto &Pair : Privates) { 4414 const VarDecl *VD = Pair.second.Original; 4415 QualType Type = VD->getType().getNonReferenceType(); 4416 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4417 if (VD->hasAttrs()) { 4418 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4419 E(VD->getAttrs().end()); 4420 I != E; ++I) 4421 FD->addAttr(*I); 4422 } 4423 } 4424 RD->completeDefinition(); 4425 return RD; 4426 } 4427 return nullptr; 4428 } 4429 4430 static RecordDecl * 4431 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4432 QualType KmpInt32Ty, 4433 QualType KmpRoutineEntryPointerQTy) { 4434 ASTContext &C = CGM.getContext(); 4435 // Build struct kmp_task_t { 4436 // void * shareds; 4437 // kmp_routine_entry_t routine; 4438 // kmp_int32 part_id; 4439 // kmp_cmplrdata_t data1; 4440 // kmp_cmplrdata_t data2; 4441 // For taskloops additional fields: 4442 // kmp_uint64 lb; 4443 // kmp_uint64 ub; 4444 // kmp_int64 st; 4445 // kmp_int32 liter; 4446 // void * reductions; 4447 // }; 4448 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4449 UD->startDefinition(); 4450 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4451 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4452 UD->completeDefinition(); 4453 QualType KmpCmplrdataTy = C.getRecordType(UD); 4454 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4455 RD->startDefinition(); 4456 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4457 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4458 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4459 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4460 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4461 if (isOpenMPTaskLoopDirective(Kind)) { 4462 QualType KmpUInt64Ty = 4463 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4464 QualType KmpInt64Ty = 4465 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4466 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4467 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4468 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4469 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4470 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4471 } 4472 RD->completeDefinition(); 4473 return RD; 4474 } 4475 4476 static RecordDecl * 4477 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4478 ArrayRef<PrivateDataTy> Privates) { 4479 ASTContext &C = CGM.getContext(); 4480 // Build struct kmp_task_t_with_privates { 4481 // kmp_task_t task_data; 4482 // .kmp_privates_t. privates; 4483 // }; 4484 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4485 RD->startDefinition(); 4486 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4487 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4488 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4489 RD->completeDefinition(); 4490 return RD; 4491 } 4492 4493 /// Emit a proxy function which accepts kmp_task_t as the second 4494 /// argument. 4495 /// \code 4496 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4497 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4498 /// For taskloops: 4499 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4500 /// tt->reductions, tt->shareds); 4501 /// return 0; 4502 /// } 4503 /// \endcode 4504 static llvm::Function * 4505 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4506 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4507 QualType KmpTaskTWithPrivatesPtrQTy, 4508 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4509 QualType SharedsPtrTy, llvm::Function *TaskFunction, 4510 llvm::Value *TaskPrivatesMap) { 4511 ASTContext &C = CGM.getContext(); 4512 FunctionArgList Args; 4513 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4514 ImplicitParamDecl::Other); 4515 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4516 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4517 ImplicitParamDecl::Other); 4518 Args.push_back(&GtidArg); 4519 Args.push_back(&TaskTypeArg); 4520 const auto &TaskEntryFnInfo = 4521 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4522 llvm::FunctionType *TaskEntryTy = 4523 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4524 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 4525 auto *TaskEntry = llvm::Function::Create( 4526 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4527 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4528 TaskEntry->setDoesNotRecurse(); 4529 CodeGenFunction CGF(CGM); 4530 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4531 Loc, Loc); 4532 4533 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4534 // tt, 4535 // For taskloops: 4536 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4537 // tt->task_data.shareds); 4538 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4539 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4540 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4541 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4542 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4543 const auto *KmpTaskTWithPrivatesQTyRD = 4544 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4545 LValue Base = 4546 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4547 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4548 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4549 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4550 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 4551 4552 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4553 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4554 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4555 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4556 CGF.ConvertTypeForMem(SharedsPtrTy)); 4557 4558 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4559 llvm::Value *PrivatesParam; 4560 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4561 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4562 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4563 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 4564 } else { 4565 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4566 } 4567 4568 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4569 TaskPrivatesMap, 4570 CGF.Builder 4571 .CreatePointerBitCastOrAddrSpaceCast( 4572 TDBase.getAddress(CGF), CGF.VoidPtrTy) 4573 .getPointer()}; 4574 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4575 std::end(CommonArgs)); 4576 if (isOpenMPTaskLoopDirective(Kind)) { 4577 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4578 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4579 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4580 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4581 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4582 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4583 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4584 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4585 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4586 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4587 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4588 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4589 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4590 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4591 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4592 CallArgs.push_back(LBParam); 4593 CallArgs.push_back(UBParam); 4594 CallArgs.push_back(StParam); 4595 CallArgs.push_back(LIParam); 4596 CallArgs.push_back(RParam); 4597 } 4598 CallArgs.push_back(SharedsParam); 4599 4600 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4601 CallArgs); 4602 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4603 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4604 CGF.FinishFunction(); 4605 return TaskEntry; 4606 } 4607 4608 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4609 SourceLocation Loc, 4610 QualType KmpInt32Ty, 4611 QualType KmpTaskTWithPrivatesPtrQTy, 4612 QualType KmpTaskTWithPrivatesQTy) { 4613 ASTContext &C = CGM.getContext(); 4614 FunctionArgList Args; 4615 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4616 ImplicitParamDecl::Other); 4617 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4618 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4619 ImplicitParamDecl::Other); 4620 Args.push_back(&GtidArg); 4621 Args.push_back(&TaskTypeArg); 4622 const auto &DestructorFnInfo = 4623 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4624 llvm::FunctionType *DestructorFnTy = 4625 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4626 std::string Name = 4627 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 4628 auto *DestructorFn = 4629 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4630 Name, &CGM.getModule()); 4631 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4632 DestructorFnInfo); 4633 DestructorFn->setDoesNotRecurse(); 4634 CodeGenFunction CGF(CGM); 4635 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4636 Args, Loc, Loc); 4637 4638 LValue Base = CGF.EmitLoadOfPointerLValue( 4639 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4640 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4641 const auto *KmpTaskTWithPrivatesQTyRD = 4642 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4643 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4644 Base = CGF.EmitLValueForField(Base, *FI); 4645 for (const auto *Field : 4646 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4647 if (QualType::DestructionKind DtorKind = 4648 Field->getType().isDestructedType()) { 4649 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4650 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 4651 } 4652 } 4653 CGF.FinishFunction(); 4654 return DestructorFn; 4655 } 4656 4657 /// Emit a privates mapping function for correct handling of private and 4658 /// firstprivate variables. 4659 /// \code 4660 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4661 /// **noalias priv1,..., <tyn> **noalias privn) { 4662 /// *priv1 = &.privates.priv1; 4663 /// ...; 4664 /// *privn = &.privates.privn; 4665 /// } 4666 /// \endcode 4667 static llvm::Value * 4668 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4669 ArrayRef<const Expr *> PrivateVars, 4670 ArrayRef<const Expr *> FirstprivateVars, 4671 ArrayRef<const Expr *> LastprivateVars, 4672 QualType PrivatesQTy, 4673 ArrayRef<PrivateDataTy> Privates) { 4674 ASTContext &C = CGM.getContext(); 4675 FunctionArgList Args; 4676 ImplicitParamDecl TaskPrivatesArg( 4677 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4678 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4679 ImplicitParamDecl::Other); 4680 Args.push_back(&TaskPrivatesArg); 4681 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4682 unsigned Counter = 1; 4683 for (const Expr *E : PrivateVars) { 4684 Args.push_back(ImplicitParamDecl::Create( 4685 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4686 C.getPointerType(C.getPointerType(E->getType())) 4687 .withConst() 4688 .withRestrict(), 4689 ImplicitParamDecl::Other)); 4690 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4691 PrivateVarsPos[VD] = Counter; 4692 ++Counter; 4693 } 4694 for (const Expr *E : FirstprivateVars) { 4695 Args.push_back(ImplicitParamDecl::Create( 4696 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4697 C.getPointerType(C.getPointerType(E->getType())) 4698 .withConst() 4699 .withRestrict(), 4700 ImplicitParamDecl::Other)); 4701 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4702 PrivateVarsPos[VD] = Counter; 4703 ++Counter; 4704 } 4705 for (const Expr *E : LastprivateVars) { 4706 Args.push_back(ImplicitParamDecl::Create( 4707 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4708 C.getPointerType(C.getPointerType(E->getType())) 4709 .withConst() 4710 .withRestrict(), 4711 ImplicitParamDecl::Other)); 4712 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4713 PrivateVarsPos[VD] = Counter; 4714 ++Counter; 4715 } 4716 const auto &TaskPrivatesMapFnInfo = 4717 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4718 llvm::FunctionType *TaskPrivatesMapTy = 4719 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4720 std::string Name = 4721 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 4722 auto *TaskPrivatesMap = llvm::Function::Create( 4723 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 4724 &CGM.getModule()); 4725 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4726 TaskPrivatesMapFnInfo); 4727 if (CGM.getLangOpts().Optimize) { 4728 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4729 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4730 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4731 } 4732 CodeGenFunction CGF(CGM); 4733 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4734 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4735 4736 // *privi = &.privates.privi; 4737 LValue Base = CGF.EmitLoadOfPointerLValue( 4738 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4739 TaskPrivatesArg.getType()->castAs<PointerType>()); 4740 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4741 Counter = 0; 4742 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4743 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4744 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4745 LValue RefLVal = 4746 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4747 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4748 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 4749 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 4750 ++Counter; 4751 } 4752 CGF.FinishFunction(); 4753 return TaskPrivatesMap; 4754 } 4755 4756 /// Emit initialization for private variables in task-based directives. 4757 static void emitPrivatesInit(CodeGenFunction &CGF, 4758 const OMPExecutableDirective &D, 4759 Address KmpTaskSharedsPtr, LValue TDBase, 4760 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4761 QualType SharedsTy, QualType SharedsPtrTy, 4762 const OMPTaskDataTy &Data, 4763 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4764 ASTContext &C = CGF.getContext(); 4765 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4766 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4767 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4768 ? OMPD_taskloop 4769 : OMPD_task; 4770 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4771 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4772 LValue SrcBase; 4773 bool IsTargetTask = 4774 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4775 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4776 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4777 // PointersArray and SizesArray. The original variables for these arrays are 4778 // not captured and we get their addresses explicitly. 4779 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 4780 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4781 SrcBase = CGF.MakeAddrLValue( 4782 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4783 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4784 SharedsTy); 4785 } 4786 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4787 for (const PrivateDataTy &Pair : Privates) { 4788 const VarDecl *VD = Pair.second.PrivateCopy; 4789 const Expr *Init = VD->getAnyInitializer(); 4790 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4791 !CGF.isTrivialInitializer(Init)))) { 4792 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4793 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4794 const VarDecl *OriginalVD = Pair.second.Original; 4795 // Check if the variable is the target-based BasePointersArray, 4796 // PointersArray or SizesArray. 4797 LValue SharedRefLValue; 4798 QualType Type = PrivateLValue.getType(); 4799 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4800 if (IsTargetTask && !SharedField) { 4801 assert(isa<ImplicitParamDecl>(OriginalVD) && 4802 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4803 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4804 ->getNumParams() == 0 && 4805 isa<TranslationUnitDecl>( 4806 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4807 ->getDeclContext()) && 4808 "Expected artificial target data variable."); 4809 SharedRefLValue = 4810 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4811 } else if (ForDup) { 4812 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4813 SharedRefLValue = CGF.MakeAddrLValue( 4814 Address(SharedRefLValue.getPointer(CGF), 4815 C.getDeclAlign(OriginalVD)), 4816 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4817 SharedRefLValue.getTBAAInfo()); 4818 } else { 4819 InlinedOpenMPRegionRAII Region( 4820 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 4821 /*HasCancel=*/false); 4822 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 4823 } 4824 if (Type->isArrayType()) { 4825 // Initialize firstprivate array. 4826 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4827 // Perform simple memcpy. 4828 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4829 } else { 4830 // Initialize firstprivate array using element-by-element 4831 // initialization. 4832 CGF.EmitOMPAggregateAssign( 4833 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 4834 Type, 4835 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4836 Address SrcElement) { 4837 // Clean up any temporaries needed by the initialization. 4838 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4839 InitScope.addPrivate( 4840 Elem, [SrcElement]() -> Address { return SrcElement; }); 4841 (void)InitScope.Privatize(); 4842 // Emit initialization for single element. 4843 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4844 CGF, &CapturesInfo); 4845 CGF.EmitAnyExprToMem(Init, DestElement, 4846 Init->getType().getQualifiers(), 4847 /*IsInitializer=*/false); 4848 }); 4849 } 4850 } else { 4851 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4852 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 4853 return SharedRefLValue.getAddress(CGF); 4854 }); 4855 (void)InitScope.Privatize(); 4856 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4857 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4858 /*capturedByInit=*/false); 4859 } 4860 } else { 4861 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4862 } 4863 } 4864 ++FI; 4865 } 4866 } 4867 4868 /// Check if duplication function is required for taskloops. 4869 static bool checkInitIsRequired(CodeGenFunction &CGF, 4870 ArrayRef<PrivateDataTy> Privates) { 4871 bool InitRequired = false; 4872 for (const PrivateDataTy &Pair : Privates) { 4873 const VarDecl *VD = Pair.second.PrivateCopy; 4874 const Expr *Init = VD->getAnyInitializer(); 4875 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4876 !CGF.isTrivialInitializer(Init)); 4877 if (InitRequired) 4878 break; 4879 } 4880 return InitRequired; 4881 } 4882 4883 4884 /// Emit task_dup function (for initialization of 4885 /// private/firstprivate/lastprivate vars and last_iter flag) 4886 /// \code 4887 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4888 /// lastpriv) { 4889 /// // setup lastprivate flag 4890 /// task_dst->last = lastpriv; 4891 /// // could be constructor calls here... 4892 /// } 4893 /// \endcode 4894 static llvm::Value * 4895 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4896 const OMPExecutableDirective &D, 4897 QualType KmpTaskTWithPrivatesPtrQTy, 4898 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4899 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4900 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4901 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4902 ASTContext &C = CGM.getContext(); 4903 FunctionArgList Args; 4904 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4905 KmpTaskTWithPrivatesPtrQTy, 4906 ImplicitParamDecl::Other); 4907 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4908 KmpTaskTWithPrivatesPtrQTy, 4909 ImplicitParamDecl::Other); 4910 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4911 ImplicitParamDecl::Other); 4912 Args.push_back(&DstArg); 4913 Args.push_back(&SrcArg); 4914 Args.push_back(&LastprivArg); 4915 const auto &TaskDupFnInfo = 4916 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4917 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4918 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4919 auto *TaskDup = llvm::Function::Create( 4920 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4921 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4922 TaskDup->setDoesNotRecurse(); 4923 CodeGenFunction CGF(CGM); 4924 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4925 Loc); 4926 4927 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4928 CGF.GetAddrOfLocalVar(&DstArg), 4929 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4930 // task_dst->liter = lastpriv; 4931 if (WithLastIter) { 4932 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4933 LValue Base = CGF.EmitLValueForField( 4934 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4935 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4936 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4937 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4938 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4939 } 4940 4941 // Emit initial values for private copies (if any). 4942 assert(!Privates.empty()); 4943 Address KmpTaskSharedsPtr = Address::invalid(); 4944 if (!Data.FirstprivateVars.empty()) { 4945 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4946 CGF.GetAddrOfLocalVar(&SrcArg), 4947 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4948 LValue Base = CGF.EmitLValueForField( 4949 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4950 KmpTaskSharedsPtr = Address( 4951 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4952 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4953 KmpTaskTShareds)), 4954 Loc), 4955 CGF.getNaturalTypeAlignment(SharedsTy)); 4956 } 4957 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4958 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4959 CGF.FinishFunction(); 4960 return TaskDup; 4961 } 4962 4963 /// Checks if destructor function is required to be generated. 4964 /// \return true if cleanups are required, false otherwise. 4965 static bool 4966 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4967 bool NeedsCleanup = false; 4968 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4969 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4970 for (const FieldDecl *FD : PrivateRD->fields()) { 4971 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4972 if (NeedsCleanup) 4973 break; 4974 } 4975 return NeedsCleanup; 4976 } 4977 4978 CGOpenMPRuntime::TaskResultTy 4979 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4980 const OMPExecutableDirective &D, 4981 llvm::Function *TaskFunction, QualType SharedsTy, 4982 Address Shareds, const OMPTaskDataTy &Data) { 4983 ASTContext &C = CGM.getContext(); 4984 llvm::SmallVector<PrivateDataTy, 4> Privates; 4985 // Aggregate privates and sort them by the alignment. 4986 const auto *I = Data.PrivateCopies.begin(); 4987 for (const Expr *E : Data.PrivateVars) { 4988 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4989 Privates.emplace_back( 4990 C.getDeclAlign(VD), 4991 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4992 /*PrivateElemInit=*/nullptr)); 4993 ++I; 4994 } 4995 I = Data.FirstprivateCopies.begin(); 4996 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4997 for (const Expr *E : Data.FirstprivateVars) { 4998 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4999 Privates.emplace_back( 5000 C.getDeclAlign(VD), 5001 PrivateHelpersTy( 5002 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5003 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 5004 ++I; 5005 ++IElemInitRef; 5006 } 5007 I = Data.LastprivateCopies.begin(); 5008 for (const Expr *E : Data.LastprivateVars) { 5009 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5010 Privates.emplace_back( 5011 C.getDeclAlign(VD), 5012 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5013 /*PrivateElemInit=*/nullptr)); 5014 ++I; 5015 } 5016 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 5017 return L.first > R.first; 5018 }); 5019 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 5020 // Build type kmp_routine_entry_t (if not built yet). 5021 emitKmpRoutineEntryT(KmpInt32Ty); 5022 // Build type kmp_task_t (if not built yet). 5023 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 5024 if (SavedKmpTaskloopTQTy.isNull()) { 5025 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5026 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5027 } 5028 KmpTaskTQTy = SavedKmpTaskloopTQTy; 5029 } else { 5030 assert((D.getDirectiveKind() == OMPD_task || 5031 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 5032 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 5033 "Expected taskloop, task or target directive"); 5034 if (SavedKmpTaskTQTy.isNull()) { 5035 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5036 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5037 } 5038 KmpTaskTQTy = SavedKmpTaskTQTy; 5039 } 5040 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 5041 // Build particular struct kmp_task_t for the given task. 5042 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 5043 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 5044 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 5045 QualType KmpTaskTWithPrivatesPtrQTy = 5046 C.getPointerType(KmpTaskTWithPrivatesQTy); 5047 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 5048 llvm::Type *KmpTaskTWithPrivatesPtrTy = 5049 KmpTaskTWithPrivatesTy->getPointerTo(); 5050 llvm::Value *KmpTaskTWithPrivatesTySize = 5051 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 5052 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 5053 5054 // Emit initial values for private copies (if any). 5055 llvm::Value *TaskPrivatesMap = nullptr; 5056 llvm::Type *TaskPrivatesMapTy = 5057 std::next(TaskFunction->arg_begin(), 3)->getType(); 5058 if (!Privates.empty()) { 5059 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 5060 TaskPrivatesMap = emitTaskPrivateMappingFunction( 5061 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 5062 FI->getType(), Privates); 5063 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5064 TaskPrivatesMap, TaskPrivatesMapTy); 5065 } else { 5066 TaskPrivatesMap = llvm::ConstantPointerNull::get( 5067 cast<llvm::PointerType>(TaskPrivatesMapTy)); 5068 } 5069 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 5070 // kmp_task_t *tt); 5071 llvm::Function *TaskEntry = emitProxyTaskFunction( 5072 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5073 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 5074 TaskPrivatesMap); 5075 5076 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 5077 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 5078 // kmp_routine_entry_t *task_entry); 5079 // Task flags. Format is taken from 5080 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 5081 // description of kmp_tasking_flags struct. 5082 enum { 5083 TiedFlag = 0x1, 5084 FinalFlag = 0x2, 5085 DestructorsFlag = 0x8, 5086 PriorityFlag = 0x20 5087 }; 5088 unsigned Flags = Data.Tied ? TiedFlag : 0; 5089 bool NeedsCleanup = false; 5090 if (!Privates.empty()) { 5091 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 5092 if (NeedsCleanup) 5093 Flags = Flags | DestructorsFlag; 5094 } 5095 if (Data.Priority.getInt()) 5096 Flags = Flags | PriorityFlag; 5097 llvm::Value *TaskFlags = 5098 Data.Final.getPointer() 5099 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 5100 CGF.Builder.getInt32(FinalFlag), 5101 CGF.Builder.getInt32(/*C=*/0)) 5102 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 5103 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 5104 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 5105 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 5106 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 5107 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5108 TaskEntry, KmpRoutineEntryPtrTy)}; 5109 llvm::Value *NewTask; 5110 if (D.hasClausesOfKind<OMPNowaitClause>()) { 5111 // Check if we have any device clause associated with the directive. 5112 const Expr *Device = nullptr; 5113 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 5114 Device = C->getDevice(); 5115 // Emit device ID if any otherwise use default value. 5116 llvm::Value *DeviceID; 5117 if (Device) 5118 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 5119 CGF.Int64Ty, /*isSigned=*/true); 5120 else 5121 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 5122 AllocArgs.push_back(DeviceID); 5123 NewTask = CGF.EmitRuntimeCall( 5124 createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs); 5125 } else { 5126 NewTask = CGF.EmitRuntimeCall( 5127 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 5128 } 5129 llvm::Value *NewTaskNewTaskTTy = 5130 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5131 NewTask, KmpTaskTWithPrivatesPtrTy); 5132 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 5133 KmpTaskTWithPrivatesQTy); 5134 LValue TDBase = 5135 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 5136 // Fill the data in the resulting kmp_task_t record. 5137 // Copy shareds if there are any. 5138 Address KmpTaskSharedsPtr = Address::invalid(); 5139 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 5140 KmpTaskSharedsPtr = 5141 Address(CGF.EmitLoadOfScalar( 5142 CGF.EmitLValueForField( 5143 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 5144 KmpTaskTShareds)), 5145 Loc), 5146 CGF.getNaturalTypeAlignment(SharedsTy)); 5147 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 5148 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 5149 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 5150 } 5151 // Emit initial values for private copies (if any). 5152 TaskResultTy Result; 5153 if (!Privates.empty()) { 5154 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 5155 SharedsTy, SharedsPtrTy, Data, Privates, 5156 /*ForDup=*/false); 5157 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 5158 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 5159 Result.TaskDupFn = emitTaskDupFunction( 5160 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 5161 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 5162 /*WithLastIter=*/!Data.LastprivateVars.empty()); 5163 } 5164 } 5165 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 5166 enum { Priority = 0, Destructors = 1 }; 5167 // Provide pointer to function with destructors for privates. 5168 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 5169 const RecordDecl *KmpCmplrdataUD = 5170 (*FI)->getType()->getAsUnionType()->getDecl(); 5171 if (NeedsCleanup) { 5172 llvm::Value *DestructorFn = emitDestructorsFunction( 5173 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5174 KmpTaskTWithPrivatesQTy); 5175 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 5176 LValue DestructorsLV = CGF.EmitLValueForField( 5177 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 5178 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5179 DestructorFn, KmpRoutineEntryPtrTy), 5180 DestructorsLV); 5181 } 5182 // Set priority. 5183 if (Data.Priority.getInt()) { 5184 LValue Data2LV = CGF.EmitLValueForField( 5185 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 5186 LValue PriorityLV = CGF.EmitLValueForField( 5187 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 5188 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 5189 } 5190 Result.NewTask = NewTask; 5191 Result.TaskEntry = TaskEntry; 5192 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 5193 Result.TDBase = TDBase; 5194 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 5195 return Result; 5196 } 5197 5198 namespace { 5199 /// Dependence kind for RTL. 5200 enum RTLDependenceKindTy { 5201 DepIn = 0x01, 5202 DepInOut = 0x3, 5203 DepMutexInOutSet = 0x4 5204 }; 5205 /// Fields ids in kmp_depend_info record. 5206 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 5207 } // namespace 5208 5209 /// Translates internal dependency kind into the runtime kind. 5210 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 5211 RTLDependenceKindTy DepKind; 5212 switch (K) { 5213 case OMPC_DEPEND_in: 5214 DepKind = DepIn; 5215 break; 5216 // Out and InOut dependencies must use the same code. 5217 case OMPC_DEPEND_out: 5218 case OMPC_DEPEND_inout: 5219 DepKind = DepInOut; 5220 break; 5221 case OMPC_DEPEND_mutexinoutset: 5222 DepKind = DepMutexInOutSet; 5223 break; 5224 case OMPC_DEPEND_source: 5225 case OMPC_DEPEND_sink: 5226 case OMPC_DEPEND_depobj: 5227 case OMPC_DEPEND_unknown: 5228 llvm_unreachable("Unknown task dependence type"); 5229 } 5230 return DepKind; 5231 } 5232 5233 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 5234 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 5235 QualType &FlagsTy) { 5236 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 5237 if (KmpDependInfoTy.isNull()) { 5238 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 5239 KmpDependInfoRD->startDefinition(); 5240 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 5241 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 5242 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 5243 KmpDependInfoRD->completeDefinition(); 5244 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 5245 } 5246 } 5247 5248 std::pair<llvm::Value *, LValue> 5249 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 5250 SourceLocation Loc) { 5251 ASTContext &C = CGM.getContext(); 5252 QualType FlagsTy; 5253 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5254 RecordDecl *KmpDependInfoRD = 5255 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5256 LValue Base = CGF.EmitLoadOfPointerLValue( 5257 DepobjLVal.getAddress(CGF), 5258 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5259 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5260 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5261 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5262 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 5263 Base.getTBAAInfo()); 5264 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5265 Addr.getPointer(), 5266 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5267 LValue NumDepsBase = CGF.MakeAddrLValue( 5268 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 5269 Base.getBaseInfo(), Base.getTBAAInfo()); 5270 // NumDeps = deps[i].base_addr; 5271 LValue BaseAddrLVal = CGF.EmitLValueForField( 5272 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5273 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 5274 return std::make_pair(NumDeps, Base); 5275 } 5276 5277 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 5278 CodeGenFunction &CGF, 5279 ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependencies, 5280 bool ForDepobj, SourceLocation Loc) { 5281 // Process list of dependencies. 5282 ASTContext &C = CGM.getContext(); 5283 Address DependenciesArray = Address::invalid(); 5284 unsigned NumDependencies = Dependencies.size(); 5285 llvm::Value *NumOfElements = nullptr; 5286 if (NumDependencies) { 5287 QualType FlagsTy; 5288 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5289 RecordDecl *KmpDependInfoRD = 5290 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5291 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5292 unsigned NumDepobjDependecies = 0; 5293 SmallVector<std::pair<llvm::Value *, LValue>, 4> Depobjs; 5294 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 5295 // Calculate number of depobj dependecies. 5296 for (const std::pair<OpenMPDependClauseKind, const Expr *> &Pair : 5297 Dependencies) { 5298 if (Pair.first != OMPC_DEPEND_depobj) 5299 continue; 5300 LValue DepobjLVal = CGF.EmitLValue(Pair.second); 5301 llvm::Value *NumDeps; 5302 LValue Base; 5303 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5304 NumOfDepobjElements = 5305 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumDeps); 5306 Depobjs.emplace_back(NumDeps, Base); 5307 ++NumDepobjDependecies; 5308 } 5309 5310 QualType KmpDependInfoArrayTy; 5311 // Define type kmp_depend_info[<Dependencies.size()>]; 5312 // For depobj reserve one extra element to store the number of elements. 5313 // It is required to handle depobj(x) update(in) construct. 5314 // kmp_depend_info[<Dependencies.size()>] deps; 5315 if (ForDepobj) { 5316 assert(NumDepobjDependecies == 0 && 5317 "depobj dependency kind is not expected in depobj directive."); 5318 KmpDependInfoArrayTy = C.getConstantArrayType( 5319 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 5320 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5321 // Need to allocate on the dynamic memory. 5322 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5323 // Use default allocator. 5324 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5325 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoArrayTy); 5326 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 5327 llvm::Value *Size = CGF.CGM.getSize(Sz.alignTo(Align)); 5328 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5329 5330 llvm::Value *Addr = CGF.EmitRuntimeCall( 5331 createRuntimeFunction(OMPRTL__kmpc_alloc), Args, ".dep.arr.addr"); 5332 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5333 Addr, CGF.ConvertTypeForMem(KmpDependInfoArrayTy)->getPointerTo()); 5334 DependenciesArray = Address(Addr, Align); 5335 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 5336 /*isSigned=*/false); 5337 } else if (NumDepobjDependecies > 0) { 5338 NumOfElements = CGF.Builder.CreateNUWAdd( 5339 NumOfDepobjElements, 5340 llvm::ConstantInt::get(CGM.IntPtrTy, 5341 NumDependencies - NumDepobjDependecies, 5342 /*isSigned=*/false)); 5343 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 5344 /*isSigned=*/false); 5345 OpaqueValueExpr OVE( 5346 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 5347 VK_RValue); 5348 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 5349 RValue::get(NumOfElements)); 5350 KmpDependInfoArrayTy = 5351 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 5352 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 5353 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 5354 // Properly emit variable-sized array. 5355 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 5356 ImplicitParamDecl::Other); 5357 CGF.EmitVarDecl(*PD); 5358 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 5359 } else { 5360 KmpDependInfoArrayTy = C.getConstantArrayType( 5361 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 5362 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5363 DependenciesArray = 5364 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 5365 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 5366 /*isSigned=*/false); 5367 } 5368 if (ForDepobj) { 5369 // Write number of elements in the first element of array for depobj. 5370 llvm::Value *NumVal = 5371 llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 5372 LValue Base = CGF.MakeAddrLValue( 5373 CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), 5374 KmpDependInfoTy); 5375 // deps[i].base_addr = NumDependencies; 5376 LValue BaseAddrLVal = CGF.EmitLValueForField( 5377 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5378 CGF.EmitStoreOfScalar(NumVal, BaseAddrLVal); 5379 } 5380 unsigned Pos = ForDepobj ? 1 : 0; 5381 for (unsigned I = 0; I < NumDependencies; ++I) { 5382 if (Dependencies[I].first == OMPC_DEPEND_depobj) 5383 continue; 5384 const Expr *E = Dependencies[I].second; 5385 LValue Addr = CGF.EmitLValue(E); 5386 llvm::Value *Size; 5387 QualType Ty = E->getType(); 5388 if (const auto *ASE = 5389 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 5390 LValue UpAddrLVal = 5391 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 5392 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 5393 UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 5394 llvm::Value *LowIntPtr = 5395 CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGM.SizeTy); 5396 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 5397 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 5398 } else { 5399 Size = CGF.getTypeSize(Ty); 5400 } 5401 LValue Base; 5402 if (NumDepobjDependecies > 0) { 5403 Base = CGF.MakeAddrLValue( 5404 CGF.Builder.CreateConstGEP(DependenciesArray, Pos), 5405 KmpDependInfoTy); 5406 } else { 5407 Base = CGF.MakeAddrLValue( 5408 CGF.Builder.CreateConstArrayGEP(DependenciesArray, Pos), 5409 KmpDependInfoTy); 5410 } 5411 // deps[i].base_addr = &<Dependencies[i].second>; 5412 LValue BaseAddrLVal = CGF.EmitLValueForField( 5413 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5414 CGF.EmitStoreOfScalar( 5415 CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGF.IntPtrTy), 5416 BaseAddrLVal); 5417 // deps[i].len = sizeof(<Dependencies[i].second>); 5418 LValue LenLVal = CGF.EmitLValueForField( 5419 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 5420 CGF.EmitStoreOfScalar(Size, LenLVal); 5421 // deps[i].flags = <Dependencies[i].first>; 5422 RTLDependenceKindTy DepKind = 5423 translateDependencyKind(Dependencies[I].first); 5424 LValue FlagsLVal = CGF.EmitLValueForField( 5425 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5426 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5427 FlagsLVal); 5428 ++Pos; 5429 } 5430 // Copy final depobj arrays. 5431 if (NumDepobjDependecies > 0) { 5432 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 5433 Address Addr = CGF.Builder.CreateConstGEP(DependenciesArray, Pos); 5434 for (const std::pair<llvm::Value *, LValue> &Pair : Depobjs) { 5435 llvm::Value *Size = CGF.Builder.CreateNUWMul(ElSize, Pair.first); 5436 CGF.Builder.CreateMemCpy(Addr, Pair.second.getAddress(CGF), Size); 5437 Addr = 5438 Address(CGF.Builder.CreateGEP( 5439 Addr.getElementType(), Addr.getPointer(), Pair.first), 5440 DependenciesArray.getAlignment().alignmentOfArrayElement( 5441 C.getTypeSizeInChars(KmpDependInfoTy))); 5442 } 5443 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5444 DependenciesArray, CGF.VoidPtrTy); 5445 } else { 5446 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5447 CGF.Builder.CreateConstArrayGEP(DependenciesArray, ForDepobj ? 1 : 0), 5448 CGF.VoidPtrTy); 5449 } 5450 } 5451 return std::make_pair(NumOfElements, DependenciesArray); 5452 } 5453 5454 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5455 SourceLocation Loc) { 5456 ASTContext &C = CGM.getContext(); 5457 QualType FlagsTy; 5458 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5459 LValue Base = CGF.EmitLoadOfPointerLValue( 5460 DepobjLVal.getAddress(CGF), 5461 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5462 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5463 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5464 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5465 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5466 Addr.getPointer(), 5467 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5468 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5469 CGF.VoidPtrTy); 5470 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5471 // Use default allocator. 5472 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5473 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5474 5475 // _kmpc_free(gtid, addr, nullptr); 5476 (void)CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_free), Args); 5477 } 5478 5479 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5480 OpenMPDependClauseKind NewDepKind, 5481 SourceLocation Loc) { 5482 ASTContext &C = CGM.getContext(); 5483 QualType FlagsTy; 5484 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5485 RecordDecl *KmpDependInfoRD = 5486 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5487 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5488 llvm::Value *NumDeps; 5489 LValue Base; 5490 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5491 5492 Address Begin = Base.getAddress(CGF); 5493 // Cast from pointer to array type to pointer to single element. 5494 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5495 // The basic structure here is a while-do loop. 5496 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5497 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5498 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5499 CGF.EmitBlock(BodyBB); 5500 llvm::PHINode *ElementPHI = 5501 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5502 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5503 Begin = Address(ElementPHI, Begin.getAlignment()); 5504 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5505 Base.getTBAAInfo()); 5506 // deps[i].flags = NewDepKind; 5507 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5508 LValue FlagsLVal = CGF.EmitLValueForField( 5509 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5510 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5511 FlagsLVal); 5512 5513 // Shift the address forward by one element. 5514 Address ElementNext = 5515 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5516 ElementPHI->addIncoming(ElementNext.getPointer(), 5517 CGF.Builder.GetInsertBlock()); 5518 llvm::Value *IsEmpty = 5519 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5520 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5521 // Done. 5522 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5523 } 5524 5525 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5526 const OMPExecutableDirective &D, 5527 llvm::Function *TaskFunction, 5528 QualType SharedsTy, Address Shareds, 5529 const Expr *IfCond, 5530 const OMPTaskDataTy &Data) { 5531 if (!CGF.HaveInsertPoint()) 5532 return; 5533 5534 TaskResultTy Result = 5535 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5536 llvm::Value *NewTask = Result.NewTask; 5537 llvm::Function *TaskEntry = Result.TaskEntry; 5538 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5539 LValue TDBase = Result.TDBase; 5540 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5541 // Process list of dependences. 5542 Address DependenciesArray = Address::invalid(); 5543 llvm::Value *NumOfElements; 5544 std::tie(NumOfElements, DependenciesArray) = 5545 emitDependClause(CGF, Data.Dependences, /*ForDepobj=*/false, Loc); 5546 5547 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5548 // libcall. 5549 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5550 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5551 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5552 // list is not empty 5553 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5554 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5555 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5556 llvm::Value *DepTaskArgs[7]; 5557 if (!Data.Dependences.empty()) { 5558 DepTaskArgs[0] = UpLoc; 5559 DepTaskArgs[1] = ThreadID; 5560 DepTaskArgs[2] = NewTask; 5561 DepTaskArgs[3] = NumOfElements; 5562 DepTaskArgs[4] = DependenciesArray.getPointer(); 5563 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5564 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5565 } 5566 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5567 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5568 if (!Data.Tied) { 5569 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5570 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5571 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5572 } 5573 if (!Data.Dependences.empty()) { 5574 CGF.EmitRuntimeCall( 5575 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5576 } else { 5577 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5578 TaskArgs); 5579 } 5580 // Check if parent region is untied and build return for untied task; 5581 if (auto *Region = 5582 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5583 Region->emitUntiedSwitch(CGF); 5584 }; 5585 5586 llvm::Value *DepWaitTaskArgs[6]; 5587 if (!Data.Dependences.empty()) { 5588 DepWaitTaskArgs[0] = UpLoc; 5589 DepWaitTaskArgs[1] = ThreadID; 5590 DepWaitTaskArgs[2] = NumOfElements; 5591 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5592 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5593 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5594 } 5595 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5596 &Data, &DepWaitTaskArgs, 5597 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5598 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5599 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5600 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5601 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5602 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5603 // is specified. 5604 if (!Data.Dependences.empty()) 5605 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5606 DepWaitTaskArgs); 5607 // Call proxy_task_entry(gtid, new_task); 5608 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5609 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5610 Action.Enter(CGF); 5611 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5612 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5613 OutlinedFnArgs); 5614 }; 5615 5616 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5617 // kmp_task_t *new_task); 5618 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5619 // kmp_task_t *new_task); 5620 RegionCodeGenTy RCG(CodeGen); 5621 CommonActionTy Action( 5622 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5623 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5624 RCG.setAction(Action); 5625 RCG(CGF); 5626 }; 5627 5628 if (IfCond) { 5629 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5630 } else { 5631 RegionCodeGenTy ThenRCG(ThenCodeGen); 5632 ThenRCG(CGF); 5633 } 5634 } 5635 5636 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5637 const OMPLoopDirective &D, 5638 llvm::Function *TaskFunction, 5639 QualType SharedsTy, Address Shareds, 5640 const Expr *IfCond, 5641 const OMPTaskDataTy &Data) { 5642 if (!CGF.HaveInsertPoint()) 5643 return; 5644 TaskResultTy Result = 5645 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5646 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5647 // libcall. 5648 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5649 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5650 // sched, kmp_uint64 grainsize, void *task_dup); 5651 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5652 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5653 llvm::Value *IfVal; 5654 if (IfCond) { 5655 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5656 /*isSigned=*/true); 5657 } else { 5658 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5659 } 5660 5661 LValue LBLVal = CGF.EmitLValueForField( 5662 Result.TDBase, 5663 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5664 const auto *LBVar = 5665 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5666 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5667 LBLVal.getQuals(), 5668 /*IsInitializer=*/true); 5669 LValue UBLVal = CGF.EmitLValueForField( 5670 Result.TDBase, 5671 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5672 const auto *UBVar = 5673 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5674 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5675 UBLVal.getQuals(), 5676 /*IsInitializer=*/true); 5677 LValue StLVal = CGF.EmitLValueForField( 5678 Result.TDBase, 5679 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5680 const auto *StVar = 5681 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5682 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5683 StLVal.getQuals(), 5684 /*IsInitializer=*/true); 5685 // Store reductions address. 5686 LValue RedLVal = CGF.EmitLValueForField( 5687 Result.TDBase, 5688 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5689 if (Data.Reductions) { 5690 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5691 } else { 5692 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5693 CGF.getContext().VoidPtrTy); 5694 } 5695 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5696 llvm::Value *TaskArgs[] = { 5697 UpLoc, 5698 ThreadID, 5699 Result.NewTask, 5700 IfVal, 5701 LBLVal.getPointer(CGF), 5702 UBLVal.getPointer(CGF), 5703 CGF.EmitLoadOfScalar(StLVal, Loc), 5704 llvm::ConstantInt::getSigned( 5705 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5706 llvm::ConstantInt::getSigned( 5707 CGF.IntTy, Data.Schedule.getPointer() 5708 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5709 : NoSchedule), 5710 Data.Schedule.getPointer() 5711 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5712 /*isSigned=*/false) 5713 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5714 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5715 Result.TaskDupFn, CGF.VoidPtrTy) 5716 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5717 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 5718 } 5719 5720 /// Emit reduction operation for each element of array (required for 5721 /// array sections) LHS op = RHS. 5722 /// \param Type Type of array. 5723 /// \param LHSVar Variable on the left side of the reduction operation 5724 /// (references element of array in original variable). 5725 /// \param RHSVar Variable on the right side of the reduction operation 5726 /// (references element of array in original variable). 5727 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5728 /// RHSVar. 5729 static void EmitOMPAggregateReduction( 5730 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5731 const VarDecl *RHSVar, 5732 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5733 const Expr *, const Expr *)> &RedOpGen, 5734 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5735 const Expr *UpExpr = nullptr) { 5736 // Perform element-by-element initialization. 5737 QualType ElementTy; 5738 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5739 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5740 5741 // Drill down to the base element type on both arrays. 5742 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5743 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5744 5745 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5746 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5747 // Cast from pointer to array type to pointer to single element. 5748 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5749 // The basic structure here is a while-do loop. 5750 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5751 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5752 llvm::Value *IsEmpty = 5753 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5754 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5755 5756 // Enter the loop body, making that address the current address. 5757 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5758 CGF.EmitBlock(BodyBB); 5759 5760 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5761 5762 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5763 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5764 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5765 Address RHSElementCurrent = 5766 Address(RHSElementPHI, 5767 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5768 5769 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5770 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5771 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5772 Address LHSElementCurrent = 5773 Address(LHSElementPHI, 5774 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5775 5776 // Emit copy. 5777 CodeGenFunction::OMPPrivateScope Scope(CGF); 5778 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5779 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5780 Scope.Privatize(); 5781 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5782 Scope.ForceCleanup(); 5783 5784 // Shift the address forward by one element. 5785 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5786 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5787 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5788 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5789 // Check whether we've reached the end. 5790 llvm::Value *Done = 5791 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5792 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5793 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5794 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5795 5796 // Done. 5797 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5798 } 5799 5800 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5801 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5802 /// UDR combiner function. 5803 static void emitReductionCombiner(CodeGenFunction &CGF, 5804 const Expr *ReductionOp) { 5805 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5806 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5807 if (const auto *DRE = 5808 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5809 if (const auto *DRD = 5810 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5811 std::pair<llvm::Function *, llvm::Function *> Reduction = 5812 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5813 RValue Func = RValue::get(Reduction.first); 5814 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5815 CGF.EmitIgnoredExpr(ReductionOp); 5816 return; 5817 } 5818 CGF.EmitIgnoredExpr(ReductionOp); 5819 } 5820 5821 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5822 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5823 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5824 ArrayRef<const Expr *> ReductionOps) { 5825 ASTContext &C = CGM.getContext(); 5826 5827 // void reduction_func(void *LHSArg, void *RHSArg); 5828 FunctionArgList Args; 5829 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5830 ImplicitParamDecl::Other); 5831 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5832 ImplicitParamDecl::Other); 5833 Args.push_back(&LHSArg); 5834 Args.push_back(&RHSArg); 5835 const auto &CGFI = 5836 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5837 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5838 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5839 llvm::GlobalValue::InternalLinkage, Name, 5840 &CGM.getModule()); 5841 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5842 Fn->setDoesNotRecurse(); 5843 CodeGenFunction CGF(CGM); 5844 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5845 5846 // Dst = (void*[n])(LHSArg); 5847 // Src = (void*[n])(RHSArg); 5848 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5849 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5850 ArgsType), CGF.getPointerAlign()); 5851 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5852 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5853 ArgsType), CGF.getPointerAlign()); 5854 5855 // ... 5856 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5857 // ... 5858 CodeGenFunction::OMPPrivateScope Scope(CGF); 5859 auto IPriv = Privates.begin(); 5860 unsigned Idx = 0; 5861 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5862 const auto *RHSVar = 5863 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5864 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5865 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5866 }); 5867 const auto *LHSVar = 5868 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5869 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5870 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5871 }); 5872 QualType PrivTy = (*IPriv)->getType(); 5873 if (PrivTy->isVariablyModifiedType()) { 5874 // Get array size and emit VLA type. 5875 ++Idx; 5876 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5877 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5878 const VariableArrayType *VLA = 5879 CGF.getContext().getAsVariableArrayType(PrivTy); 5880 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5881 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5882 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5883 CGF.EmitVariablyModifiedType(PrivTy); 5884 } 5885 } 5886 Scope.Privatize(); 5887 IPriv = Privates.begin(); 5888 auto ILHS = LHSExprs.begin(); 5889 auto IRHS = RHSExprs.begin(); 5890 for (const Expr *E : ReductionOps) { 5891 if ((*IPriv)->getType()->isArrayType()) { 5892 // Emit reduction for array section. 5893 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5894 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5895 EmitOMPAggregateReduction( 5896 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5897 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5898 emitReductionCombiner(CGF, E); 5899 }); 5900 } else { 5901 // Emit reduction for array subscript or single variable. 5902 emitReductionCombiner(CGF, E); 5903 } 5904 ++IPriv; 5905 ++ILHS; 5906 ++IRHS; 5907 } 5908 Scope.ForceCleanup(); 5909 CGF.FinishFunction(); 5910 return Fn; 5911 } 5912 5913 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5914 const Expr *ReductionOp, 5915 const Expr *PrivateRef, 5916 const DeclRefExpr *LHS, 5917 const DeclRefExpr *RHS) { 5918 if (PrivateRef->getType()->isArrayType()) { 5919 // Emit reduction for array section. 5920 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5921 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5922 EmitOMPAggregateReduction( 5923 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5924 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5925 emitReductionCombiner(CGF, ReductionOp); 5926 }); 5927 } else { 5928 // Emit reduction for array subscript or single variable. 5929 emitReductionCombiner(CGF, ReductionOp); 5930 } 5931 } 5932 5933 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5934 ArrayRef<const Expr *> Privates, 5935 ArrayRef<const Expr *> LHSExprs, 5936 ArrayRef<const Expr *> RHSExprs, 5937 ArrayRef<const Expr *> ReductionOps, 5938 ReductionOptionsTy Options) { 5939 if (!CGF.HaveInsertPoint()) 5940 return; 5941 5942 bool WithNowait = Options.WithNowait; 5943 bool SimpleReduction = Options.SimpleReduction; 5944 5945 // Next code should be emitted for reduction: 5946 // 5947 // static kmp_critical_name lock = { 0 }; 5948 // 5949 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5950 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5951 // ... 5952 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5953 // *(Type<n>-1*)rhs[<n>-1]); 5954 // } 5955 // 5956 // ... 5957 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5958 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5959 // RedList, reduce_func, &<lock>)) { 5960 // case 1: 5961 // ... 5962 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5963 // ... 5964 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5965 // break; 5966 // case 2: 5967 // ... 5968 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5969 // ... 5970 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5971 // break; 5972 // default:; 5973 // } 5974 // 5975 // if SimpleReduction is true, only the next code is generated: 5976 // ... 5977 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5978 // ... 5979 5980 ASTContext &C = CGM.getContext(); 5981 5982 if (SimpleReduction) { 5983 CodeGenFunction::RunCleanupsScope Scope(CGF); 5984 auto IPriv = Privates.begin(); 5985 auto ILHS = LHSExprs.begin(); 5986 auto IRHS = RHSExprs.begin(); 5987 for (const Expr *E : ReductionOps) { 5988 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5989 cast<DeclRefExpr>(*IRHS)); 5990 ++IPriv; 5991 ++ILHS; 5992 ++IRHS; 5993 } 5994 return; 5995 } 5996 5997 // 1. Build a list of reduction variables. 5998 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5999 auto Size = RHSExprs.size(); 6000 for (const Expr *E : Privates) { 6001 if (E->getType()->isVariablyModifiedType()) 6002 // Reserve place for array size. 6003 ++Size; 6004 } 6005 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 6006 QualType ReductionArrayTy = 6007 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 6008 /*IndexTypeQuals=*/0); 6009 Address ReductionList = 6010 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 6011 auto IPriv = Privates.begin(); 6012 unsigned Idx = 0; 6013 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 6014 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 6015 CGF.Builder.CreateStore( 6016 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6017 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 6018 Elem); 6019 if ((*IPriv)->getType()->isVariablyModifiedType()) { 6020 // Store array size. 6021 ++Idx; 6022 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 6023 llvm::Value *Size = CGF.Builder.CreateIntCast( 6024 CGF.getVLASize( 6025 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 6026 .NumElts, 6027 CGF.SizeTy, /*isSigned=*/false); 6028 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 6029 Elem); 6030 } 6031 } 6032 6033 // 2. Emit reduce_func(). 6034 llvm::Function *ReductionFn = emitReductionFunction( 6035 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 6036 LHSExprs, RHSExprs, ReductionOps); 6037 6038 // 3. Create static kmp_critical_name lock = { 0 }; 6039 std::string Name = getName({"reduction"}); 6040 llvm::Value *Lock = getCriticalRegionLock(Name); 6041 6042 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 6043 // RedList, reduce_func, &<lock>); 6044 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 6045 llvm::Value *ThreadId = getThreadID(CGF, Loc); 6046 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 6047 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6048 ReductionList.getPointer(), CGF.VoidPtrTy); 6049 llvm::Value *Args[] = { 6050 IdentTLoc, // ident_t *<loc> 6051 ThreadId, // i32 <gtid> 6052 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 6053 ReductionArrayTySize, // size_type sizeof(RedList) 6054 RL, // void *RedList 6055 ReductionFn, // void (*) (void *, void *) <reduce_func> 6056 Lock // kmp_critical_name *&<lock> 6057 }; 6058 llvm::Value *Res = CGF.EmitRuntimeCall( 6059 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 6060 : OMPRTL__kmpc_reduce), 6061 Args); 6062 6063 // 5. Build switch(res) 6064 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 6065 llvm::SwitchInst *SwInst = 6066 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 6067 6068 // 6. Build case 1: 6069 // ... 6070 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 6071 // ... 6072 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 6073 // break; 6074 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 6075 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 6076 CGF.EmitBlock(Case1BB); 6077 6078 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 6079 llvm::Value *EndArgs[] = { 6080 IdentTLoc, // ident_t *<loc> 6081 ThreadId, // i32 <gtid> 6082 Lock // kmp_critical_name *&<lock> 6083 }; 6084 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 6085 CodeGenFunction &CGF, PrePostActionTy &Action) { 6086 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6087 auto IPriv = Privates.begin(); 6088 auto ILHS = LHSExprs.begin(); 6089 auto IRHS = RHSExprs.begin(); 6090 for (const Expr *E : ReductionOps) { 6091 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 6092 cast<DeclRefExpr>(*IRHS)); 6093 ++IPriv; 6094 ++ILHS; 6095 ++IRHS; 6096 } 6097 }; 6098 RegionCodeGenTy RCG(CodeGen); 6099 CommonActionTy Action( 6100 nullptr, llvm::None, 6101 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 6102 : OMPRTL__kmpc_end_reduce), 6103 EndArgs); 6104 RCG.setAction(Action); 6105 RCG(CGF); 6106 6107 CGF.EmitBranch(DefaultBB); 6108 6109 // 7. Build case 2: 6110 // ... 6111 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 6112 // ... 6113 // break; 6114 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 6115 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 6116 CGF.EmitBlock(Case2BB); 6117 6118 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 6119 CodeGenFunction &CGF, PrePostActionTy &Action) { 6120 auto ILHS = LHSExprs.begin(); 6121 auto IRHS = RHSExprs.begin(); 6122 auto IPriv = Privates.begin(); 6123 for (const Expr *E : ReductionOps) { 6124 const Expr *XExpr = nullptr; 6125 const Expr *EExpr = nullptr; 6126 const Expr *UpExpr = nullptr; 6127 BinaryOperatorKind BO = BO_Comma; 6128 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 6129 if (BO->getOpcode() == BO_Assign) { 6130 XExpr = BO->getLHS(); 6131 UpExpr = BO->getRHS(); 6132 } 6133 } 6134 // Try to emit update expression as a simple atomic. 6135 const Expr *RHSExpr = UpExpr; 6136 if (RHSExpr) { 6137 // Analyze RHS part of the whole expression. 6138 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 6139 RHSExpr->IgnoreParenImpCasts())) { 6140 // If this is a conditional operator, analyze its condition for 6141 // min/max reduction operator. 6142 RHSExpr = ACO->getCond(); 6143 } 6144 if (const auto *BORHS = 6145 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 6146 EExpr = BORHS->getRHS(); 6147 BO = BORHS->getOpcode(); 6148 } 6149 } 6150 if (XExpr) { 6151 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 6152 auto &&AtomicRedGen = [BO, VD, 6153 Loc](CodeGenFunction &CGF, const Expr *XExpr, 6154 const Expr *EExpr, const Expr *UpExpr) { 6155 LValue X = CGF.EmitLValue(XExpr); 6156 RValue E; 6157 if (EExpr) 6158 E = CGF.EmitAnyExpr(EExpr); 6159 CGF.EmitOMPAtomicSimpleUpdateExpr( 6160 X, E, BO, /*IsXLHSInRHSPart=*/true, 6161 llvm::AtomicOrdering::Monotonic, Loc, 6162 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 6163 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6164 PrivateScope.addPrivate( 6165 VD, [&CGF, VD, XRValue, Loc]() { 6166 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 6167 CGF.emitOMPSimpleStore( 6168 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 6169 VD->getType().getNonReferenceType(), Loc); 6170 return LHSTemp; 6171 }); 6172 (void)PrivateScope.Privatize(); 6173 return CGF.EmitAnyExpr(UpExpr); 6174 }); 6175 }; 6176 if ((*IPriv)->getType()->isArrayType()) { 6177 // Emit atomic reduction for array section. 6178 const auto *RHSVar = 6179 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 6180 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 6181 AtomicRedGen, XExpr, EExpr, UpExpr); 6182 } else { 6183 // Emit atomic reduction for array subscript or single variable. 6184 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 6185 } 6186 } else { 6187 // Emit as a critical region. 6188 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 6189 const Expr *, const Expr *) { 6190 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6191 std::string Name = RT.getName({"atomic_reduction"}); 6192 RT.emitCriticalRegion( 6193 CGF, Name, 6194 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 6195 Action.Enter(CGF); 6196 emitReductionCombiner(CGF, E); 6197 }, 6198 Loc); 6199 }; 6200 if ((*IPriv)->getType()->isArrayType()) { 6201 const auto *LHSVar = 6202 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 6203 const auto *RHSVar = 6204 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 6205 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 6206 CritRedGen); 6207 } else { 6208 CritRedGen(CGF, nullptr, nullptr, nullptr); 6209 } 6210 } 6211 ++ILHS; 6212 ++IRHS; 6213 ++IPriv; 6214 } 6215 }; 6216 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 6217 if (!WithNowait) { 6218 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 6219 llvm::Value *EndArgs[] = { 6220 IdentTLoc, // ident_t *<loc> 6221 ThreadId, // i32 <gtid> 6222 Lock // kmp_critical_name *&<lock> 6223 }; 6224 CommonActionTy Action(nullptr, llvm::None, 6225 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 6226 EndArgs); 6227 AtomicRCG.setAction(Action); 6228 AtomicRCG(CGF); 6229 } else { 6230 AtomicRCG(CGF); 6231 } 6232 6233 CGF.EmitBranch(DefaultBB); 6234 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 6235 } 6236 6237 /// Generates unique name for artificial threadprivate variables. 6238 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 6239 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 6240 const Expr *Ref) { 6241 SmallString<256> Buffer; 6242 llvm::raw_svector_ostream Out(Buffer); 6243 const clang::DeclRefExpr *DE; 6244 const VarDecl *D = ::getBaseDecl(Ref, DE); 6245 if (!D) 6246 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 6247 D = D->getCanonicalDecl(); 6248 std::string Name = CGM.getOpenMPRuntime().getName( 6249 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 6250 Out << Prefix << Name << "_" 6251 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 6252 return std::string(Out.str()); 6253 } 6254 6255 /// Emits reduction initializer function: 6256 /// \code 6257 /// void @.red_init(void* %arg) { 6258 /// %0 = bitcast void* %arg to <type>* 6259 /// store <type> <init>, <type>* %0 6260 /// ret void 6261 /// } 6262 /// \endcode 6263 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 6264 SourceLocation Loc, 6265 ReductionCodeGen &RCG, unsigned N) { 6266 ASTContext &C = CGM.getContext(); 6267 FunctionArgList Args; 6268 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6269 ImplicitParamDecl::Other); 6270 Args.emplace_back(&Param); 6271 const auto &FnInfo = 6272 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6273 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6274 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 6275 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6276 Name, &CGM.getModule()); 6277 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6278 Fn->setDoesNotRecurse(); 6279 CodeGenFunction CGF(CGM); 6280 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6281 Address PrivateAddr = CGF.EmitLoadOfPointer( 6282 CGF.GetAddrOfLocalVar(&Param), 6283 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6284 llvm::Value *Size = nullptr; 6285 // If the size of the reduction item is non-constant, load it from global 6286 // threadprivate variable. 6287 if (RCG.getSizes(N).second) { 6288 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6289 CGF, CGM.getContext().getSizeType(), 6290 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6291 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6292 CGM.getContext().getSizeType(), Loc); 6293 } 6294 RCG.emitAggregateType(CGF, N, Size); 6295 LValue SharedLVal; 6296 // If initializer uses initializer from declare reduction construct, emit a 6297 // pointer to the address of the original reduction item (reuired by reduction 6298 // initializer) 6299 if (RCG.usesReductionInitializer(N)) { 6300 Address SharedAddr = 6301 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6302 CGF, CGM.getContext().VoidPtrTy, 6303 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6304 SharedAddr = CGF.EmitLoadOfPointer( 6305 SharedAddr, 6306 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 6307 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 6308 } else { 6309 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 6310 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 6311 CGM.getContext().VoidPtrTy); 6312 } 6313 // Emit the initializer: 6314 // %0 = bitcast void* %arg to <type>* 6315 // store <type> <init>, <type>* %0 6316 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 6317 [](CodeGenFunction &) { return false; }); 6318 CGF.FinishFunction(); 6319 return Fn; 6320 } 6321 6322 /// Emits reduction combiner function: 6323 /// \code 6324 /// void @.red_comb(void* %arg0, void* %arg1) { 6325 /// %lhs = bitcast void* %arg0 to <type>* 6326 /// %rhs = bitcast void* %arg1 to <type>* 6327 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 6328 /// store <type> %2, <type>* %lhs 6329 /// ret void 6330 /// } 6331 /// \endcode 6332 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 6333 SourceLocation Loc, 6334 ReductionCodeGen &RCG, unsigned N, 6335 const Expr *ReductionOp, 6336 const Expr *LHS, const Expr *RHS, 6337 const Expr *PrivateRef) { 6338 ASTContext &C = CGM.getContext(); 6339 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 6340 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 6341 FunctionArgList Args; 6342 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 6343 C.VoidPtrTy, ImplicitParamDecl::Other); 6344 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6345 ImplicitParamDecl::Other); 6346 Args.emplace_back(&ParamInOut); 6347 Args.emplace_back(&ParamIn); 6348 const auto &FnInfo = 6349 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6350 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6351 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 6352 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6353 Name, &CGM.getModule()); 6354 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6355 Fn->setDoesNotRecurse(); 6356 CodeGenFunction CGF(CGM); 6357 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6358 llvm::Value *Size = nullptr; 6359 // If the size of the reduction item is non-constant, load it from global 6360 // threadprivate variable. 6361 if (RCG.getSizes(N).second) { 6362 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6363 CGF, CGM.getContext().getSizeType(), 6364 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6365 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6366 CGM.getContext().getSizeType(), Loc); 6367 } 6368 RCG.emitAggregateType(CGF, N, Size); 6369 // Remap lhs and rhs variables to the addresses of the function arguments. 6370 // %lhs = bitcast void* %arg0 to <type>* 6371 // %rhs = bitcast void* %arg1 to <type>* 6372 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6373 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6374 // Pull out the pointer to the variable. 6375 Address PtrAddr = CGF.EmitLoadOfPointer( 6376 CGF.GetAddrOfLocalVar(&ParamInOut), 6377 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6378 return CGF.Builder.CreateElementBitCast( 6379 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6380 }); 6381 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6382 // Pull out the pointer to the variable. 6383 Address PtrAddr = CGF.EmitLoadOfPointer( 6384 CGF.GetAddrOfLocalVar(&ParamIn), 6385 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6386 return CGF.Builder.CreateElementBitCast( 6387 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6388 }); 6389 PrivateScope.Privatize(); 6390 // Emit the combiner body: 6391 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6392 // store <type> %2, <type>* %lhs 6393 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6394 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6395 cast<DeclRefExpr>(RHS)); 6396 CGF.FinishFunction(); 6397 return Fn; 6398 } 6399 6400 /// Emits reduction finalizer function: 6401 /// \code 6402 /// void @.red_fini(void* %arg) { 6403 /// %0 = bitcast void* %arg to <type>* 6404 /// <destroy>(<type>* %0) 6405 /// ret void 6406 /// } 6407 /// \endcode 6408 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6409 SourceLocation Loc, 6410 ReductionCodeGen &RCG, unsigned N) { 6411 if (!RCG.needCleanups(N)) 6412 return nullptr; 6413 ASTContext &C = CGM.getContext(); 6414 FunctionArgList Args; 6415 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6416 ImplicitParamDecl::Other); 6417 Args.emplace_back(&Param); 6418 const auto &FnInfo = 6419 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6420 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6421 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6422 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6423 Name, &CGM.getModule()); 6424 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6425 Fn->setDoesNotRecurse(); 6426 CodeGenFunction CGF(CGM); 6427 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6428 Address PrivateAddr = CGF.EmitLoadOfPointer( 6429 CGF.GetAddrOfLocalVar(&Param), 6430 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6431 llvm::Value *Size = nullptr; 6432 // If the size of the reduction item is non-constant, load it from global 6433 // threadprivate variable. 6434 if (RCG.getSizes(N).second) { 6435 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6436 CGF, CGM.getContext().getSizeType(), 6437 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6438 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6439 CGM.getContext().getSizeType(), Loc); 6440 } 6441 RCG.emitAggregateType(CGF, N, Size); 6442 // Emit the finalizer body: 6443 // <destroy>(<type>* %0) 6444 RCG.emitCleanups(CGF, N, PrivateAddr); 6445 CGF.FinishFunction(Loc); 6446 return Fn; 6447 } 6448 6449 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6450 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6451 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6452 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6453 return nullptr; 6454 6455 // Build typedef struct: 6456 // kmp_task_red_input { 6457 // void *reduce_shar; // shared reduction item 6458 // size_t reduce_size; // size of data item 6459 // void *reduce_init; // data initialization routine 6460 // void *reduce_fini; // data finalization routine 6461 // void *reduce_comb; // data combiner routine 6462 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6463 // } kmp_task_red_input_t; 6464 ASTContext &C = CGM.getContext(); 6465 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 6466 RD->startDefinition(); 6467 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6468 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6469 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6470 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6471 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6472 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6473 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6474 RD->completeDefinition(); 6475 QualType RDType = C.getRecordType(RD); 6476 unsigned Size = Data.ReductionVars.size(); 6477 llvm::APInt ArraySize(/*numBits=*/64, Size); 6478 QualType ArrayRDType = C.getConstantArrayType( 6479 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6480 // kmp_task_red_input_t .rd_input.[Size]; 6481 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6482 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 6483 Data.ReductionOps); 6484 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6485 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6486 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6487 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6488 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6489 TaskRedInput.getPointer(), Idxs, 6490 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6491 ".rd_input.gep."); 6492 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6493 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6494 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6495 RCG.emitSharedLValue(CGF, Cnt); 6496 llvm::Value *CastedShared = 6497 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6498 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6499 RCG.emitAggregateType(CGF, Cnt); 6500 llvm::Value *SizeValInChars; 6501 llvm::Value *SizeVal; 6502 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6503 // We use delayed creation/initialization for VLAs, array sections and 6504 // custom reduction initializations. It is required because runtime does not 6505 // provide the way to pass the sizes of VLAs/array sections to 6506 // initializer/combiner/finalizer functions and does not pass the pointer to 6507 // original reduction item to the initializer. Instead threadprivate global 6508 // variables are used to store these values and use them in the functions. 6509 bool DelayedCreation = !!SizeVal; 6510 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6511 /*isSigned=*/false); 6512 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6513 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6514 // ElemLVal.reduce_init = init; 6515 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6516 llvm::Value *InitAddr = 6517 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6518 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6519 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 6520 // ElemLVal.reduce_fini = fini; 6521 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6522 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6523 llvm::Value *FiniAddr = Fini 6524 ? CGF.EmitCastToVoidPtr(Fini) 6525 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6526 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6527 // ElemLVal.reduce_comb = comb; 6528 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6529 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6530 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6531 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6532 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6533 // ElemLVal.flags = 0; 6534 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6535 if (DelayedCreation) { 6536 CGF.EmitStoreOfScalar( 6537 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6538 FlagsLVal); 6539 } else 6540 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6541 FlagsLVal.getType()); 6542 } 6543 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 6544 // *data); 6545 llvm::Value *Args[] = { 6546 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6547 /*isSigned=*/true), 6548 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6549 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6550 CGM.VoidPtrTy)}; 6551 return CGF.EmitRuntimeCall( 6552 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 6553 } 6554 6555 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6556 SourceLocation Loc, 6557 ReductionCodeGen &RCG, 6558 unsigned N) { 6559 auto Sizes = RCG.getSizes(N); 6560 // Emit threadprivate global variable if the type is non-constant 6561 // (Sizes.second = nullptr). 6562 if (Sizes.second) { 6563 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6564 /*isSigned=*/false); 6565 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6566 CGF, CGM.getContext().getSizeType(), 6567 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6568 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6569 } 6570 // Store address of the original reduction item if custom initializer is used. 6571 if (RCG.usesReductionInitializer(N)) { 6572 Address SharedAddr = getAddrOfArtificialThreadPrivate( 6573 CGF, CGM.getContext().VoidPtrTy, 6574 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6575 CGF.Builder.CreateStore( 6576 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6577 RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy), 6578 SharedAddr, /*IsVolatile=*/false); 6579 } 6580 } 6581 6582 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6583 SourceLocation Loc, 6584 llvm::Value *ReductionsPtr, 6585 LValue SharedLVal) { 6586 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6587 // *d); 6588 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6589 CGM.IntTy, 6590 /*isSigned=*/true), 6591 ReductionsPtr, 6592 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6593 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6594 return Address( 6595 CGF.EmitRuntimeCall( 6596 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6597 SharedLVal.getAlignment()); 6598 } 6599 6600 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6601 SourceLocation Loc) { 6602 if (!CGF.HaveInsertPoint()) 6603 return; 6604 6605 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 6606 if (OMPBuilder) { 6607 OMPBuilder->CreateTaskwait(CGF.Builder); 6608 } else { 6609 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6610 // global_tid); 6611 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6612 // Ignore return result until untied tasks are supported. 6613 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 6614 } 6615 6616 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6617 Region->emitUntiedSwitch(CGF); 6618 } 6619 6620 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6621 OpenMPDirectiveKind InnerKind, 6622 const RegionCodeGenTy &CodeGen, 6623 bool HasCancel) { 6624 if (!CGF.HaveInsertPoint()) 6625 return; 6626 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6627 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6628 } 6629 6630 namespace { 6631 enum RTCancelKind { 6632 CancelNoreq = 0, 6633 CancelParallel = 1, 6634 CancelLoop = 2, 6635 CancelSections = 3, 6636 CancelTaskgroup = 4 6637 }; 6638 } // anonymous namespace 6639 6640 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6641 RTCancelKind CancelKind = CancelNoreq; 6642 if (CancelRegion == OMPD_parallel) 6643 CancelKind = CancelParallel; 6644 else if (CancelRegion == OMPD_for) 6645 CancelKind = CancelLoop; 6646 else if (CancelRegion == OMPD_sections) 6647 CancelKind = CancelSections; 6648 else { 6649 assert(CancelRegion == OMPD_taskgroup); 6650 CancelKind = CancelTaskgroup; 6651 } 6652 return CancelKind; 6653 } 6654 6655 void CGOpenMPRuntime::emitCancellationPointCall( 6656 CodeGenFunction &CGF, SourceLocation Loc, 6657 OpenMPDirectiveKind CancelRegion) { 6658 if (!CGF.HaveInsertPoint()) 6659 return; 6660 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6661 // global_tid, kmp_int32 cncl_kind); 6662 if (auto *OMPRegionInfo = 6663 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6664 // For 'cancellation point taskgroup', the task region info may not have a 6665 // cancel. This may instead happen in another adjacent task. 6666 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6667 llvm::Value *Args[] = { 6668 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6669 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6670 // Ignore return result until untied tasks are supported. 6671 llvm::Value *Result = CGF.EmitRuntimeCall( 6672 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 6673 // if (__kmpc_cancellationpoint()) { 6674 // exit from construct; 6675 // } 6676 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6677 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6678 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6679 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6680 CGF.EmitBlock(ExitBB); 6681 // exit from construct; 6682 CodeGenFunction::JumpDest CancelDest = 6683 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6684 CGF.EmitBranchThroughCleanup(CancelDest); 6685 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6686 } 6687 } 6688 } 6689 6690 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6691 const Expr *IfCond, 6692 OpenMPDirectiveKind CancelRegion) { 6693 if (!CGF.HaveInsertPoint()) 6694 return; 6695 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6696 // kmp_int32 cncl_kind); 6697 if (auto *OMPRegionInfo = 6698 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6699 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 6700 PrePostActionTy &) { 6701 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6702 llvm::Value *Args[] = { 6703 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6704 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6705 // Ignore return result until untied tasks are supported. 6706 llvm::Value *Result = CGF.EmitRuntimeCall( 6707 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 6708 // if (__kmpc_cancel()) { 6709 // exit from construct; 6710 // } 6711 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6712 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6713 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6714 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6715 CGF.EmitBlock(ExitBB); 6716 // exit from construct; 6717 CodeGenFunction::JumpDest CancelDest = 6718 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6719 CGF.EmitBranchThroughCleanup(CancelDest); 6720 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6721 }; 6722 if (IfCond) { 6723 emitIfClause(CGF, IfCond, ThenGen, 6724 [](CodeGenFunction &, PrePostActionTy &) {}); 6725 } else { 6726 RegionCodeGenTy ThenRCG(ThenGen); 6727 ThenRCG(CGF); 6728 } 6729 } 6730 } 6731 6732 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6733 const OMPExecutableDirective &D, StringRef ParentName, 6734 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6735 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6736 assert(!ParentName.empty() && "Invalid target region parent name!"); 6737 HasEmittedTargetRegion = true; 6738 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6739 IsOffloadEntry, CodeGen); 6740 } 6741 6742 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6743 const OMPExecutableDirective &D, StringRef ParentName, 6744 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6745 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6746 // Create a unique name for the entry function using the source location 6747 // information of the current target region. The name will be something like: 6748 // 6749 // __omp_offloading_DD_FFFF_PP_lBB 6750 // 6751 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6752 // mangled name of the function that encloses the target region and BB is the 6753 // line number of the target region. 6754 6755 unsigned DeviceID; 6756 unsigned FileID; 6757 unsigned Line; 6758 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6759 Line); 6760 SmallString<64> EntryFnName; 6761 { 6762 llvm::raw_svector_ostream OS(EntryFnName); 6763 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6764 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6765 } 6766 6767 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6768 6769 CodeGenFunction CGF(CGM, true); 6770 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6771 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6772 6773 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6774 6775 // If this target outline function is not an offload entry, we don't need to 6776 // register it. 6777 if (!IsOffloadEntry) 6778 return; 6779 6780 // The target region ID is used by the runtime library to identify the current 6781 // target region, so it only has to be unique and not necessarily point to 6782 // anything. It could be the pointer to the outlined function that implements 6783 // the target region, but we aren't using that so that the compiler doesn't 6784 // need to keep that, and could therefore inline the host function if proven 6785 // worthwhile during optimization. In the other hand, if emitting code for the 6786 // device, the ID has to be the function address so that it can retrieved from 6787 // the offloading entry and launched by the runtime library. We also mark the 6788 // outlined function to have external linkage in case we are emitting code for 6789 // the device, because these functions will be entry points to the device. 6790 6791 if (CGM.getLangOpts().OpenMPIsDevice) { 6792 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6793 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6794 OutlinedFn->setDSOLocal(false); 6795 } else { 6796 std::string Name = getName({EntryFnName, "region_id"}); 6797 OutlinedFnID = new llvm::GlobalVariable( 6798 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6799 llvm::GlobalValue::WeakAnyLinkage, 6800 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6801 } 6802 6803 // Register the information for the entry associated with this target region. 6804 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6805 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6806 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6807 } 6808 6809 /// Checks if the expression is constant or does not have non-trivial function 6810 /// calls. 6811 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6812 // We can skip constant expressions. 6813 // We can skip expressions with trivial calls or simple expressions. 6814 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6815 !E->hasNonTrivialCall(Ctx)) && 6816 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6817 } 6818 6819 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6820 const Stmt *Body) { 6821 const Stmt *Child = Body->IgnoreContainers(); 6822 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6823 Child = nullptr; 6824 for (const Stmt *S : C->body()) { 6825 if (const auto *E = dyn_cast<Expr>(S)) { 6826 if (isTrivial(Ctx, E)) 6827 continue; 6828 } 6829 // Some of the statements can be ignored. 6830 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6831 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6832 continue; 6833 // Analyze declarations. 6834 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6835 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6836 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6837 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6838 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6839 isa<UsingDirectiveDecl>(D) || 6840 isa<OMPDeclareReductionDecl>(D) || 6841 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6842 return true; 6843 const auto *VD = dyn_cast<VarDecl>(D); 6844 if (!VD) 6845 return false; 6846 return VD->isConstexpr() || 6847 ((VD->getType().isTrivialType(Ctx) || 6848 VD->getType()->isReferenceType()) && 6849 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6850 })) 6851 continue; 6852 } 6853 // Found multiple children - cannot get the one child only. 6854 if (Child) 6855 return nullptr; 6856 Child = S; 6857 } 6858 if (Child) 6859 Child = Child->IgnoreContainers(); 6860 } 6861 return Child; 6862 } 6863 6864 /// Emit the number of teams for a target directive. Inspect the num_teams 6865 /// clause associated with a teams construct combined or closely nested 6866 /// with the target directive. 6867 /// 6868 /// Emit a team of size one for directives such as 'target parallel' that 6869 /// have no associated teams construct. 6870 /// 6871 /// Otherwise, return nullptr. 6872 static llvm::Value * 6873 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6874 const OMPExecutableDirective &D) { 6875 assert(!CGF.getLangOpts().OpenMPIsDevice && 6876 "Clauses associated with the teams directive expected to be emitted " 6877 "only for the host!"); 6878 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6879 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6880 "Expected target-based executable directive."); 6881 CGBuilderTy &Bld = CGF.Builder; 6882 switch (DirectiveKind) { 6883 case OMPD_target: { 6884 const auto *CS = D.getInnermostCapturedStmt(); 6885 const auto *Body = 6886 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6887 const Stmt *ChildStmt = 6888 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6889 if (const auto *NestedDir = 6890 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6891 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6892 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6893 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6894 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6895 const Expr *NumTeams = 6896 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6897 llvm::Value *NumTeamsVal = 6898 CGF.EmitScalarExpr(NumTeams, 6899 /*IgnoreResultAssign*/ true); 6900 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6901 /*isSigned=*/true); 6902 } 6903 return Bld.getInt32(0); 6904 } 6905 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6906 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6907 return Bld.getInt32(1); 6908 return Bld.getInt32(0); 6909 } 6910 return nullptr; 6911 } 6912 case OMPD_target_teams: 6913 case OMPD_target_teams_distribute: 6914 case OMPD_target_teams_distribute_simd: 6915 case OMPD_target_teams_distribute_parallel_for: 6916 case OMPD_target_teams_distribute_parallel_for_simd: { 6917 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6918 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6919 const Expr *NumTeams = 6920 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6921 llvm::Value *NumTeamsVal = 6922 CGF.EmitScalarExpr(NumTeams, 6923 /*IgnoreResultAssign*/ true); 6924 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6925 /*isSigned=*/true); 6926 } 6927 return Bld.getInt32(0); 6928 } 6929 case OMPD_target_parallel: 6930 case OMPD_target_parallel_for: 6931 case OMPD_target_parallel_for_simd: 6932 case OMPD_target_simd: 6933 return Bld.getInt32(1); 6934 case OMPD_parallel: 6935 case OMPD_for: 6936 case OMPD_parallel_for: 6937 case OMPD_parallel_master: 6938 case OMPD_parallel_sections: 6939 case OMPD_for_simd: 6940 case OMPD_parallel_for_simd: 6941 case OMPD_cancel: 6942 case OMPD_cancellation_point: 6943 case OMPD_ordered: 6944 case OMPD_threadprivate: 6945 case OMPD_allocate: 6946 case OMPD_task: 6947 case OMPD_simd: 6948 case OMPD_sections: 6949 case OMPD_section: 6950 case OMPD_single: 6951 case OMPD_master: 6952 case OMPD_critical: 6953 case OMPD_taskyield: 6954 case OMPD_barrier: 6955 case OMPD_taskwait: 6956 case OMPD_taskgroup: 6957 case OMPD_atomic: 6958 case OMPD_flush: 6959 case OMPD_depobj: 6960 case OMPD_teams: 6961 case OMPD_target_data: 6962 case OMPD_target_exit_data: 6963 case OMPD_target_enter_data: 6964 case OMPD_distribute: 6965 case OMPD_distribute_simd: 6966 case OMPD_distribute_parallel_for: 6967 case OMPD_distribute_parallel_for_simd: 6968 case OMPD_teams_distribute: 6969 case OMPD_teams_distribute_simd: 6970 case OMPD_teams_distribute_parallel_for: 6971 case OMPD_teams_distribute_parallel_for_simd: 6972 case OMPD_target_update: 6973 case OMPD_declare_simd: 6974 case OMPD_declare_variant: 6975 case OMPD_declare_target: 6976 case OMPD_end_declare_target: 6977 case OMPD_declare_reduction: 6978 case OMPD_declare_mapper: 6979 case OMPD_taskloop: 6980 case OMPD_taskloop_simd: 6981 case OMPD_master_taskloop: 6982 case OMPD_master_taskloop_simd: 6983 case OMPD_parallel_master_taskloop: 6984 case OMPD_parallel_master_taskloop_simd: 6985 case OMPD_requires: 6986 case OMPD_unknown: 6987 break; 6988 } 6989 llvm_unreachable("Unexpected directive kind."); 6990 } 6991 6992 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6993 llvm::Value *DefaultThreadLimitVal) { 6994 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6995 CGF.getContext(), CS->getCapturedStmt()); 6996 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6997 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6998 llvm::Value *NumThreads = nullptr; 6999 llvm::Value *CondVal = nullptr; 7000 // Handle if clause. If if clause present, the number of threads is 7001 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7002 if (Dir->hasClausesOfKind<OMPIfClause>()) { 7003 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7004 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7005 const OMPIfClause *IfClause = nullptr; 7006 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 7007 if (C->getNameModifier() == OMPD_unknown || 7008 C->getNameModifier() == OMPD_parallel) { 7009 IfClause = C; 7010 break; 7011 } 7012 } 7013 if (IfClause) { 7014 const Expr *Cond = IfClause->getCondition(); 7015 bool Result; 7016 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7017 if (!Result) 7018 return CGF.Builder.getInt32(1); 7019 } else { 7020 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 7021 if (const auto *PreInit = 7022 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 7023 for (const auto *I : PreInit->decls()) { 7024 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7025 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7026 } else { 7027 CodeGenFunction::AutoVarEmission Emission = 7028 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7029 CGF.EmitAutoVarCleanups(Emission); 7030 } 7031 } 7032 } 7033 CondVal = CGF.EvaluateExprAsBool(Cond); 7034 } 7035 } 7036 } 7037 // Check the value of num_threads clause iff if clause was not specified 7038 // or is not evaluated to false. 7039 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 7040 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7041 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7042 const auto *NumThreadsClause = 7043 Dir->getSingleClause<OMPNumThreadsClause>(); 7044 CodeGenFunction::LexicalScope Scope( 7045 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 7046 if (const auto *PreInit = 7047 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 7048 for (const auto *I : PreInit->decls()) { 7049 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7050 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7051 } else { 7052 CodeGenFunction::AutoVarEmission Emission = 7053 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7054 CGF.EmitAutoVarCleanups(Emission); 7055 } 7056 } 7057 } 7058 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 7059 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 7060 /*isSigned=*/false); 7061 if (DefaultThreadLimitVal) 7062 NumThreads = CGF.Builder.CreateSelect( 7063 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 7064 DefaultThreadLimitVal, NumThreads); 7065 } else { 7066 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 7067 : CGF.Builder.getInt32(0); 7068 } 7069 // Process condition of the if clause. 7070 if (CondVal) { 7071 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 7072 CGF.Builder.getInt32(1)); 7073 } 7074 return NumThreads; 7075 } 7076 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 7077 return CGF.Builder.getInt32(1); 7078 return DefaultThreadLimitVal; 7079 } 7080 return DefaultThreadLimitVal ? DefaultThreadLimitVal 7081 : CGF.Builder.getInt32(0); 7082 } 7083 7084 /// Emit the number of threads for a target directive. Inspect the 7085 /// thread_limit clause associated with a teams construct combined or closely 7086 /// nested with the target directive. 7087 /// 7088 /// Emit the num_threads clause for directives such as 'target parallel' that 7089 /// have no associated teams construct. 7090 /// 7091 /// Otherwise, return nullptr. 7092 static llvm::Value * 7093 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 7094 const OMPExecutableDirective &D) { 7095 assert(!CGF.getLangOpts().OpenMPIsDevice && 7096 "Clauses associated with the teams directive expected to be emitted " 7097 "only for the host!"); 7098 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7099 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7100 "Expected target-based executable directive."); 7101 CGBuilderTy &Bld = CGF.Builder; 7102 llvm::Value *ThreadLimitVal = nullptr; 7103 llvm::Value *NumThreadsVal = nullptr; 7104 switch (DirectiveKind) { 7105 case OMPD_target: { 7106 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7107 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7108 return NumThreads; 7109 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7110 CGF.getContext(), CS->getCapturedStmt()); 7111 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7112 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 7113 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7114 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7115 const auto *ThreadLimitClause = 7116 Dir->getSingleClause<OMPThreadLimitClause>(); 7117 CodeGenFunction::LexicalScope Scope( 7118 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 7119 if (const auto *PreInit = 7120 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 7121 for (const auto *I : PreInit->decls()) { 7122 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7123 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7124 } else { 7125 CodeGenFunction::AutoVarEmission Emission = 7126 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7127 CGF.EmitAutoVarCleanups(Emission); 7128 } 7129 } 7130 } 7131 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7132 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7133 ThreadLimitVal = 7134 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7135 } 7136 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 7137 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 7138 CS = Dir->getInnermostCapturedStmt(); 7139 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7140 CGF.getContext(), CS->getCapturedStmt()); 7141 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 7142 } 7143 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 7144 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 7145 CS = Dir->getInnermostCapturedStmt(); 7146 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7147 return NumThreads; 7148 } 7149 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 7150 return Bld.getInt32(1); 7151 } 7152 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7153 } 7154 case OMPD_target_teams: { 7155 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7156 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7157 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7158 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7159 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7160 ThreadLimitVal = 7161 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7162 } 7163 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7164 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7165 return NumThreads; 7166 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7167 CGF.getContext(), CS->getCapturedStmt()); 7168 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7169 if (Dir->getDirectiveKind() == OMPD_distribute) { 7170 CS = Dir->getInnermostCapturedStmt(); 7171 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7172 return NumThreads; 7173 } 7174 } 7175 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7176 } 7177 case OMPD_target_teams_distribute: 7178 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7179 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7180 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7181 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7182 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7183 ThreadLimitVal = 7184 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7185 } 7186 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7187 case OMPD_target_parallel: 7188 case OMPD_target_parallel_for: 7189 case OMPD_target_parallel_for_simd: 7190 case OMPD_target_teams_distribute_parallel_for: 7191 case OMPD_target_teams_distribute_parallel_for_simd: { 7192 llvm::Value *CondVal = nullptr; 7193 // Handle if clause. If if clause present, the number of threads is 7194 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7195 if (D.hasClausesOfKind<OMPIfClause>()) { 7196 const OMPIfClause *IfClause = nullptr; 7197 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7198 if (C->getNameModifier() == OMPD_unknown || 7199 C->getNameModifier() == OMPD_parallel) { 7200 IfClause = C; 7201 break; 7202 } 7203 } 7204 if (IfClause) { 7205 const Expr *Cond = IfClause->getCondition(); 7206 bool Result; 7207 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7208 if (!Result) 7209 return Bld.getInt32(1); 7210 } else { 7211 CodeGenFunction::RunCleanupsScope Scope(CGF); 7212 CondVal = CGF.EvaluateExprAsBool(Cond); 7213 } 7214 } 7215 } 7216 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7217 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7218 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7219 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7220 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7221 ThreadLimitVal = 7222 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7223 } 7224 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7225 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7226 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7227 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7228 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7229 NumThreadsVal = 7230 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7231 ThreadLimitVal = ThreadLimitVal 7232 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7233 ThreadLimitVal), 7234 NumThreadsVal, ThreadLimitVal) 7235 : NumThreadsVal; 7236 } 7237 if (!ThreadLimitVal) 7238 ThreadLimitVal = Bld.getInt32(0); 7239 if (CondVal) 7240 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7241 return ThreadLimitVal; 7242 } 7243 case OMPD_target_teams_distribute_simd: 7244 case OMPD_target_simd: 7245 return Bld.getInt32(1); 7246 case OMPD_parallel: 7247 case OMPD_for: 7248 case OMPD_parallel_for: 7249 case OMPD_parallel_master: 7250 case OMPD_parallel_sections: 7251 case OMPD_for_simd: 7252 case OMPD_parallel_for_simd: 7253 case OMPD_cancel: 7254 case OMPD_cancellation_point: 7255 case OMPD_ordered: 7256 case OMPD_threadprivate: 7257 case OMPD_allocate: 7258 case OMPD_task: 7259 case OMPD_simd: 7260 case OMPD_sections: 7261 case OMPD_section: 7262 case OMPD_single: 7263 case OMPD_master: 7264 case OMPD_critical: 7265 case OMPD_taskyield: 7266 case OMPD_barrier: 7267 case OMPD_taskwait: 7268 case OMPD_taskgroup: 7269 case OMPD_atomic: 7270 case OMPD_flush: 7271 case OMPD_depobj: 7272 case OMPD_teams: 7273 case OMPD_target_data: 7274 case OMPD_target_exit_data: 7275 case OMPD_target_enter_data: 7276 case OMPD_distribute: 7277 case OMPD_distribute_simd: 7278 case OMPD_distribute_parallel_for: 7279 case OMPD_distribute_parallel_for_simd: 7280 case OMPD_teams_distribute: 7281 case OMPD_teams_distribute_simd: 7282 case OMPD_teams_distribute_parallel_for: 7283 case OMPD_teams_distribute_parallel_for_simd: 7284 case OMPD_target_update: 7285 case OMPD_declare_simd: 7286 case OMPD_declare_variant: 7287 case OMPD_declare_target: 7288 case OMPD_end_declare_target: 7289 case OMPD_declare_reduction: 7290 case OMPD_declare_mapper: 7291 case OMPD_taskloop: 7292 case OMPD_taskloop_simd: 7293 case OMPD_master_taskloop: 7294 case OMPD_master_taskloop_simd: 7295 case OMPD_parallel_master_taskloop: 7296 case OMPD_parallel_master_taskloop_simd: 7297 case OMPD_requires: 7298 case OMPD_unknown: 7299 break; 7300 } 7301 llvm_unreachable("Unsupported directive kind."); 7302 } 7303 7304 namespace { 7305 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7306 7307 // Utility to handle information from clauses associated with a given 7308 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7309 // It provides a convenient interface to obtain the information and generate 7310 // code for that information. 7311 class MappableExprsHandler { 7312 public: 7313 /// Values for bit flags used to specify the mapping type for 7314 /// offloading. 7315 enum OpenMPOffloadMappingFlags : uint64_t { 7316 /// No flags 7317 OMP_MAP_NONE = 0x0, 7318 /// Allocate memory on the device and move data from host to device. 7319 OMP_MAP_TO = 0x01, 7320 /// Allocate memory on the device and move data from device to host. 7321 OMP_MAP_FROM = 0x02, 7322 /// Always perform the requested mapping action on the element, even 7323 /// if it was already mapped before. 7324 OMP_MAP_ALWAYS = 0x04, 7325 /// Delete the element from the device environment, ignoring the 7326 /// current reference count associated with the element. 7327 OMP_MAP_DELETE = 0x08, 7328 /// The element being mapped is a pointer-pointee pair; both the 7329 /// pointer and the pointee should be mapped. 7330 OMP_MAP_PTR_AND_OBJ = 0x10, 7331 /// This flags signals that the base address of an entry should be 7332 /// passed to the target kernel as an argument. 7333 OMP_MAP_TARGET_PARAM = 0x20, 7334 /// Signal that the runtime library has to return the device pointer 7335 /// in the current position for the data being mapped. Used when we have the 7336 /// use_device_ptr clause. 7337 OMP_MAP_RETURN_PARAM = 0x40, 7338 /// This flag signals that the reference being passed is a pointer to 7339 /// private data. 7340 OMP_MAP_PRIVATE = 0x80, 7341 /// Pass the element to the device by value. 7342 OMP_MAP_LITERAL = 0x100, 7343 /// Implicit map 7344 OMP_MAP_IMPLICIT = 0x200, 7345 /// Close is a hint to the runtime to allocate memory close to 7346 /// the target device. 7347 OMP_MAP_CLOSE = 0x400, 7348 /// The 16 MSBs of the flags indicate whether the entry is member of some 7349 /// struct/class. 7350 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7351 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7352 }; 7353 7354 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7355 static unsigned getFlagMemberOffset() { 7356 unsigned Offset = 0; 7357 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7358 Remain = Remain >> 1) 7359 Offset++; 7360 return Offset; 7361 } 7362 7363 /// Class that associates information with a base pointer to be passed to the 7364 /// runtime library. 7365 class BasePointerInfo { 7366 /// The base pointer. 7367 llvm::Value *Ptr = nullptr; 7368 /// The base declaration that refers to this device pointer, or null if 7369 /// there is none. 7370 const ValueDecl *DevPtrDecl = nullptr; 7371 7372 public: 7373 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7374 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7375 llvm::Value *operator*() const { return Ptr; } 7376 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7377 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7378 }; 7379 7380 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7381 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7382 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7383 7384 /// Map between a struct and the its lowest & highest elements which have been 7385 /// mapped. 7386 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7387 /// HE(FieldIndex, Pointer)} 7388 struct StructRangeInfoTy { 7389 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7390 0, Address::invalid()}; 7391 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7392 0, Address::invalid()}; 7393 Address Base = Address::invalid(); 7394 }; 7395 7396 private: 7397 /// Kind that defines how a device pointer has to be returned. 7398 struct MapInfo { 7399 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7400 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7401 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7402 bool ReturnDevicePointer = false; 7403 bool IsImplicit = false; 7404 7405 MapInfo() = default; 7406 MapInfo( 7407 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7408 OpenMPMapClauseKind MapType, 7409 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7410 bool ReturnDevicePointer, bool IsImplicit) 7411 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7412 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 7413 }; 7414 7415 /// If use_device_ptr is used on a pointer which is a struct member and there 7416 /// is no map information about it, then emission of that entry is deferred 7417 /// until the whole struct has been processed. 7418 struct DeferredDevicePtrEntryTy { 7419 const Expr *IE = nullptr; 7420 const ValueDecl *VD = nullptr; 7421 7422 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 7423 : IE(IE), VD(VD) {} 7424 }; 7425 7426 /// The target directive from where the mappable clauses were extracted. It 7427 /// is either a executable directive or a user-defined mapper directive. 7428 llvm::PointerUnion<const OMPExecutableDirective *, 7429 const OMPDeclareMapperDecl *> 7430 CurDir; 7431 7432 /// Function the directive is being generated for. 7433 CodeGenFunction &CGF; 7434 7435 /// Set of all first private variables in the current directive. 7436 /// bool data is set to true if the variable is implicitly marked as 7437 /// firstprivate, false otherwise. 7438 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7439 7440 /// Map between device pointer declarations and their expression components. 7441 /// The key value for declarations in 'this' is null. 7442 llvm::DenseMap< 7443 const ValueDecl *, 7444 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7445 DevPointersMap; 7446 7447 llvm::Value *getExprTypeSize(const Expr *E) const { 7448 QualType ExprTy = E->getType().getCanonicalType(); 7449 7450 // Reference types are ignored for mapping purposes. 7451 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7452 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7453 7454 // Given that an array section is considered a built-in type, we need to 7455 // do the calculation based on the length of the section instead of relying 7456 // on CGF.getTypeSize(E->getType()). 7457 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7458 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7459 OAE->getBase()->IgnoreParenImpCasts()) 7460 .getCanonicalType(); 7461 7462 // If there is no length associated with the expression and lower bound is 7463 // not specified too, that means we are using the whole length of the 7464 // base. 7465 if (!OAE->getLength() && OAE->getColonLoc().isValid() && 7466 !OAE->getLowerBound()) 7467 return CGF.getTypeSize(BaseTy); 7468 7469 llvm::Value *ElemSize; 7470 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7471 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7472 } else { 7473 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7474 assert(ATy && "Expecting array type if not a pointer type."); 7475 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7476 } 7477 7478 // If we don't have a length at this point, that is because we have an 7479 // array section with a single element. 7480 if (!OAE->getLength() && OAE->getColonLoc().isInvalid()) 7481 return ElemSize; 7482 7483 if (const Expr *LenExpr = OAE->getLength()) { 7484 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7485 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7486 CGF.getContext().getSizeType(), 7487 LenExpr->getExprLoc()); 7488 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7489 } 7490 assert(!OAE->getLength() && OAE->getColonLoc().isValid() && 7491 OAE->getLowerBound() && "expected array_section[lb:]."); 7492 // Size = sizetype - lb * elemtype; 7493 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7494 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7495 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7496 CGF.getContext().getSizeType(), 7497 OAE->getLowerBound()->getExprLoc()); 7498 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7499 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7500 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7501 LengthVal = CGF.Builder.CreateSelect( 7502 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7503 return LengthVal; 7504 } 7505 return CGF.getTypeSize(ExprTy); 7506 } 7507 7508 /// Return the corresponding bits for a given map clause modifier. Add 7509 /// a flag marking the map as a pointer if requested. Add a flag marking the 7510 /// map as the first one of a series of maps that relate to the same map 7511 /// expression. 7512 OpenMPOffloadMappingFlags getMapTypeBits( 7513 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7514 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7515 OpenMPOffloadMappingFlags Bits = 7516 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7517 switch (MapType) { 7518 case OMPC_MAP_alloc: 7519 case OMPC_MAP_release: 7520 // alloc and release is the default behavior in the runtime library, i.e. 7521 // if we don't pass any bits alloc/release that is what the runtime is 7522 // going to do. Therefore, we don't need to signal anything for these two 7523 // type modifiers. 7524 break; 7525 case OMPC_MAP_to: 7526 Bits |= OMP_MAP_TO; 7527 break; 7528 case OMPC_MAP_from: 7529 Bits |= OMP_MAP_FROM; 7530 break; 7531 case OMPC_MAP_tofrom: 7532 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7533 break; 7534 case OMPC_MAP_delete: 7535 Bits |= OMP_MAP_DELETE; 7536 break; 7537 case OMPC_MAP_unknown: 7538 llvm_unreachable("Unexpected map type!"); 7539 } 7540 if (AddPtrFlag) 7541 Bits |= OMP_MAP_PTR_AND_OBJ; 7542 if (AddIsTargetParamFlag) 7543 Bits |= OMP_MAP_TARGET_PARAM; 7544 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7545 != MapModifiers.end()) 7546 Bits |= OMP_MAP_ALWAYS; 7547 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7548 != MapModifiers.end()) 7549 Bits |= OMP_MAP_CLOSE; 7550 return Bits; 7551 } 7552 7553 /// Return true if the provided expression is a final array section. A 7554 /// final array section, is one whose length can't be proved to be one. 7555 bool isFinalArraySectionExpression(const Expr *E) const { 7556 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7557 7558 // It is not an array section and therefore not a unity-size one. 7559 if (!OASE) 7560 return false; 7561 7562 // An array section with no colon always refer to a single element. 7563 if (OASE->getColonLoc().isInvalid()) 7564 return false; 7565 7566 const Expr *Length = OASE->getLength(); 7567 7568 // If we don't have a length we have to check if the array has size 1 7569 // for this dimension. Also, we should always expect a length if the 7570 // base type is pointer. 7571 if (!Length) { 7572 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7573 OASE->getBase()->IgnoreParenImpCasts()) 7574 .getCanonicalType(); 7575 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7576 return ATy->getSize().getSExtValue() != 1; 7577 // If we don't have a constant dimension length, we have to consider 7578 // the current section as having any size, so it is not necessarily 7579 // unitary. If it happen to be unity size, that's user fault. 7580 return true; 7581 } 7582 7583 // Check if the length evaluates to 1. 7584 Expr::EvalResult Result; 7585 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7586 return true; // Can have more that size 1. 7587 7588 llvm::APSInt ConstLength = Result.Val.getInt(); 7589 return ConstLength.getSExtValue() != 1; 7590 } 7591 7592 /// Generate the base pointers, section pointers, sizes and map type 7593 /// bits for the provided map type, map modifier, and expression components. 7594 /// \a IsFirstComponent should be set to true if the provided set of 7595 /// components is the first associated with a capture. 7596 void generateInfoForComponentList( 7597 OpenMPMapClauseKind MapType, 7598 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7599 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7600 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7601 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7602 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 7603 bool IsImplicit, 7604 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7605 OverlappedElements = llvm::None) const { 7606 // The following summarizes what has to be generated for each map and the 7607 // types below. The generated information is expressed in this order: 7608 // base pointer, section pointer, size, flags 7609 // (to add to the ones that come from the map type and modifier). 7610 // 7611 // double d; 7612 // int i[100]; 7613 // float *p; 7614 // 7615 // struct S1 { 7616 // int i; 7617 // float f[50]; 7618 // } 7619 // struct S2 { 7620 // int i; 7621 // float f[50]; 7622 // S1 s; 7623 // double *p; 7624 // struct S2 *ps; 7625 // } 7626 // S2 s; 7627 // S2 *ps; 7628 // 7629 // map(d) 7630 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7631 // 7632 // map(i) 7633 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7634 // 7635 // map(i[1:23]) 7636 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7637 // 7638 // map(p) 7639 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7640 // 7641 // map(p[1:24]) 7642 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7643 // 7644 // map(s) 7645 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7646 // 7647 // map(s.i) 7648 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7649 // 7650 // map(s.s.f) 7651 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7652 // 7653 // map(s.p) 7654 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7655 // 7656 // map(to: s.p[:22]) 7657 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7658 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7659 // &(s.p), &(s.p[0]), 22*sizeof(double), 7660 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7661 // (*) alloc space for struct members, only this is a target parameter 7662 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7663 // optimizes this entry out, same in the examples below) 7664 // (***) map the pointee (map: to) 7665 // 7666 // map(s.ps) 7667 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7668 // 7669 // map(from: s.ps->s.i) 7670 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7671 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7672 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7673 // 7674 // map(to: s.ps->ps) 7675 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7676 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7677 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7678 // 7679 // map(s.ps->ps->ps) 7680 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7681 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7682 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7683 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7684 // 7685 // map(to: s.ps->ps->s.f[:22]) 7686 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7687 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7688 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7689 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7690 // 7691 // map(ps) 7692 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7693 // 7694 // map(ps->i) 7695 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7696 // 7697 // map(ps->s.f) 7698 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7699 // 7700 // map(from: ps->p) 7701 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7702 // 7703 // map(to: ps->p[:22]) 7704 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7705 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7706 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7707 // 7708 // map(ps->ps) 7709 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7710 // 7711 // map(from: ps->ps->s.i) 7712 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7713 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7714 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7715 // 7716 // map(from: ps->ps->ps) 7717 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7718 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7719 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7720 // 7721 // map(ps->ps->ps->ps) 7722 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7723 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7724 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7725 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7726 // 7727 // map(to: ps->ps->ps->s.f[:22]) 7728 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7729 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7730 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7731 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7732 // 7733 // map(to: s.f[:22]) map(from: s.p[:33]) 7734 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7735 // sizeof(double*) (**), TARGET_PARAM 7736 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7737 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7738 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7739 // (*) allocate contiguous space needed to fit all mapped members even if 7740 // we allocate space for members not mapped (in this example, 7741 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7742 // them as well because they fall between &s.f[0] and &s.p) 7743 // 7744 // map(from: s.f[:22]) map(to: ps->p[:33]) 7745 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7746 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7747 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7748 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7749 // (*) the struct this entry pertains to is the 2nd element in the list of 7750 // arguments, hence MEMBER_OF(2) 7751 // 7752 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7753 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7754 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7755 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7756 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7757 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7758 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7759 // (*) the struct this entry pertains to is the 4th element in the list 7760 // of arguments, hence MEMBER_OF(4) 7761 7762 // Track if the map information being generated is the first for a capture. 7763 bool IsCaptureFirstInfo = IsFirstComponentList; 7764 // When the variable is on a declare target link or in a to clause with 7765 // unified memory, a reference is needed to hold the host/device address 7766 // of the variable. 7767 bool RequiresReference = false; 7768 7769 // Scan the components from the base to the complete expression. 7770 auto CI = Components.rbegin(); 7771 auto CE = Components.rend(); 7772 auto I = CI; 7773 7774 // Track if the map information being generated is the first for a list of 7775 // components. 7776 bool IsExpressionFirstInfo = true; 7777 Address BP = Address::invalid(); 7778 const Expr *AssocExpr = I->getAssociatedExpression(); 7779 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7780 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7781 7782 if (isa<MemberExpr>(AssocExpr)) { 7783 // The base is the 'this' pointer. The content of the pointer is going 7784 // to be the base of the field being mapped. 7785 BP = CGF.LoadCXXThisAddress(); 7786 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7787 (OASE && 7788 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7789 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7790 } else { 7791 // The base is the reference to the variable. 7792 // BP = &Var. 7793 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7794 if (const auto *VD = 7795 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7796 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7797 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7798 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7799 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7800 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7801 RequiresReference = true; 7802 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7803 } 7804 } 7805 } 7806 7807 // If the variable is a pointer and is being dereferenced (i.e. is not 7808 // the last component), the base has to be the pointer itself, not its 7809 // reference. References are ignored for mapping purposes. 7810 QualType Ty = 7811 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7812 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7813 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7814 7815 // We do not need to generate individual map information for the 7816 // pointer, it can be associated with the combined storage. 7817 ++I; 7818 } 7819 } 7820 7821 // Track whether a component of the list should be marked as MEMBER_OF some 7822 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7823 // in a component list should be marked as MEMBER_OF, all subsequent entries 7824 // do not belong to the base struct. E.g. 7825 // struct S2 s; 7826 // s.ps->ps->ps->f[:] 7827 // (1) (2) (3) (4) 7828 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7829 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7830 // is the pointee of ps(2) which is not member of struct s, so it should not 7831 // be marked as such (it is still PTR_AND_OBJ). 7832 // The variable is initialized to false so that PTR_AND_OBJ entries which 7833 // are not struct members are not considered (e.g. array of pointers to 7834 // data). 7835 bool ShouldBeMemberOf = false; 7836 7837 // Variable keeping track of whether or not we have encountered a component 7838 // in the component list which is a member expression. Useful when we have a 7839 // pointer or a final array section, in which case it is the previous 7840 // component in the list which tells us whether we have a member expression. 7841 // E.g. X.f[:] 7842 // While processing the final array section "[:]" it is "f" which tells us 7843 // whether we are dealing with a member of a declared struct. 7844 const MemberExpr *EncounteredME = nullptr; 7845 7846 for (; I != CE; ++I) { 7847 // If the current component is member of a struct (parent struct) mark it. 7848 if (!EncounteredME) { 7849 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7850 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7851 // as MEMBER_OF the parent struct. 7852 if (EncounteredME) 7853 ShouldBeMemberOf = true; 7854 } 7855 7856 auto Next = std::next(I); 7857 7858 // We need to generate the addresses and sizes if this is the last 7859 // component, if the component is a pointer or if it is an array section 7860 // whose length can't be proved to be one. If this is a pointer, it 7861 // becomes the base address for the following components. 7862 7863 // A final array section, is one whose length can't be proved to be one. 7864 bool IsFinalArraySection = 7865 isFinalArraySectionExpression(I->getAssociatedExpression()); 7866 7867 // Get information on whether the element is a pointer. Have to do a 7868 // special treatment for array sections given that they are built-in 7869 // types. 7870 const auto *OASE = 7871 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7872 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7873 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7874 bool IsPointer = 7875 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7876 .getCanonicalType() 7877 ->isAnyPointerType()) || 7878 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7879 bool IsNonDerefPointer = IsPointer && !UO && !BO; 7880 7881 if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { 7882 // If this is not the last component, we expect the pointer to be 7883 // associated with an array expression or member expression. 7884 assert((Next == CE || 7885 isa<MemberExpr>(Next->getAssociatedExpression()) || 7886 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7887 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7888 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7889 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7890 "Unexpected expression"); 7891 7892 Address LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7893 .getAddress(CGF); 7894 7895 // If this component is a pointer inside the base struct then we don't 7896 // need to create any entry for it - it will be combined with the object 7897 // it is pointing to into a single PTR_AND_OBJ entry. 7898 bool IsMemberPointer = 7899 IsPointer && EncounteredME && 7900 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7901 EncounteredME); 7902 if (!OverlappedElements.empty()) { 7903 // Handle base element with the info for overlapped elements. 7904 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7905 assert(Next == CE && 7906 "Expected last element for the overlapped elements."); 7907 assert(!IsPointer && 7908 "Unexpected base element with the pointer type."); 7909 // Mark the whole struct as the struct that requires allocation on the 7910 // device. 7911 PartialStruct.LowestElem = {0, LB}; 7912 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7913 I->getAssociatedExpression()->getType()); 7914 Address HB = CGF.Builder.CreateConstGEP( 7915 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7916 CGF.VoidPtrTy), 7917 TypeSize.getQuantity() - 1); 7918 PartialStruct.HighestElem = { 7919 std::numeric_limits<decltype( 7920 PartialStruct.HighestElem.first)>::max(), 7921 HB}; 7922 PartialStruct.Base = BP; 7923 // Emit data for non-overlapped data. 7924 OpenMPOffloadMappingFlags Flags = 7925 OMP_MAP_MEMBER_OF | 7926 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7927 /*AddPtrFlag=*/false, 7928 /*AddIsTargetParamFlag=*/false); 7929 LB = BP; 7930 llvm::Value *Size = nullptr; 7931 // Do bitcopy of all non-overlapped structure elements. 7932 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7933 Component : OverlappedElements) { 7934 Address ComponentLB = Address::invalid(); 7935 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7936 Component) { 7937 if (MC.getAssociatedDeclaration()) { 7938 ComponentLB = 7939 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7940 .getAddress(CGF); 7941 Size = CGF.Builder.CreatePtrDiff( 7942 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7943 CGF.EmitCastToVoidPtr(LB.getPointer())); 7944 break; 7945 } 7946 } 7947 BasePointers.push_back(BP.getPointer()); 7948 Pointers.push_back(LB.getPointer()); 7949 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, 7950 /*isSigned=*/true)); 7951 Types.push_back(Flags); 7952 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7953 } 7954 BasePointers.push_back(BP.getPointer()); 7955 Pointers.push_back(LB.getPointer()); 7956 Size = CGF.Builder.CreatePtrDiff( 7957 CGF.EmitCastToVoidPtr( 7958 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7959 CGF.EmitCastToVoidPtr(LB.getPointer())); 7960 Sizes.push_back( 7961 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7962 Types.push_back(Flags); 7963 break; 7964 } 7965 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7966 if (!IsMemberPointer) { 7967 BasePointers.push_back(BP.getPointer()); 7968 Pointers.push_back(LB.getPointer()); 7969 Sizes.push_back( 7970 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7971 7972 // We need to add a pointer flag for each map that comes from the 7973 // same expression except for the first one. We also need to signal 7974 // this map is the first one that relates with the current capture 7975 // (there is a set of entries for each capture). 7976 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7977 MapType, MapModifiers, IsImplicit, 7978 !IsExpressionFirstInfo || RequiresReference, 7979 IsCaptureFirstInfo && !RequiresReference); 7980 7981 if (!IsExpressionFirstInfo) { 7982 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7983 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7984 if (IsPointer) 7985 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7986 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7987 7988 if (ShouldBeMemberOf) { 7989 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7990 // should be later updated with the correct value of MEMBER_OF. 7991 Flags |= OMP_MAP_MEMBER_OF; 7992 // From now on, all subsequent PTR_AND_OBJ entries should not be 7993 // marked as MEMBER_OF. 7994 ShouldBeMemberOf = false; 7995 } 7996 } 7997 7998 Types.push_back(Flags); 7999 } 8000 8001 // If we have encountered a member expression so far, keep track of the 8002 // mapped member. If the parent is "*this", then the value declaration 8003 // is nullptr. 8004 if (EncounteredME) { 8005 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 8006 unsigned FieldIndex = FD->getFieldIndex(); 8007 8008 // Update info about the lowest and highest elements for this struct 8009 if (!PartialStruct.Base.isValid()) { 8010 PartialStruct.LowestElem = {FieldIndex, LB}; 8011 PartialStruct.HighestElem = {FieldIndex, LB}; 8012 PartialStruct.Base = BP; 8013 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8014 PartialStruct.LowestElem = {FieldIndex, LB}; 8015 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8016 PartialStruct.HighestElem = {FieldIndex, LB}; 8017 } 8018 } 8019 8020 // If we have a final array section, we are done with this expression. 8021 if (IsFinalArraySection) 8022 break; 8023 8024 // The pointer becomes the base for the next element. 8025 if (Next != CE) 8026 BP = LB; 8027 8028 IsExpressionFirstInfo = false; 8029 IsCaptureFirstInfo = false; 8030 } 8031 } 8032 } 8033 8034 /// Return the adjusted map modifiers if the declaration a capture refers to 8035 /// appears in a first-private clause. This is expected to be used only with 8036 /// directives that start with 'target'. 8037 MappableExprsHandler::OpenMPOffloadMappingFlags 8038 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8039 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8040 8041 // A first private variable captured by reference will use only the 8042 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8043 // declaration is known as first-private in this handler. 8044 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8045 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 8046 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 8047 return MappableExprsHandler::OMP_MAP_ALWAYS | 8048 MappableExprsHandler::OMP_MAP_TO; 8049 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8050 return MappableExprsHandler::OMP_MAP_TO | 8051 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8052 return MappableExprsHandler::OMP_MAP_PRIVATE | 8053 MappableExprsHandler::OMP_MAP_TO; 8054 } 8055 return MappableExprsHandler::OMP_MAP_TO | 8056 MappableExprsHandler::OMP_MAP_FROM; 8057 } 8058 8059 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8060 // Rotate by getFlagMemberOffset() bits. 8061 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8062 << getFlagMemberOffset()); 8063 } 8064 8065 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8066 OpenMPOffloadMappingFlags MemberOfFlag) { 8067 // If the entry is PTR_AND_OBJ but has not been marked with the special 8068 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8069 // marked as MEMBER_OF. 8070 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8071 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8072 return; 8073 8074 // Reset the placeholder value to prepare the flag for the assignment of the 8075 // proper MEMBER_OF value. 8076 Flags &= ~OMP_MAP_MEMBER_OF; 8077 Flags |= MemberOfFlag; 8078 } 8079 8080 void getPlainLayout(const CXXRecordDecl *RD, 8081 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8082 bool AsBase) const { 8083 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8084 8085 llvm::StructType *St = 8086 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8087 8088 unsigned NumElements = St->getNumElements(); 8089 llvm::SmallVector< 8090 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8091 RecordLayout(NumElements); 8092 8093 // Fill bases. 8094 for (const auto &I : RD->bases()) { 8095 if (I.isVirtual()) 8096 continue; 8097 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8098 // Ignore empty bases. 8099 if (Base->isEmpty() || CGF.getContext() 8100 .getASTRecordLayout(Base) 8101 .getNonVirtualSize() 8102 .isZero()) 8103 continue; 8104 8105 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8106 RecordLayout[FieldIndex] = Base; 8107 } 8108 // Fill in virtual bases. 8109 for (const auto &I : RD->vbases()) { 8110 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8111 // Ignore empty bases. 8112 if (Base->isEmpty()) 8113 continue; 8114 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8115 if (RecordLayout[FieldIndex]) 8116 continue; 8117 RecordLayout[FieldIndex] = Base; 8118 } 8119 // Fill in all the fields. 8120 assert(!RD->isUnion() && "Unexpected union."); 8121 for (const auto *Field : RD->fields()) { 8122 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8123 // will fill in later.) 8124 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8125 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8126 RecordLayout[FieldIndex] = Field; 8127 } 8128 } 8129 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8130 &Data : RecordLayout) { 8131 if (Data.isNull()) 8132 continue; 8133 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8134 getPlainLayout(Base, Layout, /*AsBase=*/true); 8135 else 8136 Layout.push_back(Data.get<const FieldDecl *>()); 8137 } 8138 } 8139 8140 public: 8141 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8142 : CurDir(&Dir), CGF(CGF) { 8143 // Extract firstprivate clause information. 8144 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8145 for (const auto *D : C->varlists()) 8146 FirstPrivateDecls.try_emplace( 8147 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8148 // Extract device pointer clause information. 8149 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8150 for (auto L : C->component_lists()) 8151 DevPointersMap[L.first].push_back(L.second); 8152 } 8153 8154 /// Constructor for the declare mapper directive. 8155 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8156 : CurDir(&Dir), CGF(CGF) {} 8157 8158 /// Generate code for the combined entry if we have a partially mapped struct 8159 /// and take care of the mapping flags of the arguments corresponding to 8160 /// individual struct members. 8161 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 8162 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8163 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 8164 const StructRangeInfoTy &PartialStruct) const { 8165 // Base is the base of the struct 8166 BasePointers.push_back(PartialStruct.Base.getPointer()); 8167 // Pointer is the address of the lowest element 8168 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 8169 Pointers.push_back(LB); 8170 // Size is (addr of {highest+1} element) - (addr of lowest element) 8171 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 8172 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 8173 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8174 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8175 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8176 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8177 /*isSigned=*/false); 8178 Sizes.push_back(Size); 8179 // Map type is always TARGET_PARAM 8180 Types.push_back(OMP_MAP_TARGET_PARAM); 8181 // Remove TARGET_PARAM flag from the first element 8182 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8183 8184 // All other current entries will be MEMBER_OF the combined entry 8185 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8186 // 0xFFFF in the MEMBER_OF field). 8187 OpenMPOffloadMappingFlags MemberOfFlag = 8188 getMemberOfFlag(BasePointers.size() - 1); 8189 for (auto &M : CurTypes) 8190 setCorrectMemberOfFlag(M, MemberOfFlag); 8191 } 8192 8193 /// Generate all the base pointers, section pointers, sizes and map 8194 /// types for the extracted mappable expressions. Also, for each item that 8195 /// relates with a device pointer, a pair of the relevant declaration and 8196 /// index where it occurs is appended to the device pointers info array. 8197 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 8198 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8199 MapFlagsArrayTy &Types) const { 8200 // We have to process the component lists that relate with the same 8201 // declaration in a single chunk so that we can generate the map flags 8202 // correctly. Therefore, we organize all lists in a map. 8203 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8204 8205 // Helper function to fill the information map for the different supported 8206 // clauses. 8207 auto &&InfoGen = [&Info]( 8208 const ValueDecl *D, 8209 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8210 OpenMPMapClauseKind MapType, 8211 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8212 bool ReturnDevicePointer, bool IsImplicit) { 8213 const ValueDecl *VD = 8214 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8215 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8216 IsImplicit); 8217 }; 8218 8219 assert(CurDir.is<const OMPExecutableDirective *>() && 8220 "Expect a executable directive"); 8221 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8222 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 8223 for (const auto L : C->component_lists()) { 8224 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 8225 /*ReturnDevicePointer=*/false, C->isImplicit()); 8226 } 8227 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 8228 for (const auto L : C->component_lists()) { 8229 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 8230 /*ReturnDevicePointer=*/false, C->isImplicit()); 8231 } 8232 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 8233 for (const auto L : C->component_lists()) { 8234 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 8235 /*ReturnDevicePointer=*/false, C->isImplicit()); 8236 } 8237 8238 // Look at the use_device_ptr clause information and mark the existing map 8239 // entries as such. If there is no map information for an entry in the 8240 // use_device_ptr list, we create one with map type 'alloc' and zero size 8241 // section. It is the user fault if that was not mapped before. If there is 8242 // no map information and the pointer is a struct member, then we defer the 8243 // emission of that entry until the whole struct has been processed. 8244 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 8245 DeferredInfo; 8246 8247 for (const auto *C : 8248 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 8249 for (const auto L : C->component_lists()) { 8250 assert(!L.second.empty() && "Not expecting empty list of components!"); 8251 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 8252 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8253 const Expr *IE = L.second.back().getAssociatedExpression(); 8254 // If the first component is a member expression, we have to look into 8255 // 'this', which maps to null in the map of map information. Otherwise 8256 // look directly for the information. 8257 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8258 8259 // We potentially have map information for this declaration already. 8260 // Look for the first set of components that refer to it. 8261 if (It != Info.end()) { 8262 auto CI = std::find_if( 8263 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 8264 return MI.Components.back().getAssociatedDeclaration() == VD; 8265 }); 8266 // If we found a map entry, signal that the pointer has to be returned 8267 // and move on to the next declaration. 8268 if (CI != It->second.end()) { 8269 CI->ReturnDevicePointer = true; 8270 continue; 8271 } 8272 } 8273 8274 // We didn't find any match in our map information - generate a zero 8275 // size array section - if the pointer is a struct member we defer this 8276 // action until the whole struct has been processed. 8277 if (isa<MemberExpr>(IE)) { 8278 // Insert the pointer into Info to be processed by 8279 // generateInfoForComponentList. Because it is a member pointer 8280 // without a pointee, no entry will be generated for it, therefore 8281 // we need to generate one after the whole struct has been processed. 8282 // Nonetheless, generateInfoForComponentList must be called to take 8283 // the pointer into account for the calculation of the range of the 8284 // partial struct. 8285 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 8286 /*ReturnDevicePointer=*/false, C->isImplicit()); 8287 DeferredInfo[nullptr].emplace_back(IE, VD); 8288 } else { 8289 llvm::Value *Ptr = 8290 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8291 BasePointers.emplace_back(Ptr, VD); 8292 Pointers.push_back(Ptr); 8293 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8294 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8295 } 8296 } 8297 } 8298 8299 for (const auto &M : Info) { 8300 // We need to know when we generate information for the first component 8301 // associated with a capture, because the mapping flags depend on it. 8302 bool IsFirstComponentList = true; 8303 8304 // Temporary versions of arrays 8305 MapBaseValuesArrayTy CurBasePointers; 8306 MapValuesArrayTy CurPointers; 8307 MapValuesArrayTy CurSizes; 8308 MapFlagsArrayTy CurTypes; 8309 StructRangeInfoTy PartialStruct; 8310 8311 for (const MapInfo &L : M.second) { 8312 assert(!L.Components.empty() && 8313 "Not expecting declaration with no component lists."); 8314 8315 // Remember the current base pointer index. 8316 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 8317 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8318 CurBasePointers, CurPointers, CurSizes, 8319 CurTypes, PartialStruct, 8320 IsFirstComponentList, L.IsImplicit); 8321 8322 // If this entry relates with a device pointer, set the relevant 8323 // declaration and add the 'return pointer' flag. 8324 if (L.ReturnDevicePointer) { 8325 assert(CurBasePointers.size() > CurrentBasePointersIdx && 8326 "Unexpected number of mapped base pointers."); 8327 8328 const ValueDecl *RelevantVD = 8329 L.Components.back().getAssociatedDeclaration(); 8330 assert(RelevantVD && 8331 "No relevant declaration related with device pointer??"); 8332 8333 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 8334 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8335 } 8336 IsFirstComponentList = false; 8337 } 8338 8339 // Append any pending zero-length pointers which are struct members and 8340 // used with use_device_ptr. 8341 auto CI = DeferredInfo.find(M.first); 8342 if (CI != DeferredInfo.end()) { 8343 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8344 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8345 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 8346 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 8347 CurBasePointers.emplace_back(BasePtr, L.VD); 8348 CurPointers.push_back(Ptr); 8349 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8350 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8351 // value MEMBER_OF=FFFF so that the entry is later updated with the 8352 // correct value of MEMBER_OF. 8353 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8354 OMP_MAP_MEMBER_OF); 8355 } 8356 } 8357 8358 // If there is an entry in PartialStruct it means we have a struct with 8359 // individual members mapped. Emit an extra combined entry. 8360 if (PartialStruct.Base.isValid()) 8361 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8362 PartialStruct); 8363 8364 // We need to append the results of this capture to what we already have. 8365 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8366 Pointers.append(CurPointers.begin(), CurPointers.end()); 8367 Sizes.append(CurSizes.begin(), CurSizes.end()); 8368 Types.append(CurTypes.begin(), CurTypes.end()); 8369 } 8370 } 8371 8372 /// Generate all the base pointers, section pointers, sizes and map types for 8373 /// the extracted map clauses of user-defined mapper. 8374 void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, 8375 MapValuesArrayTy &Pointers, 8376 MapValuesArrayTy &Sizes, 8377 MapFlagsArrayTy &Types) const { 8378 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8379 "Expect a declare mapper directive"); 8380 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8381 // We have to process the component lists that relate with the same 8382 // declaration in a single chunk so that we can generate the map flags 8383 // correctly. Therefore, we organize all lists in a map. 8384 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8385 8386 // Helper function to fill the information map for the different supported 8387 // clauses. 8388 auto &&InfoGen = [&Info]( 8389 const ValueDecl *D, 8390 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8391 OpenMPMapClauseKind MapType, 8392 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8393 bool ReturnDevicePointer, bool IsImplicit) { 8394 const ValueDecl *VD = 8395 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8396 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8397 IsImplicit); 8398 }; 8399 8400 for (const auto *C : CurMapperDir->clauselists()) { 8401 const auto *MC = cast<OMPMapClause>(C); 8402 for (const auto L : MC->component_lists()) { 8403 InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), 8404 /*ReturnDevicePointer=*/false, MC->isImplicit()); 8405 } 8406 } 8407 8408 for (const auto &M : Info) { 8409 // We need to know when we generate information for the first component 8410 // associated with a capture, because the mapping flags depend on it. 8411 bool IsFirstComponentList = true; 8412 8413 // Temporary versions of arrays 8414 MapBaseValuesArrayTy CurBasePointers; 8415 MapValuesArrayTy CurPointers; 8416 MapValuesArrayTy CurSizes; 8417 MapFlagsArrayTy CurTypes; 8418 StructRangeInfoTy PartialStruct; 8419 8420 for (const MapInfo &L : M.second) { 8421 assert(!L.Components.empty() && 8422 "Not expecting declaration with no component lists."); 8423 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8424 CurBasePointers, CurPointers, CurSizes, 8425 CurTypes, PartialStruct, 8426 IsFirstComponentList, L.IsImplicit); 8427 IsFirstComponentList = false; 8428 } 8429 8430 // If there is an entry in PartialStruct it means we have a struct with 8431 // individual members mapped. Emit an extra combined entry. 8432 if (PartialStruct.Base.isValid()) 8433 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8434 PartialStruct); 8435 8436 // We need to append the results of this capture to what we already have. 8437 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8438 Pointers.append(CurPointers.begin(), CurPointers.end()); 8439 Sizes.append(CurSizes.begin(), CurSizes.end()); 8440 Types.append(CurTypes.begin(), CurTypes.end()); 8441 } 8442 } 8443 8444 /// Emit capture info for lambdas for variables captured by reference. 8445 void generateInfoForLambdaCaptures( 8446 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 8447 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8448 MapFlagsArrayTy &Types, 8449 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8450 const auto *RD = VD->getType() 8451 .getCanonicalType() 8452 .getNonReferenceType() 8453 ->getAsCXXRecordDecl(); 8454 if (!RD || !RD->isLambda()) 8455 return; 8456 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8457 LValue VDLVal = CGF.MakeAddrLValue( 8458 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8459 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8460 FieldDecl *ThisCapture = nullptr; 8461 RD->getCaptureFields(Captures, ThisCapture); 8462 if (ThisCapture) { 8463 LValue ThisLVal = 8464 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8465 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8466 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8467 VDLVal.getPointer(CGF)); 8468 BasePointers.push_back(ThisLVal.getPointer(CGF)); 8469 Pointers.push_back(ThisLValVal.getPointer(CGF)); 8470 Sizes.push_back( 8471 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8472 CGF.Int64Ty, /*isSigned=*/true)); 8473 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8474 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8475 } 8476 for (const LambdaCapture &LC : RD->captures()) { 8477 if (!LC.capturesVariable()) 8478 continue; 8479 const VarDecl *VD = LC.getCapturedVar(); 8480 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8481 continue; 8482 auto It = Captures.find(VD); 8483 assert(It != Captures.end() && "Found lambda capture without field."); 8484 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8485 if (LC.getCaptureKind() == LCK_ByRef) { 8486 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8487 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8488 VDLVal.getPointer(CGF)); 8489 BasePointers.push_back(VarLVal.getPointer(CGF)); 8490 Pointers.push_back(VarLValVal.getPointer(CGF)); 8491 Sizes.push_back(CGF.Builder.CreateIntCast( 8492 CGF.getTypeSize( 8493 VD->getType().getCanonicalType().getNonReferenceType()), 8494 CGF.Int64Ty, /*isSigned=*/true)); 8495 } else { 8496 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8497 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8498 VDLVal.getPointer(CGF)); 8499 BasePointers.push_back(VarLVal.getPointer(CGF)); 8500 Pointers.push_back(VarRVal.getScalarVal()); 8501 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8502 } 8503 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8504 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8505 } 8506 } 8507 8508 /// Set correct indices for lambdas captures. 8509 void adjustMemberOfForLambdaCaptures( 8510 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8511 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8512 MapFlagsArrayTy &Types) const { 8513 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8514 // Set correct member_of idx for all implicit lambda captures. 8515 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8516 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8517 continue; 8518 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8519 assert(BasePtr && "Unable to find base lambda address."); 8520 int TgtIdx = -1; 8521 for (unsigned J = I; J > 0; --J) { 8522 unsigned Idx = J - 1; 8523 if (Pointers[Idx] != BasePtr) 8524 continue; 8525 TgtIdx = Idx; 8526 break; 8527 } 8528 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8529 // All other current entries will be MEMBER_OF the combined entry 8530 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8531 // 0xFFFF in the MEMBER_OF field). 8532 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8533 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8534 } 8535 } 8536 8537 /// Generate the base pointers, section pointers, sizes and map types 8538 /// associated to a given capture. 8539 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8540 llvm::Value *Arg, 8541 MapBaseValuesArrayTy &BasePointers, 8542 MapValuesArrayTy &Pointers, 8543 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8544 StructRangeInfoTy &PartialStruct) const { 8545 assert(!Cap->capturesVariableArrayType() && 8546 "Not expecting to generate map info for a variable array type!"); 8547 8548 // We need to know when we generating information for the first component 8549 const ValueDecl *VD = Cap->capturesThis() 8550 ? nullptr 8551 : Cap->getCapturedVar()->getCanonicalDecl(); 8552 8553 // If this declaration appears in a is_device_ptr clause we just have to 8554 // pass the pointer by value. If it is a reference to a declaration, we just 8555 // pass its value. 8556 if (DevPointersMap.count(VD)) { 8557 BasePointers.emplace_back(Arg, VD); 8558 Pointers.push_back(Arg); 8559 Sizes.push_back( 8560 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8561 CGF.Int64Ty, /*isSigned=*/true)); 8562 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8563 return; 8564 } 8565 8566 using MapData = 8567 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8568 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 8569 SmallVector<MapData, 4> DeclComponentLists; 8570 assert(CurDir.is<const OMPExecutableDirective *>() && 8571 "Expect a executable directive"); 8572 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8573 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8574 for (const auto L : C->decl_component_lists(VD)) { 8575 assert(L.first == VD && 8576 "We got information for the wrong declaration??"); 8577 assert(!L.second.empty() && 8578 "Not expecting declaration with no component lists."); 8579 DeclComponentLists.emplace_back(L.second, C->getMapType(), 8580 C->getMapTypeModifiers(), 8581 C->isImplicit()); 8582 } 8583 } 8584 8585 // Find overlapping elements (including the offset from the base element). 8586 llvm::SmallDenseMap< 8587 const MapData *, 8588 llvm::SmallVector< 8589 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8590 4> 8591 OverlappedData; 8592 size_t Count = 0; 8593 for (const MapData &L : DeclComponentLists) { 8594 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8595 OpenMPMapClauseKind MapType; 8596 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8597 bool IsImplicit; 8598 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8599 ++Count; 8600 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8601 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8602 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 8603 auto CI = Components.rbegin(); 8604 auto CE = Components.rend(); 8605 auto SI = Components1.rbegin(); 8606 auto SE = Components1.rend(); 8607 for (; CI != CE && SI != SE; ++CI, ++SI) { 8608 if (CI->getAssociatedExpression()->getStmtClass() != 8609 SI->getAssociatedExpression()->getStmtClass()) 8610 break; 8611 // Are we dealing with different variables/fields? 8612 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8613 break; 8614 } 8615 // Found overlapping if, at least for one component, reached the head of 8616 // the components list. 8617 if (CI == CE || SI == SE) { 8618 assert((CI != CE || SI != SE) && 8619 "Unexpected full match of the mapping components."); 8620 const MapData &BaseData = CI == CE ? L : L1; 8621 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8622 SI == SE ? Components : Components1; 8623 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8624 OverlappedElements.getSecond().push_back(SubData); 8625 } 8626 } 8627 } 8628 // Sort the overlapped elements for each item. 8629 llvm::SmallVector<const FieldDecl *, 4> Layout; 8630 if (!OverlappedData.empty()) { 8631 if (const auto *CRD = 8632 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8633 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8634 else { 8635 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8636 Layout.append(RD->field_begin(), RD->field_end()); 8637 } 8638 } 8639 for (auto &Pair : OverlappedData) { 8640 llvm::sort( 8641 Pair.getSecond(), 8642 [&Layout]( 8643 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8644 OMPClauseMappableExprCommon::MappableExprComponentListRef 8645 Second) { 8646 auto CI = First.rbegin(); 8647 auto CE = First.rend(); 8648 auto SI = Second.rbegin(); 8649 auto SE = Second.rend(); 8650 for (; CI != CE && SI != SE; ++CI, ++SI) { 8651 if (CI->getAssociatedExpression()->getStmtClass() != 8652 SI->getAssociatedExpression()->getStmtClass()) 8653 break; 8654 // Are we dealing with different variables/fields? 8655 if (CI->getAssociatedDeclaration() != 8656 SI->getAssociatedDeclaration()) 8657 break; 8658 } 8659 8660 // Lists contain the same elements. 8661 if (CI == CE && SI == SE) 8662 return false; 8663 8664 // List with less elements is less than list with more elements. 8665 if (CI == CE || SI == SE) 8666 return CI == CE; 8667 8668 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8669 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8670 if (FD1->getParent() == FD2->getParent()) 8671 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8672 const auto It = 8673 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8674 return FD == FD1 || FD == FD2; 8675 }); 8676 return *It == FD1; 8677 }); 8678 } 8679 8680 // Associated with a capture, because the mapping flags depend on it. 8681 // Go through all of the elements with the overlapped elements. 8682 for (const auto &Pair : OverlappedData) { 8683 const MapData &L = *Pair.getFirst(); 8684 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8685 OpenMPMapClauseKind MapType; 8686 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8687 bool IsImplicit; 8688 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8689 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8690 OverlappedComponents = Pair.getSecond(); 8691 bool IsFirstComponentList = true; 8692 generateInfoForComponentList(MapType, MapModifiers, Components, 8693 BasePointers, Pointers, Sizes, Types, 8694 PartialStruct, IsFirstComponentList, 8695 IsImplicit, OverlappedComponents); 8696 } 8697 // Go through other elements without overlapped elements. 8698 bool IsFirstComponentList = OverlappedData.empty(); 8699 for (const MapData &L : DeclComponentLists) { 8700 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8701 OpenMPMapClauseKind MapType; 8702 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8703 bool IsImplicit; 8704 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8705 auto It = OverlappedData.find(&L); 8706 if (It == OverlappedData.end()) 8707 generateInfoForComponentList(MapType, MapModifiers, Components, 8708 BasePointers, Pointers, Sizes, Types, 8709 PartialStruct, IsFirstComponentList, 8710 IsImplicit); 8711 IsFirstComponentList = false; 8712 } 8713 } 8714 8715 /// Generate the base pointers, section pointers, sizes and map types 8716 /// associated with the declare target link variables. 8717 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 8718 MapValuesArrayTy &Pointers, 8719 MapValuesArrayTy &Sizes, 8720 MapFlagsArrayTy &Types) const { 8721 assert(CurDir.is<const OMPExecutableDirective *>() && 8722 "Expect a executable directive"); 8723 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8724 // Map other list items in the map clause which are not captured variables 8725 // but "declare target link" global variables. 8726 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8727 for (const auto L : C->component_lists()) { 8728 if (!L.first) 8729 continue; 8730 const auto *VD = dyn_cast<VarDecl>(L.first); 8731 if (!VD) 8732 continue; 8733 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8734 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8735 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8736 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 8737 continue; 8738 StructRangeInfoTy PartialStruct; 8739 generateInfoForComponentList( 8740 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 8741 Pointers, Sizes, Types, PartialStruct, 8742 /*IsFirstComponentList=*/true, C->isImplicit()); 8743 assert(!PartialStruct.Base.isValid() && 8744 "No partial structs for declare target link expected."); 8745 } 8746 } 8747 } 8748 8749 /// Generate the default map information for a given capture \a CI, 8750 /// record field declaration \a RI and captured value \a CV. 8751 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8752 const FieldDecl &RI, llvm::Value *CV, 8753 MapBaseValuesArrayTy &CurBasePointers, 8754 MapValuesArrayTy &CurPointers, 8755 MapValuesArrayTy &CurSizes, 8756 MapFlagsArrayTy &CurMapTypes) const { 8757 bool IsImplicit = true; 8758 // Do the default mapping. 8759 if (CI.capturesThis()) { 8760 CurBasePointers.push_back(CV); 8761 CurPointers.push_back(CV); 8762 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8763 CurSizes.push_back( 8764 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8765 CGF.Int64Ty, /*isSigned=*/true)); 8766 // Default map type. 8767 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8768 } else if (CI.capturesVariableByCopy()) { 8769 CurBasePointers.push_back(CV); 8770 CurPointers.push_back(CV); 8771 if (!RI.getType()->isAnyPointerType()) { 8772 // We have to signal to the runtime captures passed by value that are 8773 // not pointers. 8774 CurMapTypes.push_back(OMP_MAP_LITERAL); 8775 CurSizes.push_back(CGF.Builder.CreateIntCast( 8776 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8777 } else { 8778 // Pointers are implicitly mapped with a zero size and no flags 8779 // (other than first map that is added for all implicit maps). 8780 CurMapTypes.push_back(OMP_MAP_NONE); 8781 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8782 } 8783 const VarDecl *VD = CI.getCapturedVar(); 8784 auto I = FirstPrivateDecls.find(VD); 8785 if (I != FirstPrivateDecls.end()) 8786 IsImplicit = I->getSecond(); 8787 } else { 8788 assert(CI.capturesVariable() && "Expected captured reference."); 8789 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8790 QualType ElementType = PtrTy->getPointeeType(); 8791 CurSizes.push_back(CGF.Builder.CreateIntCast( 8792 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8793 // The default map type for a scalar/complex type is 'to' because by 8794 // default the value doesn't have to be retrieved. For an aggregate 8795 // type, the default is 'tofrom'. 8796 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 8797 const VarDecl *VD = CI.getCapturedVar(); 8798 auto I = FirstPrivateDecls.find(VD); 8799 if (I != FirstPrivateDecls.end() && 8800 VD->getType().isConstant(CGF.getContext())) { 8801 llvm::Constant *Addr = 8802 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8803 // Copy the value of the original variable to the new global copy. 8804 CGF.Builder.CreateMemCpy( 8805 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 8806 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8807 CurSizes.back(), /*IsVolatile=*/false); 8808 // Use new global variable as the base pointers. 8809 CurBasePointers.push_back(Addr); 8810 CurPointers.push_back(Addr); 8811 } else { 8812 CurBasePointers.push_back(CV); 8813 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8814 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8815 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8816 AlignmentSource::Decl)); 8817 CurPointers.push_back(PtrAddr.getPointer()); 8818 } else { 8819 CurPointers.push_back(CV); 8820 } 8821 } 8822 if (I != FirstPrivateDecls.end()) 8823 IsImplicit = I->getSecond(); 8824 } 8825 // Every default map produces a single argument which is a target parameter. 8826 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 8827 8828 // Add flag stating this is an implicit map. 8829 if (IsImplicit) 8830 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 8831 } 8832 }; 8833 } // anonymous namespace 8834 8835 /// Emit the arrays used to pass the captures and map information to the 8836 /// offloading runtime library. If there is no map or capture information, 8837 /// return nullptr by reference. 8838 static void 8839 emitOffloadingArrays(CodeGenFunction &CGF, 8840 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 8841 MappableExprsHandler::MapValuesArrayTy &Pointers, 8842 MappableExprsHandler::MapValuesArrayTy &Sizes, 8843 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 8844 CGOpenMPRuntime::TargetDataInfo &Info) { 8845 CodeGenModule &CGM = CGF.CGM; 8846 ASTContext &Ctx = CGF.getContext(); 8847 8848 // Reset the array information. 8849 Info.clearArrayInfo(); 8850 Info.NumberOfPtrs = BasePointers.size(); 8851 8852 if (Info.NumberOfPtrs) { 8853 // Detect if we have any capture size requiring runtime evaluation of the 8854 // size so that a constant array could be eventually used. 8855 bool hasRuntimeEvaluationCaptureSize = false; 8856 for (llvm::Value *S : Sizes) 8857 if (!isa<llvm::Constant>(S)) { 8858 hasRuntimeEvaluationCaptureSize = true; 8859 break; 8860 } 8861 8862 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8863 QualType PointerArrayType = Ctx.getConstantArrayType( 8864 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 8865 /*IndexTypeQuals=*/0); 8866 8867 Info.BasePointersArray = 8868 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8869 Info.PointersArray = 8870 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8871 8872 // If we don't have any VLA types or other types that require runtime 8873 // evaluation, we can use a constant array for the map sizes, otherwise we 8874 // need to fill up the arrays as we do for the pointers. 8875 QualType Int64Ty = 8876 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8877 if (hasRuntimeEvaluationCaptureSize) { 8878 QualType SizeArrayType = Ctx.getConstantArrayType( 8879 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 8880 /*IndexTypeQuals=*/0); 8881 Info.SizesArray = 8882 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8883 } else { 8884 // We expect all the sizes to be constant, so we collect them to create 8885 // a constant array. 8886 SmallVector<llvm::Constant *, 16> ConstSizes; 8887 for (llvm::Value *S : Sizes) 8888 ConstSizes.push_back(cast<llvm::Constant>(S)); 8889 8890 auto *SizesArrayInit = llvm::ConstantArray::get( 8891 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 8892 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8893 auto *SizesArrayGbl = new llvm::GlobalVariable( 8894 CGM.getModule(), SizesArrayInit->getType(), 8895 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8896 SizesArrayInit, Name); 8897 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8898 Info.SizesArray = SizesArrayGbl; 8899 } 8900 8901 // The map types are always constant so we don't need to generate code to 8902 // fill arrays. Instead, we create an array constant. 8903 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 8904 llvm::copy(MapTypes, Mapping.begin()); 8905 llvm::Constant *MapTypesArrayInit = 8906 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8907 std::string MaptypesName = 8908 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8909 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8910 CGM.getModule(), MapTypesArrayInit->getType(), 8911 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8912 MapTypesArrayInit, MaptypesName); 8913 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8914 Info.MapTypesArray = MapTypesArrayGbl; 8915 8916 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8917 llvm::Value *BPVal = *BasePointers[I]; 8918 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8919 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8920 Info.BasePointersArray, 0, I); 8921 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8922 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8923 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8924 CGF.Builder.CreateStore(BPVal, BPAddr); 8925 8926 if (Info.requiresDevicePointerInfo()) 8927 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 8928 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8929 8930 llvm::Value *PVal = Pointers[I]; 8931 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8932 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8933 Info.PointersArray, 0, I); 8934 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8935 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8936 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8937 CGF.Builder.CreateStore(PVal, PAddr); 8938 8939 if (hasRuntimeEvaluationCaptureSize) { 8940 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8941 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8942 Info.SizesArray, 8943 /*Idx0=*/0, 8944 /*Idx1=*/I); 8945 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 8946 CGF.Builder.CreateStore( 8947 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true), 8948 SAddr); 8949 } 8950 } 8951 } 8952 } 8953 8954 /// Emit the arguments to be passed to the runtime library based on the 8955 /// arrays of pointers, sizes and map types. 8956 static void emitOffloadingArraysArgument( 8957 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8958 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8959 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 8960 CodeGenModule &CGM = CGF.CGM; 8961 if (Info.NumberOfPtrs) { 8962 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8963 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8964 Info.BasePointersArray, 8965 /*Idx0=*/0, /*Idx1=*/0); 8966 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8967 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8968 Info.PointersArray, 8969 /*Idx0=*/0, 8970 /*Idx1=*/0); 8971 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8972 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 8973 /*Idx0=*/0, /*Idx1=*/0); 8974 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8975 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8976 Info.MapTypesArray, 8977 /*Idx0=*/0, 8978 /*Idx1=*/0); 8979 } else { 8980 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8981 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8982 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8983 MapTypesArrayArg = 8984 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8985 } 8986 } 8987 8988 /// Check for inner distribute directive. 8989 static const OMPExecutableDirective * 8990 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8991 const auto *CS = D.getInnermostCapturedStmt(); 8992 const auto *Body = 8993 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8994 const Stmt *ChildStmt = 8995 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8996 8997 if (const auto *NestedDir = 8998 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8999 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9000 switch (D.getDirectiveKind()) { 9001 case OMPD_target: 9002 if (isOpenMPDistributeDirective(DKind)) 9003 return NestedDir; 9004 if (DKind == OMPD_teams) { 9005 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9006 /*IgnoreCaptured=*/true); 9007 if (!Body) 9008 return nullptr; 9009 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9010 if (const auto *NND = 9011 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9012 DKind = NND->getDirectiveKind(); 9013 if (isOpenMPDistributeDirective(DKind)) 9014 return NND; 9015 } 9016 } 9017 return nullptr; 9018 case OMPD_target_teams: 9019 if (isOpenMPDistributeDirective(DKind)) 9020 return NestedDir; 9021 return nullptr; 9022 case OMPD_target_parallel: 9023 case OMPD_target_simd: 9024 case OMPD_target_parallel_for: 9025 case OMPD_target_parallel_for_simd: 9026 return nullptr; 9027 case OMPD_target_teams_distribute: 9028 case OMPD_target_teams_distribute_simd: 9029 case OMPD_target_teams_distribute_parallel_for: 9030 case OMPD_target_teams_distribute_parallel_for_simd: 9031 case OMPD_parallel: 9032 case OMPD_for: 9033 case OMPD_parallel_for: 9034 case OMPD_parallel_master: 9035 case OMPD_parallel_sections: 9036 case OMPD_for_simd: 9037 case OMPD_parallel_for_simd: 9038 case OMPD_cancel: 9039 case OMPD_cancellation_point: 9040 case OMPD_ordered: 9041 case OMPD_threadprivate: 9042 case OMPD_allocate: 9043 case OMPD_task: 9044 case OMPD_simd: 9045 case OMPD_sections: 9046 case OMPD_section: 9047 case OMPD_single: 9048 case OMPD_master: 9049 case OMPD_critical: 9050 case OMPD_taskyield: 9051 case OMPD_barrier: 9052 case OMPD_taskwait: 9053 case OMPD_taskgroup: 9054 case OMPD_atomic: 9055 case OMPD_flush: 9056 case OMPD_depobj: 9057 case OMPD_teams: 9058 case OMPD_target_data: 9059 case OMPD_target_exit_data: 9060 case OMPD_target_enter_data: 9061 case OMPD_distribute: 9062 case OMPD_distribute_simd: 9063 case OMPD_distribute_parallel_for: 9064 case OMPD_distribute_parallel_for_simd: 9065 case OMPD_teams_distribute: 9066 case OMPD_teams_distribute_simd: 9067 case OMPD_teams_distribute_parallel_for: 9068 case OMPD_teams_distribute_parallel_for_simd: 9069 case OMPD_target_update: 9070 case OMPD_declare_simd: 9071 case OMPD_declare_variant: 9072 case OMPD_declare_target: 9073 case OMPD_end_declare_target: 9074 case OMPD_declare_reduction: 9075 case OMPD_declare_mapper: 9076 case OMPD_taskloop: 9077 case OMPD_taskloop_simd: 9078 case OMPD_master_taskloop: 9079 case OMPD_master_taskloop_simd: 9080 case OMPD_parallel_master_taskloop: 9081 case OMPD_parallel_master_taskloop_simd: 9082 case OMPD_requires: 9083 case OMPD_unknown: 9084 llvm_unreachable("Unexpected directive."); 9085 } 9086 } 9087 9088 return nullptr; 9089 } 9090 9091 /// Emit the user-defined mapper function. The code generation follows the 9092 /// pattern in the example below. 9093 /// \code 9094 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9095 /// void *base, void *begin, 9096 /// int64_t size, int64_t type) { 9097 /// // Allocate space for an array section first. 9098 /// if (size > 1 && !maptype.IsDelete) 9099 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9100 /// size*sizeof(Ty), clearToFrom(type)); 9101 /// // Map members. 9102 /// for (unsigned i = 0; i < size; i++) { 9103 /// // For each component specified by this mapper: 9104 /// for (auto c : all_components) { 9105 /// if (c.hasMapper()) 9106 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9107 /// c.arg_type); 9108 /// else 9109 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9110 /// c.arg_begin, c.arg_size, c.arg_type); 9111 /// } 9112 /// } 9113 /// // Delete the array section. 9114 /// if (size > 1 && maptype.IsDelete) 9115 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9116 /// size*sizeof(Ty), clearToFrom(type)); 9117 /// } 9118 /// \endcode 9119 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9120 CodeGenFunction *CGF) { 9121 if (UDMMap.count(D) > 0) 9122 return; 9123 ASTContext &C = CGM.getContext(); 9124 QualType Ty = D->getType(); 9125 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9126 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9127 auto *MapperVarDecl = 9128 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9129 SourceLocation Loc = D->getLocation(); 9130 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9131 9132 // Prepare mapper function arguments and attributes. 9133 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9134 C.VoidPtrTy, ImplicitParamDecl::Other); 9135 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9136 ImplicitParamDecl::Other); 9137 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9138 C.VoidPtrTy, ImplicitParamDecl::Other); 9139 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9140 ImplicitParamDecl::Other); 9141 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9142 ImplicitParamDecl::Other); 9143 FunctionArgList Args; 9144 Args.push_back(&HandleArg); 9145 Args.push_back(&BaseArg); 9146 Args.push_back(&BeginArg); 9147 Args.push_back(&SizeArg); 9148 Args.push_back(&TypeArg); 9149 const CGFunctionInfo &FnInfo = 9150 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9151 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9152 SmallString<64> TyStr; 9153 llvm::raw_svector_ostream Out(TyStr); 9154 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9155 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9156 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9157 Name, &CGM.getModule()); 9158 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9159 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9160 // Start the mapper function code generation. 9161 CodeGenFunction MapperCGF(CGM); 9162 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9163 // Compute the starting and end addreses of array elements. 9164 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9165 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9166 C.getPointerType(Int64Ty), Loc); 9167 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9168 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 9169 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 9170 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9171 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9172 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9173 C.getPointerType(Int64Ty), Loc); 9174 // Prepare common arguments for array initiation and deletion. 9175 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9176 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9177 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9178 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9179 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9180 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9181 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9182 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9183 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9184 9185 // Emit array initiation if this is an array section and \p MapType indicates 9186 // that memory allocation is required. 9187 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9188 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9189 ElementSize, HeadBB, /*IsInit=*/true); 9190 9191 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9192 9193 // Emit the loop header block. 9194 MapperCGF.EmitBlock(HeadBB); 9195 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9196 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9197 // Evaluate whether the initial condition is satisfied. 9198 llvm::Value *IsEmpty = 9199 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9200 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9201 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9202 9203 // Emit the loop body block. 9204 MapperCGF.EmitBlock(BodyBB); 9205 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9206 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9207 PtrPHI->addIncoming(PtrBegin, EntryBB); 9208 Address PtrCurrent = 9209 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9210 .getAlignment() 9211 .alignmentOfArrayElement(ElementSize)); 9212 // Privatize the declared variable of mapper to be the current array element. 9213 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9214 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 9215 return MapperCGF 9216 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 9217 .getAddress(MapperCGF); 9218 }); 9219 (void)Scope.Privatize(); 9220 9221 // Get map clause information. Fill up the arrays with all mapped variables. 9222 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9223 MappableExprsHandler::MapValuesArrayTy Pointers; 9224 MappableExprsHandler::MapValuesArrayTy Sizes; 9225 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9226 MappableExprsHandler MEHandler(*D, MapperCGF); 9227 MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); 9228 9229 // Call the runtime API __tgt_mapper_num_components to get the number of 9230 // pre-existing components. 9231 llvm::Value *OffloadingArgs[] = {Handle}; 9232 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9233 createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs); 9234 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9235 PreviousSize, 9236 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9237 9238 // Fill up the runtime mapper handle for all components. 9239 for (unsigned I = 0; I < BasePointers.size(); ++I) { 9240 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9241 *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9242 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9243 Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9244 llvm::Value *CurSizeArg = Sizes[I]; 9245 9246 // Extract the MEMBER_OF field from the map type. 9247 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 9248 MapperCGF.EmitBlock(MemberBB); 9249 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); 9250 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 9251 OriMapType, 9252 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 9253 llvm::BasicBlock *MemberCombineBB = 9254 MapperCGF.createBasicBlock("omp.member.combine"); 9255 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 9256 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 9257 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 9258 // Add the number of pre-existing components to the MEMBER_OF field if it 9259 // is valid. 9260 MapperCGF.EmitBlock(MemberCombineBB); 9261 llvm::Value *CombinedMember = 9262 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9263 // Do nothing if it is not a member of previous components. 9264 MapperCGF.EmitBlock(TypeBB); 9265 llvm::PHINode *MemberMapType = 9266 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9267 MemberMapType->addIncoming(OriMapType, MemberBB); 9268 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9269 9270 // Combine the map type inherited from user-defined mapper with that 9271 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9272 // bits of the \a MapType, which is the input argument of the mapper 9273 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9274 // bits of MemberMapType. 9275 // [OpenMP 5.0], 1.2.6. map-type decay. 9276 // | alloc | to | from | tofrom | release | delete 9277 // ---------------------------------------------------------- 9278 // alloc | alloc | alloc | alloc | alloc | release | delete 9279 // to | alloc | to | alloc | to | release | delete 9280 // from | alloc | alloc | from | from | release | delete 9281 // tofrom | alloc | to | from | tofrom | release | delete 9282 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9283 MapType, 9284 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9285 MappableExprsHandler::OMP_MAP_FROM)); 9286 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9287 llvm::BasicBlock *AllocElseBB = 9288 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9289 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9290 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9291 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9292 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9293 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9294 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9295 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9296 MapperCGF.EmitBlock(AllocBB); 9297 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9298 MemberMapType, 9299 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9300 MappableExprsHandler::OMP_MAP_FROM))); 9301 MapperCGF.Builder.CreateBr(EndBB); 9302 MapperCGF.EmitBlock(AllocElseBB); 9303 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9304 LeftToFrom, 9305 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9306 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9307 // In case of to, clear OMP_MAP_FROM. 9308 MapperCGF.EmitBlock(ToBB); 9309 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9310 MemberMapType, 9311 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9312 MapperCGF.Builder.CreateBr(EndBB); 9313 MapperCGF.EmitBlock(ToElseBB); 9314 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9315 LeftToFrom, 9316 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9317 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9318 // In case of from, clear OMP_MAP_TO. 9319 MapperCGF.EmitBlock(FromBB); 9320 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9321 MemberMapType, 9322 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9323 // In case of tofrom, do nothing. 9324 MapperCGF.EmitBlock(EndBB); 9325 llvm::PHINode *CurMapType = 9326 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9327 CurMapType->addIncoming(AllocMapType, AllocBB); 9328 CurMapType->addIncoming(ToMapType, ToBB); 9329 CurMapType->addIncoming(FromMapType, FromBB); 9330 CurMapType->addIncoming(MemberMapType, ToElseBB); 9331 9332 // TODO: call the corresponding mapper function if a user-defined mapper is 9333 // associated with this map clause. 9334 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9335 // data structure. 9336 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9337 CurSizeArg, CurMapType}; 9338 MapperCGF.EmitRuntimeCall( 9339 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), 9340 OffloadingArgs); 9341 } 9342 9343 // Update the pointer to point to the next element that needs to be mapped, 9344 // and check whether we have mapped all elements. 9345 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9346 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9347 PtrPHI->addIncoming(PtrNext, BodyBB); 9348 llvm::Value *IsDone = 9349 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9350 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9351 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9352 9353 MapperCGF.EmitBlock(ExitBB); 9354 // Emit array deletion if this is an array section and \p MapType indicates 9355 // that deletion is required. 9356 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9357 ElementSize, DoneBB, /*IsInit=*/false); 9358 9359 // Emit the function exit block. 9360 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9361 MapperCGF.FinishFunction(); 9362 UDMMap.try_emplace(D, Fn); 9363 if (CGF) { 9364 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9365 Decls.second.push_back(D); 9366 } 9367 } 9368 9369 /// Emit the array initialization or deletion portion for user-defined mapper 9370 /// code generation. First, it evaluates whether an array section is mapped and 9371 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9372 /// true, and \a MapType indicates to not delete this array, array 9373 /// initialization code is generated. If \a IsInit is false, and \a MapType 9374 /// indicates to not this array, array deletion code is generated. 9375 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9376 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9377 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9378 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9379 StringRef Prefix = IsInit ? ".init" : ".del"; 9380 9381 // Evaluate if this is an array section. 9382 llvm::BasicBlock *IsDeleteBB = 9383 MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"})); 9384 llvm::BasicBlock *BodyBB = 9385 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9386 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9387 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9388 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9389 9390 // Evaluate if we are going to delete this section. 9391 MapperCGF.EmitBlock(IsDeleteBB); 9392 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9393 MapType, 9394 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9395 llvm::Value *DeleteCond; 9396 if (IsInit) { 9397 DeleteCond = MapperCGF.Builder.CreateIsNull( 9398 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9399 } else { 9400 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9401 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9402 } 9403 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9404 9405 MapperCGF.EmitBlock(BodyBB); 9406 // Get the array size by multiplying element size and element number (i.e., \p 9407 // Size). 9408 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9409 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9410 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9411 // memory allocation/deletion purpose only. 9412 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9413 MapType, 9414 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9415 MappableExprsHandler::OMP_MAP_FROM))); 9416 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9417 // data structure. 9418 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9419 MapperCGF.EmitRuntimeCall( 9420 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs); 9421 } 9422 9423 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9424 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9425 llvm::Value *DeviceID, 9426 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9427 const OMPLoopDirective &D)> 9428 SizeEmitter) { 9429 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9430 const OMPExecutableDirective *TD = &D; 9431 // Get nested teams distribute kind directive, if any. 9432 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9433 TD = getNestedDistributeDirective(CGM.getContext(), D); 9434 if (!TD) 9435 return; 9436 const auto *LD = cast<OMPLoopDirective>(TD); 9437 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, 9438 PrePostActionTy &) { 9439 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9440 llvm::Value *Args[] = {DeviceID, NumIterations}; 9441 CGF.EmitRuntimeCall( 9442 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); 9443 } 9444 }; 9445 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9446 } 9447 9448 void CGOpenMPRuntime::emitTargetCall( 9449 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9450 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9451 const Expr *Device, 9452 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9453 const OMPLoopDirective &D)> 9454 SizeEmitter) { 9455 if (!CGF.HaveInsertPoint()) 9456 return; 9457 9458 assert(OutlinedFn && "Invalid outlined function!"); 9459 9460 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 9461 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9462 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9463 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9464 PrePostActionTy &) { 9465 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9466 }; 9467 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9468 9469 CodeGenFunction::OMPTargetDataInfo InputInfo; 9470 llvm::Value *MapTypesArray = nullptr; 9471 // Fill up the pointer arrays and transfer execution to the device. 9472 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9473 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, 9474 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9475 // On top of the arrays that were filled up, the target offloading call 9476 // takes as arguments the device id as well as the host pointer. The host 9477 // pointer is used by the runtime library to identify the current target 9478 // region, so it only has to be unique and not necessarily point to 9479 // anything. It could be the pointer to the outlined function that 9480 // implements the target region, but we aren't using that so that the 9481 // compiler doesn't need to keep that, and could therefore inline the host 9482 // function if proven worthwhile during optimization. 9483 9484 // From this point on, we need to have an ID of the target region defined. 9485 assert(OutlinedFnID && "Invalid outlined function ID!"); 9486 9487 // Emit device ID if any. 9488 llvm::Value *DeviceID; 9489 if (Device) { 9490 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9491 CGF.Int64Ty, /*isSigned=*/true); 9492 } else { 9493 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9494 } 9495 9496 // Emit the number of elements in the offloading arrays. 9497 llvm::Value *PointerNum = 9498 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9499 9500 // Return value of the runtime offloading call. 9501 llvm::Value *Return; 9502 9503 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9504 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9505 9506 // Emit tripcount for the target loop-based directive. 9507 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 9508 9509 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9510 // The target region is an outlined function launched by the runtime 9511 // via calls __tgt_target() or __tgt_target_teams(). 9512 // 9513 // __tgt_target() launches a target region with one team and one thread, 9514 // executing a serial region. This master thread may in turn launch 9515 // more threads within its team upon encountering a parallel region, 9516 // however, no additional teams can be launched on the device. 9517 // 9518 // __tgt_target_teams() launches a target region with one or more teams, 9519 // each with one or more threads. This call is required for target 9520 // constructs such as: 9521 // 'target teams' 9522 // 'target' / 'teams' 9523 // 'target teams distribute parallel for' 9524 // 'target parallel' 9525 // and so on. 9526 // 9527 // Note that on the host and CPU targets, the runtime implementation of 9528 // these calls simply call the outlined function without forking threads. 9529 // The outlined functions themselves have runtime calls to 9530 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9531 // the compiler in emitTeamsCall() and emitParallelCall(). 9532 // 9533 // In contrast, on the NVPTX target, the implementation of 9534 // __tgt_target_teams() launches a GPU kernel with the requested number 9535 // of teams and threads so no additional calls to the runtime are required. 9536 if (NumTeams) { 9537 // If we have NumTeams defined this means that we have an enclosed teams 9538 // region. Therefore we also expect to have NumThreads defined. These two 9539 // values should be defined in the presence of a teams directive, 9540 // regardless of having any clauses associated. If the user is using teams 9541 // but no clauses, these two values will be the default that should be 9542 // passed to the runtime library - a 32-bit integer with the value zero. 9543 assert(NumThreads && "Thread limit expression should be available along " 9544 "with number of teams."); 9545 llvm::Value *OffloadingArgs[] = {DeviceID, 9546 OutlinedFnID, 9547 PointerNum, 9548 InputInfo.BasePointersArray.getPointer(), 9549 InputInfo.PointersArray.getPointer(), 9550 InputInfo.SizesArray.getPointer(), 9551 MapTypesArray, 9552 NumTeams, 9553 NumThreads}; 9554 Return = CGF.EmitRuntimeCall( 9555 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 9556 : OMPRTL__tgt_target_teams), 9557 OffloadingArgs); 9558 } else { 9559 llvm::Value *OffloadingArgs[] = {DeviceID, 9560 OutlinedFnID, 9561 PointerNum, 9562 InputInfo.BasePointersArray.getPointer(), 9563 InputInfo.PointersArray.getPointer(), 9564 InputInfo.SizesArray.getPointer(), 9565 MapTypesArray}; 9566 Return = CGF.EmitRuntimeCall( 9567 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 9568 : OMPRTL__tgt_target), 9569 OffloadingArgs); 9570 } 9571 9572 // Check the error code and execute the host version if required. 9573 llvm::BasicBlock *OffloadFailedBlock = 9574 CGF.createBasicBlock("omp_offload.failed"); 9575 llvm::BasicBlock *OffloadContBlock = 9576 CGF.createBasicBlock("omp_offload.cont"); 9577 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9578 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9579 9580 CGF.EmitBlock(OffloadFailedBlock); 9581 if (RequiresOuterTask) { 9582 CapturedVars.clear(); 9583 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9584 } 9585 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9586 CGF.EmitBranch(OffloadContBlock); 9587 9588 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9589 }; 9590 9591 // Notify that the host version must be executed. 9592 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 9593 RequiresOuterTask](CodeGenFunction &CGF, 9594 PrePostActionTy &) { 9595 if (RequiresOuterTask) { 9596 CapturedVars.clear(); 9597 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9598 } 9599 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9600 }; 9601 9602 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 9603 &CapturedVars, RequiresOuterTask, 9604 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 9605 // Fill up the arrays with all the captured variables. 9606 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9607 MappableExprsHandler::MapValuesArrayTy Pointers; 9608 MappableExprsHandler::MapValuesArrayTy Sizes; 9609 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9610 9611 // Get mappable expression information. 9612 MappableExprsHandler MEHandler(D, CGF); 9613 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9614 9615 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9616 auto CV = CapturedVars.begin(); 9617 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9618 CE = CS.capture_end(); 9619 CI != CE; ++CI, ++RI, ++CV) { 9620 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 9621 MappableExprsHandler::MapValuesArrayTy CurPointers; 9622 MappableExprsHandler::MapValuesArrayTy CurSizes; 9623 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 9624 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9625 9626 // VLA sizes are passed to the outlined region by copy and do not have map 9627 // information associated. 9628 if (CI->capturesVariableArrayType()) { 9629 CurBasePointers.push_back(*CV); 9630 CurPointers.push_back(*CV); 9631 CurSizes.push_back(CGF.Builder.CreateIntCast( 9632 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9633 // Copy to the device as an argument. No need to retrieve it. 9634 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 9635 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 9636 MappableExprsHandler::OMP_MAP_IMPLICIT); 9637 } else { 9638 // If we have any information in the map clause, we use it, otherwise we 9639 // just do a default mapping. 9640 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 9641 CurSizes, CurMapTypes, PartialStruct); 9642 if (CurBasePointers.empty()) 9643 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 9644 CurPointers, CurSizes, CurMapTypes); 9645 // Generate correct mapping for variables captured by reference in 9646 // lambdas. 9647 if (CI->capturesVariable()) 9648 MEHandler.generateInfoForLambdaCaptures( 9649 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 9650 CurMapTypes, LambdaPointers); 9651 } 9652 // We expect to have at least an element of information for this capture. 9653 assert(!CurBasePointers.empty() && 9654 "Non-existing map pointer for capture!"); 9655 assert(CurBasePointers.size() == CurPointers.size() && 9656 CurBasePointers.size() == CurSizes.size() && 9657 CurBasePointers.size() == CurMapTypes.size() && 9658 "Inconsistent map information sizes!"); 9659 9660 // If there is an entry in PartialStruct it means we have a struct with 9661 // individual members mapped. Emit an extra combined entry. 9662 if (PartialStruct.Base.isValid()) 9663 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 9664 CurMapTypes, PartialStruct); 9665 9666 // We need to append the results of this capture to what we already have. 9667 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 9668 Pointers.append(CurPointers.begin(), CurPointers.end()); 9669 Sizes.append(CurSizes.begin(), CurSizes.end()); 9670 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 9671 } 9672 // Adjust MEMBER_OF flags for the lambdas captures. 9673 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 9674 Pointers, MapTypes); 9675 // Map other list items in the map clause which are not captured variables 9676 // but "declare target link" global variables. 9677 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 9678 MapTypes); 9679 9680 TargetDataInfo Info; 9681 // Fill up the arrays and create the arguments. 9682 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9683 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9684 Info.PointersArray, Info.SizesArray, 9685 Info.MapTypesArray, Info); 9686 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9687 InputInfo.BasePointersArray = 9688 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9689 InputInfo.PointersArray = 9690 Address(Info.PointersArray, CGM.getPointerAlign()); 9691 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 9692 MapTypesArray = Info.MapTypesArray; 9693 if (RequiresOuterTask) 9694 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9695 else 9696 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9697 }; 9698 9699 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 9700 CodeGenFunction &CGF, PrePostActionTy &) { 9701 if (RequiresOuterTask) { 9702 CodeGenFunction::OMPTargetDataInfo InputInfo; 9703 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9704 } else { 9705 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9706 } 9707 }; 9708 9709 // If we have a target function ID it means that we need to support 9710 // offloading, otherwise, just execute on the host. We need to execute on host 9711 // regardless of the conditional in the if clause if, e.g., the user do not 9712 // specify target triples. 9713 if (OutlinedFnID) { 9714 if (IfCond) { 9715 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9716 } else { 9717 RegionCodeGenTy ThenRCG(TargetThenGen); 9718 ThenRCG(CGF); 9719 } 9720 } else { 9721 RegionCodeGenTy ElseRCG(TargetElseGen); 9722 ElseRCG(CGF); 9723 } 9724 } 9725 9726 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9727 StringRef ParentName) { 9728 if (!S) 9729 return; 9730 9731 // Codegen OMP target directives that offload compute to the device. 9732 bool RequiresDeviceCodegen = 9733 isa<OMPExecutableDirective>(S) && 9734 isOpenMPTargetExecutionDirective( 9735 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9736 9737 if (RequiresDeviceCodegen) { 9738 const auto &E = *cast<OMPExecutableDirective>(S); 9739 unsigned DeviceID; 9740 unsigned FileID; 9741 unsigned Line; 9742 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 9743 FileID, Line); 9744 9745 // Is this a target region that should not be emitted as an entry point? If 9746 // so just signal we are done with this target region. 9747 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 9748 ParentName, Line)) 9749 return; 9750 9751 switch (E.getDirectiveKind()) { 9752 case OMPD_target: 9753 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9754 cast<OMPTargetDirective>(E)); 9755 break; 9756 case OMPD_target_parallel: 9757 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9758 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9759 break; 9760 case OMPD_target_teams: 9761 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9762 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9763 break; 9764 case OMPD_target_teams_distribute: 9765 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9766 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9767 break; 9768 case OMPD_target_teams_distribute_simd: 9769 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9770 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9771 break; 9772 case OMPD_target_parallel_for: 9773 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9774 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9775 break; 9776 case OMPD_target_parallel_for_simd: 9777 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9778 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9779 break; 9780 case OMPD_target_simd: 9781 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9782 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9783 break; 9784 case OMPD_target_teams_distribute_parallel_for: 9785 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9786 CGM, ParentName, 9787 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9788 break; 9789 case OMPD_target_teams_distribute_parallel_for_simd: 9790 CodeGenFunction:: 9791 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9792 CGM, ParentName, 9793 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9794 break; 9795 case OMPD_parallel: 9796 case OMPD_for: 9797 case OMPD_parallel_for: 9798 case OMPD_parallel_master: 9799 case OMPD_parallel_sections: 9800 case OMPD_for_simd: 9801 case OMPD_parallel_for_simd: 9802 case OMPD_cancel: 9803 case OMPD_cancellation_point: 9804 case OMPD_ordered: 9805 case OMPD_threadprivate: 9806 case OMPD_allocate: 9807 case OMPD_task: 9808 case OMPD_simd: 9809 case OMPD_sections: 9810 case OMPD_section: 9811 case OMPD_single: 9812 case OMPD_master: 9813 case OMPD_critical: 9814 case OMPD_taskyield: 9815 case OMPD_barrier: 9816 case OMPD_taskwait: 9817 case OMPD_taskgroup: 9818 case OMPD_atomic: 9819 case OMPD_flush: 9820 case OMPD_depobj: 9821 case OMPD_teams: 9822 case OMPD_target_data: 9823 case OMPD_target_exit_data: 9824 case OMPD_target_enter_data: 9825 case OMPD_distribute: 9826 case OMPD_distribute_simd: 9827 case OMPD_distribute_parallel_for: 9828 case OMPD_distribute_parallel_for_simd: 9829 case OMPD_teams_distribute: 9830 case OMPD_teams_distribute_simd: 9831 case OMPD_teams_distribute_parallel_for: 9832 case OMPD_teams_distribute_parallel_for_simd: 9833 case OMPD_target_update: 9834 case OMPD_declare_simd: 9835 case OMPD_declare_variant: 9836 case OMPD_declare_target: 9837 case OMPD_end_declare_target: 9838 case OMPD_declare_reduction: 9839 case OMPD_declare_mapper: 9840 case OMPD_taskloop: 9841 case OMPD_taskloop_simd: 9842 case OMPD_master_taskloop: 9843 case OMPD_master_taskloop_simd: 9844 case OMPD_parallel_master_taskloop: 9845 case OMPD_parallel_master_taskloop_simd: 9846 case OMPD_requires: 9847 case OMPD_unknown: 9848 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9849 } 9850 return; 9851 } 9852 9853 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9854 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9855 return; 9856 9857 scanForTargetRegionsFunctions( 9858 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9859 return; 9860 } 9861 9862 // If this is a lambda function, look into its body. 9863 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9864 S = L->getBody(); 9865 9866 // Keep looking for target regions recursively. 9867 for (const Stmt *II : S->children()) 9868 scanForTargetRegionsFunctions(II, ParentName); 9869 } 9870 9871 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9872 // If emitting code for the host, we do not process FD here. Instead we do 9873 // the normal code generation. 9874 if (!CGM.getLangOpts().OpenMPIsDevice) { 9875 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 9876 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9877 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9878 // Do not emit device_type(nohost) functions for the host. 9879 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 9880 return true; 9881 } 9882 return false; 9883 } 9884 9885 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9886 // Try to detect target regions in the function. 9887 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 9888 StringRef Name = CGM.getMangledName(GD); 9889 scanForTargetRegionsFunctions(FD->getBody(), Name); 9890 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9891 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9892 // Do not emit device_type(nohost) functions for the host. 9893 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 9894 return true; 9895 } 9896 9897 // Do not to emit function if it is not marked as declare target. 9898 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9899 AlreadyEmittedTargetDecls.count(VD) == 0; 9900 } 9901 9902 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9903 if (!CGM.getLangOpts().OpenMPIsDevice) 9904 return false; 9905 9906 // Check if there are Ctors/Dtors in this declaration and look for target 9907 // regions in it. We use the complete variant to produce the kernel name 9908 // mangling. 9909 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9910 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9911 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9912 StringRef ParentName = 9913 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9914 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9915 } 9916 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9917 StringRef ParentName = 9918 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9919 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9920 } 9921 } 9922 9923 // Do not to emit variable if it is not marked as declare target. 9924 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9925 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9926 cast<VarDecl>(GD.getDecl())); 9927 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9928 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9929 HasRequiresUnifiedSharedMemory)) { 9930 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9931 return true; 9932 } 9933 return false; 9934 } 9935 9936 llvm::Constant * 9937 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9938 const VarDecl *VD) { 9939 assert(VD->getType().isConstant(CGM.getContext()) && 9940 "Expected constant variable."); 9941 StringRef VarName; 9942 llvm::Constant *Addr; 9943 llvm::GlobalValue::LinkageTypes Linkage; 9944 QualType Ty = VD->getType(); 9945 SmallString<128> Buffer; 9946 { 9947 unsigned DeviceID; 9948 unsigned FileID; 9949 unsigned Line; 9950 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9951 FileID, Line); 9952 llvm::raw_svector_ostream OS(Buffer); 9953 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9954 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9955 VarName = OS.str(); 9956 } 9957 Linkage = llvm::GlobalValue::InternalLinkage; 9958 Addr = 9959 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9960 getDefaultFirstprivateAddressSpace()); 9961 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9962 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9963 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9964 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9965 VarName, Addr, VarSize, 9966 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9967 return Addr; 9968 } 9969 9970 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9971 llvm::Constant *Addr) { 9972 if (CGM.getLangOpts().OMPTargetTriples.empty() && 9973 !CGM.getLangOpts().OpenMPIsDevice) 9974 return; 9975 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9976 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9977 if (!Res) { 9978 if (CGM.getLangOpts().OpenMPIsDevice) { 9979 // Register non-target variables being emitted in device code (debug info 9980 // may cause this). 9981 StringRef VarName = CGM.getMangledName(VD); 9982 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9983 } 9984 return; 9985 } 9986 // Register declare target variables. 9987 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9988 StringRef VarName; 9989 CharUnits VarSize; 9990 llvm::GlobalValue::LinkageTypes Linkage; 9991 9992 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9993 !HasRequiresUnifiedSharedMemory) { 9994 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9995 VarName = CGM.getMangledName(VD); 9996 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9997 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9998 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9999 } else { 10000 VarSize = CharUnits::Zero(); 10001 } 10002 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10003 // Temp solution to prevent optimizations of the internal variables. 10004 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10005 std::string RefName = getName({VarName, "ref"}); 10006 if (!CGM.GetGlobalValue(RefName)) { 10007 llvm::Constant *AddrRef = 10008 getOrCreateInternalVariable(Addr->getType(), RefName); 10009 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10010 GVAddrRef->setConstant(/*Val=*/true); 10011 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10012 GVAddrRef->setInitializer(Addr); 10013 CGM.addCompilerUsedGlobal(GVAddrRef); 10014 } 10015 } 10016 } else { 10017 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10018 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10019 HasRequiresUnifiedSharedMemory)) && 10020 "Declare target attribute must link or to with unified memory."); 10021 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10022 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10023 else 10024 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10025 10026 if (CGM.getLangOpts().OpenMPIsDevice) { 10027 VarName = Addr->getName(); 10028 Addr = nullptr; 10029 } else { 10030 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10031 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10032 } 10033 VarSize = CGM.getPointerSize(); 10034 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10035 } 10036 10037 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10038 VarName, Addr, VarSize, Flags, Linkage); 10039 } 10040 10041 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10042 if (isa<FunctionDecl>(GD.getDecl()) || 10043 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10044 return emitTargetFunctions(GD); 10045 10046 return emitTargetGlobalVariable(GD); 10047 } 10048 10049 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10050 for (const VarDecl *VD : DeferredGlobalVariables) { 10051 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10052 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10053 if (!Res) 10054 continue; 10055 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10056 !HasRequiresUnifiedSharedMemory) { 10057 CGM.EmitGlobal(VD); 10058 } else { 10059 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10060 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10061 HasRequiresUnifiedSharedMemory)) && 10062 "Expected link clause or to clause with unified memory."); 10063 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10064 } 10065 } 10066 } 10067 10068 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10069 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10070 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10071 " Expected target-based directive."); 10072 } 10073 10074 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10075 for (const OMPClause *Clause : D->clauselists()) { 10076 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10077 HasRequiresUnifiedSharedMemory = true; 10078 } else if (const auto *AC = 10079 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10080 switch (AC->getAtomicDefaultMemOrderKind()) { 10081 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10082 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10083 break; 10084 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10085 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10086 break; 10087 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10088 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10089 break; 10090 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10091 break; 10092 } 10093 } 10094 } 10095 } 10096 10097 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10098 return RequiresAtomicOrdering; 10099 } 10100 10101 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10102 LangAS &AS) { 10103 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10104 return false; 10105 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10106 switch(A->getAllocatorType()) { 10107 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10108 // Not supported, fallback to the default mem space. 10109 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10110 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10111 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10112 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10113 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10114 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10115 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10116 AS = LangAS::Default; 10117 return true; 10118 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10119 llvm_unreachable("Expected predefined allocator for the variables with the " 10120 "static storage."); 10121 } 10122 return false; 10123 } 10124 10125 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10126 return HasRequiresUnifiedSharedMemory; 10127 } 10128 10129 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10130 CodeGenModule &CGM) 10131 : CGM(CGM) { 10132 if (CGM.getLangOpts().OpenMPIsDevice) { 10133 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10134 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10135 } 10136 } 10137 10138 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10139 if (CGM.getLangOpts().OpenMPIsDevice) 10140 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10141 } 10142 10143 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10144 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10145 return true; 10146 10147 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10148 // Do not to emit function if it is marked as declare target as it was already 10149 // emitted. 10150 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10151 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10152 if (auto *F = dyn_cast_or_null<llvm::Function>( 10153 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10154 return !F->isDeclaration(); 10155 return false; 10156 } 10157 return true; 10158 } 10159 10160 return !AlreadyEmittedTargetDecls.insert(D).second; 10161 } 10162 10163 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10164 // If we don't have entries or if we are emitting code for the device, we 10165 // don't need to do anything. 10166 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10167 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10168 (OffloadEntriesInfoManager.empty() && 10169 !HasEmittedDeclareTargetRegion && 10170 !HasEmittedTargetRegion)) 10171 return nullptr; 10172 10173 // Create and register the function that handles the requires directives. 10174 ASTContext &C = CGM.getContext(); 10175 10176 llvm::Function *RequiresRegFn; 10177 { 10178 CodeGenFunction CGF(CGM); 10179 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10180 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10181 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10182 RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); 10183 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10184 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10185 // TODO: check for other requires clauses. 10186 // The requires directive takes effect only when a target region is 10187 // present in the compilation unit. Otherwise it is ignored and not 10188 // passed to the runtime. This avoids the runtime from throwing an error 10189 // for mismatching requires clauses across compilation units that don't 10190 // contain at least 1 target region. 10191 assert((HasEmittedTargetRegion || 10192 HasEmittedDeclareTargetRegion || 10193 !OffloadEntriesInfoManager.empty()) && 10194 "Target or declare target region expected."); 10195 if (HasRequiresUnifiedSharedMemory) 10196 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10197 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires), 10198 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10199 CGF.FinishFunction(); 10200 } 10201 return RequiresRegFn; 10202 } 10203 10204 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10205 const OMPExecutableDirective &D, 10206 SourceLocation Loc, 10207 llvm::Function *OutlinedFn, 10208 ArrayRef<llvm::Value *> CapturedVars) { 10209 if (!CGF.HaveInsertPoint()) 10210 return; 10211 10212 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10213 CodeGenFunction::RunCleanupsScope Scope(CGF); 10214 10215 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10216 llvm::Value *Args[] = { 10217 RTLoc, 10218 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10219 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10220 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10221 RealArgs.append(std::begin(Args), std::end(Args)); 10222 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10223 10224 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 10225 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10226 } 10227 10228 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10229 const Expr *NumTeams, 10230 const Expr *ThreadLimit, 10231 SourceLocation Loc) { 10232 if (!CGF.HaveInsertPoint()) 10233 return; 10234 10235 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10236 10237 llvm::Value *NumTeamsVal = 10238 NumTeams 10239 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10240 CGF.CGM.Int32Ty, /* isSigned = */ true) 10241 : CGF.Builder.getInt32(0); 10242 10243 llvm::Value *ThreadLimitVal = 10244 ThreadLimit 10245 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10246 CGF.CGM.Int32Ty, /* isSigned = */ true) 10247 : CGF.Builder.getInt32(0); 10248 10249 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10250 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10251 ThreadLimitVal}; 10252 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 10253 PushNumTeamsArgs); 10254 } 10255 10256 void CGOpenMPRuntime::emitTargetDataCalls( 10257 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10258 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10259 if (!CGF.HaveInsertPoint()) 10260 return; 10261 10262 // Action used to replace the default codegen action and turn privatization 10263 // off. 10264 PrePostActionTy NoPrivAction; 10265 10266 // Generate the code for the opening of the data environment. Capture all the 10267 // arguments of the runtime call by reference because they are used in the 10268 // closing of the region. 10269 auto &&BeginThenGen = [this, &D, Device, &Info, 10270 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10271 // Fill up the arrays with all the mapped variables. 10272 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10273 MappableExprsHandler::MapValuesArrayTy Pointers; 10274 MappableExprsHandler::MapValuesArrayTy Sizes; 10275 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10276 10277 // Get map clause information. 10278 MappableExprsHandler MCHandler(D, CGF); 10279 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10280 10281 // Fill up the arrays and create the arguments. 10282 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10283 10284 llvm::Value *BasePointersArrayArg = nullptr; 10285 llvm::Value *PointersArrayArg = nullptr; 10286 llvm::Value *SizesArrayArg = nullptr; 10287 llvm::Value *MapTypesArrayArg = nullptr; 10288 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10289 SizesArrayArg, MapTypesArrayArg, Info); 10290 10291 // Emit device ID if any. 10292 llvm::Value *DeviceID = nullptr; 10293 if (Device) { 10294 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10295 CGF.Int64Ty, /*isSigned=*/true); 10296 } else { 10297 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10298 } 10299 10300 // Emit the number of elements in the offloading arrays. 10301 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10302 10303 llvm::Value *OffloadingArgs[] = { 10304 DeviceID, PointerNum, BasePointersArrayArg, 10305 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10306 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 10307 OffloadingArgs); 10308 10309 // If device pointer privatization is required, emit the body of the region 10310 // here. It will have to be duplicated: with and without privatization. 10311 if (!Info.CaptureDeviceAddrMap.empty()) 10312 CodeGen(CGF); 10313 }; 10314 10315 // Generate code for the closing of the data region. 10316 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10317 PrePostActionTy &) { 10318 assert(Info.isValid() && "Invalid data environment closing arguments."); 10319 10320 llvm::Value *BasePointersArrayArg = nullptr; 10321 llvm::Value *PointersArrayArg = nullptr; 10322 llvm::Value *SizesArrayArg = nullptr; 10323 llvm::Value *MapTypesArrayArg = nullptr; 10324 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10325 SizesArrayArg, MapTypesArrayArg, Info); 10326 10327 // Emit device ID if any. 10328 llvm::Value *DeviceID = nullptr; 10329 if (Device) { 10330 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10331 CGF.Int64Ty, /*isSigned=*/true); 10332 } else { 10333 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10334 } 10335 10336 // Emit the number of elements in the offloading arrays. 10337 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10338 10339 llvm::Value *OffloadingArgs[] = { 10340 DeviceID, PointerNum, BasePointersArrayArg, 10341 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10342 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 10343 OffloadingArgs); 10344 }; 10345 10346 // If we need device pointer privatization, we need to emit the body of the 10347 // region with no privatization in the 'else' branch of the conditional. 10348 // Otherwise, we don't have to do anything. 10349 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10350 PrePostActionTy &) { 10351 if (!Info.CaptureDeviceAddrMap.empty()) { 10352 CodeGen.setAction(NoPrivAction); 10353 CodeGen(CGF); 10354 } 10355 }; 10356 10357 // We don't have to do anything to close the region if the if clause evaluates 10358 // to false. 10359 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10360 10361 if (IfCond) { 10362 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10363 } else { 10364 RegionCodeGenTy RCG(BeginThenGen); 10365 RCG(CGF); 10366 } 10367 10368 // If we don't require privatization of device pointers, we emit the body in 10369 // between the runtime calls. This avoids duplicating the body code. 10370 if (Info.CaptureDeviceAddrMap.empty()) { 10371 CodeGen.setAction(NoPrivAction); 10372 CodeGen(CGF); 10373 } 10374 10375 if (IfCond) { 10376 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10377 } else { 10378 RegionCodeGenTy RCG(EndThenGen); 10379 RCG(CGF); 10380 } 10381 } 10382 10383 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10384 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10385 const Expr *Device) { 10386 if (!CGF.HaveInsertPoint()) 10387 return; 10388 10389 assert((isa<OMPTargetEnterDataDirective>(D) || 10390 isa<OMPTargetExitDataDirective>(D) || 10391 isa<OMPTargetUpdateDirective>(D)) && 10392 "Expecting either target enter, exit data, or update directives."); 10393 10394 CodeGenFunction::OMPTargetDataInfo InputInfo; 10395 llvm::Value *MapTypesArray = nullptr; 10396 // Generate the code for the opening of the data environment. 10397 auto &&ThenGen = [this, &D, Device, &InputInfo, 10398 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10399 // Emit device ID if any. 10400 llvm::Value *DeviceID = nullptr; 10401 if (Device) { 10402 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10403 CGF.Int64Ty, /*isSigned=*/true); 10404 } else { 10405 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10406 } 10407 10408 // Emit the number of elements in the offloading arrays. 10409 llvm::Constant *PointerNum = 10410 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10411 10412 llvm::Value *OffloadingArgs[] = {DeviceID, 10413 PointerNum, 10414 InputInfo.BasePointersArray.getPointer(), 10415 InputInfo.PointersArray.getPointer(), 10416 InputInfo.SizesArray.getPointer(), 10417 MapTypesArray}; 10418 10419 // Select the right runtime function call for each expected standalone 10420 // directive. 10421 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10422 OpenMPRTLFunction RTLFn; 10423 switch (D.getDirectiveKind()) { 10424 case OMPD_target_enter_data: 10425 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 10426 : OMPRTL__tgt_target_data_begin; 10427 break; 10428 case OMPD_target_exit_data: 10429 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 10430 : OMPRTL__tgt_target_data_end; 10431 break; 10432 case OMPD_target_update: 10433 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 10434 : OMPRTL__tgt_target_data_update; 10435 break; 10436 case OMPD_parallel: 10437 case OMPD_for: 10438 case OMPD_parallel_for: 10439 case OMPD_parallel_master: 10440 case OMPD_parallel_sections: 10441 case OMPD_for_simd: 10442 case OMPD_parallel_for_simd: 10443 case OMPD_cancel: 10444 case OMPD_cancellation_point: 10445 case OMPD_ordered: 10446 case OMPD_threadprivate: 10447 case OMPD_allocate: 10448 case OMPD_task: 10449 case OMPD_simd: 10450 case OMPD_sections: 10451 case OMPD_section: 10452 case OMPD_single: 10453 case OMPD_master: 10454 case OMPD_critical: 10455 case OMPD_taskyield: 10456 case OMPD_barrier: 10457 case OMPD_taskwait: 10458 case OMPD_taskgroup: 10459 case OMPD_atomic: 10460 case OMPD_flush: 10461 case OMPD_depobj: 10462 case OMPD_teams: 10463 case OMPD_target_data: 10464 case OMPD_distribute: 10465 case OMPD_distribute_simd: 10466 case OMPD_distribute_parallel_for: 10467 case OMPD_distribute_parallel_for_simd: 10468 case OMPD_teams_distribute: 10469 case OMPD_teams_distribute_simd: 10470 case OMPD_teams_distribute_parallel_for: 10471 case OMPD_teams_distribute_parallel_for_simd: 10472 case OMPD_declare_simd: 10473 case OMPD_declare_variant: 10474 case OMPD_declare_target: 10475 case OMPD_end_declare_target: 10476 case OMPD_declare_reduction: 10477 case OMPD_declare_mapper: 10478 case OMPD_taskloop: 10479 case OMPD_taskloop_simd: 10480 case OMPD_master_taskloop: 10481 case OMPD_master_taskloop_simd: 10482 case OMPD_parallel_master_taskloop: 10483 case OMPD_parallel_master_taskloop_simd: 10484 case OMPD_target: 10485 case OMPD_target_simd: 10486 case OMPD_target_teams_distribute: 10487 case OMPD_target_teams_distribute_simd: 10488 case OMPD_target_teams_distribute_parallel_for: 10489 case OMPD_target_teams_distribute_parallel_for_simd: 10490 case OMPD_target_teams: 10491 case OMPD_target_parallel: 10492 case OMPD_target_parallel_for: 10493 case OMPD_target_parallel_for_simd: 10494 case OMPD_requires: 10495 case OMPD_unknown: 10496 llvm_unreachable("Unexpected standalone target data directive."); 10497 break; 10498 } 10499 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 10500 }; 10501 10502 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 10503 CodeGenFunction &CGF, PrePostActionTy &) { 10504 // Fill up the arrays with all the mapped variables. 10505 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10506 MappableExprsHandler::MapValuesArrayTy Pointers; 10507 MappableExprsHandler::MapValuesArrayTy Sizes; 10508 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10509 10510 // Get map clause information. 10511 MappableExprsHandler MEHandler(D, CGF); 10512 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10513 10514 TargetDataInfo Info; 10515 // Fill up the arrays and create the arguments. 10516 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10517 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10518 Info.PointersArray, Info.SizesArray, 10519 Info.MapTypesArray, Info); 10520 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10521 InputInfo.BasePointersArray = 10522 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10523 InputInfo.PointersArray = 10524 Address(Info.PointersArray, CGM.getPointerAlign()); 10525 InputInfo.SizesArray = 10526 Address(Info.SizesArray, CGM.getPointerAlign()); 10527 MapTypesArray = Info.MapTypesArray; 10528 if (D.hasClausesOfKind<OMPDependClause>()) 10529 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10530 else 10531 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10532 }; 10533 10534 if (IfCond) { 10535 emitIfClause(CGF, IfCond, TargetThenGen, 10536 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10537 } else { 10538 RegionCodeGenTy ThenRCG(TargetThenGen); 10539 ThenRCG(CGF); 10540 } 10541 } 10542 10543 namespace { 10544 /// Kind of parameter in a function with 'declare simd' directive. 10545 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 10546 /// Attribute set of the parameter. 10547 struct ParamAttrTy { 10548 ParamKindTy Kind = Vector; 10549 llvm::APSInt StrideOrArg; 10550 llvm::APSInt Alignment; 10551 }; 10552 } // namespace 10553 10554 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10555 ArrayRef<ParamAttrTy> ParamAttrs) { 10556 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10557 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10558 // of that clause. The VLEN value must be power of 2. 10559 // In other case the notion of the function`s "characteristic data type" (CDT) 10560 // is used to compute the vector length. 10561 // CDT is defined in the following order: 10562 // a) For non-void function, the CDT is the return type. 10563 // b) If the function has any non-uniform, non-linear parameters, then the 10564 // CDT is the type of the first such parameter. 10565 // c) If the CDT determined by a) or b) above is struct, union, or class 10566 // type which is pass-by-value (except for the type that maps to the 10567 // built-in complex data type), the characteristic data type is int. 10568 // d) If none of the above three cases is applicable, the CDT is int. 10569 // The VLEN is then determined based on the CDT and the size of vector 10570 // register of that ISA for which current vector version is generated. The 10571 // VLEN is computed using the formula below: 10572 // VLEN = sizeof(vector_register) / sizeof(CDT), 10573 // where vector register size specified in section 3.2.1 Registers and the 10574 // Stack Frame of original AMD64 ABI document. 10575 QualType RetType = FD->getReturnType(); 10576 if (RetType.isNull()) 10577 return 0; 10578 ASTContext &C = FD->getASTContext(); 10579 QualType CDT; 10580 if (!RetType.isNull() && !RetType->isVoidType()) { 10581 CDT = RetType; 10582 } else { 10583 unsigned Offset = 0; 10584 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10585 if (ParamAttrs[Offset].Kind == Vector) 10586 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10587 ++Offset; 10588 } 10589 if (CDT.isNull()) { 10590 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10591 if (ParamAttrs[I + Offset].Kind == Vector) { 10592 CDT = FD->getParamDecl(I)->getType(); 10593 break; 10594 } 10595 } 10596 } 10597 } 10598 if (CDT.isNull()) 10599 CDT = C.IntTy; 10600 CDT = CDT->getCanonicalTypeUnqualified(); 10601 if (CDT->isRecordType() || CDT->isUnionType()) 10602 CDT = C.IntTy; 10603 return C.getTypeSize(CDT); 10604 } 10605 10606 static void 10607 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10608 const llvm::APSInt &VLENVal, 10609 ArrayRef<ParamAttrTy> ParamAttrs, 10610 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10611 struct ISADataTy { 10612 char ISA; 10613 unsigned VecRegSize; 10614 }; 10615 ISADataTy ISAData[] = { 10616 { 10617 'b', 128 10618 }, // SSE 10619 { 10620 'c', 256 10621 }, // AVX 10622 { 10623 'd', 256 10624 }, // AVX2 10625 { 10626 'e', 512 10627 }, // AVX512 10628 }; 10629 llvm::SmallVector<char, 2> Masked; 10630 switch (State) { 10631 case OMPDeclareSimdDeclAttr::BS_Undefined: 10632 Masked.push_back('N'); 10633 Masked.push_back('M'); 10634 break; 10635 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10636 Masked.push_back('N'); 10637 break; 10638 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10639 Masked.push_back('M'); 10640 break; 10641 } 10642 for (char Mask : Masked) { 10643 for (const ISADataTy &Data : ISAData) { 10644 SmallString<256> Buffer; 10645 llvm::raw_svector_ostream Out(Buffer); 10646 Out << "_ZGV" << Data.ISA << Mask; 10647 if (!VLENVal) { 10648 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10649 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10650 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10651 } else { 10652 Out << VLENVal; 10653 } 10654 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 10655 switch (ParamAttr.Kind){ 10656 case LinearWithVarStride: 10657 Out << 's' << ParamAttr.StrideOrArg; 10658 break; 10659 case Linear: 10660 Out << 'l'; 10661 if (!!ParamAttr.StrideOrArg) 10662 Out << ParamAttr.StrideOrArg; 10663 break; 10664 case Uniform: 10665 Out << 'u'; 10666 break; 10667 case Vector: 10668 Out << 'v'; 10669 break; 10670 } 10671 if (!!ParamAttr.Alignment) 10672 Out << 'a' << ParamAttr.Alignment; 10673 } 10674 Out << '_' << Fn->getName(); 10675 Fn->addFnAttr(Out.str()); 10676 } 10677 } 10678 } 10679 10680 // This are the Functions that are needed to mangle the name of the 10681 // vector functions generated by the compiler, according to the rules 10682 // defined in the "Vector Function ABI specifications for AArch64", 10683 // available at 10684 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10685 10686 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 10687 /// 10688 /// TODO: Need to implement the behavior for reference marked with a 10689 /// var or no linear modifiers (1.b in the section). For this, we 10690 /// need to extend ParamKindTy to support the linear modifiers. 10691 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10692 QT = QT.getCanonicalType(); 10693 10694 if (QT->isVoidType()) 10695 return false; 10696 10697 if (Kind == ParamKindTy::Uniform) 10698 return false; 10699 10700 if (Kind == ParamKindTy::Linear) 10701 return false; 10702 10703 // TODO: Handle linear references with modifiers 10704 10705 if (Kind == ParamKindTy::LinearWithVarStride) 10706 return false; 10707 10708 return true; 10709 } 10710 10711 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10712 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10713 QT = QT.getCanonicalType(); 10714 unsigned Size = C.getTypeSize(QT); 10715 10716 // Only scalars and complex within 16 bytes wide set PVB to true. 10717 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10718 return false; 10719 10720 if (QT->isFloatingType()) 10721 return true; 10722 10723 if (QT->isIntegerType()) 10724 return true; 10725 10726 if (QT->isPointerType()) 10727 return true; 10728 10729 // TODO: Add support for complex types (section 3.1.2, item 2). 10730 10731 return false; 10732 } 10733 10734 /// Computes the lane size (LS) of a return type or of an input parameter, 10735 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10736 /// TODO: Add support for references, section 3.2.1, item 1. 10737 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10738 if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10739 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10740 if (getAArch64PBV(PTy, C)) 10741 return C.getTypeSize(PTy); 10742 } 10743 if (getAArch64PBV(QT, C)) 10744 return C.getTypeSize(QT); 10745 10746 return C.getTypeSize(C.getUIntPtrType()); 10747 } 10748 10749 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10750 // signature of the scalar function, as defined in 3.2.2 of the 10751 // AAVFABI. 10752 static std::tuple<unsigned, unsigned, bool> 10753 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10754 QualType RetType = FD->getReturnType().getCanonicalType(); 10755 10756 ASTContext &C = FD->getASTContext(); 10757 10758 bool OutputBecomesInput = false; 10759 10760 llvm::SmallVector<unsigned, 8> Sizes; 10761 if (!RetType->isVoidType()) { 10762 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10763 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10764 OutputBecomesInput = true; 10765 } 10766 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10767 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10768 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10769 } 10770 10771 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10772 // The LS of a function parameter / return value can only be a power 10773 // of 2, starting from 8 bits, up to 128. 10774 assert(std::all_of(Sizes.begin(), Sizes.end(), 10775 [](unsigned Size) { 10776 return Size == 8 || Size == 16 || Size == 32 || 10777 Size == 64 || Size == 128; 10778 }) && 10779 "Invalid size"); 10780 10781 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10782 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10783 OutputBecomesInput); 10784 } 10785 10786 /// Mangle the parameter part of the vector function name according to 10787 /// their OpenMP classification. The mangling function is defined in 10788 /// section 3.5 of the AAVFABI. 10789 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10790 SmallString<256> Buffer; 10791 llvm::raw_svector_ostream Out(Buffer); 10792 for (const auto &ParamAttr : ParamAttrs) { 10793 switch (ParamAttr.Kind) { 10794 case LinearWithVarStride: 10795 Out << "ls" << ParamAttr.StrideOrArg; 10796 break; 10797 case Linear: 10798 Out << 'l'; 10799 // Don't print the step value if it is not present or if it is 10800 // equal to 1. 10801 if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1) 10802 Out << ParamAttr.StrideOrArg; 10803 break; 10804 case Uniform: 10805 Out << 'u'; 10806 break; 10807 case Vector: 10808 Out << 'v'; 10809 break; 10810 } 10811 10812 if (!!ParamAttr.Alignment) 10813 Out << 'a' << ParamAttr.Alignment; 10814 } 10815 10816 return std::string(Out.str()); 10817 } 10818 10819 // Function used to add the attribute. The parameter `VLEN` is 10820 // templated to allow the use of "x" when targeting scalable functions 10821 // for SVE. 10822 template <typename T> 10823 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10824 char ISA, StringRef ParSeq, 10825 StringRef MangledName, bool OutputBecomesInput, 10826 llvm::Function *Fn) { 10827 SmallString<256> Buffer; 10828 llvm::raw_svector_ostream Out(Buffer); 10829 Out << Prefix << ISA << LMask << VLEN; 10830 if (OutputBecomesInput) 10831 Out << "v"; 10832 Out << ParSeq << "_" << MangledName; 10833 Fn->addFnAttr(Out.str()); 10834 } 10835 10836 // Helper function to generate the Advanced SIMD names depending on 10837 // the value of the NDS when simdlen is not present. 10838 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10839 StringRef Prefix, char ISA, 10840 StringRef ParSeq, StringRef MangledName, 10841 bool OutputBecomesInput, 10842 llvm::Function *Fn) { 10843 switch (NDS) { 10844 case 8: 10845 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10846 OutputBecomesInput, Fn); 10847 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10848 OutputBecomesInput, Fn); 10849 break; 10850 case 16: 10851 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10852 OutputBecomesInput, Fn); 10853 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10854 OutputBecomesInput, Fn); 10855 break; 10856 case 32: 10857 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10858 OutputBecomesInput, Fn); 10859 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10860 OutputBecomesInput, Fn); 10861 break; 10862 case 64: 10863 case 128: 10864 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10865 OutputBecomesInput, Fn); 10866 break; 10867 default: 10868 llvm_unreachable("Scalar type is too wide."); 10869 } 10870 } 10871 10872 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10873 static void emitAArch64DeclareSimdFunction( 10874 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10875 ArrayRef<ParamAttrTy> ParamAttrs, 10876 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10877 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10878 10879 // Get basic data for building the vector signature. 10880 const auto Data = getNDSWDS(FD, ParamAttrs); 10881 const unsigned NDS = std::get<0>(Data); 10882 const unsigned WDS = std::get<1>(Data); 10883 const bool OutputBecomesInput = std::get<2>(Data); 10884 10885 // Check the values provided via `simdlen` by the user. 10886 // 1. A `simdlen(1)` doesn't produce vector signatures, 10887 if (UserVLEN == 1) { 10888 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10889 DiagnosticsEngine::Warning, 10890 "The clause simdlen(1) has no effect when targeting aarch64."); 10891 CGM.getDiags().Report(SLoc, DiagID); 10892 return; 10893 } 10894 10895 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10896 // Advanced SIMD output. 10897 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10898 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10899 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10900 "power of 2 when targeting Advanced SIMD."); 10901 CGM.getDiags().Report(SLoc, DiagID); 10902 return; 10903 } 10904 10905 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10906 // limits. 10907 if (ISA == 's' && UserVLEN != 0) { 10908 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10909 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10910 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10911 "lanes in the architectural constraints " 10912 "for SVE (min is 128-bit, max is " 10913 "2048-bit, by steps of 128-bit)"); 10914 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10915 return; 10916 } 10917 } 10918 10919 // Sort out parameter sequence. 10920 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10921 StringRef Prefix = "_ZGV"; 10922 // Generate simdlen from user input (if any). 10923 if (UserVLEN) { 10924 if (ISA == 's') { 10925 // SVE generates only a masked function. 10926 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10927 OutputBecomesInput, Fn); 10928 } else { 10929 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10930 // Advanced SIMD generates one or two functions, depending on 10931 // the `[not]inbranch` clause. 10932 switch (State) { 10933 case OMPDeclareSimdDeclAttr::BS_Undefined: 10934 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10935 OutputBecomesInput, Fn); 10936 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10937 OutputBecomesInput, Fn); 10938 break; 10939 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10940 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10941 OutputBecomesInput, Fn); 10942 break; 10943 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10944 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10945 OutputBecomesInput, Fn); 10946 break; 10947 } 10948 } 10949 } else { 10950 // If no user simdlen is provided, follow the AAVFABI rules for 10951 // generating the vector length. 10952 if (ISA == 's') { 10953 // SVE, section 3.4.1, item 1. 10954 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10955 OutputBecomesInput, Fn); 10956 } else { 10957 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10958 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10959 // two vector names depending on the use of the clause 10960 // `[not]inbranch`. 10961 switch (State) { 10962 case OMPDeclareSimdDeclAttr::BS_Undefined: 10963 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10964 OutputBecomesInput, Fn); 10965 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10966 OutputBecomesInput, Fn); 10967 break; 10968 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10969 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10970 OutputBecomesInput, Fn); 10971 break; 10972 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10973 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10974 OutputBecomesInput, Fn); 10975 break; 10976 } 10977 } 10978 } 10979 } 10980 10981 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10982 llvm::Function *Fn) { 10983 ASTContext &C = CGM.getContext(); 10984 FD = FD->getMostRecentDecl(); 10985 // Map params to their positions in function decl. 10986 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10987 if (isa<CXXMethodDecl>(FD)) 10988 ParamPositions.try_emplace(FD, 0); 10989 unsigned ParamPos = ParamPositions.size(); 10990 for (const ParmVarDecl *P : FD->parameters()) { 10991 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10992 ++ParamPos; 10993 } 10994 while (FD) { 10995 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10996 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10997 // Mark uniform parameters. 10998 for (const Expr *E : Attr->uniforms()) { 10999 E = E->IgnoreParenImpCasts(); 11000 unsigned Pos; 11001 if (isa<CXXThisExpr>(E)) { 11002 Pos = ParamPositions[FD]; 11003 } else { 11004 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11005 ->getCanonicalDecl(); 11006 Pos = ParamPositions[PVD]; 11007 } 11008 ParamAttrs[Pos].Kind = Uniform; 11009 } 11010 // Get alignment info. 11011 auto NI = Attr->alignments_begin(); 11012 for (const Expr *E : Attr->aligneds()) { 11013 E = E->IgnoreParenImpCasts(); 11014 unsigned Pos; 11015 QualType ParmTy; 11016 if (isa<CXXThisExpr>(E)) { 11017 Pos = ParamPositions[FD]; 11018 ParmTy = E->getType(); 11019 } else { 11020 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11021 ->getCanonicalDecl(); 11022 Pos = ParamPositions[PVD]; 11023 ParmTy = PVD->getType(); 11024 } 11025 ParamAttrs[Pos].Alignment = 11026 (*NI) 11027 ? (*NI)->EvaluateKnownConstInt(C) 11028 : llvm::APSInt::getUnsigned( 11029 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11030 .getQuantity()); 11031 ++NI; 11032 } 11033 // Mark linear parameters. 11034 auto SI = Attr->steps_begin(); 11035 auto MI = Attr->modifiers_begin(); 11036 for (const Expr *E : Attr->linears()) { 11037 E = E->IgnoreParenImpCasts(); 11038 unsigned Pos; 11039 if (isa<CXXThisExpr>(E)) { 11040 Pos = ParamPositions[FD]; 11041 } else { 11042 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11043 ->getCanonicalDecl(); 11044 Pos = ParamPositions[PVD]; 11045 } 11046 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11047 ParamAttr.Kind = Linear; 11048 if (*SI) { 11049 Expr::EvalResult Result; 11050 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11051 if (const auto *DRE = 11052 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11053 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 11054 ParamAttr.Kind = LinearWithVarStride; 11055 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 11056 ParamPositions[StridePVD->getCanonicalDecl()]); 11057 } 11058 } 11059 } else { 11060 ParamAttr.StrideOrArg = Result.Val.getInt(); 11061 } 11062 } 11063 ++SI; 11064 ++MI; 11065 } 11066 llvm::APSInt VLENVal; 11067 SourceLocation ExprLoc; 11068 const Expr *VLENExpr = Attr->getSimdlen(); 11069 if (VLENExpr) { 11070 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11071 ExprLoc = VLENExpr->getExprLoc(); 11072 } 11073 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11074 if (CGM.getTriple().isX86()) { 11075 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11076 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11077 unsigned VLEN = VLENVal.getExtValue(); 11078 StringRef MangledName = Fn->getName(); 11079 if (CGM.getTarget().hasFeature("sve")) 11080 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11081 MangledName, 's', 128, Fn, ExprLoc); 11082 if (CGM.getTarget().hasFeature("neon")) 11083 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11084 MangledName, 'n', 128, Fn, ExprLoc); 11085 } 11086 } 11087 FD = FD->getPreviousDecl(); 11088 } 11089 } 11090 11091 namespace { 11092 /// Cleanup action for doacross support. 11093 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11094 public: 11095 static const int DoacrossFinArgs = 2; 11096 11097 private: 11098 llvm::FunctionCallee RTLFn; 11099 llvm::Value *Args[DoacrossFinArgs]; 11100 11101 public: 11102 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11103 ArrayRef<llvm::Value *> CallArgs) 11104 : RTLFn(RTLFn) { 11105 assert(CallArgs.size() == DoacrossFinArgs); 11106 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11107 } 11108 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11109 if (!CGF.HaveInsertPoint()) 11110 return; 11111 CGF.EmitRuntimeCall(RTLFn, Args); 11112 } 11113 }; 11114 } // namespace 11115 11116 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11117 const OMPLoopDirective &D, 11118 ArrayRef<Expr *> NumIterations) { 11119 if (!CGF.HaveInsertPoint()) 11120 return; 11121 11122 ASTContext &C = CGM.getContext(); 11123 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11124 RecordDecl *RD; 11125 if (KmpDimTy.isNull()) { 11126 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11127 // kmp_int64 lo; // lower 11128 // kmp_int64 up; // upper 11129 // kmp_int64 st; // stride 11130 // }; 11131 RD = C.buildImplicitRecord("kmp_dim"); 11132 RD->startDefinition(); 11133 addFieldToRecordDecl(C, RD, Int64Ty); 11134 addFieldToRecordDecl(C, RD, Int64Ty); 11135 addFieldToRecordDecl(C, RD, Int64Ty); 11136 RD->completeDefinition(); 11137 KmpDimTy = C.getRecordType(RD); 11138 } else { 11139 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11140 } 11141 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11142 QualType ArrayTy = 11143 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11144 11145 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11146 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11147 enum { LowerFD = 0, UpperFD, StrideFD }; 11148 // Fill dims with data. 11149 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11150 LValue DimsLVal = CGF.MakeAddrLValue( 11151 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11152 // dims.upper = num_iterations; 11153 LValue UpperLVal = CGF.EmitLValueForField( 11154 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11155 llvm::Value *NumIterVal = 11156 CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]), 11157 D.getNumIterations()->getType(), Int64Ty, 11158 D.getNumIterations()->getExprLoc()); 11159 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11160 // dims.stride = 1; 11161 LValue StrideLVal = CGF.EmitLValueForField( 11162 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11163 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11164 StrideLVal); 11165 } 11166 11167 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11168 // kmp_int32 num_dims, struct kmp_dim * dims); 11169 llvm::Value *Args[] = { 11170 emitUpdateLocation(CGF, D.getBeginLoc()), 11171 getThreadID(CGF, D.getBeginLoc()), 11172 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11173 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11174 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11175 CGM.VoidPtrTy)}; 11176 11177 llvm::FunctionCallee RTLFn = 11178 createRuntimeFunction(OMPRTL__kmpc_doacross_init); 11179 CGF.EmitRuntimeCall(RTLFn, Args); 11180 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11181 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11182 llvm::FunctionCallee FiniRTLFn = 11183 createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 11184 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11185 llvm::makeArrayRef(FiniArgs)); 11186 } 11187 11188 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11189 const OMPDependClause *C) { 11190 QualType Int64Ty = 11191 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11192 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11193 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11194 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11195 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11196 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11197 const Expr *CounterVal = C->getLoopData(I); 11198 assert(CounterVal); 11199 llvm::Value *CntVal = CGF.EmitScalarConversion( 11200 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11201 CounterVal->getExprLoc()); 11202 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11203 /*Volatile=*/false, Int64Ty); 11204 } 11205 llvm::Value *Args[] = { 11206 emitUpdateLocation(CGF, C->getBeginLoc()), 11207 getThreadID(CGF, C->getBeginLoc()), 11208 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11209 llvm::FunctionCallee RTLFn; 11210 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11211 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 11212 } else { 11213 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11214 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 11215 } 11216 CGF.EmitRuntimeCall(RTLFn, Args); 11217 } 11218 11219 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11220 llvm::FunctionCallee Callee, 11221 ArrayRef<llvm::Value *> Args) const { 11222 assert(Loc.isValid() && "Outlined function call location must be valid."); 11223 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11224 11225 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11226 if (Fn->doesNotThrow()) { 11227 CGF.EmitNounwindRuntimeCall(Fn, Args); 11228 return; 11229 } 11230 } 11231 CGF.EmitRuntimeCall(Callee, Args); 11232 } 11233 11234 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11235 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11236 ArrayRef<llvm::Value *> Args) const { 11237 emitCall(CGF, Loc, OutlinedFn, Args); 11238 } 11239 11240 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11241 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11242 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11243 HasEmittedDeclareTargetRegion = true; 11244 } 11245 11246 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11247 const VarDecl *NativeParam, 11248 const VarDecl *TargetParam) const { 11249 return CGF.GetAddrOfLocalVar(NativeParam); 11250 } 11251 11252 namespace { 11253 /// Cleanup action for allocate support. 11254 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11255 public: 11256 static const int CleanupArgs = 3; 11257 11258 private: 11259 llvm::FunctionCallee RTLFn; 11260 llvm::Value *Args[CleanupArgs]; 11261 11262 public: 11263 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 11264 ArrayRef<llvm::Value *> CallArgs) 11265 : RTLFn(RTLFn) { 11266 assert(CallArgs.size() == CleanupArgs && 11267 "Size of arguments does not match."); 11268 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11269 } 11270 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11271 if (!CGF.HaveInsertPoint()) 11272 return; 11273 CGF.EmitRuntimeCall(RTLFn, Args); 11274 } 11275 }; 11276 } // namespace 11277 11278 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11279 const VarDecl *VD) { 11280 if (!VD) 11281 return Address::invalid(); 11282 const VarDecl *CVD = VD->getCanonicalDecl(); 11283 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 11284 return Address::invalid(); 11285 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11286 // Use the default allocation. 11287 if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 11288 !AA->getAllocator()) 11289 return Address::invalid(); 11290 llvm::Value *Size; 11291 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11292 if (CVD->getType()->isVariablyModifiedType()) { 11293 Size = CGF.getTypeSize(CVD->getType()); 11294 // Align the size: ((size + align - 1) / align) * align 11295 Size = CGF.Builder.CreateNUWAdd( 11296 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11297 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11298 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11299 } else { 11300 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11301 Size = CGM.getSize(Sz.alignTo(Align)); 11302 } 11303 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11304 assert(AA->getAllocator() && 11305 "Expected allocator expression for non-default allocator."); 11306 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11307 // According to the standard, the original allocator type is a enum (integer). 11308 // Convert to pointer type, if required. 11309 if (Allocator->getType()->isIntegerTy()) 11310 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 11311 else if (Allocator->getType()->isPointerTy()) 11312 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 11313 CGM.VoidPtrTy); 11314 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11315 11316 llvm::Value *Addr = 11317 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, 11318 getName({CVD->getName(), ".void.addr"})); 11319 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 11320 Allocator}; 11321 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); 11322 11323 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11324 llvm::makeArrayRef(FiniArgs)); 11325 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11326 Addr, 11327 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 11328 getName({CVD->getName(), ".addr"})); 11329 return Address(Addr, Align); 11330 } 11331 11332 /// Finds the variant function that matches current context with its context 11333 /// selector. 11334 static const FunctionDecl *getDeclareVariantFunction(CodeGenModule &CGM, 11335 const FunctionDecl *FD) { 11336 if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>()) 11337 return FD; 11338 11339 SmallVector<Expr *, 8> VariantExprs; 11340 SmallVector<VariantMatchInfo, 8> VMIs; 11341 for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) { 11342 const OMPTraitInfo &TI = A->getTraitInfos(); 11343 VMIs.push_back(VariantMatchInfo()); 11344 TI.getAsVariantMatchInfo(CGM.getContext(), VMIs.back()); 11345 VariantExprs.push_back(A->getVariantFuncRef()); 11346 } 11347 11348 OMPContext Ctx(CGM.getLangOpts().OpenMPIsDevice, CGM.getTriple()); 11349 // FIXME: Keep the context in the OMPIRBuilder so we can add constructs as we 11350 // build them. 11351 11352 int BestMatchIdx = getBestVariantMatchForContext(VMIs, Ctx); 11353 if (BestMatchIdx < 0) 11354 return FD; 11355 11356 return cast<FunctionDecl>( 11357 cast<DeclRefExpr>(VariantExprs[BestMatchIdx]->IgnoreParenImpCasts()) 11358 ->getDecl()); 11359 } 11360 11361 bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) { 11362 const auto *D = cast<FunctionDecl>(GD.getDecl()); 11363 // If the original function is defined already, use its definition. 11364 StringRef MangledName = CGM.getMangledName(GD); 11365 llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName); 11366 if (Orig && !Orig->isDeclaration()) 11367 return false; 11368 const FunctionDecl *NewFD = getDeclareVariantFunction(CGM, D); 11369 // Emit original function if it does not have declare variant attribute or the 11370 // context does not match. 11371 if (NewFD == D) 11372 return false; 11373 GlobalDecl NewGD = GD.getWithDecl(NewFD); 11374 if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) { 11375 DeferredVariantFunction.erase(D); 11376 return true; 11377 } 11378 DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD))); 11379 return true; 11380 } 11381 11382 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11383 CodeGenModule &CGM, const OMPLoopDirective &S) 11384 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11385 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11386 if (!NeedToPush) 11387 return; 11388 NontemporalDeclsSet &DS = 11389 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11390 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11391 for (const Stmt *Ref : C->private_refs()) { 11392 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11393 const ValueDecl *VD; 11394 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11395 VD = DRE->getDecl(); 11396 } else { 11397 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11398 assert((ME->isImplicitCXXThis() || 11399 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11400 "Expected member of current class."); 11401 VD = ME->getMemberDecl(); 11402 } 11403 DS.insert(VD); 11404 } 11405 } 11406 } 11407 11408 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11409 if (!NeedToPush) 11410 return; 11411 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11412 } 11413 11414 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11415 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11416 11417 return llvm::any_of( 11418 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11419 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 11420 } 11421 11422 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11423 const OMPExecutableDirective &S, 11424 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11425 const { 11426 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11427 // Vars in target/task regions must be excluded completely. 11428 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11429 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11430 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11431 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11432 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11433 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11434 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11435 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11436 } 11437 } 11438 // Exclude vars in private clauses. 11439 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11440 for (const Expr *Ref : C->varlists()) { 11441 if (!Ref->getType()->isScalarType()) 11442 continue; 11443 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11444 if (!DRE) 11445 continue; 11446 NeedToCheckForLPCs.insert(DRE->getDecl()); 11447 } 11448 } 11449 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11450 for (const Expr *Ref : C->varlists()) { 11451 if (!Ref->getType()->isScalarType()) 11452 continue; 11453 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11454 if (!DRE) 11455 continue; 11456 NeedToCheckForLPCs.insert(DRE->getDecl()); 11457 } 11458 } 11459 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11460 for (const Expr *Ref : C->varlists()) { 11461 if (!Ref->getType()->isScalarType()) 11462 continue; 11463 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11464 if (!DRE) 11465 continue; 11466 NeedToCheckForLPCs.insert(DRE->getDecl()); 11467 } 11468 } 11469 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11470 for (const Expr *Ref : C->varlists()) { 11471 if (!Ref->getType()->isScalarType()) 11472 continue; 11473 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11474 if (!DRE) 11475 continue; 11476 NeedToCheckForLPCs.insert(DRE->getDecl()); 11477 } 11478 } 11479 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 11480 for (const Expr *Ref : C->varlists()) { 11481 if (!Ref->getType()->isScalarType()) 11482 continue; 11483 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11484 if (!DRE) 11485 continue; 11486 NeedToCheckForLPCs.insert(DRE->getDecl()); 11487 } 11488 } 11489 for (const Decl *VD : NeedToCheckForLPCs) { 11490 for (const LastprivateConditionalData &Data : 11491 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 11492 if (Data.DeclToUniqueName.count(VD) > 0) { 11493 if (!Data.Disabled) 11494 NeedToAddForLPCsAsDisabled.insert(VD); 11495 break; 11496 } 11497 } 11498 } 11499 } 11500 11501 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11502 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 11503 : CGM(CGF.CGM), 11504 Action((CGM.getLangOpts().OpenMP >= 50 && 11505 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11506 [](const OMPLastprivateClause *C) { 11507 return C->getKind() == 11508 OMPC_LASTPRIVATE_conditional; 11509 })) 11510 ? ActionToDo::PushAsLastprivateConditional 11511 : ActionToDo::DoNotPush) { 11512 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11513 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 11514 return; 11515 assert(Action == ActionToDo::PushAsLastprivateConditional && 11516 "Expected a push action."); 11517 LastprivateConditionalData &Data = 11518 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11519 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11520 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 11521 continue; 11522 11523 for (const Expr *Ref : C->varlists()) { 11524 Data.DeclToUniqueName.insert(std::make_pair( 11525 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 11526 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 11527 } 11528 } 11529 Data.IVLVal = IVLVal; 11530 Data.Fn = CGF.CurFn; 11531 } 11532 11533 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11534 CodeGenFunction &CGF, const OMPExecutableDirective &S) 11535 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 11536 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11537 if (CGM.getLangOpts().OpenMP < 50) 11538 return; 11539 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 11540 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 11541 if (!NeedToAddForLPCsAsDisabled.empty()) { 11542 Action = ActionToDo::DisableLastprivateConditional; 11543 LastprivateConditionalData &Data = 11544 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11545 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 11546 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 11547 Data.Fn = CGF.CurFn; 11548 Data.Disabled = true; 11549 } 11550 } 11551 11552 CGOpenMPRuntime::LastprivateConditionalRAII 11553 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 11554 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 11555 return LastprivateConditionalRAII(CGF, S); 11556 } 11557 11558 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 11559 if (CGM.getLangOpts().OpenMP < 50) 11560 return; 11561 if (Action == ActionToDo::DisableLastprivateConditional) { 11562 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11563 "Expected list of disabled private vars."); 11564 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11565 } 11566 if (Action == ActionToDo::PushAsLastprivateConditional) { 11567 assert( 11568 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11569 "Expected list of lastprivate conditional vars."); 11570 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11571 } 11572 } 11573 11574 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 11575 const VarDecl *VD) { 11576 ASTContext &C = CGM.getContext(); 11577 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 11578 if (I == LastprivateConditionalToTypes.end()) 11579 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 11580 QualType NewType; 11581 const FieldDecl *VDField; 11582 const FieldDecl *FiredField; 11583 LValue BaseLVal; 11584 auto VI = I->getSecond().find(VD); 11585 if (VI == I->getSecond().end()) { 11586 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 11587 RD->startDefinition(); 11588 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 11589 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 11590 RD->completeDefinition(); 11591 NewType = C.getRecordType(RD); 11592 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 11593 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 11594 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 11595 } else { 11596 NewType = std::get<0>(VI->getSecond()); 11597 VDField = std::get<1>(VI->getSecond()); 11598 FiredField = std::get<2>(VI->getSecond()); 11599 BaseLVal = std::get<3>(VI->getSecond()); 11600 } 11601 LValue FiredLVal = 11602 CGF.EmitLValueForField(BaseLVal, FiredField); 11603 CGF.EmitStoreOfScalar( 11604 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 11605 FiredLVal); 11606 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 11607 } 11608 11609 namespace { 11610 /// Checks if the lastprivate conditional variable is referenced in LHS. 11611 class LastprivateConditionalRefChecker final 11612 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 11613 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 11614 const Expr *FoundE = nullptr; 11615 const Decl *FoundD = nullptr; 11616 StringRef UniqueDeclName; 11617 LValue IVLVal; 11618 llvm::Function *FoundFn = nullptr; 11619 SourceLocation Loc; 11620 11621 public: 11622 bool VisitDeclRefExpr(const DeclRefExpr *E) { 11623 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11624 llvm::reverse(LPM)) { 11625 auto It = D.DeclToUniqueName.find(E->getDecl()); 11626 if (It == D.DeclToUniqueName.end()) 11627 continue; 11628 if (D.Disabled) 11629 return false; 11630 FoundE = E; 11631 FoundD = E->getDecl()->getCanonicalDecl(); 11632 UniqueDeclName = It->second; 11633 IVLVal = D.IVLVal; 11634 FoundFn = D.Fn; 11635 break; 11636 } 11637 return FoundE == E; 11638 } 11639 bool VisitMemberExpr(const MemberExpr *E) { 11640 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 11641 return false; 11642 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11643 llvm::reverse(LPM)) { 11644 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 11645 if (It == D.DeclToUniqueName.end()) 11646 continue; 11647 if (D.Disabled) 11648 return false; 11649 FoundE = E; 11650 FoundD = E->getMemberDecl()->getCanonicalDecl(); 11651 UniqueDeclName = It->second; 11652 IVLVal = D.IVLVal; 11653 FoundFn = D.Fn; 11654 break; 11655 } 11656 return FoundE == E; 11657 } 11658 bool VisitStmt(const Stmt *S) { 11659 for (const Stmt *Child : S->children()) { 11660 if (!Child) 11661 continue; 11662 if (const auto *E = dyn_cast<Expr>(Child)) 11663 if (!E->isGLValue()) 11664 continue; 11665 if (Visit(Child)) 11666 return true; 11667 } 11668 return false; 11669 } 11670 explicit LastprivateConditionalRefChecker( 11671 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 11672 : LPM(LPM) {} 11673 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 11674 getFoundData() const { 11675 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 11676 } 11677 }; 11678 } // namespace 11679 11680 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 11681 LValue IVLVal, 11682 StringRef UniqueDeclName, 11683 LValue LVal, 11684 SourceLocation Loc) { 11685 // Last updated loop counter for the lastprivate conditional var. 11686 // int<xx> last_iv = 0; 11687 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 11688 llvm::Constant *LastIV = 11689 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 11690 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 11691 IVLVal.getAlignment().getAsAlign()); 11692 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 11693 11694 // Last value of the lastprivate conditional. 11695 // decltype(priv_a) last_a; 11696 llvm::Constant *Last = getOrCreateInternalVariable( 11697 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 11698 cast<llvm::GlobalVariable>(Last)->setAlignment( 11699 LVal.getAlignment().getAsAlign()); 11700 LValue LastLVal = 11701 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 11702 11703 // Global loop counter. Required to handle inner parallel-for regions. 11704 // iv 11705 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 11706 11707 // #pragma omp critical(a) 11708 // if (last_iv <= iv) { 11709 // last_iv = iv; 11710 // last_a = priv_a; 11711 // } 11712 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 11713 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 11714 Action.Enter(CGF); 11715 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 11716 // (last_iv <= iv) ? Check if the variable is updated and store new 11717 // value in global var. 11718 llvm::Value *CmpRes; 11719 if (IVLVal.getType()->isSignedIntegerType()) { 11720 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 11721 } else { 11722 assert(IVLVal.getType()->isUnsignedIntegerType() && 11723 "Loop iteration variable must be integer."); 11724 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 11725 } 11726 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 11727 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 11728 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 11729 // { 11730 CGF.EmitBlock(ThenBB); 11731 11732 // last_iv = iv; 11733 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 11734 11735 // last_a = priv_a; 11736 switch (CGF.getEvaluationKind(LVal.getType())) { 11737 case TEK_Scalar: { 11738 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 11739 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 11740 break; 11741 } 11742 case TEK_Complex: { 11743 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 11744 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 11745 break; 11746 } 11747 case TEK_Aggregate: 11748 llvm_unreachable( 11749 "Aggregates are not supported in lastprivate conditional."); 11750 } 11751 // } 11752 CGF.EmitBranch(ExitBB); 11753 // There is no need to emit line number for unconditional branch. 11754 (void)ApplyDebugLocation::CreateEmpty(CGF); 11755 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 11756 }; 11757 11758 if (CGM.getLangOpts().OpenMPSimd) { 11759 // Do not emit as a critical region as no parallel region could be emitted. 11760 RegionCodeGenTy ThenRCG(CodeGen); 11761 ThenRCG(CGF); 11762 } else { 11763 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 11764 } 11765 } 11766 11767 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 11768 const Expr *LHS) { 11769 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11770 return; 11771 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 11772 if (!Checker.Visit(LHS)) 11773 return; 11774 const Expr *FoundE; 11775 const Decl *FoundD; 11776 StringRef UniqueDeclName; 11777 LValue IVLVal; 11778 llvm::Function *FoundFn; 11779 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 11780 Checker.getFoundData(); 11781 if (FoundFn != CGF.CurFn) { 11782 // Special codegen for inner parallel regions. 11783 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 11784 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 11785 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 11786 "Lastprivate conditional is not found in outer region."); 11787 QualType StructTy = std::get<0>(It->getSecond()); 11788 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 11789 LValue PrivLVal = CGF.EmitLValue(FoundE); 11790 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11791 PrivLVal.getAddress(CGF), 11792 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 11793 LValue BaseLVal = 11794 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 11795 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 11796 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 11797 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 11798 FiredLVal, llvm::AtomicOrdering::Unordered, 11799 /*IsVolatile=*/true, /*isInit=*/false); 11800 return; 11801 } 11802 11803 // Private address of the lastprivate conditional in the current context. 11804 // priv_a 11805 LValue LVal = CGF.EmitLValue(FoundE); 11806 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 11807 FoundE->getExprLoc()); 11808 } 11809 11810 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 11811 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11812 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 11813 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11814 return; 11815 auto Range = llvm::reverse(LastprivateConditionalStack); 11816 auto It = llvm::find_if( 11817 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 11818 if (It == Range.end() || It->Fn != CGF.CurFn) 11819 return; 11820 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 11821 assert(LPCI != LastprivateConditionalToTypes.end() && 11822 "Lastprivates must be registered already."); 11823 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11824 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 11825 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 11826 for (const auto &Pair : It->DeclToUniqueName) { 11827 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 11828 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 11829 continue; 11830 auto I = LPCI->getSecond().find(Pair.first); 11831 assert(I != LPCI->getSecond().end() && 11832 "Lastprivate must be rehistered already."); 11833 // bool Cmp = priv_a.Fired != 0; 11834 LValue BaseLVal = std::get<3>(I->getSecond()); 11835 LValue FiredLVal = 11836 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 11837 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 11838 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 11839 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 11840 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 11841 // if (Cmp) { 11842 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 11843 CGF.EmitBlock(ThenBB); 11844 Address Addr = CGF.GetAddrOfLocalVar(VD); 11845 LValue LVal; 11846 if (VD->getType()->isReferenceType()) 11847 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 11848 AlignmentSource::Decl); 11849 else 11850 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 11851 AlignmentSource::Decl); 11852 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 11853 D.getBeginLoc()); 11854 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 11855 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 11856 // } 11857 } 11858 } 11859 11860 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 11861 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 11862 SourceLocation Loc) { 11863 if (CGF.getLangOpts().OpenMP < 50) 11864 return; 11865 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 11866 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 11867 "Unknown lastprivate conditional variable."); 11868 StringRef UniqueName = It->second; 11869 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 11870 // The variable was not updated in the region - exit. 11871 if (!GV) 11872 return; 11873 LValue LPLVal = CGF.MakeAddrLValue( 11874 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 11875 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 11876 CGF.EmitStoreOfScalar(Res, PrivLVal); 11877 } 11878 11879 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11880 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11881 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11882 llvm_unreachable("Not supported in SIMD-only mode"); 11883 } 11884 11885 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11886 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11887 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11888 llvm_unreachable("Not supported in SIMD-only mode"); 11889 } 11890 11891 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11892 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11893 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11894 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11895 bool Tied, unsigned &NumberOfParts) { 11896 llvm_unreachable("Not supported in SIMD-only mode"); 11897 } 11898 11899 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11900 SourceLocation Loc, 11901 llvm::Function *OutlinedFn, 11902 ArrayRef<llvm::Value *> CapturedVars, 11903 const Expr *IfCond) { 11904 llvm_unreachable("Not supported in SIMD-only mode"); 11905 } 11906 11907 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11908 CodeGenFunction &CGF, StringRef CriticalName, 11909 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11910 const Expr *Hint) { 11911 llvm_unreachable("Not supported in SIMD-only mode"); 11912 } 11913 11914 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11915 const RegionCodeGenTy &MasterOpGen, 11916 SourceLocation Loc) { 11917 llvm_unreachable("Not supported in SIMD-only mode"); 11918 } 11919 11920 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11921 SourceLocation Loc) { 11922 llvm_unreachable("Not supported in SIMD-only mode"); 11923 } 11924 11925 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 11926 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 11927 SourceLocation Loc) { 11928 llvm_unreachable("Not supported in SIMD-only mode"); 11929 } 11930 11931 void CGOpenMPSIMDRuntime::emitSingleRegion( 11932 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 11933 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 11934 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 11935 ArrayRef<const Expr *> AssignmentOps) { 11936 llvm_unreachable("Not supported in SIMD-only mode"); 11937 } 11938 11939 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 11940 const RegionCodeGenTy &OrderedOpGen, 11941 SourceLocation Loc, 11942 bool IsThreads) { 11943 llvm_unreachable("Not supported in SIMD-only mode"); 11944 } 11945 11946 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 11947 SourceLocation Loc, 11948 OpenMPDirectiveKind Kind, 11949 bool EmitChecks, 11950 bool ForceSimpleCall) { 11951 llvm_unreachable("Not supported in SIMD-only mode"); 11952 } 11953 11954 void CGOpenMPSIMDRuntime::emitForDispatchInit( 11955 CodeGenFunction &CGF, SourceLocation Loc, 11956 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 11957 bool Ordered, const DispatchRTInput &DispatchValues) { 11958 llvm_unreachable("Not supported in SIMD-only mode"); 11959 } 11960 11961 void CGOpenMPSIMDRuntime::emitForStaticInit( 11962 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 11963 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 11964 llvm_unreachable("Not supported in SIMD-only mode"); 11965 } 11966 11967 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 11968 CodeGenFunction &CGF, SourceLocation Loc, 11969 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 11970 llvm_unreachable("Not supported in SIMD-only mode"); 11971 } 11972 11973 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 11974 SourceLocation Loc, 11975 unsigned IVSize, 11976 bool IVSigned) { 11977 llvm_unreachable("Not supported in SIMD-only mode"); 11978 } 11979 11980 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 11981 SourceLocation Loc, 11982 OpenMPDirectiveKind DKind) { 11983 llvm_unreachable("Not supported in SIMD-only mode"); 11984 } 11985 11986 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 11987 SourceLocation Loc, 11988 unsigned IVSize, bool IVSigned, 11989 Address IL, Address LB, 11990 Address UB, Address ST) { 11991 llvm_unreachable("Not supported in SIMD-only mode"); 11992 } 11993 11994 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 11995 llvm::Value *NumThreads, 11996 SourceLocation Loc) { 11997 llvm_unreachable("Not supported in SIMD-only mode"); 11998 } 11999 12000 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12001 ProcBindKind ProcBind, 12002 SourceLocation Loc) { 12003 llvm_unreachable("Not supported in SIMD-only mode"); 12004 } 12005 12006 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12007 const VarDecl *VD, 12008 Address VDAddr, 12009 SourceLocation Loc) { 12010 llvm_unreachable("Not supported in SIMD-only mode"); 12011 } 12012 12013 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12014 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12015 CodeGenFunction *CGF) { 12016 llvm_unreachable("Not supported in SIMD-only mode"); 12017 } 12018 12019 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12020 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12021 llvm_unreachable("Not supported in SIMD-only mode"); 12022 } 12023 12024 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12025 ArrayRef<const Expr *> Vars, 12026 SourceLocation Loc, 12027 llvm::AtomicOrdering AO) { 12028 llvm_unreachable("Not supported in SIMD-only mode"); 12029 } 12030 12031 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12032 const OMPExecutableDirective &D, 12033 llvm::Function *TaskFunction, 12034 QualType SharedsTy, Address Shareds, 12035 const Expr *IfCond, 12036 const OMPTaskDataTy &Data) { 12037 llvm_unreachable("Not supported in SIMD-only mode"); 12038 } 12039 12040 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12041 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12042 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12043 const Expr *IfCond, const OMPTaskDataTy &Data) { 12044 llvm_unreachable("Not supported in SIMD-only mode"); 12045 } 12046 12047 void CGOpenMPSIMDRuntime::emitReduction( 12048 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12049 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12050 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12051 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12052 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12053 ReductionOps, Options); 12054 } 12055 12056 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12057 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12058 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12059 llvm_unreachable("Not supported in SIMD-only mode"); 12060 } 12061 12062 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12063 SourceLocation Loc, 12064 ReductionCodeGen &RCG, 12065 unsigned N) { 12066 llvm_unreachable("Not supported in SIMD-only mode"); 12067 } 12068 12069 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12070 SourceLocation Loc, 12071 llvm::Value *ReductionsPtr, 12072 LValue SharedLVal) { 12073 llvm_unreachable("Not supported in SIMD-only mode"); 12074 } 12075 12076 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12077 SourceLocation Loc) { 12078 llvm_unreachable("Not supported in SIMD-only mode"); 12079 } 12080 12081 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12082 CodeGenFunction &CGF, SourceLocation Loc, 12083 OpenMPDirectiveKind CancelRegion) { 12084 llvm_unreachable("Not supported in SIMD-only mode"); 12085 } 12086 12087 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12088 SourceLocation Loc, const Expr *IfCond, 12089 OpenMPDirectiveKind CancelRegion) { 12090 llvm_unreachable("Not supported in SIMD-only mode"); 12091 } 12092 12093 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12094 const OMPExecutableDirective &D, StringRef ParentName, 12095 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12096 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12097 llvm_unreachable("Not supported in SIMD-only mode"); 12098 } 12099 12100 void CGOpenMPSIMDRuntime::emitTargetCall( 12101 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12102 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12103 const Expr *Device, 12104 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12105 const OMPLoopDirective &D)> 12106 SizeEmitter) { 12107 llvm_unreachable("Not supported in SIMD-only mode"); 12108 } 12109 12110 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12111 llvm_unreachable("Not supported in SIMD-only mode"); 12112 } 12113 12114 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12115 llvm_unreachable("Not supported in SIMD-only mode"); 12116 } 12117 12118 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12119 return false; 12120 } 12121 12122 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12123 const OMPExecutableDirective &D, 12124 SourceLocation Loc, 12125 llvm::Function *OutlinedFn, 12126 ArrayRef<llvm::Value *> CapturedVars) { 12127 llvm_unreachable("Not supported in SIMD-only mode"); 12128 } 12129 12130 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12131 const Expr *NumTeams, 12132 const Expr *ThreadLimit, 12133 SourceLocation Loc) { 12134 llvm_unreachable("Not supported in SIMD-only mode"); 12135 } 12136 12137 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12138 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12139 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12140 llvm_unreachable("Not supported in SIMD-only mode"); 12141 } 12142 12143 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12144 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12145 const Expr *Device) { 12146 llvm_unreachable("Not supported in SIMD-only mode"); 12147 } 12148 12149 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12150 const OMPLoopDirective &D, 12151 ArrayRef<Expr *> NumIterations) { 12152 llvm_unreachable("Not supported in SIMD-only mode"); 12153 } 12154 12155 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12156 const OMPDependClause *C) { 12157 llvm_unreachable("Not supported in SIMD-only mode"); 12158 } 12159 12160 const VarDecl * 12161 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12162 const VarDecl *NativeParam) const { 12163 llvm_unreachable("Not supported in SIMD-only mode"); 12164 } 12165 12166 Address 12167 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12168 const VarDecl *NativeParam, 12169 const VarDecl *TargetParam) const { 12170 llvm_unreachable("Not supported in SIMD-only mode"); 12171 } 12172