1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/FileManager.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/CodeGen/ConstantInitBuilder.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/SetOperations.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/Bitcode/BitcodeReader.h" 32 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 33 #include "llvm/IR/Constants.h" 34 #include "llvm/IR/DerivedTypes.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/IR/Value.h" 37 #include "llvm/Support/AtomicOrdering.h" 38 #include "llvm/Support/Format.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include <cassert> 41 #include <numeric> 42 43 using namespace clang; 44 using namespace CodeGen; 45 using namespace llvm::omp; 46 47 namespace { 48 /// Base class for handling code generation inside OpenMP regions. 49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 50 public: 51 /// Kinds of OpenMP regions used in codegen. 52 enum CGOpenMPRegionKind { 53 /// Region with outlined function for standalone 'parallel' 54 /// directive. 55 ParallelOutlinedRegion, 56 /// Region with outlined function for standalone 'task' directive. 57 TaskOutlinedRegion, 58 /// Region for constructs that do not require function outlining, 59 /// like 'for', 'sections', 'atomic' etc. directives. 60 InlinedRegion, 61 /// Region with outlined function for standalone 'target' directive. 62 TargetRegion, 63 }; 64 65 CGOpenMPRegionInfo(const CapturedStmt &CS, 66 const CGOpenMPRegionKind RegionKind, 67 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 68 bool HasCancel) 69 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 70 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 71 72 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 73 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 74 bool HasCancel) 75 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 76 Kind(Kind), HasCancel(HasCancel) {} 77 78 /// Get a variable or parameter for storing global thread id 79 /// inside OpenMP construct. 80 virtual const VarDecl *getThreadIDVariable() const = 0; 81 82 /// Emit the captured statement body. 83 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 84 85 /// Get an LValue for the current ThreadID variable. 86 /// \return LValue for thread id variable. This LValue always has type int32*. 87 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 88 89 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 90 91 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 92 93 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 94 95 bool hasCancel() const { return HasCancel; } 96 97 static bool classof(const CGCapturedStmtInfo *Info) { 98 return Info->getKind() == CR_OpenMP; 99 } 100 101 ~CGOpenMPRegionInfo() override = default; 102 103 protected: 104 CGOpenMPRegionKind RegionKind; 105 RegionCodeGenTy CodeGen; 106 OpenMPDirectiveKind Kind; 107 bool HasCancel; 108 }; 109 110 /// API for captured statement code generation in OpenMP constructs. 111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 112 public: 113 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 114 const RegionCodeGenTy &CodeGen, 115 OpenMPDirectiveKind Kind, bool HasCancel, 116 StringRef HelperName) 117 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 118 HasCancel), 119 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 120 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 121 } 122 123 /// Get a variable or parameter for storing global thread id 124 /// inside OpenMP construct. 125 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 126 127 /// Get the name of the capture helper. 128 StringRef getHelperName() const override { return HelperName; } 129 130 static bool classof(const CGCapturedStmtInfo *Info) { 131 return CGOpenMPRegionInfo::classof(Info) && 132 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 133 ParallelOutlinedRegion; 134 } 135 136 private: 137 /// A variable or parameter storing global thread id for OpenMP 138 /// constructs. 139 const VarDecl *ThreadIDVar; 140 StringRef HelperName; 141 }; 142 143 /// API for captured statement code generation in OpenMP constructs. 144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 145 public: 146 class UntiedTaskActionTy final : public PrePostActionTy { 147 bool Untied; 148 const VarDecl *PartIDVar; 149 const RegionCodeGenTy UntiedCodeGen; 150 llvm::SwitchInst *UntiedSwitch = nullptr; 151 152 public: 153 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 154 const RegionCodeGenTy &UntiedCodeGen) 155 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 156 void Enter(CodeGenFunction &CGF) override { 157 if (Untied) { 158 // Emit task switching point. 159 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 160 CGF.GetAddrOfLocalVar(PartIDVar), 161 PartIDVar->getType()->castAs<PointerType>()); 162 llvm::Value *Res = 163 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 164 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 165 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 166 CGF.EmitBlock(DoneBB); 167 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 168 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 169 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 170 CGF.Builder.GetInsertBlock()); 171 emitUntiedSwitch(CGF); 172 } 173 } 174 void emitUntiedSwitch(CodeGenFunction &CGF) const { 175 if (Untied) { 176 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 177 CGF.GetAddrOfLocalVar(PartIDVar), 178 PartIDVar->getType()->castAs<PointerType>()); 179 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 180 PartIdLVal); 181 UntiedCodeGen(CGF); 182 CodeGenFunction::JumpDest CurPoint = 183 CGF.getJumpDestInCurrentScope(".untied.next."); 184 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 185 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 186 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 187 CGF.Builder.GetInsertBlock()); 188 CGF.EmitBranchThroughCleanup(CurPoint); 189 CGF.EmitBlock(CurPoint.getBlock()); 190 } 191 } 192 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 193 }; 194 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 195 const VarDecl *ThreadIDVar, 196 const RegionCodeGenTy &CodeGen, 197 OpenMPDirectiveKind Kind, bool HasCancel, 198 const UntiedTaskActionTy &Action) 199 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 200 ThreadIDVar(ThreadIDVar), Action(Action) { 201 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 202 } 203 204 /// Get a variable or parameter for storing global thread id 205 /// inside OpenMP construct. 206 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 207 208 /// Get an LValue for the current ThreadID variable. 209 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 210 211 /// Get the name of the capture helper. 212 StringRef getHelperName() const override { return ".omp_outlined."; } 213 214 void emitUntiedSwitch(CodeGenFunction &CGF) override { 215 Action.emitUntiedSwitch(CGF); 216 } 217 218 static bool classof(const CGCapturedStmtInfo *Info) { 219 return CGOpenMPRegionInfo::classof(Info) && 220 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 221 TaskOutlinedRegion; 222 } 223 224 private: 225 /// A variable or parameter storing global thread id for OpenMP 226 /// constructs. 227 const VarDecl *ThreadIDVar; 228 /// Action for emitting code for untied tasks. 229 const UntiedTaskActionTy &Action; 230 }; 231 232 /// API for inlined captured statement code generation in OpenMP 233 /// constructs. 234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 235 public: 236 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 237 const RegionCodeGenTy &CodeGen, 238 OpenMPDirectiveKind Kind, bool HasCancel) 239 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 240 OldCSI(OldCSI), 241 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 242 243 // Retrieve the value of the context parameter. 244 llvm::Value *getContextValue() const override { 245 if (OuterRegionInfo) 246 return OuterRegionInfo->getContextValue(); 247 llvm_unreachable("No context value for inlined OpenMP region"); 248 } 249 250 void setContextValue(llvm::Value *V) override { 251 if (OuterRegionInfo) { 252 OuterRegionInfo->setContextValue(V); 253 return; 254 } 255 llvm_unreachable("No context value for inlined OpenMP region"); 256 } 257 258 /// Lookup the captured field decl for a variable. 259 const FieldDecl *lookup(const VarDecl *VD) const override { 260 if (OuterRegionInfo) 261 return OuterRegionInfo->lookup(VD); 262 // If there is no outer outlined region,no need to lookup in a list of 263 // captured variables, we can use the original one. 264 return nullptr; 265 } 266 267 FieldDecl *getThisFieldDecl() const override { 268 if (OuterRegionInfo) 269 return OuterRegionInfo->getThisFieldDecl(); 270 return nullptr; 271 } 272 273 /// Get a variable or parameter for storing global thread id 274 /// inside OpenMP construct. 275 const VarDecl *getThreadIDVariable() const override { 276 if (OuterRegionInfo) 277 return OuterRegionInfo->getThreadIDVariable(); 278 return nullptr; 279 } 280 281 /// Get an LValue for the current ThreadID variable. 282 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 285 llvm_unreachable("No LValue for inlined OpenMP construct"); 286 } 287 288 /// Get the name of the capture helper. 289 StringRef getHelperName() const override { 290 if (auto *OuterRegionInfo = getOldCSI()) 291 return OuterRegionInfo->getHelperName(); 292 llvm_unreachable("No helper name for inlined OpenMP construct"); 293 } 294 295 void emitUntiedSwitch(CodeGenFunction &CGF) override { 296 if (OuterRegionInfo) 297 OuterRegionInfo->emitUntiedSwitch(CGF); 298 } 299 300 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 301 302 static bool classof(const CGCapturedStmtInfo *Info) { 303 return CGOpenMPRegionInfo::classof(Info) && 304 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 305 } 306 307 ~CGOpenMPInlinedRegionInfo() override = default; 308 309 private: 310 /// CodeGen info about outer OpenMP region. 311 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 312 CGOpenMPRegionInfo *OuterRegionInfo; 313 }; 314 315 /// API for captured statement code generation in OpenMP target 316 /// constructs. For this captures, implicit parameters are used instead of the 317 /// captured fields. The name of the target region has to be unique in a given 318 /// application so it is provided by the client, because only the client has 319 /// the information to generate that. 320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 321 public: 322 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 323 const RegionCodeGenTy &CodeGen, StringRef HelperName) 324 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 325 /*HasCancel=*/false), 326 HelperName(HelperName) {} 327 328 /// This is unused for target regions because each starts executing 329 /// with a single thread. 330 const VarDecl *getThreadIDVariable() const override { return nullptr; } 331 332 /// Get the name of the capture helper. 333 StringRef getHelperName() const override { return HelperName; } 334 335 static bool classof(const CGCapturedStmtInfo *Info) { 336 return CGOpenMPRegionInfo::classof(Info) && 337 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 338 } 339 340 private: 341 StringRef HelperName; 342 }; 343 344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 345 llvm_unreachable("No codegen for expressions"); 346 } 347 /// API for generation of expressions captured in a innermost OpenMP 348 /// region. 349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 350 public: 351 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 352 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 353 OMPD_unknown, 354 /*HasCancel=*/false), 355 PrivScope(CGF) { 356 // Make sure the globals captured in the provided statement are local by 357 // using the privatization logic. We assume the same variable is not 358 // captured more than once. 359 for (const auto &C : CS.captures()) { 360 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 361 continue; 362 363 const VarDecl *VD = C.getCapturedVar(); 364 if (VD->isLocalVarDeclOrParm()) 365 continue; 366 367 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 368 /*RefersToEnclosingVariableOrCapture=*/false, 369 VD->getType().getNonReferenceType(), VK_LValue, 370 C.getLocation()); 371 PrivScope.addPrivate( 372 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 373 } 374 (void)PrivScope.Privatize(); 375 } 376 377 /// Lookup the captured field decl for a variable. 378 const FieldDecl *lookup(const VarDecl *VD) const override { 379 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 380 return FD; 381 return nullptr; 382 } 383 384 /// Emit the captured statement body. 385 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 386 llvm_unreachable("No body for expressions"); 387 } 388 389 /// Get a variable or parameter for storing global thread id 390 /// inside OpenMP construct. 391 const VarDecl *getThreadIDVariable() const override { 392 llvm_unreachable("No thread id for expressions"); 393 } 394 395 /// Get the name of the capture helper. 396 StringRef getHelperName() const override { 397 llvm_unreachable("No helper name for expressions"); 398 } 399 400 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 401 402 private: 403 /// Private scope to capture global variables. 404 CodeGenFunction::OMPPrivateScope PrivScope; 405 }; 406 407 /// RAII for emitting code of OpenMP constructs. 408 class InlinedOpenMPRegionRAII { 409 CodeGenFunction &CGF; 410 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 411 FieldDecl *LambdaThisCaptureField = nullptr; 412 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 413 414 public: 415 /// Constructs region for combined constructs. 416 /// \param CodeGen Code generation sequence for combined directives. Includes 417 /// a list of functions used for code generation of implicitly inlined 418 /// regions. 419 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 420 OpenMPDirectiveKind Kind, bool HasCancel) 421 : CGF(CGF) { 422 // Start emission for the construct. 423 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 424 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 425 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 426 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 427 CGF.LambdaThisCaptureField = nullptr; 428 BlockInfo = CGF.BlockInfo; 429 CGF.BlockInfo = nullptr; 430 } 431 432 ~InlinedOpenMPRegionRAII() { 433 // Restore original CapturedStmtInfo only if we're done with code emission. 434 auto *OldCSI = 435 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 436 delete CGF.CapturedStmtInfo; 437 CGF.CapturedStmtInfo = OldCSI; 438 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 439 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 440 CGF.BlockInfo = BlockInfo; 441 } 442 }; 443 444 /// Values for bit flags used in the ident_t to describe the fields. 445 /// All enumeric elements are named and described in accordance with the code 446 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 447 enum OpenMPLocationFlags : unsigned { 448 /// Use trampoline for internal microtask. 449 OMP_IDENT_IMD = 0x01, 450 /// Use c-style ident structure. 451 OMP_IDENT_KMPC = 0x02, 452 /// Atomic reduction option for kmpc_reduce. 453 OMP_ATOMIC_REDUCE = 0x10, 454 /// Explicit 'barrier' directive. 455 OMP_IDENT_BARRIER_EXPL = 0x20, 456 /// Implicit barrier in code. 457 OMP_IDENT_BARRIER_IMPL = 0x40, 458 /// Implicit barrier in 'for' directive. 459 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 460 /// Implicit barrier in 'sections' directive. 461 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 462 /// Implicit barrier in 'single' directive. 463 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 464 /// Call of __kmp_for_static_init for static loop. 465 OMP_IDENT_WORK_LOOP = 0x200, 466 /// Call of __kmp_for_static_init for sections. 467 OMP_IDENT_WORK_SECTIONS = 0x400, 468 /// Call of __kmp_for_static_init for distribute. 469 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 470 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 471 }; 472 473 namespace { 474 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 475 /// Values for bit flags for marking which requires clauses have been used. 476 enum OpenMPOffloadingRequiresDirFlags : int64_t { 477 /// flag undefined. 478 OMP_REQ_UNDEFINED = 0x000, 479 /// no requires clause present. 480 OMP_REQ_NONE = 0x001, 481 /// reverse_offload clause. 482 OMP_REQ_REVERSE_OFFLOAD = 0x002, 483 /// unified_address clause. 484 OMP_REQ_UNIFIED_ADDRESS = 0x004, 485 /// unified_shared_memory clause. 486 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 487 /// dynamic_allocators clause. 488 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 489 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 490 }; 491 492 enum OpenMPOffloadingReservedDeviceIDs { 493 /// Device ID if the device was not defined, runtime should get it 494 /// from environment variables in the spec. 495 OMP_DEVICEID_UNDEF = -1, 496 }; 497 } // anonymous namespace 498 499 /// Describes ident structure that describes a source location. 500 /// All descriptions are taken from 501 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 502 /// Original structure: 503 /// typedef struct ident { 504 /// kmp_int32 reserved_1; /**< might be used in Fortran; 505 /// see above */ 506 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 507 /// KMP_IDENT_KMPC identifies this union 508 /// member */ 509 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 510 /// see above */ 511 ///#if USE_ITT_BUILD 512 /// /* but currently used for storing 513 /// region-specific ITT */ 514 /// /* contextual information. */ 515 ///#endif /* USE_ITT_BUILD */ 516 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 517 /// C++ */ 518 /// char const *psource; /**< String describing the source location. 519 /// The string is composed of semi-colon separated 520 // fields which describe the source file, 521 /// the function and a pair of line numbers that 522 /// delimit the construct. 523 /// */ 524 /// } ident_t; 525 enum IdentFieldIndex { 526 /// might be used in Fortran 527 IdentField_Reserved_1, 528 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 529 IdentField_Flags, 530 /// Not really used in Fortran any more 531 IdentField_Reserved_2, 532 /// Source[4] in Fortran, do not use for C++ 533 IdentField_Reserved_3, 534 /// String describing the source location. The string is composed of 535 /// semi-colon separated fields which describe the source file, the function 536 /// and a pair of line numbers that delimit the construct. 537 IdentField_PSource 538 }; 539 540 /// Schedule types for 'omp for' loops (these enumerators are taken from 541 /// the enum sched_type in kmp.h). 542 enum OpenMPSchedType { 543 /// Lower bound for default (unordered) versions. 544 OMP_sch_lower = 32, 545 OMP_sch_static_chunked = 33, 546 OMP_sch_static = 34, 547 OMP_sch_dynamic_chunked = 35, 548 OMP_sch_guided_chunked = 36, 549 OMP_sch_runtime = 37, 550 OMP_sch_auto = 38, 551 /// static with chunk adjustment (e.g., simd) 552 OMP_sch_static_balanced_chunked = 45, 553 /// Lower bound for 'ordered' versions. 554 OMP_ord_lower = 64, 555 OMP_ord_static_chunked = 65, 556 OMP_ord_static = 66, 557 OMP_ord_dynamic_chunked = 67, 558 OMP_ord_guided_chunked = 68, 559 OMP_ord_runtime = 69, 560 OMP_ord_auto = 70, 561 OMP_sch_default = OMP_sch_static, 562 /// dist_schedule types 563 OMP_dist_sch_static_chunked = 91, 564 OMP_dist_sch_static = 92, 565 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 566 /// Set if the monotonic schedule modifier was present. 567 OMP_sch_modifier_monotonic = (1 << 29), 568 /// Set if the nonmonotonic schedule modifier was present. 569 OMP_sch_modifier_nonmonotonic = (1 << 30), 570 }; 571 572 enum OpenMPRTLFunction { 573 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 574 /// kmpc_micro microtask, ...); 575 OMPRTL__kmpc_fork_call, 576 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, 577 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 578 OMPRTL__kmpc_threadprivate_cached, 579 /// Call to void __kmpc_threadprivate_register( ident_t *, 580 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 581 OMPRTL__kmpc_threadprivate_register, 582 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 583 OMPRTL__kmpc_global_thread_num, 584 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 585 // kmp_critical_name *crit); 586 OMPRTL__kmpc_critical, 587 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 588 // global_tid, kmp_critical_name *crit, uintptr_t hint); 589 OMPRTL__kmpc_critical_with_hint, 590 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 591 // kmp_critical_name *crit); 592 OMPRTL__kmpc_end_critical, 593 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 594 // global_tid); 595 OMPRTL__kmpc_cancel_barrier, 596 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 597 OMPRTL__kmpc_barrier, 598 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 599 OMPRTL__kmpc_for_static_fini, 600 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 601 // global_tid); 602 OMPRTL__kmpc_serialized_parallel, 603 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 604 // global_tid); 605 OMPRTL__kmpc_end_serialized_parallel, 606 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 607 // kmp_int32 num_threads); 608 OMPRTL__kmpc_push_num_threads, 609 // Call to void __kmpc_flush(ident_t *loc); 610 OMPRTL__kmpc_flush, 611 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 612 OMPRTL__kmpc_master, 613 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 614 OMPRTL__kmpc_end_master, 615 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 616 // int end_part); 617 OMPRTL__kmpc_omp_taskyield, 618 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 619 OMPRTL__kmpc_single, 620 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 621 OMPRTL__kmpc_end_single, 622 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 623 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 624 // kmp_routine_entry_t *task_entry); 625 OMPRTL__kmpc_omp_task_alloc, 626 // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *, 627 // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, 628 // size_t sizeof_shareds, kmp_routine_entry_t *task_entry, 629 // kmp_int64 device_id); 630 OMPRTL__kmpc_omp_target_task_alloc, 631 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 632 // new_task); 633 OMPRTL__kmpc_omp_task, 634 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 635 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 636 // kmp_int32 didit); 637 OMPRTL__kmpc_copyprivate, 638 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 639 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 640 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 641 OMPRTL__kmpc_reduce, 642 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 643 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 644 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 645 // *lck); 646 OMPRTL__kmpc_reduce_nowait, 647 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 648 // kmp_critical_name *lck); 649 OMPRTL__kmpc_end_reduce, 650 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 651 // kmp_critical_name *lck); 652 OMPRTL__kmpc_end_reduce_nowait, 653 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 654 // kmp_task_t * new_task); 655 OMPRTL__kmpc_omp_task_begin_if0, 656 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 657 // kmp_task_t * new_task); 658 OMPRTL__kmpc_omp_task_complete_if0, 659 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 660 OMPRTL__kmpc_ordered, 661 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 662 OMPRTL__kmpc_end_ordered, 663 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 664 // global_tid); 665 OMPRTL__kmpc_omp_taskwait, 666 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 667 OMPRTL__kmpc_taskgroup, 668 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 669 OMPRTL__kmpc_end_taskgroup, 670 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 671 // int proc_bind); 672 OMPRTL__kmpc_push_proc_bind, 673 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 674 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 675 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 676 OMPRTL__kmpc_omp_task_with_deps, 677 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 678 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 679 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 680 OMPRTL__kmpc_omp_wait_deps, 681 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 682 // global_tid, kmp_int32 cncl_kind); 683 OMPRTL__kmpc_cancellationpoint, 684 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 685 // kmp_int32 cncl_kind); 686 OMPRTL__kmpc_cancel, 687 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 688 // kmp_int32 num_teams, kmp_int32 thread_limit); 689 OMPRTL__kmpc_push_num_teams, 690 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 691 // microtask, ...); 692 OMPRTL__kmpc_fork_teams, 693 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 694 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 695 // sched, kmp_uint64 grainsize, void *task_dup); 696 OMPRTL__kmpc_taskloop, 697 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 698 // num_dims, struct kmp_dim *dims); 699 OMPRTL__kmpc_doacross_init, 700 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 701 OMPRTL__kmpc_doacross_fini, 702 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 703 // *vec); 704 OMPRTL__kmpc_doacross_post, 705 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 706 // *vec); 707 OMPRTL__kmpc_doacross_wait, 708 // Call to void *__kmpc_taskred_init(int gtid, int num_data, void *data); 709 OMPRTL__kmpc_taskred_init, 710 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 711 // *d); 712 OMPRTL__kmpc_task_reduction_get_th_data, 713 // Call to void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 714 // is_ws, int num, void *data); 715 OMPRTL__kmpc_taskred_modifier_init, 716 // Call to void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, 717 // int is_ws); 718 OMPRTL__kmpc_task_reduction_modifier_fini, 719 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); 720 OMPRTL__kmpc_alloc, 721 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); 722 OMPRTL__kmpc_free, 723 // Call to omp_allocator_handle_t __kmpc_init_allocator(int gtid, 724 // omp_memspace_handle_t, int ntraits, omp_alloctrait_t traits[]); 725 OMPRTL__kmpc_init_allocator, 726 // Call to void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t al); 727 OMPRTL__kmpc_destroy_allocator, 728 729 // 730 // Offloading related calls 731 // 732 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 733 // size); 734 OMPRTL__kmpc_push_target_tripcount, 735 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 736 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 737 // *arg_types); 738 OMPRTL__tgt_target, 739 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 740 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 741 // *arg_types); 742 OMPRTL__tgt_target_nowait, 743 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 744 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 745 // *arg_types, int32_t num_teams, int32_t thread_limit); 746 OMPRTL__tgt_target_teams, 747 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 748 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 749 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 750 OMPRTL__tgt_target_teams_nowait, 751 // Call to void __tgt_register_requires(int64_t flags); 752 OMPRTL__tgt_register_requires, 753 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 754 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 755 OMPRTL__tgt_target_data_begin, 756 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 757 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 758 // *arg_types); 759 OMPRTL__tgt_target_data_begin_nowait, 760 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 761 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 762 OMPRTL__tgt_target_data_end, 763 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 764 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 765 // *arg_types); 766 OMPRTL__tgt_target_data_end_nowait, 767 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 768 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 769 OMPRTL__tgt_target_data_update, 770 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 771 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 772 // *arg_types); 773 OMPRTL__tgt_target_data_update_nowait, 774 // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 775 OMPRTL__tgt_mapper_num_components, 776 // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void 777 // *base, void *begin, int64_t size, int64_t type); 778 OMPRTL__tgt_push_mapper_component, 779 // Call to kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 780 // int gtid, kmp_task_t *task); 781 OMPRTL__kmpc_task_allow_completion_event, 782 }; 783 784 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 785 /// region. 786 class CleanupTy final : public EHScopeStack::Cleanup { 787 PrePostActionTy *Action; 788 789 public: 790 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 791 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 792 if (!CGF.HaveInsertPoint()) 793 return; 794 Action->Exit(CGF); 795 } 796 }; 797 798 } // anonymous namespace 799 800 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 801 CodeGenFunction::RunCleanupsScope Scope(CGF); 802 if (PrePostAction) { 803 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 804 Callback(CodeGen, CGF, *PrePostAction); 805 } else { 806 PrePostActionTy Action; 807 Callback(CodeGen, CGF, Action); 808 } 809 } 810 811 /// Check if the combiner is a call to UDR combiner and if it is so return the 812 /// UDR decl used for reduction. 813 static const OMPDeclareReductionDecl * 814 getReductionInit(const Expr *ReductionOp) { 815 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 816 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 817 if (const auto *DRE = 818 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 819 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 820 return DRD; 821 return nullptr; 822 } 823 824 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 825 const OMPDeclareReductionDecl *DRD, 826 const Expr *InitOp, 827 Address Private, Address Original, 828 QualType Ty) { 829 if (DRD->getInitializer()) { 830 std::pair<llvm::Function *, llvm::Function *> Reduction = 831 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 832 const auto *CE = cast<CallExpr>(InitOp); 833 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 834 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 835 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 836 const auto *LHSDRE = 837 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 838 const auto *RHSDRE = 839 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 840 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 841 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 842 [=]() { return Private; }); 843 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 844 [=]() { return Original; }); 845 (void)PrivateScope.Privatize(); 846 RValue Func = RValue::get(Reduction.second); 847 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 848 CGF.EmitIgnoredExpr(InitOp); 849 } else { 850 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 851 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 852 auto *GV = new llvm::GlobalVariable( 853 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 854 llvm::GlobalValue::PrivateLinkage, Init, Name); 855 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 856 RValue InitRVal; 857 switch (CGF.getEvaluationKind(Ty)) { 858 case TEK_Scalar: 859 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 860 break; 861 case TEK_Complex: 862 InitRVal = 863 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 864 break; 865 case TEK_Aggregate: 866 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 867 break; 868 } 869 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 870 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 871 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 872 /*IsInitializer=*/false); 873 } 874 } 875 876 /// Emit initialization of arrays of complex types. 877 /// \param DestAddr Address of the array. 878 /// \param Type Type of array. 879 /// \param Init Initial expression of array. 880 /// \param SrcAddr Address of the original array. 881 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 882 QualType Type, bool EmitDeclareReductionInit, 883 const Expr *Init, 884 const OMPDeclareReductionDecl *DRD, 885 Address SrcAddr = Address::invalid()) { 886 // Perform element-by-element initialization. 887 QualType ElementTy; 888 889 // Drill down to the base element type on both arrays. 890 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 891 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 892 DestAddr = 893 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 894 if (DRD) 895 SrcAddr = 896 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 897 898 llvm::Value *SrcBegin = nullptr; 899 if (DRD) 900 SrcBegin = SrcAddr.getPointer(); 901 llvm::Value *DestBegin = DestAddr.getPointer(); 902 // Cast from pointer to array type to pointer to single element. 903 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 904 // The basic structure here is a while-do loop. 905 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 906 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 907 llvm::Value *IsEmpty = 908 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 909 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 910 911 // Enter the loop body, making that address the current address. 912 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 913 CGF.EmitBlock(BodyBB); 914 915 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 916 917 llvm::PHINode *SrcElementPHI = nullptr; 918 Address SrcElementCurrent = Address::invalid(); 919 if (DRD) { 920 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 921 "omp.arraycpy.srcElementPast"); 922 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 923 SrcElementCurrent = 924 Address(SrcElementPHI, 925 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 926 } 927 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 928 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 929 DestElementPHI->addIncoming(DestBegin, EntryBB); 930 Address DestElementCurrent = 931 Address(DestElementPHI, 932 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 933 934 // Emit copy. 935 { 936 CodeGenFunction::RunCleanupsScope InitScope(CGF); 937 if (EmitDeclareReductionInit) { 938 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 939 SrcElementCurrent, ElementTy); 940 } else 941 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 942 /*IsInitializer=*/false); 943 } 944 945 if (DRD) { 946 // Shift the address forward by one element. 947 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 948 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 949 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 950 } 951 952 // Shift the address forward by one element. 953 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 954 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 955 // Check whether we've reached the end. 956 llvm::Value *Done = 957 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 958 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 959 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 960 961 // Done. 962 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 963 } 964 965 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 966 return CGF.EmitOMPSharedLValue(E); 967 } 968 969 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 970 const Expr *E) { 971 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 972 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 973 return LValue(); 974 } 975 976 void ReductionCodeGen::emitAggregateInitialization( 977 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 978 const OMPDeclareReductionDecl *DRD) { 979 // Emit VarDecl with copy init for arrays. 980 // Get the address of the original variable captured in current 981 // captured region. 982 const auto *PrivateVD = 983 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 984 bool EmitDeclareReductionInit = 985 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 986 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 987 EmitDeclareReductionInit, 988 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 989 : PrivateVD->getInit(), 990 DRD, SharedLVal.getAddress(CGF)); 991 } 992 993 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 994 ArrayRef<const Expr *> Origs, 995 ArrayRef<const Expr *> Privates, 996 ArrayRef<const Expr *> ReductionOps) { 997 ClausesData.reserve(Shareds.size()); 998 SharedAddresses.reserve(Shareds.size()); 999 Sizes.reserve(Shareds.size()); 1000 BaseDecls.reserve(Shareds.size()); 1001 const auto *IOrig = Origs.begin(); 1002 const auto *IPriv = Privates.begin(); 1003 const auto *IRed = ReductionOps.begin(); 1004 for (const Expr *Ref : Shareds) { 1005 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 1006 std::advance(IOrig, 1); 1007 std::advance(IPriv, 1); 1008 std::advance(IRed, 1); 1009 } 1010 } 1011 1012 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 1013 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 1014 "Number of generated lvalues must be exactly N."); 1015 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 1016 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 1017 SharedAddresses.emplace_back(First, Second); 1018 if (ClausesData[N].Shared == ClausesData[N].Ref) { 1019 OrigAddresses.emplace_back(First, Second); 1020 } else { 1021 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 1022 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 1023 OrigAddresses.emplace_back(First, Second); 1024 } 1025 } 1026 1027 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 1028 const auto *PrivateVD = 1029 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1030 QualType PrivateType = PrivateVD->getType(); 1031 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 1032 if (!PrivateType->isVariablyModifiedType()) { 1033 Sizes.emplace_back( 1034 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 1035 nullptr); 1036 return; 1037 } 1038 llvm::Value *Size; 1039 llvm::Value *SizeInChars; 1040 auto *ElemType = 1041 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 1042 ->getElementType(); 1043 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 1044 if (AsArraySection) { 1045 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 1046 OrigAddresses[N].first.getPointer(CGF)); 1047 Size = CGF.Builder.CreateNUWAdd( 1048 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 1049 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 1050 } else { 1051 SizeInChars = 1052 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 1053 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 1054 } 1055 Sizes.emplace_back(SizeInChars, Size); 1056 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1057 CGF, 1058 cast<OpaqueValueExpr>( 1059 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1060 RValue::get(Size)); 1061 CGF.EmitVariablyModifiedType(PrivateType); 1062 } 1063 1064 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 1065 llvm::Value *Size) { 1066 const auto *PrivateVD = 1067 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1068 QualType PrivateType = PrivateVD->getType(); 1069 if (!PrivateType->isVariablyModifiedType()) { 1070 assert(!Size && !Sizes[N].second && 1071 "Size should be nullptr for non-variably modified reduction " 1072 "items."); 1073 return; 1074 } 1075 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1076 CGF, 1077 cast<OpaqueValueExpr>( 1078 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1079 RValue::get(Size)); 1080 CGF.EmitVariablyModifiedType(PrivateType); 1081 } 1082 1083 void ReductionCodeGen::emitInitialization( 1084 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1085 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1086 assert(SharedAddresses.size() > N && "No variable was generated"); 1087 const auto *PrivateVD = 1088 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1089 const OMPDeclareReductionDecl *DRD = 1090 getReductionInit(ClausesData[N].ReductionOp); 1091 QualType PrivateType = PrivateVD->getType(); 1092 PrivateAddr = CGF.Builder.CreateElementBitCast( 1093 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1094 QualType SharedType = SharedAddresses[N].first.getType(); 1095 SharedLVal = CGF.MakeAddrLValue( 1096 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 1097 CGF.ConvertTypeForMem(SharedType)), 1098 SharedType, SharedAddresses[N].first.getBaseInfo(), 1099 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1100 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1101 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1102 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1103 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1104 PrivateAddr, SharedLVal.getAddress(CGF), 1105 SharedLVal.getType()); 1106 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1107 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1108 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1109 PrivateVD->getType().getQualifiers(), 1110 /*IsInitializer=*/false); 1111 } 1112 } 1113 1114 bool ReductionCodeGen::needCleanups(unsigned N) { 1115 const auto *PrivateVD = 1116 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1117 QualType PrivateType = PrivateVD->getType(); 1118 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1119 return DTorKind != QualType::DK_none; 1120 } 1121 1122 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1123 Address PrivateAddr) { 1124 const auto *PrivateVD = 1125 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1126 QualType PrivateType = PrivateVD->getType(); 1127 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1128 if (needCleanups(N)) { 1129 PrivateAddr = CGF.Builder.CreateElementBitCast( 1130 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1131 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1132 } 1133 } 1134 1135 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1136 LValue BaseLV) { 1137 BaseTy = BaseTy.getNonReferenceType(); 1138 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1139 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1140 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1141 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 1142 } else { 1143 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 1144 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1145 } 1146 BaseTy = BaseTy->getPointeeType(); 1147 } 1148 return CGF.MakeAddrLValue( 1149 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 1150 CGF.ConvertTypeForMem(ElTy)), 1151 BaseLV.getType(), BaseLV.getBaseInfo(), 1152 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1153 } 1154 1155 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1156 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1157 llvm::Value *Addr) { 1158 Address Tmp = Address::invalid(); 1159 Address TopTmp = Address::invalid(); 1160 Address MostTopTmp = Address::invalid(); 1161 BaseTy = BaseTy.getNonReferenceType(); 1162 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1163 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1164 Tmp = CGF.CreateMemTemp(BaseTy); 1165 if (TopTmp.isValid()) 1166 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1167 else 1168 MostTopTmp = Tmp; 1169 TopTmp = Tmp; 1170 BaseTy = BaseTy->getPointeeType(); 1171 } 1172 llvm::Type *Ty = BaseLVType; 1173 if (Tmp.isValid()) 1174 Ty = Tmp.getElementType(); 1175 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1176 if (Tmp.isValid()) { 1177 CGF.Builder.CreateStore(Addr, Tmp); 1178 return MostTopTmp; 1179 } 1180 return Address(Addr, BaseLVAlignment); 1181 } 1182 1183 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1184 const VarDecl *OrigVD = nullptr; 1185 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1186 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1187 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1188 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1189 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1190 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1191 DE = cast<DeclRefExpr>(Base); 1192 OrigVD = cast<VarDecl>(DE->getDecl()); 1193 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1194 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1195 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1196 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1197 DE = cast<DeclRefExpr>(Base); 1198 OrigVD = cast<VarDecl>(DE->getDecl()); 1199 } 1200 return OrigVD; 1201 } 1202 1203 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1204 Address PrivateAddr) { 1205 const DeclRefExpr *DE; 1206 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1207 BaseDecls.emplace_back(OrigVD); 1208 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1209 LValue BaseLValue = 1210 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1211 OriginalBaseLValue); 1212 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1213 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1214 llvm::Value *PrivatePointer = 1215 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1216 PrivateAddr.getPointer(), 1217 SharedAddresses[N].first.getAddress(CGF).getType()); 1218 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1219 return castToBase(CGF, OrigVD->getType(), 1220 SharedAddresses[N].first.getType(), 1221 OriginalBaseLValue.getAddress(CGF).getType(), 1222 OriginalBaseLValue.getAlignment(), Ptr); 1223 } 1224 BaseDecls.emplace_back( 1225 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1226 return PrivateAddr; 1227 } 1228 1229 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1230 const OMPDeclareReductionDecl *DRD = 1231 getReductionInit(ClausesData[N].ReductionOp); 1232 return DRD && DRD->getInitializer(); 1233 } 1234 1235 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1236 return CGF.EmitLoadOfPointerLValue( 1237 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1238 getThreadIDVariable()->getType()->castAs<PointerType>()); 1239 } 1240 1241 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1242 if (!CGF.HaveInsertPoint()) 1243 return; 1244 // 1.2.2 OpenMP Language Terminology 1245 // Structured block - An executable statement with a single entry at the 1246 // top and a single exit at the bottom. 1247 // The point of exit cannot be a branch out of the structured block. 1248 // longjmp() and throw() must not violate the entry/exit criteria. 1249 CGF.EHStack.pushTerminate(); 1250 CodeGen(CGF); 1251 CGF.EHStack.popTerminate(); 1252 } 1253 1254 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1255 CodeGenFunction &CGF) { 1256 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1257 getThreadIDVariable()->getType(), 1258 AlignmentSource::Decl); 1259 } 1260 1261 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1262 QualType FieldTy) { 1263 auto *Field = FieldDecl::Create( 1264 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1265 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1266 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1267 Field->setAccess(AS_public); 1268 DC->addDecl(Field); 1269 return Field; 1270 } 1271 1272 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1273 StringRef Separator) 1274 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1275 OffloadEntriesInfoManager(CGM) { 1276 ASTContext &C = CGM.getContext(); 1277 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1278 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1279 RD->startDefinition(); 1280 // reserved_1 1281 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1282 // flags 1283 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1284 // reserved_2 1285 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1286 // reserved_3 1287 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1288 // psource 1289 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1290 RD->completeDefinition(); 1291 IdentQTy = C.getRecordType(RD); 1292 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1293 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1294 1295 loadOffloadInfoMetadata(); 1296 } 1297 1298 void CGOpenMPRuntime::clear() { 1299 InternalVars.clear(); 1300 // Clean non-target variable declarations possibly used only in debug info. 1301 for (const auto &Data : EmittedNonTargetVariables) { 1302 if (!Data.getValue().pointsToAliveValue()) 1303 continue; 1304 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1305 if (!GV) 1306 continue; 1307 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1308 continue; 1309 GV->eraseFromParent(); 1310 } 1311 } 1312 1313 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1314 SmallString<128> Buffer; 1315 llvm::raw_svector_ostream OS(Buffer); 1316 StringRef Sep = FirstSeparator; 1317 for (StringRef Part : Parts) { 1318 OS << Sep << Part; 1319 Sep = Separator; 1320 } 1321 return std::string(OS.str()); 1322 } 1323 1324 static llvm::Function * 1325 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1326 const Expr *CombinerInitializer, const VarDecl *In, 1327 const VarDecl *Out, bool IsCombiner) { 1328 // void .omp_combiner.(Ty *in, Ty *out); 1329 ASTContext &C = CGM.getContext(); 1330 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1331 FunctionArgList Args; 1332 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1333 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1334 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1335 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1336 Args.push_back(&OmpOutParm); 1337 Args.push_back(&OmpInParm); 1338 const CGFunctionInfo &FnInfo = 1339 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1340 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1341 std::string Name = CGM.getOpenMPRuntime().getName( 1342 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1343 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1344 Name, &CGM.getModule()); 1345 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1346 if (CGM.getLangOpts().Optimize) { 1347 Fn->removeFnAttr(llvm::Attribute::NoInline); 1348 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1349 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1350 } 1351 CodeGenFunction CGF(CGM); 1352 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1353 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1354 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1355 Out->getLocation()); 1356 CodeGenFunction::OMPPrivateScope Scope(CGF); 1357 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1358 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1359 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1360 .getAddress(CGF); 1361 }); 1362 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1363 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1364 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1365 .getAddress(CGF); 1366 }); 1367 (void)Scope.Privatize(); 1368 if (!IsCombiner && Out->hasInit() && 1369 !CGF.isTrivialInitializer(Out->getInit())) { 1370 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1371 Out->getType().getQualifiers(), 1372 /*IsInitializer=*/true); 1373 } 1374 if (CombinerInitializer) 1375 CGF.EmitIgnoredExpr(CombinerInitializer); 1376 Scope.ForceCleanup(); 1377 CGF.FinishFunction(); 1378 return Fn; 1379 } 1380 1381 void CGOpenMPRuntime::emitUserDefinedReduction( 1382 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1383 if (UDRMap.count(D) > 0) 1384 return; 1385 llvm::Function *Combiner = emitCombinerOrInitializer( 1386 CGM, D->getType(), D->getCombiner(), 1387 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1388 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1389 /*IsCombiner=*/true); 1390 llvm::Function *Initializer = nullptr; 1391 if (const Expr *Init = D->getInitializer()) { 1392 Initializer = emitCombinerOrInitializer( 1393 CGM, D->getType(), 1394 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1395 : nullptr, 1396 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1397 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1398 /*IsCombiner=*/false); 1399 } 1400 UDRMap.try_emplace(D, Combiner, Initializer); 1401 if (CGF) { 1402 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1403 Decls.second.push_back(D); 1404 } 1405 } 1406 1407 std::pair<llvm::Function *, llvm::Function *> 1408 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1409 auto I = UDRMap.find(D); 1410 if (I != UDRMap.end()) 1411 return I->second; 1412 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1413 return UDRMap.lookup(D); 1414 } 1415 1416 namespace { 1417 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1418 // Builder if one is present. 1419 struct PushAndPopStackRAII { 1420 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1421 bool HasCancel) 1422 : OMPBuilder(OMPBuilder) { 1423 if (!OMPBuilder) 1424 return; 1425 1426 // The following callback is the crucial part of clangs cleanup process. 1427 // 1428 // NOTE: 1429 // Once the OpenMPIRBuilder is used to create parallel regions (and 1430 // similar), the cancellation destination (Dest below) is determined via 1431 // IP. That means if we have variables to finalize we split the block at IP, 1432 // use the new block (=BB) as destination to build a JumpDest (via 1433 // getJumpDestInCurrentScope(BB)) which then is fed to 1434 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1435 // to push & pop an FinalizationInfo object. 1436 // The FiniCB will still be needed but at the point where the 1437 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1438 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1439 assert(IP.getBlock()->end() == IP.getPoint() && 1440 "Clang CG should cause non-terminated block!"); 1441 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1442 CGF.Builder.restoreIP(IP); 1443 CodeGenFunction::JumpDest Dest = 1444 CGF.getOMPCancelDestination(OMPD_parallel); 1445 CGF.EmitBranchThroughCleanup(Dest); 1446 }; 1447 1448 // TODO: Remove this once we emit parallel regions through the 1449 // OpenMPIRBuilder as it can do this setup internally. 1450 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1451 {FiniCB, OMPD_parallel, HasCancel}); 1452 OMPBuilder->pushFinalizationCB(std::move(FI)); 1453 } 1454 ~PushAndPopStackRAII() { 1455 if (OMPBuilder) 1456 OMPBuilder->popFinalizationCB(); 1457 } 1458 llvm::OpenMPIRBuilder *OMPBuilder; 1459 }; 1460 } // namespace 1461 1462 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1463 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1464 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1465 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1466 assert(ThreadIDVar->getType()->isPointerType() && 1467 "thread id variable must be of type kmp_int32 *"); 1468 CodeGenFunction CGF(CGM, true); 1469 bool HasCancel = false; 1470 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1471 HasCancel = OPD->hasCancel(); 1472 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1473 HasCancel = OPD->hasCancel(); 1474 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1475 HasCancel = OPSD->hasCancel(); 1476 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1477 HasCancel = OPFD->hasCancel(); 1478 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1479 HasCancel = OPFD->hasCancel(); 1480 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1481 HasCancel = OPFD->hasCancel(); 1482 else if (const auto *OPFD = 1483 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1484 HasCancel = OPFD->hasCancel(); 1485 else if (const auto *OPFD = 1486 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1487 HasCancel = OPFD->hasCancel(); 1488 1489 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1490 // parallel region to make cancellation barriers work properly. 1491 llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder(); 1492 PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel); 1493 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1494 HasCancel, OutlinedHelperName); 1495 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1496 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1497 } 1498 1499 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1500 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1501 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1502 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1503 return emitParallelOrTeamsOutlinedFunction( 1504 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1505 } 1506 1507 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1508 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1509 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1510 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1511 return emitParallelOrTeamsOutlinedFunction( 1512 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1513 } 1514 1515 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1516 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1517 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1518 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1519 bool Tied, unsigned &NumberOfParts) { 1520 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1521 PrePostActionTy &) { 1522 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1523 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1524 llvm::Value *TaskArgs[] = { 1525 UpLoc, ThreadID, 1526 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1527 TaskTVar->getType()->castAs<PointerType>()) 1528 .getPointer(CGF)}; 1529 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1530 }; 1531 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1532 UntiedCodeGen); 1533 CodeGen.setAction(Action); 1534 assert(!ThreadIDVar->getType()->isPointerType() && 1535 "thread id variable must be of type kmp_int32 for tasks"); 1536 const OpenMPDirectiveKind Region = 1537 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1538 : OMPD_task; 1539 const CapturedStmt *CS = D.getCapturedStmt(Region); 1540 bool HasCancel = false; 1541 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1542 HasCancel = TD->hasCancel(); 1543 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1544 HasCancel = TD->hasCancel(); 1545 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1546 HasCancel = TD->hasCancel(); 1547 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1548 HasCancel = TD->hasCancel(); 1549 1550 CodeGenFunction CGF(CGM, true); 1551 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1552 InnermostKind, HasCancel, Action); 1553 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1554 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1555 if (!Tied) 1556 NumberOfParts = Action.getNumberOfParts(); 1557 return Res; 1558 } 1559 1560 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1561 const RecordDecl *RD, const CGRecordLayout &RL, 1562 ArrayRef<llvm::Constant *> Data) { 1563 llvm::StructType *StructTy = RL.getLLVMType(); 1564 unsigned PrevIdx = 0; 1565 ConstantInitBuilder CIBuilder(CGM); 1566 auto DI = Data.begin(); 1567 for (const FieldDecl *FD : RD->fields()) { 1568 unsigned Idx = RL.getLLVMFieldNo(FD); 1569 // Fill the alignment. 1570 for (unsigned I = PrevIdx; I < Idx; ++I) 1571 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1572 PrevIdx = Idx + 1; 1573 Fields.add(*DI); 1574 ++DI; 1575 } 1576 } 1577 1578 template <class... As> 1579 static llvm::GlobalVariable * 1580 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1581 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1582 As &&... Args) { 1583 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1584 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1585 ConstantInitBuilder CIBuilder(CGM); 1586 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1587 buildStructValue(Fields, CGM, RD, RL, Data); 1588 return Fields.finishAndCreateGlobal( 1589 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1590 std::forward<As>(Args)...); 1591 } 1592 1593 template <typename T> 1594 static void 1595 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1596 ArrayRef<llvm::Constant *> Data, 1597 T &Parent) { 1598 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1599 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1600 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1601 buildStructValue(Fields, CGM, RD, RL, Data); 1602 Fields.finishAndAddTo(Parent); 1603 } 1604 1605 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1606 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1607 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1608 FlagsTy FlagsKey(Flags, Reserved2Flags); 1609 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1610 if (!Entry) { 1611 if (!DefaultOpenMPPSource) { 1612 // Initialize default location for psource field of ident_t structure of 1613 // all ident_t objects. Format is ";file;function;line;column;;". 1614 // Taken from 1615 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1616 DefaultOpenMPPSource = 1617 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1618 DefaultOpenMPPSource = 1619 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1620 } 1621 1622 llvm::Constant *Data[] = { 1623 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1624 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1625 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1626 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1627 llvm::GlobalValue *DefaultOpenMPLocation = 1628 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1629 llvm::GlobalValue::PrivateLinkage); 1630 DefaultOpenMPLocation->setUnnamedAddr( 1631 llvm::GlobalValue::UnnamedAddr::Global); 1632 1633 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1634 } 1635 return Address(Entry, Align); 1636 } 1637 1638 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1639 bool AtCurrentPoint) { 1640 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1641 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1642 1643 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1644 if (AtCurrentPoint) { 1645 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1646 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1647 } else { 1648 Elem.second.ServiceInsertPt = 1649 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1650 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1651 } 1652 } 1653 1654 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1655 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1656 if (Elem.second.ServiceInsertPt) { 1657 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1658 Elem.second.ServiceInsertPt = nullptr; 1659 Ptr->eraseFromParent(); 1660 } 1661 } 1662 1663 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1664 SourceLocation Loc, 1665 unsigned Flags) { 1666 Flags |= OMP_IDENT_KMPC; 1667 // If no debug info is generated - return global default location. 1668 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1669 Loc.isInvalid()) 1670 return getOrCreateDefaultLocation(Flags).getPointer(); 1671 1672 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1673 1674 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1675 Address LocValue = Address::invalid(); 1676 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1677 if (I != OpenMPLocThreadIDMap.end()) 1678 LocValue = Address(I->second.DebugLoc, Align); 1679 1680 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1681 // GetOpenMPThreadID was called before this routine. 1682 if (!LocValue.isValid()) { 1683 // Generate "ident_t .kmpc_loc.addr;" 1684 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1685 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1686 Elem.second.DebugLoc = AI.getPointer(); 1687 LocValue = AI; 1688 1689 if (!Elem.second.ServiceInsertPt) 1690 setLocThreadIdInsertPt(CGF); 1691 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1692 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1693 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1694 CGF.getTypeSize(IdentQTy)); 1695 } 1696 1697 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1698 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1699 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1700 LValue PSource = 1701 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1702 1703 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1704 if (OMPDebugLoc == nullptr) { 1705 SmallString<128> Buffer2; 1706 llvm::raw_svector_ostream OS2(Buffer2); 1707 // Build debug location 1708 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1709 OS2 << ";" << PLoc.getFilename() << ";"; 1710 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1711 OS2 << FD->getQualifiedNameAsString(); 1712 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1713 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1714 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1715 } 1716 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1717 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1718 1719 // Our callers always pass this to a runtime function, so for 1720 // convenience, go ahead and return a naked pointer. 1721 return LocValue.getPointer(); 1722 } 1723 1724 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1725 SourceLocation Loc) { 1726 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1727 1728 llvm::Value *ThreadID = nullptr; 1729 // Check whether we've already cached a load of the thread id in this 1730 // function. 1731 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1732 if (I != OpenMPLocThreadIDMap.end()) { 1733 ThreadID = I->second.ThreadID; 1734 if (ThreadID != nullptr) 1735 return ThreadID; 1736 } 1737 // If exceptions are enabled, do not use parameter to avoid possible crash. 1738 if (auto *OMPRegionInfo = 1739 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1740 if (OMPRegionInfo->getThreadIDVariable()) { 1741 // Check if this an outlined function with thread id passed as argument. 1742 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1743 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1744 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1745 !CGF.getLangOpts().CXXExceptions || 1746 CGF.Builder.GetInsertBlock() == TopBlock || 1747 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1748 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1749 TopBlock || 1750 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1751 CGF.Builder.GetInsertBlock()) { 1752 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1753 // If value loaded in entry block, cache it and use it everywhere in 1754 // function. 1755 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1756 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1757 Elem.second.ThreadID = ThreadID; 1758 } 1759 return ThreadID; 1760 } 1761 } 1762 } 1763 1764 // This is not an outlined function region - need to call __kmpc_int32 1765 // kmpc_global_thread_num(ident_t *loc). 1766 // Generate thread id value and cache this value for use across the 1767 // function. 1768 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1769 if (!Elem.second.ServiceInsertPt) 1770 setLocThreadIdInsertPt(CGF); 1771 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1772 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1773 llvm::CallInst *Call = CGF.Builder.CreateCall( 1774 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1775 emitUpdateLocation(CGF, Loc)); 1776 Call->setCallingConv(CGF.getRuntimeCC()); 1777 Elem.second.ThreadID = Call; 1778 return Call; 1779 } 1780 1781 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1782 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1783 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1784 clearLocThreadIdInsertPt(CGF); 1785 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1786 } 1787 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1788 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1789 UDRMap.erase(D); 1790 FunctionUDRMap.erase(CGF.CurFn); 1791 } 1792 auto I = FunctionUDMMap.find(CGF.CurFn); 1793 if (I != FunctionUDMMap.end()) { 1794 for(const auto *D : I->second) 1795 UDMMap.erase(D); 1796 FunctionUDMMap.erase(I); 1797 } 1798 LastprivateConditionalToTypes.erase(CGF.CurFn); 1799 } 1800 1801 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1802 return IdentTy->getPointerTo(); 1803 } 1804 1805 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1806 if (!Kmpc_MicroTy) { 1807 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1808 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1809 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1810 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1811 } 1812 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1813 } 1814 1815 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1816 llvm::FunctionCallee RTLFn = nullptr; 1817 switch (static_cast<OpenMPRTLFunction>(Function)) { 1818 case OMPRTL__kmpc_fork_call: { 1819 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1820 // microtask, ...); 1821 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1822 getKmpc_MicroPointerTy()}; 1823 auto *FnTy = 1824 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1825 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1826 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 1827 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 1828 llvm::LLVMContext &Ctx = F->getContext(); 1829 llvm::MDBuilder MDB(Ctx); 1830 // Annotate the callback behavior of the __kmpc_fork_call: 1831 // - The callback callee is argument number 2 (microtask). 1832 // - The first two arguments of the callback callee are unknown (-1). 1833 // - All variadic arguments to the __kmpc_fork_call are passed to the 1834 // callback callee. 1835 F->addMetadata( 1836 llvm::LLVMContext::MD_callback, 1837 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 1838 2, {-1, -1}, 1839 /* VarArgsArePassed */ true)})); 1840 } 1841 } 1842 break; 1843 } 1844 case OMPRTL__kmpc_global_thread_num: { 1845 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1846 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1847 auto *FnTy = 1848 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1849 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1850 break; 1851 } 1852 case OMPRTL__kmpc_threadprivate_cached: { 1853 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1854 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1855 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1856 CGM.VoidPtrTy, CGM.SizeTy, 1857 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1858 auto *FnTy = 1859 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1860 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1861 break; 1862 } 1863 case OMPRTL__kmpc_critical: { 1864 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1865 // kmp_critical_name *crit); 1866 llvm::Type *TypeParams[] = { 1867 getIdentTyPointerTy(), CGM.Int32Ty, 1868 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1869 auto *FnTy = 1870 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1871 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1872 break; 1873 } 1874 case OMPRTL__kmpc_critical_with_hint: { 1875 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1876 // kmp_critical_name *crit, uintptr_t hint); 1877 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1878 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1879 CGM.IntPtrTy}; 1880 auto *FnTy = 1881 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1882 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1883 break; 1884 } 1885 case OMPRTL__kmpc_threadprivate_register: { 1886 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1887 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1888 // typedef void *(*kmpc_ctor)(void *); 1889 auto *KmpcCtorTy = 1890 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1891 /*isVarArg*/ false)->getPointerTo(); 1892 // typedef void *(*kmpc_cctor)(void *, void *); 1893 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1894 auto *KmpcCopyCtorTy = 1895 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1896 /*isVarArg*/ false) 1897 ->getPointerTo(); 1898 // typedef void (*kmpc_dtor)(void *); 1899 auto *KmpcDtorTy = 1900 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1901 ->getPointerTo(); 1902 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1903 KmpcCopyCtorTy, KmpcDtorTy}; 1904 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1905 /*isVarArg*/ false); 1906 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1907 break; 1908 } 1909 case OMPRTL__kmpc_end_critical: { 1910 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1911 // kmp_critical_name *crit); 1912 llvm::Type *TypeParams[] = { 1913 getIdentTyPointerTy(), CGM.Int32Ty, 1914 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1915 auto *FnTy = 1916 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1917 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1918 break; 1919 } 1920 case OMPRTL__kmpc_cancel_barrier: { 1921 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1922 // global_tid); 1923 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1924 auto *FnTy = 1925 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1926 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1927 break; 1928 } 1929 case OMPRTL__kmpc_barrier: { 1930 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1931 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1932 auto *FnTy = 1933 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1934 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1935 break; 1936 } 1937 case OMPRTL__kmpc_for_static_fini: { 1938 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1939 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1940 auto *FnTy = 1941 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1942 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1943 break; 1944 } 1945 case OMPRTL__kmpc_push_num_threads: { 1946 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1947 // kmp_int32 num_threads) 1948 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1949 CGM.Int32Ty}; 1950 auto *FnTy = 1951 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1952 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1953 break; 1954 } 1955 case OMPRTL__kmpc_serialized_parallel: { 1956 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1957 // global_tid); 1958 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1959 auto *FnTy = 1960 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1961 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1962 break; 1963 } 1964 case OMPRTL__kmpc_end_serialized_parallel: { 1965 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1966 // global_tid); 1967 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1968 auto *FnTy = 1969 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1970 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1971 break; 1972 } 1973 case OMPRTL__kmpc_flush: { 1974 // Build void __kmpc_flush(ident_t *loc); 1975 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1976 auto *FnTy = 1977 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1978 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1979 break; 1980 } 1981 case OMPRTL__kmpc_master: { 1982 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1983 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1984 auto *FnTy = 1985 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1986 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1987 break; 1988 } 1989 case OMPRTL__kmpc_end_master: { 1990 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1991 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1992 auto *FnTy = 1993 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1994 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1995 break; 1996 } 1997 case OMPRTL__kmpc_omp_taskyield: { 1998 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1999 // int end_part); 2000 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2001 auto *FnTy = 2002 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2003 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 2004 break; 2005 } 2006 case OMPRTL__kmpc_single: { 2007 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 2008 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2009 auto *FnTy = 2010 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2011 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 2012 break; 2013 } 2014 case OMPRTL__kmpc_end_single: { 2015 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 2016 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2017 auto *FnTy = 2018 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2019 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 2020 break; 2021 } 2022 case OMPRTL__kmpc_omp_task_alloc: { 2023 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 2024 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2025 // kmp_routine_entry_t *task_entry); 2026 assert(KmpRoutineEntryPtrTy != nullptr && 2027 "Type kmp_routine_entry_t must be created."); 2028 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2029 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 2030 // Return void * and then cast to particular kmp_task_t type. 2031 auto *FnTy = 2032 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2033 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 2034 break; 2035 } 2036 case OMPRTL__kmpc_omp_target_task_alloc: { 2037 // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid, 2038 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2039 // kmp_routine_entry_t *task_entry, kmp_int64 device_id); 2040 assert(KmpRoutineEntryPtrTy != nullptr && 2041 "Type kmp_routine_entry_t must be created."); 2042 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2043 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy, 2044 CGM.Int64Ty}; 2045 // Return void * and then cast to particular kmp_task_t type. 2046 auto *FnTy = 2047 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2048 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc"); 2049 break; 2050 } 2051 case OMPRTL__kmpc_omp_task: { 2052 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2053 // *new_task); 2054 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2055 CGM.VoidPtrTy}; 2056 auto *FnTy = 2057 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2058 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 2059 break; 2060 } 2061 case OMPRTL__kmpc_copyprivate: { 2062 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 2063 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 2064 // kmp_int32 didit); 2065 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2066 auto *CpyFnTy = 2067 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 2068 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 2069 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 2070 CGM.Int32Ty}; 2071 auto *FnTy = 2072 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2073 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 2074 break; 2075 } 2076 case OMPRTL__kmpc_reduce: { 2077 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 2078 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 2079 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 2080 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2081 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2082 /*isVarArg=*/false); 2083 llvm::Type *TypeParams[] = { 2084 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2085 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2086 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2087 auto *FnTy = 2088 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2089 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 2090 break; 2091 } 2092 case OMPRTL__kmpc_reduce_nowait: { 2093 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 2094 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 2095 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 2096 // *lck); 2097 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2098 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2099 /*isVarArg=*/false); 2100 llvm::Type *TypeParams[] = { 2101 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2102 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2103 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2104 auto *FnTy = 2105 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2106 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 2107 break; 2108 } 2109 case OMPRTL__kmpc_end_reduce: { 2110 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 2111 // kmp_critical_name *lck); 2112 llvm::Type *TypeParams[] = { 2113 getIdentTyPointerTy(), CGM.Int32Ty, 2114 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2115 auto *FnTy = 2116 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2117 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 2118 break; 2119 } 2120 case OMPRTL__kmpc_end_reduce_nowait: { 2121 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 2122 // kmp_critical_name *lck); 2123 llvm::Type *TypeParams[] = { 2124 getIdentTyPointerTy(), CGM.Int32Ty, 2125 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2126 auto *FnTy = 2127 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2128 RTLFn = 2129 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 2130 break; 2131 } 2132 case OMPRTL__kmpc_omp_task_begin_if0: { 2133 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2134 // *new_task); 2135 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2136 CGM.VoidPtrTy}; 2137 auto *FnTy = 2138 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2139 RTLFn = 2140 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 2141 break; 2142 } 2143 case OMPRTL__kmpc_omp_task_complete_if0: { 2144 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2145 // *new_task); 2146 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2147 CGM.VoidPtrTy}; 2148 auto *FnTy = 2149 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2150 RTLFn = CGM.CreateRuntimeFunction(FnTy, 2151 /*Name=*/"__kmpc_omp_task_complete_if0"); 2152 break; 2153 } 2154 case OMPRTL__kmpc_ordered: { 2155 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 2156 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2157 auto *FnTy = 2158 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2159 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 2160 break; 2161 } 2162 case OMPRTL__kmpc_end_ordered: { 2163 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 2164 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2165 auto *FnTy = 2166 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2167 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 2168 break; 2169 } 2170 case OMPRTL__kmpc_omp_taskwait: { 2171 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 2172 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2173 auto *FnTy = 2174 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2175 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 2176 break; 2177 } 2178 case OMPRTL__kmpc_taskgroup: { 2179 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 2180 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2181 auto *FnTy = 2182 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2183 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 2184 break; 2185 } 2186 case OMPRTL__kmpc_end_taskgroup: { 2187 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 2188 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2189 auto *FnTy = 2190 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2191 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 2192 break; 2193 } 2194 case OMPRTL__kmpc_push_proc_bind: { 2195 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 2196 // int proc_bind) 2197 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2198 auto *FnTy = 2199 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2200 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 2201 break; 2202 } 2203 case OMPRTL__kmpc_omp_task_with_deps: { 2204 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2205 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2206 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 2207 llvm::Type *TypeParams[] = { 2208 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 2209 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 2210 auto *FnTy = 2211 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2212 RTLFn = 2213 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 2214 break; 2215 } 2216 case OMPRTL__kmpc_omp_wait_deps: { 2217 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2218 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2219 // kmp_depend_info_t *noalias_dep_list); 2220 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2221 CGM.Int32Ty, CGM.VoidPtrTy, 2222 CGM.Int32Ty, CGM.VoidPtrTy}; 2223 auto *FnTy = 2224 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2225 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2226 break; 2227 } 2228 case OMPRTL__kmpc_cancellationpoint: { 2229 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2230 // global_tid, kmp_int32 cncl_kind) 2231 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2232 auto *FnTy = 2233 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2234 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2235 break; 2236 } 2237 case OMPRTL__kmpc_cancel: { 2238 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2239 // kmp_int32 cncl_kind) 2240 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2241 auto *FnTy = 2242 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2243 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2244 break; 2245 } 2246 case OMPRTL__kmpc_push_num_teams: { 2247 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2248 // kmp_int32 num_teams, kmp_int32 num_threads) 2249 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2250 CGM.Int32Ty}; 2251 auto *FnTy = 2252 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2253 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2254 break; 2255 } 2256 case OMPRTL__kmpc_fork_teams: { 2257 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2258 // microtask, ...); 2259 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2260 getKmpc_MicroPointerTy()}; 2261 auto *FnTy = 2262 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2263 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2264 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 2265 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 2266 llvm::LLVMContext &Ctx = F->getContext(); 2267 llvm::MDBuilder MDB(Ctx); 2268 // Annotate the callback behavior of the __kmpc_fork_teams: 2269 // - The callback callee is argument number 2 (microtask). 2270 // - The first two arguments of the callback callee are unknown (-1). 2271 // - All variadic arguments to the __kmpc_fork_teams are passed to the 2272 // callback callee. 2273 F->addMetadata( 2274 llvm::LLVMContext::MD_callback, 2275 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 2276 2, {-1, -1}, 2277 /* VarArgsArePassed */ true)})); 2278 } 2279 } 2280 break; 2281 } 2282 case OMPRTL__kmpc_taskloop: { 2283 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2284 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2285 // sched, kmp_uint64 grainsize, void *task_dup); 2286 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2287 CGM.IntTy, 2288 CGM.VoidPtrTy, 2289 CGM.IntTy, 2290 CGM.Int64Ty->getPointerTo(), 2291 CGM.Int64Ty->getPointerTo(), 2292 CGM.Int64Ty, 2293 CGM.IntTy, 2294 CGM.IntTy, 2295 CGM.Int64Ty, 2296 CGM.VoidPtrTy}; 2297 auto *FnTy = 2298 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2299 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2300 break; 2301 } 2302 case OMPRTL__kmpc_doacross_init: { 2303 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2304 // num_dims, struct kmp_dim *dims); 2305 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2306 CGM.Int32Ty, 2307 CGM.Int32Ty, 2308 CGM.VoidPtrTy}; 2309 auto *FnTy = 2310 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2311 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2312 break; 2313 } 2314 case OMPRTL__kmpc_doacross_fini: { 2315 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2316 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2317 auto *FnTy = 2318 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2319 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2320 break; 2321 } 2322 case OMPRTL__kmpc_doacross_post: { 2323 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2324 // *vec); 2325 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2326 CGM.Int64Ty->getPointerTo()}; 2327 auto *FnTy = 2328 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2329 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2330 break; 2331 } 2332 case OMPRTL__kmpc_doacross_wait: { 2333 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2334 // *vec); 2335 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2336 CGM.Int64Ty->getPointerTo()}; 2337 auto *FnTy = 2338 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2339 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2340 break; 2341 } 2342 case OMPRTL__kmpc_taskred_init: { 2343 // Build void *__kmpc_taskred_init(int gtid, int num_data, void *data); 2344 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2345 auto *FnTy = 2346 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2347 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskred_init"); 2348 break; 2349 } 2350 case OMPRTL__kmpc_task_reduction_get_th_data: { 2351 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2352 // *d); 2353 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2354 auto *FnTy = 2355 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2356 RTLFn = CGM.CreateRuntimeFunction( 2357 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2358 break; 2359 } 2360 case OMPRTL__kmpc_taskred_modifier_init: { 2361 // Build void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 2362 // is_ws, int num_data, void *data); 2363 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.IntTy, CGM.IntTy, 2364 CGM.IntTy, CGM.VoidPtrTy}; 2365 auto *FnTy = 2366 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2367 RTLFn = CGM.CreateRuntimeFunction(FnTy, 2368 /*Name=*/"__kmpc_taskred_modifier_init"); 2369 break; 2370 } 2371 case OMPRTL__kmpc_task_reduction_modifier_fini: { 2372 // Build void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, 2373 // int is_ws); 2374 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.IntTy, CGM.IntTy}; 2375 auto *FnTy = 2376 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2377 RTLFn = CGM.CreateRuntimeFunction( 2378 FnTy, 2379 /*Name=*/"__kmpc_task_reduction_modifier_fini"); 2380 break; 2381 } 2382 case OMPRTL__kmpc_alloc: { 2383 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t 2384 // al); omp_allocator_handle_t type is void *. 2385 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy}; 2386 auto *FnTy = 2387 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2388 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc"); 2389 break; 2390 } 2391 case OMPRTL__kmpc_free: { 2392 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t 2393 // al); omp_allocator_handle_t type is void *. 2394 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2395 auto *FnTy = 2396 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2397 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); 2398 break; 2399 } 2400 case OMPRTL__kmpc_init_allocator: { 2401 // Build omp_allocator_handle_t __kmpc_init_allocator(int gtid, 2402 // omp_memspace_handle_t, int ntraits, omp_alloctrait_t traits[]); 2403 // omp_allocator_handle_t type is void*, omp_memspace_handle_t type is 2404 // void*. 2405 auto *FnTy = llvm::FunctionType::get( 2406 CGM.VoidPtrTy, {CGM.IntTy, CGM.VoidPtrTy, CGM.IntTy, CGM.VoidPtrTy}, 2407 /*isVarArg=*/false); 2408 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_init_allocator"); 2409 break; 2410 } 2411 case OMPRTL__kmpc_destroy_allocator: { 2412 // Build void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t al); 2413 // omp_allocator_handle_t type is void*. 2414 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, {CGM.IntTy, CGM.VoidPtrTy}, 2415 /*isVarArg=*/false); 2416 RTLFn = 2417 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_destroy_allocator"); 2418 break; 2419 } 2420 case OMPRTL__kmpc_push_target_tripcount: { 2421 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 2422 // size); 2423 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; 2424 llvm::FunctionType *FnTy = 2425 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2426 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); 2427 break; 2428 } 2429 case OMPRTL__tgt_target: { 2430 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2431 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2432 // *arg_types); 2433 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2434 CGM.VoidPtrTy, 2435 CGM.Int32Ty, 2436 CGM.VoidPtrPtrTy, 2437 CGM.VoidPtrPtrTy, 2438 CGM.Int64Ty->getPointerTo(), 2439 CGM.Int64Ty->getPointerTo()}; 2440 auto *FnTy = 2441 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2442 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2443 break; 2444 } 2445 case OMPRTL__tgt_target_nowait: { 2446 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2447 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2448 // int64_t *arg_types); 2449 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2450 CGM.VoidPtrTy, 2451 CGM.Int32Ty, 2452 CGM.VoidPtrPtrTy, 2453 CGM.VoidPtrPtrTy, 2454 CGM.Int64Ty->getPointerTo(), 2455 CGM.Int64Ty->getPointerTo()}; 2456 auto *FnTy = 2457 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2458 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2459 break; 2460 } 2461 case OMPRTL__tgt_target_teams: { 2462 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2463 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2464 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2465 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2466 CGM.VoidPtrTy, 2467 CGM.Int32Ty, 2468 CGM.VoidPtrPtrTy, 2469 CGM.VoidPtrPtrTy, 2470 CGM.Int64Ty->getPointerTo(), 2471 CGM.Int64Ty->getPointerTo(), 2472 CGM.Int32Ty, 2473 CGM.Int32Ty}; 2474 auto *FnTy = 2475 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2476 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2477 break; 2478 } 2479 case OMPRTL__tgt_target_teams_nowait: { 2480 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2481 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 2482 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2483 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2484 CGM.VoidPtrTy, 2485 CGM.Int32Ty, 2486 CGM.VoidPtrPtrTy, 2487 CGM.VoidPtrPtrTy, 2488 CGM.Int64Ty->getPointerTo(), 2489 CGM.Int64Ty->getPointerTo(), 2490 CGM.Int32Ty, 2491 CGM.Int32Ty}; 2492 auto *FnTy = 2493 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2494 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2495 break; 2496 } 2497 case OMPRTL__tgt_register_requires: { 2498 // Build void __tgt_register_requires(int64_t flags); 2499 llvm::Type *TypeParams[] = {CGM.Int64Ty}; 2500 auto *FnTy = 2501 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2502 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); 2503 break; 2504 } 2505 case OMPRTL__tgt_target_data_begin: { 2506 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2507 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2508 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2509 CGM.Int32Ty, 2510 CGM.VoidPtrPtrTy, 2511 CGM.VoidPtrPtrTy, 2512 CGM.Int64Ty->getPointerTo(), 2513 CGM.Int64Ty->getPointerTo()}; 2514 auto *FnTy = 2515 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2516 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2517 break; 2518 } 2519 case OMPRTL__tgt_target_data_begin_nowait: { 2520 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2521 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2522 // *arg_types); 2523 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2524 CGM.Int32Ty, 2525 CGM.VoidPtrPtrTy, 2526 CGM.VoidPtrPtrTy, 2527 CGM.Int64Ty->getPointerTo(), 2528 CGM.Int64Ty->getPointerTo()}; 2529 auto *FnTy = 2530 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2531 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2532 break; 2533 } 2534 case OMPRTL__tgt_target_data_end: { 2535 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2536 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2537 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2538 CGM.Int32Ty, 2539 CGM.VoidPtrPtrTy, 2540 CGM.VoidPtrPtrTy, 2541 CGM.Int64Ty->getPointerTo(), 2542 CGM.Int64Ty->getPointerTo()}; 2543 auto *FnTy = 2544 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2545 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2546 break; 2547 } 2548 case OMPRTL__tgt_target_data_end_nowait: { 2549 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2550 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2551 // *arg_types); 2552 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2553 CGM.Int32Ty, 2554 CGM.VoidPtrPtrTy, 2555 CGM.VoidPtrPtrTy, 2556 CGM.Int64Ty->getPointerTo(), 2557 CGM.Int64Ty->getPointerTo()}; 2558 auto *FnTy = 2559 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2560 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2561 break; 2562 } 2563 case OMPRTL__tgt_target_data_update: { 2564 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2565 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2566 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2567 CGM.Int32Ty, 2568 CGM.VoidPtrPtrTy, 2569 CGM.VoidPtrPtrTy, 2570 CGM.Int64Ty->getPointerTo(), 2571 CGM.Int64Ty->getPointerTo()}; 2572 auto *FnTy = 2573 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2574 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2575 break; 2576 } 2577 case OMPRTL__tgt_target_data_update_nowait: { 2578 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2579 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2580 // *arg_types); 2581 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2582 CGM.Int32Ty, 2583 CGM.VoidPtrPtrTy, 2584 CGM.VoidPtrPtrTy, 2585 CGM.Int64Ty->getPointerTo(), 2586 CGM.Int64Ty->getPointerTo()}; 2587 auto *FnTy = 2588 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2589 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2590 break; 2591 } 2592 case OMPRTL__tgt_mapper_num_components: { 2593 // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 2594 llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; 2595 auto *FnTy = 2596 llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); 2597 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components"); 2598 break; 2599 } 2600 case OMPRTL__tgt_push_mapper_component: { 2601 // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void 2602 // *base, void *begin, int64_t size, int64_t type); 2603 llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy, 2604 CGM.Int64Ty, CGM.Int64Ty}; 2605 auto *FnTy = 2606 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2607 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component"); 2608 break; 2609 } 2610 case OMPRTL__kmpc_task_allow_completion_event: { 2611 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 2612 // int gtid, kmp_task_t *task); 2613 auto *FnTy = llvm::FunctionType::get( 2614 CGM.VoidPtrTy, {getIdentTyPointerTy(), CGM.IntTy, CGM.VoidPtrTy}, 2615 /*isVarArg=*/false); 2616 RTLFn = 2617 CGM.CreateRuntimeFunction(FnTy, "__kmpc_task_allow_completion_event"); 2618 break; 2619 } 2620 } 2621 assert(RTLFn && "Unable to find OpenMP runtime function"); 2622 return RTLFn; 2623 } 2624 2625 llvm::FunctionCallee 2626 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 2627 assert((IVSize == 32 || IVSize == 64) && 2628 "IV size is not compatible with the omp runtime"); 2629 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2630 : "__kmpc_for_static_init_4u") 2631 : (IVSigned ? "__kmpc_for_static_init_8" 2632 : "__kmpc_for_static_init_8u"); 2633 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2634 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2635 llvm::Type *TypeParams[] = { 2636 getIdentTyPointerTy(), // loc 2637 CGM.Int32Ty, // tid 2638 CGM.Int32Ty, // schedtype 2639 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2640 PtrTy, // p_lower 2641 PtrTy, // p_upper 2642 PtrTy, // p_stride 2643 ITy, // incr 2644 ITy // chunk 2645 }; 2646 auto *FnTy = 2647 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2648 return CGM.CreateRuntimeFunction(FnTy, Name); 2649 } 2650 2651 llvm::FunctionCallee 2652 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 2653 assert((IVSize == 32 || IVSize == 64) && 2654 "IV size is not compatible with the omp runtime"); 2655 StringRef Name = 2656 IVSize == 32 2657 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2658 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2659 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2660 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2661 CGM.Int32Ty, // tid 2662 CGM.Int32Ty, // schedtype 2663 ITy, // lower 2664 ITy, // upper 2665 ITy, // stride 2666 ITy // chunk 2667 }; 2668 auto *FnTy = 2669 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2670 return CGM.CreateRuntimeFunction(FnTy, Name); 2671 } 2672 2673 llvm::FunctionCallee 2674 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 2675 assert((IVSize == 32 || IVSize == 64) && 2676 "IV size is not compatible with the omp runtime"); 2677 StringRef Name = 2678 IVSize == 32 2679 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2680 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2681 llvm::Type *TypeParams[] = { 2682 getIdentTyPointerTy(), // loc 2683 CGM.Int32Ty, // tid 2684 }; 2685 auto *FnTy = 2686 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2687 return CGM.CreateRuntimeFunction(FnTy, Name); 2688 } 2689 2690 llvm::FunctionCallee 2691 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 2692 assert((IVSize == 32 || IVSize == 64) && 2693 "IV size is not compatible with the omp runtime"); 2694 StringRef Name = 2695 IVSize == 32 2696 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2697 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2698 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2699 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2700 llvm::Type *TypeParams[] = { 2701 getIdentTyPointerTy(), // loc 2702 CGM.Int32Ty, // tid 2703 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2704 PtrTy, // p_lower 2705 PtrTy, // p_upper 2706 PtrTy // p_stride 2707 }; 2708 auto *FnTy = 2709 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2710 return CGM.CreateRuntimeFunction(FnTy, Name); 2711 } 2712 2713 /// Obtain information that uniquely identifies a target entry. This 2714 /// consists of the file and device IDs as well as line number associated with 2715 /// the relevant entry source location. 2716 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2717 unsigned &DeviceID, unsigned &FileID, 2718 unsigned &LineNum) { 2719 SourceManager &SM = C.getSourceManager(); 2720 2721 // The loc should be always valid and have a file ID (the user cannot use 2722 // #pragma directives in macros) 2723 2724 assert(Loc.isValid() && "Source location is expected to be always valid."); 2725 2726 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2727 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2728 2729 llvm::sys::fs::UniqueID ID; 2730 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2731 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2732 << PLoc.getFilename() << EC.message(); 2733 2734 DeviceID = ID.getDevice(); 2735 FileID = ID.getFile(); 2736 LineNum = PLoc.getLine(); 2737 } 2738 2739 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 2740 if (CGM.getLangOpts().OpenMPSimd) 2741 return Address::invalid(); 2742 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2743 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2744 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 2745 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2746 HasRequiresUnifiedSharedMemory))) { 2747 SmallString<64> PtrName; 2748 { 2749 llvm::raw_svector_ostream OS(PtrName); 2750 OS << CGM.getMangledName(GlobalDecl(VD)); 2751 if (!VD->isExternallyVisible()) { 2752 unsigned DeviceID, FileID, Line; 2753 getTargetEntryUniqueInfo(CGM.getContext(), 2754 VD->getCanonicalDecl()->getBeginLoc(), 2755 DeviceID, FileID, Line); 2756 OS << llvm::format("_%x", FileID); 2757 } 2758 OS << "_decl_tgt_ref_ptr"; 2759 } 2760 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2761 if (!Ptr) { 2762 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2763 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2764 PtrName); 2765 2766 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2767 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 2768 2769 if (!CGM.getLangOpts().OpenMPIsDevice) 2770 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2771 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2772 } 2773 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2774 } 2775 return Address::invalid(); 2776 } 2777 2778 llvm::Constant * 2779 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2780 assert(!CGM.getLangOpts().OpenMPUseTLS || 2781 !CGM.getContext().getTargetInfo().isTLSSupported()); 2782 // Lookup the entry, lazily creating it if necessary. 2783 std::string Suffix = getName({"cache", ""}); 2784 return getOrCreateInternalVariable( 2785 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 2786 } 2787 2788 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2789 const VarDecl *VD, 2790 Address VDAddr, 2791 SourceLocation Loc) { 2792 if (CGM.getLangOpts().OpenMPUseTLS && 2793 CGM.getContext().getTargetInfo().isTLSSupported()) 2794 return VDAddr; 2795 2796 llvm::Type *VarTy = VDAddr.getElementType(); 2797 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2798 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2799 CGM.Int8PtrTy), 2800 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2801 getOrCreateThreadPrivateCache(VD)}; 2802 return Address(CGF.EmitRuntimeCall( 2803 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2804 VDAddr.getAlignment()); 2805 } 2806 2807 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2808 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2809 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2810 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2811 // library. 2812 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2813 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2814 OMPLoc); 2815 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2816 // to register constructor/destructor for variable. 2817 llvm::Value *Args[] = { 2818 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2819 Ctor, CopyCtor, Dtor}; 2820 CGF.EmitRuntimeCall( 2821 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2822 } 2823 2824 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2825 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2826 bool PerformInit, CodeGenFunction *CGF) { 2827 if (CGM.getLangOpts().OpenMPUseTLS && 2828 CGM.getContext().getTargetInfo().isTLSSupported()) 2829 return nullptr; 2830 2831 VD = VD->getDefinition(CGM.getContext()); 2832 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 2833 QualType ASTTy = VD->getType(); 2834 2835 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2836 const Expr *Init = VD->getAnyInitializer(); 2837 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2838 // Generate function that re-emits the declaration's initializer into the 2839 // threadprivate copy of the variable VD 2840 CodeGenFunction CtorCGF(CGM); 2841 FunctionArgList Args; 2842 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2843 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2844 ImplicitParamDecl::Other); 2845 Args.push_back(&Dst); 2846 2847 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2848 CGM.getContext().VoidPtrTy, Args); 2849 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2850 std::string Name = getName({"__kmpc_global_ctor_", ""}); 2851 llvm::Function *Fn = 2852 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2853 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2854 Args, Loc, Loc); 2855 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2856 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2857 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2858 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2859 Arg = CtorCGF.Builder.CreateElementBitCast( 2860 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2861 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2862 /*IsInitializer=*/true); 2863 ArgVal = CtorCGF.EmitLoadOfScalar( 2864 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2865 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2866 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2867 CtorCGF.FinishFunction(); 2868 Ctor = Fn; 2869 } 2870 if (VD->getType().isDestructedType() != QualType::DK_none) { 2871 // Generate function that emits destructor call for the threadprivate copy 2872 // of the variable VD 2873 CodeGenFunction DtorCGF(CGM); 2874 FunctionArgList Args; 2875 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2876 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2877 ImplicitParamDecl::Other); 2878 Args.push_back(&Dst); 2879 2880 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2881 CGM.getContext().VoidTy, Args); 2882 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2883 std::string Name = getName({"__kmpc_global_dtor_", ""}); 2884 llvm::Function *Fn = 2885 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2886 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2887 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2888 Loc, Loc); 2889 // Create a scope with an artificial location for the body of this function. 2890 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2891 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2892 DtorCGF.GetAddrOfLocalVar(&Dst), 2893 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2894 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2895 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2896 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2897 DtorCGF.FinishFunction(); 2898 Dtor = Fn; 2899 } 2900 // Do not emit init function if it is not required. 2901 if (!Ctor && !Dtor) 2902 return nullptr; 2903 2904 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2905 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2906 /*isVarArg=*/false) 2907 ->getPointerTo(); 2908 // Copying constructor for the threadprivate variable. 2909 // Must be NULL - reserved by runtime, but currently it requires that this 2910 // parameter is always NULL. Otherwise it fires assertion. 2911 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2912 if (Ctor == nullptr) { 2913 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2914 /*isVarArg=*/false) 2915 ->getPointerTo(); 2916 Ctor = llvm::Constant::getNullValue(CtorTy); 2917 } 2918 if (Dtor == nullptr) { 2919 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2920 /*isVarArg=*/false) 2921 ->getPointerTo(); 2922 Dtor = llvm::Constant::getNullValue(DtorTy); 2923 } 2924 if (!CGF) { 2925 auto *InitFunctionTy = 2926 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2927 std::string Name = getName({"__omp_threadprivate_init_", ""}); 2928 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2929 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 2930 CodeGenFunction InitCGF(CGM); 2931 FunctionArgList ArgList; 2932 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2933 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2934 Loc, Loc); 2935 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2936 InitCGF.FinishFunction(); 2937 return InitFunction; 2938 } 2939 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2940 } 2941 return nullptr; 2942 } 2943 2944 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2945 llvm::GlobalVariable *Addr, 2946 bool PerformInit) { 2947 if (CGM.getLangOpts().OMPTargetTriples.empty() && 2948 !CGM.getLangOpts().OpenMPIsDevice) 2949 return false; 2950 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2951 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2952 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 2953 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2954 HasRequiresUnifiedSharedMemory)) 2955 return CGM.getLangOpts().OpenMPIsDevice; 2956 VD = VD->getDefinition(CGM.getContext()); 2957 assert(VD && "Unknown VarDecl"); 2958 2959 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 2960 return CGM.getLangOpts().OpenMPIsDevice; 2961 2962 QualType ASTTy = VD->getType(); 2963 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 2964 2965 // Produce the unique prefix to identify the new target regions. We use 2966 // the source location of the variable declaration which we know to not 2967 // conflict with any target region. 2968 unsigned DeviceID; 2969 unsigned FileID; 2970 unsigned Line; 2971 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2972 SmallString<128> Buffer, Out; 2973 { 2974 llvm::raw_svector_ostream OS(Buffer); 2975 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2976 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2977 } 2978 2979 const Expr *Init = VD->getAnyInitializer(); 2980 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2981 llvm::Constant *Ctor; 2982 llvm::Constant *ID; 2983 if (CGM.getLangOpts().OpenMPIsDevice) { 2984 // Generate function that re-emits the declaration's initializer into 2985 // the threadprivate copy of the variable VD 2986 CodeGenFunction CtorCGF(CGM); 2987 2988 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2989 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2990 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2991 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2992 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2993 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2994 FunctionArgList(), Loc, Loc); 2995 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2996 CtorCGF.EmitAnyExprToMem(Init, 2997 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2998 Init->getType().getQualifiers(), 2999 /*IsInitializer=*/true); 3000 CtorCGF.FinishFunction(); 3001 Ctor = Fn; 3002 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 3003 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 3004 } else { 3005 Ctor = new llvm::GlobalVariable( 3006 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 3007 llvm::GlobalValue::PrivateLinkage, 3008 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 3009 ID = Ctor; 3010 } 3011 3012 // Register the information for the entry associated with the constructor. 3013 Out.clear(); 3014 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 3015 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 3016 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 3017 } 3018 if (VD->getType().isDestructedType() != QualType::DK_none) { 3019 llvm::Constant *Dtor; 3020 llvm::Constant *ID; 3021 if (CGM.getLangOpts().OpenMPIsDevice) { 3022 // Generate function that emits destructor call for the threadprivate 3023 // copy of the variable VD 3024 CodeGenFunction DtorCGF(CGM); 3025 3026 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 3027 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 3028 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 3029 FTy, Twine(Buffer, "_dtor"), FI, Loc); 3030 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 3031 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 3032 FunctionArgList(), Loc, Loc); 3033 // Create a scope with an artificial location for the body of this 3034 // function. 3035 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 3036 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 3037 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 3038 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 3039 DtorCGF.FinishFunction(); 3040 Dtor = Fn; 3041 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 3042 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 3043 } else { 3044 Dtor = new llvm::GlobalVariable( 3045 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 3046 llvm::GlobalValue::PrivateLinkage, 3047 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 3048 ID = Dtor; 3049 } 3050 // Register the information for the entry associated with the destructor. 3051 Out.clear(); 3052 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 3053 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 3054 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 3055 } 3056 return CGM.getLangOpts().OpenMPIsDevice; 3057 } 3058 3059 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 3060 QualType VarType, 3061 StringRef Name) { 3062 std::string Suffix = getName({"artificial", ""}); 3063 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 3064 llvm::Value *GAddr = 3065 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 3066 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 3067 CGM.getTarget().isTLSSupported()) { 3068 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 3069 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 3070 } 3071 std::string CacheSuffix = getName({"cache", ""}); 3072 llvm::Value *Args[] = { 3073 emitUpdateLocation(CGF, SourceLocation()), 3074 getThreadID(CGF, SourceLocation()), 3075 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 3076 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 3077 /*isSigned=*/false), 3078 getOrCreateInternalVariable( 3079 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 3080 return Address( 3081 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3082 CGF.EmitRuntimeCall( 3083 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 3084 VarLVType->getPointerTo(/*AddrSpace=*/0)), 3085 CGM.getContext().getTypeAlignInChars(VarType)); 3086 } 3087 3088 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 3089 const RegionCodeGenTy &ThenGen, 3090 const RegionCodeGenTy &ElseGen) { 3091 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 3092 3093 // If the condition constant folds and can be elided, try to avoid emitting 3094 // the condition and the dead arm of the if/else. 3095 bool CondConstant; 3096 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 3097 if (CondConstant) 3098 ThenGen(CGF); 3099 else 3100 ElseGen(CGF); 3101 return; 3102 } 3103 3104 // Otherwise, the condition did not fold, or we couldn't elide it. Just 3105 // emit the conditional branch. 3106 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3107 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 3108 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 3109 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 3110 3111 // Emit the 'then' code. 3112 CGF.EmitBlock(ThenBlock); 3113 ThenGen(CGF); 3114 CGF.EmitBranch(ContBlock); 3115 // Emit the 'else' code if present. 3116 // There is no need to emit line number for unconditional branch. 3117 (void)ApplyDebugLocation::CreateEmpty(CGF); 3118 CGF.EmitBlock(ElseBlock); 3119 ElseGen(CGF); 3120 // There is no need to emit line number for unconditional branch. 3121 (void)ApplyDebugLocation::CreateEmpty(CGF); 3122 CGF.EmitBranch(ContBlock); 3123 // Emit the continuation block for code after the if. 3124 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 3125 } 3126 3127 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 3128 llvm::Function *OutlinedFn, 3129 ArrayRef<llvm::Value *> CapturedVars, 3130 const Expr *IfCond) { 3131 if (!CGF.HaveInsertPoint()) 3132 return; 3133 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 3134 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 3135 PrePostActionTy &) { 3136 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 3137 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3138 llvm::Value *Args[] = { 3139 RTLoc, 3140 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 3141 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 3142 llvm::SmallVector<llvm::Value *, 16> RealArgs; 3143 RealArgs.append(std::begin(Args), std::end(Args)); 3144 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 3145 3146 llvm::FunctionCallee RTLFn = 3147 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 3148 CGF.EmitRuntimeCall(RTLFn, RealArgs); 3149 }; 3150 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 3151 PrePostActionTy &) { 3152 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3153 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 3154 // Build calls: 3155 // __kmpc_serialized_parallel(&Loc, GTid); 3156 llvm::Value *Args[] = {RTLoc, ThreadID}; 3157 CGF.EmitRuntimeCall( 3158 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 3159 3160 // OutlinedFn(>id, &zero_bound, CapturedStruct); 3161 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 3162 Address ZeroAddrBound = 3163 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 3164 /*Name=*/".bound.zero.addr"); 3165 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 3166 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 3167 // ThreadId for serialized parallels is 0. 3168 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 3169 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 3170 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 3171 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 3172 3173 // __kmpc_end_serialized_parallel(&Loc, GTid); 3174 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 3175 CGF.EmitRuntimeCall( 3176 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 3177 EndArgs); 3178 }; 3179 if (IfCond) { 3180 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 3181 } else { 3182 RegionCodeGenTy ThenRCG(ThenGen); 3183 ThenRCG(CGF); 3184 } 3185 } 3186 3187 // If we're inside an (outlined) parallel region, use the region info's 3188 // thread-ID variable (it is passed in a first argument of the outlined function 3189 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 3190 // regular serial code region, get thread ID by calling kmp_int32 3191 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 3192 // return the address of that temp. 3193 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 3194 SourceLocation Loc) { 3195 if (auto *OMPRegionInfo = 3196 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3197 if (OMPRegionInfo->getThreadIDVariable()) 3198 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 3199 3200 llvm::Value *ThreadID = getThreadID(CGF, Loc); 3201 QualType Int32Ty = 3202 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 3203 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 3204 CGF.EmitStoreOfScalar(ThreadID, 3205 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 3206 3207 return ThreadIDTemp; 3208 } 3209 3210 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 3211 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 3212 SmallString<256> Buffer; 3213 llvm::raw_svector_ostream Out(Buffer); 3214 Out << Name; 3215 StringRef RuntimeName = Out.str(); 3216 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 3217 if (Elem.second) { 3218 assert(Elem.second->getType()->getPointerElementType() == Ty && 3219 "OMP internal variable has different type than requested"); 3220 return &*Elem.second; 3221 } 3222 3223 return Elem.second = new llvm::GlobalVariable( 3224 CGM.getModule(), Ty, /*IsConstant*/ false, 3225 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 3226 Elem.first(), /*InsertBefore=*/nullptr, 3227 llvm::GlobalValue::NotThreadLocal, AddressSpace); 3228 } 3229 3230 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 3231 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 3232 std::string Name = getName({Prefix, "var"}); 3233 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 3234 } 3235 3236 namespace { 3237 /// Common pre(post)-action for different OpenMP constructs. 3238 class CommonActionTy final : public PrePostActionTy { 3239 llvm::FunctionCallee EnterCallee; 3240 ArrayRef<llvm::Value *> EnterArgs; 3241 llvm::FunctionCallee ExitCallee; 3242 ArrayRef<llvm::Value *> ExitArgs; 3243 bool Conditional; 3244 llvm::BasicBlock *ContBlock = nullptr; 3245 3246 public: 3247 CommonActionTy(llvm::FunctionCallee EnterCallee, 3248 ArrayRef<llvm::Value *> EnterArgs, 3249 llvm::FunctionCallee ExitCallee, 3250 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 3251 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 3252 ExitArgs(ExitArgs), Conditional(Conditional) {} 3253 void Enter(CodeGenFunction &CGF) override { 3254 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 3255 if (Conditional) { 3256 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 3257 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3258 ContBlock = CGF.createBasicBlock("omp_if.end"); 3259 // Generate the branch (If-stmt) 3260 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 3261 CGF.EmitBlock(ThenBlock); 3262 } 3263 } 3264 void Done(CodeGenFunction &CGF) { 3265 // Emit the rest of blocks/branches 3266 CGF.EmitBranch(ContBlock); 3267 CGF.EmitBlock(ContBlock, true); 3268 } 3269 void Exit(CodeGenFunction &CGF) override { 3270 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 3271 } 3272 }; 3273 } // anonymous namespace 3274 3275 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 3276 StringRef CriticalName, 3277 const RegionCodeGenTy &CriticalOpGen, 3278 SourceLocation Loc, const Expr *Hint) { 3279 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 3280 // CriticalOpGen(); 3281 // __kmpc_end_critical(ident_t *, gtid, Lock); 3282 // Prepare arguments and build a call to __kmpc_critical 3283 if (!CGF.HaveInsertPoint()) 3284 return; 3285 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3286 getCriticalRegionLock(CriticalName)}; 3287 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 3288 std::end(Args)); 3289 if (Hint) { 3290 EnterArgs.push_back(CGF.Builder.CreateIntCast( 3291 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 3292 } 3293 CommonActionTy Action( 3294 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 3295 : OMPRTL__kmpc_critical), 3296 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 3297 CriticalOpGen.setAction(Action); 3298 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 3299 } 3300 3301 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 3302 const RegionCodeGenTy &MasterOpGen, 3303 SourceLocation Loc) { 3304 if (!CGF.HaveInsertPoint()) 3305 return; 3306 // if(__kmpc_master(ident_t *, gtid)) { 3307 // MasterOpGen(); 3308 // __kmpc_end_master(ident_t *, gtid); 3309 // } 3310 // Prepare arguments and build a call to __kmpc_master 3311 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3312 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 3313 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 3314 /*Conditional=*/true); 3315 MasterOpGen.setAction(Action); 3316 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 3317 Action.Done(CGF); 3318 } 3319 3320 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 3321 SourceLocation Loc) { 3322 if (!CGF.HaveInsertPoint()) 3323 return; 3324 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3325 if (OMPBuilder) { 3326 OMPBuilder->CreateTaskyield(CGF.Builder); 3327 } else { 3328 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 3329 llvm::Value *Args[] = { 3330 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3331 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 3332 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), 3333 Args); 3334 } 3335 3336 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3337 Region->emitUntiedSwitch(CGF); 3338 } 3339 3340 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 3341 const RegionCodeGenTy &TaskgroupOpGen, 3342 SourceLocation Loc) { 3343 if (!CGF.HaveInsertPoint()) 3344 return; 3345 // __kmpc_taskgroup(ident_t *, gtid); 3346 // TaskgroupOpGen(); 3347 // __kmpc_end_taskgroup(ident_t *, gtid); 3348 // Prepare arguments and build a call to __kmpc_taskgroup 3349 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3350 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3351 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3352 Args); 3353 TaskgroupOpGen.setAction(Action); 3354 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3355 } 3356 3357 /// Given an array of pointers to variables, project the address of a 3358 /// given variable. 3359 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3360 unsigned Index, const VarDecl *Var) { 3361 // Pull out the pointer to the variable. 3362 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 3363 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3364 3365 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3366 Addr = CGF.Builder.CreateElementBitCast( 3367 Addr, CGF.ConvertTypeForMem(Var->getType())); 3368 return Addr; 3369 } 3370 3371 static llvm::Value *emitCopyprivateCopyFunction( 3372 CodeGenModule &CGM, llvm::Type *ArgsType, 3373 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3374 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3375 SourceLocation Loc) { 3376 ASTContext &C = CGM.getContext(); 3377 // void copy_func(void *LHSArg, void *RHSArg); 3378 FunctionArgList Args; 3379 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3380 ImplicitParamDecl::Other); 3381 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3382 ImplicitParamDecl::Other); 3383 Args.push_back(&LHSArg); 3384 Args.push_back(&RHSArg); 3385 const auto &CGFI = 3386 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3387 std::string Name = 3388 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 3389 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 3390 llvm::GlobalValue::InternalLinkage, Name, 3391 &CGM.getModule()); 3392 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3393 Fn->setDoesNotRecurse(); 3394 CodeGenFunction CGF(CGM); 3395 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3396 // Dest = (void*[n])(LHSArg); 3397 // Src = (void*[n])(RHSArg); 3398 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3399 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3400 ArgsType), CGF.getPointerAlign()); 3401 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3402 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3403 ArgsType), CGF.getPointerAlign()); 3404 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3405 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3406 // ... 3407 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3408 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3409 const auto *DestVar = 3410 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3411 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3412 3413 const auto *SrcVar = 3414 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3415 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3416 3417 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3418 QualType Type = VD->getType(); 3419 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3420 } 3421 CGF.FinishFunction(); 3422 return Fn; 3423 } 3424 3425 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3426 const RegionCodeGenTy &SingleOpGen, 3427 SourceLocation Loc, 3428 ArrayRef<const Expr *> CopyprivateVars, 3429 ArrayRef<const Expr *> SrcExprs, 3430 ArrayRef<const Expr *> DstExprs, 3431 ArrayRef<const Expr *> AssignmentOps) { 3432 if (!CGF.HaveInsertPoint()) 3433 return; 3434 assert(CopyprivateVars.size() == SrcExprs.size() && 3435 CopyprivateVars.size() == DstExprs.size() && 3436 CopyprivateVars.size() == AssignmentOps.size()); 3437 ASTContext &C = CGM.getContext(); 3438 // int32 did_it = 0; 3439 // if(__kmpc_single(ident_t *, gtid)) { 3440 // SingleOpGen(); 3441 // __kmpc_end_single(ident_t *, gtid); 3442 // did_it = 1; 3443 // } 3444 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3445 // <copy_func>, did_it); 3446 3447 Address DidIt = Address::invalid(); 3448 if (!CopyprivateVars.empty()) { 3449 // int32 did_it = 0; 3450 QualType KmpInt32Ty = 3451 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3452 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3453 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3454 } 3455 // Prepare arguments and build a call to __kmpc_single 3456 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3457 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3458 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3459 /*Conditional=*/true); 3460 SingleOpGen.setAction(Action); 3461 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3462 if (DidIt.isValid()) { 3463 // did_it = 1; 3464 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3465 } 3466 Action.Done(CGF); 3467 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3468 // <copy_func>, did_it); 3469 if (DidIt.isValid()) { 3470 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3471 QualType CopyprivateArrayTy = C.getConstantArrayType( 3472 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 3473 /*IndexTypeQuals=*/0); 3474 // Create a list of all private variables for copyprivate. 3475 Address CopyprivateList = 3476 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3477 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3478 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 3479 CGF.Builder.CreateStore( 3480 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3481 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 3482 CGF.VoidPtrTy), 3483 Elem); 3484 } 3485 // Build function that copies private values from single region to all other 3486 // threads in the corresponding parallel region. 3487 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3488 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3489 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3490 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3491 Address CL = 3492 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3493 CGF.VoidPtrTy); 3494 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3495 llvm::Value *Args[] = { 3496 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3497 getThreadID(CGF, Loc), // i32 <gtid> 3498 BufSize, // size_t <buf_size> 3499 CL.getPointer(), // void *<copyprivate list> 3500 CpyFn, // void (*) (void *, void *) <copy_func> 3501 DidItVal // i32 did_it 3502 }; 3503 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3504 } 3505 } 3506 3507 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3508 const RegionCodeGenTy &OrderedOpGen, 3509 SourceLocation Loc, bool IsThreads) { 3510 if (!CGF.HaveInsertPoint()) 3511 return; 3512 // __kmpc_ordered(ident_t *, gtid); 3513 // OrderedOpGen(); 3514 // __kmpc_end_ordered(ident_t *, gtid); 3515 // Prepare arguments and build a call to __kmpc_ordered 3516 if (IsThreads) { 3517 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3518 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3519 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3520 Args); 3521 OrderedOpGen.setAction(Action); 3522 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3523 return; 3524 } 3525 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3526 } 3527 3528 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 3529 unsigned Flags; 3530 if (Kind == OMPD_for) 3531 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3532 else if (Kind == OMPD_sections) 3533 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3534 else if (Kind == OMPD_single) 3535 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3536 else if (Kind == OMPD_barrier) 3537 Flags = OMP_IDENT_BARRIER_EXPL; 3538 else 3539 Flags = OMP_IDENT_BARRIER_IMPL; 3540 return Flags; 3541 } 3542 3543 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 3544 CodeGenFunction &CGF, const OMPLoopDirective &S, 3545 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 3546 // Check if the loop directive is actually a doacross loop directive. In this 3547 // case choose static, 1 schedule. 3548 if (llvm::any_of( 3549 S.getClausesOfKind<OMPOrderedClause>(), 3550 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 3551 ScheduleKind = OMPC_SCHEDULE_static; 3552 // Chunk size is 1 in this case. 3553 llvm::APInt ChunkSize(32, 1); 3554 ChunkExpr = IntegerLiteral::Create( 3555 CGF.getContext(), ChunkSize, 3556 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3557 SourceLocation()); 3558 } 3559 } 3560 3561 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3562 OpenMPDirectiveKind Kind, bool EmitChecks, 3563 bool ForceSimpleCall) { 3564 // Check if we should use the OMPBuilder 3565 auto *OMPRegionInfo = 3566 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 3567 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3568 if (OMPBuilder) { 3569 CGF.Builder.restoreIP(OMPBuilder->CreateBarrier( 3570 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 3571 return; 3572 } 3573 3574 if (!CGF.HaveInsertPoint()) 3575 return; 3576 // Build call __kmpc_cancel_barrier(loc, thread_id); 3577 // Build call __kmpc_barrier(loc, thread_id); 3578 unsigned Flags = getDefaultFlagsForBarriers(Kind); 3579 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3580 // thread_id); 3581 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3582 getThreadID(CGF, Loc)}; 3583 if (OMPRegionInfo) { 3584 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3585 llvm::Value *Result = CGF.EmitRuntimeCall( 3586 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3587 if (EmitChecks) { 3588 // if (__kmpc_cancel_barrier()) { 3589 // exit from construct; 3590 // } 3591 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3592 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3593 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3594 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3595 CGF.EmitBlock(ExitBB); 3596 // exit from construct; 3597 CodeGenFunction::JumpDest CancelDestination = 3598 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3599 CGF.EmitBranchThroughCleanup(CancelDestination); 3600 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3601 } 3602 return; 3603 } 3604 } 3605 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3606 } 3607 3608 /// Map the OpenMP loop schedule to the runtime enumeration. 3609 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3610 bool Chunked, bool Ordered) { 3611 switch (ScheduleKind) { 3612 case OMPC_SCHEDULE_static: 3613 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3614 : (Ordered ? OMP_ord_static : OMP_sch_static); 3615 case OMPC_SCHEDULE_dynamic: 3616 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3617 case OMPC_SCHEDULE_guided: 3618 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3619 case OMPC_SCHEDULE_runtime: 3620 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3621 case OMPC_SCHEDULE_auto: 3622 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3623 case OMPC_SCHEDULE_unknown: 3624 assert(!Chunked && "chunk was specified but schedule kind not known"); 3625 return Ordered ? OMP_ord_static : OMP_sch_static; 3626 } 3627 llvm_unreachable("Unexpected runtime schedule"); 3628 } 3629 3630 /// Map the OpenMP distribute schedule to the runtime enumeration. 3631 static OpenMPSchedType 3632 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3633 // only static is allowed for dist_schedule 3634 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3635 } 3636 3637 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3638 bool Chunked) const { 3639 OpenMPSchedType Schedule = 3640 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3641 return Schedule == OMP_sch_static; 3642 } 3643 3644 bool CGOpenMPRuntime::isStaticNonchunked( 3645 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3646 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3647 return Schedule == OMP_dist_sch_static; 3648 } 3649 3650 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 3651 bool Chunked) const { 3652 OpenMPSchedType Schedule = 3653 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3654 return Schedule == OMP_sch_static_chunked; 3655 } 3656 3657 bool CGOpenMPRuntime::isStaticChunked( 3658 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3659 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3660 return Schedule == OMP_dist_sch_static_chunked; 3661 } 3662 3663 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3664 OpenMPSchedType Schedule = 3665 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3666 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3667 return Schedule != OMP_sch_static; 3668 } 3669 3670 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 3671 OpenMPScheduleClauseModifier M1, 3672 OpenMPScheduleClauseModifier M2) { 3673 int Modifier = 0; 3674 switch (M1) { 3675 case OMPC_SCHEDULE_MODIFIER_monotonic: 3676 Modifier = OMP_sch_modifier_monotonic; 3677 break; 3678 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3679 Modifier = OMP_sch_modifier_nonmonotonic; 3680 break; 3681 case OMPC_SCHEDULE_MODIFIER_simd: 3682 if (Schedule == OMP_sch_static_chunked) 3683 Schedule = OMP_sch_static_balanced_chunked; 3684 break; 3685 case OMPC_SCHEDULE_MODIFIER_last: 3686 case OMPC_SCHEDULE_MODIFIER_unknown: 3687 break; 3688 } 3689 switch (M2) { 3690 case OMPC_SCHEDULE_MODIFIER_monotonic: 3691 Modifier = OMP_sch_modifier_monotonic; 3692 break; 3693 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3694 Modifier = OMP_sch_modifier_nonmonotonic; 3695 break; 3696 case OMPC_SCHEDULE_MODIFIER_simd: 3697 if (Schedule == OMP_sch_static_chunked) 3698 Schedule = OMP_sch_static_balanced_chunked; 3699 break; 3700 case OMPC_SCHEDULE_MODIFIER_last: 3701 case OMPC_SCHEDULE_MODIFIER_unknown: 3702 break; 3703 } 3704 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 3705 // If the static schedule kind is specified or if the ordered clause is 3706 // specified, and if the nonmonotonic modifier is not specified, the effect is 3707 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 3708 // modifier is specified, the effect is as if the nonmonotonic modifier is 3709 // specified. 3710 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 3711 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 3712 Schedule == OMP_sch_static_balanced_chunked || 3713 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 3714 Schedule == OMP_dist_sch_static_chunked || 3715 Schedule == OMP_dist_sch_static)) 3716 Modifier = OMP_sch_modifier_nonmonotonic; 3717 } 3718 return Schedule | Modifier; 3719 } 3720 3721 void CGOpenMPRuntime::emitForDispatchInit( 3722 CodeGenFunction &CGF, SourceLocation Loc, 3723 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3724 bool Ordered, const DispatchRTInput &DispatchValues) { 3725 if (!CGF.HaveInsertPoint()) 3726 return; 3727 OpenMPSchedType Schedule = getRuntimeSchedule( 3728 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3729 assert(Ordered || 3730 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3731 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3732 Schedule != OMP_sch_static_balanced_chunked)); 3733 // Call __kmpc_dispatch_init( 3734 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3735 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3736 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3737 3738 // If the Chunk was not specified in the clause - use default value 1. 3739 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3740 : CGF.Builder.getIntN(IVSize, 1); 3741 llvm::Value *Args[] = { 3742 emitUpdateLocation(CGF, Loc), 3743 getThreadID(CGF, Loc), 3744 CGF.Builder.getInt32(addMonoNonMonoModifier( 3745 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3746 DispatchValues.LB, // Lower 3747 DispatchValues.UB, // Upper 3748 CGF.Builder.getIntN(IVSize, 1), // Stride 3749 Chunk // Chunk 3750 }; 3751 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3752 } 3753 3754 static void emitForStaticInitCall( 3755 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3756 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 3757 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3758 const CGOpenMPRuntime::StaticRTInput &Values) { 3759 if (!CGF.HaveInsertPoint()) 3760 return; 3761 3762 assert(!Values.Ordered); 3763 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3764 Schedule == OMP_sch_static_balanced_chunked || 3765 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3766 Schedule == OMP_dist_sch_static || 3767 Schedule == OMP_dist_sch_static_chunked); 3768 3769 // Call __kmpc_for_static_init( 3770 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3771 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3772 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3773 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3774 llvm::Value *Chunk = Values.Chunk; 3775 if (Chunk == nullptr) { 3776 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3777 Schedule == OMP_dist_sch_static) && 3778 "expected static non-chunked schedule"); 3779 // If the Chunk was not specified in the clause - use default value 1. 3780 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3781 } else { 3782 assert((Schedule == OMP_sch_static_chunked || 3783 Schedule == OMP_sch_static_balanced_chunked || 3784 Schedule == OMP_ord_static_chunked || 3785 Schedule == OMP_dist_sch_static_chunked) && 3786 "expected static chunked schedule"); 3787 } 3788 llvm::Value *Args[] = { 3789 UpdateLocation, 3790 ThreadId, 3791 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 3792 M2)), // Schedule type 3793 Values.IL.getPointer(), // &isLastIter 3794 Values.LB.getPointer(), // &LB 3795 Values.UB.getPointer(), // &UB 3796 Values.ST.getPointer(), // &Stride 3797 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3798 Chunk // Chunk 3799 }; 3800 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3801 } 3802 3803 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3804 SourceLocation Loc, 3805 OpenMPDirectiveKind DKind, 3806 const OpenMPScheduleTy &ScheduleKind, 3807 const StaticRTInput &Values) { 3808 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3809 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3810 assert(isOpenMPWorksharingDirective(DKind) && 3811 "Expected loop-based or sections-based directive."); 3812 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3813 isOpenMPLoopDirective(DKind) 3814 ? OMP_IDENT_WORK_LOOP 3815 : OMP_IDENT_WORK_SECTIONS); 3816 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3817 llvm::FunctionCallee StaticInitFunction = 3818 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3819 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 3820 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3821 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3822 } 3823 3824 void CGOpenMPRuntime::emitDistributeStaticInit( 3825 CodeGenFunction &CGF, SourceLocation Loc, 3826 OpenMPDistScheduleClauseKind SchedKind, 3827 const CGOpenMPRuntime::StaticRTInput &Values) { 3828 OpenMPSchedType ScheduleNum = 3829 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3830 llvm::Value *UpdatedLocation = 3831 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3832 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3833 llvm::FunctionCallee StaticInitFunction = 3834 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3835 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3836 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3837 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3838 } 3839 3840 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3841 SourceLocation Loc, 3842 OpenMPDirectiveKind DKind) { 3843 if (!CGF.HaveInsertPoint()) 3844 return; 3845 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3846 llvm::Value *Args[] = { 3847 emitUpdateLocation(CGF, Loc, 3848 isOpenMPDistributeDirective(DKind) 3849 ? OMP_IDENT_WORK_DISTRIBUTE 3850 : isOpenMPLoopDirective(DKind) 3851 ? OMP_IDENT_WORK_LOOP 3852 : OMP_IDENT_WORK_SECTIONS), 3853 getThreadID(CGF, Loc)}; 3854 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 3855 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3856 Args); 3857 } 3858 3859 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3860 SourceLocation Loc, 3861 unsigned IVSize, 3862 bool IVSigned) { 3863 if (!CGF.HaveInsertPoint()) 3864 return; 3865 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3866 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3867 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3868 } 3869 3870 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3871 SourceLocation Loc, unsigned IVSize, 3872 bool IVSigned, Address IL, 3873 Address LB, Address UB, 3874 Address ST) { 3875 // Call __kmpc_dispatch_next( 3876 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3877 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3878 // kmp_int[32|64] *p_stride); 3879 llvm::Value *Args[] = { 3880 emitUpdateLocation(CGF, Loc), 3881 getThreadID(CGF, Loc), 3882 IL.getPointer(), // &isLastIter 3883 LB.getPointer(), // &Lower 3884 UB.getPointer(), // &Upper 3885 ST.getPointer() // &Stride 3886 }; 3887 llvm::Value *Call = 3888 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3889 return CGF.EmitScalarConversion( 3890 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3891 CGF.getContext().BoolTy, Loc); 3892 } 3893 3894 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3895 llvm::Value *NumThreads, 3896 SourceLocation Loc) { 3897 if (!CGF.HaveInsertPoint()) 3898 return; 3899 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3900 llvm::Value *Args[] = { 3901 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3902 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3903 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3904 Args); 3905 } 3906 3907 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3908 ProcBindKind ProcBind, 3909 SourceLocation Loc) { 3910 if (!CGF.HaveInsertPoint()) 3911 return; 3912 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 3913 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3914 llvm::Value *Args[] = { 3915 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3916 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 3917 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3918 } 3919 3920 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3921 SourceLocation Loc, llvm::AtomicOrdering AO) { 3922 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3923 if (OMPBuilder) { 3924 OMPBuilder->CreateFlush(CGF.Builder); 3925 } else { 3926 if (!CGF.HaveInsertPoint()) 3927 return; 3928 // Build call void __kmpc_flush(ident_t *loc) 3929 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3930 emitUpdateLocation(CGF, Loc)); 3931 } 3932 } 3933 3934 namespace { 3935 /// Indexes of fields for type kmp_task_t. 3936 enum KmpTaskTFields { 3937 /// List of shared variables. 3938 KmpTaskTShareds, 3939 /// Task routine. 3940 KmpTaskTRoutine, 3941 /// Partition id for the untied tasks. 3942 KmpTaskTPartId, 3943 /// Function with call of destructors for private variables. 3944 Data1, 3945 /// Task priority. 3946 Data2, 3947 /// (Taskloops only) Lower bound. 3948 KmpTaskTLowerBound, 3949 /// (Taskloops only) Upper bound. 3950 KmpTaskTUpperBound, 3951 /// (Taskloops only) Stride. 3952 KmpTaskTStride, 3953 /// (Taskloops only) Is last iteration flag. 3954 KmpTaskTLastIter, 3955 /// (Taskloops only) Reduction data. 3956 KmpTaskTReductions, 3957 }; 3958 } // anonymous namespace 3959 3960 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3961 return OffloadEntriesTargetRegion.empty() && 3962 OffloadEntriesDeviceGlobalVar.empty(); 3963 } 3964 3965 /// Initialize target region entry. 3966 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3967 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3968 StringRef ParentName, unsigned LineNum, 3969 unsigned Order) { 3970 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3971 "only required for the device " 3972 "code generation."); 3973 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3974 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3975 OMPTargetRegionEntryTargetRegion); 3976 ++OffloadingEntriesNum; 3977 } 3978 3979 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3980 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3981 StringRef ParentName, unsigned LineNum, 3982 llvm::Constant *Addr, llvm::Constant *ID, 3983 OMPTargetRegionEntryKind Flags) { 3984 // If we are emitting code for a target, the entry is already initialized, 3985 // only has to be registered. 3986 if (CGM.getLangOpts().OpenMPIsDevice) { 3987 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3988 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3989 DiagnosticsEngine::Error, 3990 "Unable to find target region on line '%0' in the device code."); 3991 CGM.getDiags().Report(DiagID) << LineNum; 3992 return; 3993 } 3994 auto &Entry = 3995 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3996 assert(Entry.isValid() && "Entry not initialized!"); 3997 Entry.setAddress(Addr); 3998 Entry.setID(ID); 3999 Entry.setFlags(Flags); 4000 } else { 4001 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 4002 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 4003 ++OffloadingEntriesNum; 4004 } 4005 } 4006 4007 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 4008 unsigned DeviceID, unsigned FileID, StringRef ParentName, 4009 unsigned LineNum) const { 4010 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 4011 if (PerDevice == OffloadEntriesTargetRegion.end()) 4012 return false; 4013 auto PerFile = PerDevice->second.find(FileID); 4014 if (PerFile == PerDevice->second.end()) 4015 return false; 4016 auto PerParentName = PerFile->second.find(ParentName); 4017 if (PerParentName == PerFile->second.end()) 4018 return false; 4019 auto PerLine = PerParentName->second.find(LineNum); 4020 if (PerLine == PerParentName->second.end()) 4021 return false; 4022 // Fail if this entry is already registered. 4023 if (PerLine->second.getAddress() || PerLine->second.getID()) 4024 return false; 4025 return true; 4026 } 4027 4028 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 4029 const OffloadTargetRegionEntryInfoActTy &Action) { 4030 // Scan all target region entries and perform the provided action. 4031 for (const auto &D : OffloadEntriesTargetRegion) 4032 for (const auto &F : D.second) 4033 for (const auto &P : F.second) 4034 for (const auto &L : P.second) 4035 Action(D.first, F.first, P.first(), L.first, L.second); 4036 } 4037 4038 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4039 initializeDeviceGlobalVarEntryInfo(StringRef Name, 4040 OMPTargetGlobalVarEntryKind Flags, 4041 unsigned Order) { 4042 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 4043 "only required for the device " 4044 "code generation."); 4045 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 4046 ++OffloadingEntriesNum; 4047 } 4048 4049 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4050 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 4051 CharUnits VarSize, 4052 OMPTargetGlobalVarEntryKind Flags, 4053 llvm::GlobalValue::LinkageTypes Linkage) { 4054 if (CGM.getLangOpts().OpenMPIsDevice) { 4055 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 4056 assert(Entry.isValid() && Entry.getFlags() == Flags && 4057 "Entry not initialized!"); 4058 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 4059 "Resetting with the new address."); 4060 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 4061 if (Entry.getVarSize().isZero()) { 4062 Entry.setVarSize(VarSize); 4063 Entry.setLinkage(Linkage); 4064 } 4065 return; 4066 } 4067 Entry.setVarSize(VarSize); 4068 Entry.setLinkage(Linkage); 4069 Entry.setAddress(Addr); 4070 } else { 4071 if (hasDeviceGlobalVarEntryInfo(VarName)) { 4072 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 4073 assert(Entry.isValid() && Entry.getFlags() == Flags && 4074 "Entry not initialized!"); 4075 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 4076 "Resetting with the new address."); 4077 if (Entry.getVarSize().isZero()) { 4078 Entry.setVarSize(VarSize); 4079 Entry.setLinkage(Linkage); 4080 } 4081 return; 4082 } 4083 OffloadEntriesDeviceGlobalVar.try_emplace( 4084 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 4085 ++OffloadingEntriesNum; 4086 } 4087 } 4088 4089 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4090 actOnDeviceGlobalVarEntriesInfo( 4091 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 4092 // Scan all target region entries and perform the provided action. 4093 for (const auto &E : OffloadEntriesDeviceGlobalVar) 4094 Action(E.getKey(), E.getValue()); 4095 } 4096 4097 void CGOpenMPRuntime::createOffloadEntry( 4098 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 4099 llvm::GlobalValue::LinkageTypes Linkage) { 4100 StringRef Name = Addr->getName(); 4101 llvm::Module &M = CGM.getModule(); 4102 llvm::LLVMContext &C = M.getContext(); 4103 4104 // Create constant string with the name. 4105 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 4106 4107 std::string StringName = getName({"omp_offloading", "entry_name"}); 4108 auto *Str = new llvm::GlobalVariable( 4109 M, StrPtrInit->getType(), /*isConstant=*/true, 4110 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 4111 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4112 4113 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 4114 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 4115 llvm::ConstantInt::get(CGM.SizeTy, Size), 4116 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 4117 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 4118 std::string EntryName = getName({"omp_offloading", "entry", ""}); 4119 llvm::GlobalVariable *Entry = createGlobalStruct( 4120 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 4121 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 4122 4123 // The entry has to be created in the section the linker expects it to be. 4124 Entry->setSection("omp_offloading_entries"); 4125 } 4126 4127 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 4128 // Emit the offloading entries and metadata so that the device codegen side 4129 // can easily figure out what to emit. The produced metadata looks like 4130 // this: 4131 // 4132 // !omp_offload.info = !{!1, ...} 4133 // 4134 // Right now we only generate metadata for function that contain target 4135 // regions. 4136 4137 // If we are in simd mode or there are no entries, we don't need to do 4138 // anything. 4139 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 4140 return; 4141 4142 llvm::Module &M = CGM.getModule(); 4143 llvm::LLVMContext &C = M.getContext(); 4144 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 4145 SourceLocation, StringRef>, 4146 16> 4147 OrderedEntries(OffloadEntriesInfoManager.size()); 4148 llvm::SmallVector<StringRef, 16> ParentFunctions( 4149 OffloadEntriesInfoManager.size()); 4150 4151 // Auxiliary methods to create metadata values and strings. 4152 auto &&GetMDInt = [this](unsigned V) { 4153 return llvm::ConstantAsMetadata::get( 4154 llvm::ConstantInt::get(CGM.Int32Ty, V)); 4155 }; 4156 4157 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 4158 4159 // Create the offloading info metadata node. 4160 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 4161 4162 // Create function that emits metadata for each target region entry; 4163 auto &&TargetRegionMetadataEmitter = 4164 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 4165 &GetMDString]( 4166 unsigned DeviceID, unsigned FileID, StringRef ParentName, 4167 unsigned Line, 4168 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 4169 // Generate metadata for target regions. Each entry of this metadata 4170 // contains: 4171 // - Entry 0 -> Kind of this type of metadata (0). 4172 // - Entry 1 -> Device ID of the file where the entry was identified. 4173 // - Entry 2 -> File ID of the file where the entry was identified. 4174 // - Entry 3 -> Mangled name of the function where the entry was 4175 // identified. 4176 // - Entry 4 -> Line in the file where the entry was identified. 4177 // - Entry 5 -> Order the entry was created. 4178 // The first element of the metadata node is the kind. 4179 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 4180 GetMDInt(FileID), GetMDString(ParentName), 4181 GetMDInt(Line), GetMDInt(E.getOrder())}; 4182 4183 SourceLocation Loc; 4184 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 4185 E = CGM.getContext().getSourceManager().fileinfo_end(); 4186 I != E; ++I) { 4187 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 4188 I->getFirst()->getUniqueID().getFile() == FileID) { 4189 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 4190 I->getFirst(), Line, 1); 4191 break; 4192 } 4193 } 4194 // Save this entry in the right position of the ordered entries array. 4195 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 4196 ParentFunctions[E.getOrder()] = ParentName; 4197 4198 // Add metadata to the named metadata node. 4199 MD->addOperand(llvm::MDNode::get(C, Ops)); 4200 }; 4201 4202 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 4203 TargetRegionMetadataEmitter); 4204 4205 // Create function that emits metadata for each device global variable entry; 4206 auto &&DeviceGlobalVarMetadataEmitter = 4207 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 4208 MD](StringRef MangledName, 4209 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 4210 &E) { 4211 // Generate metadata for global variables. Each entry of this metadata 4212 // contains: 4213 // - Entry 0 -> Kind of this type of metadata (1). 4214 // - Entry 1 -> Mangled name of the variable. 4215 // - Entry 2 -> Declare target kind. 4216 // - Entry 3 -> Order the entry was created. 4217 // The first element of the metadata node is the kind. 4218 llvm::Metadata *Ops[] = { 4219 GetMDInt(E.getKind()), GetMDString(MangledName), 4220 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 4221 4222 // Save this entry in the right position of the ordered entries array. 4223 OrderedEntries[E.getOrder()] = 4224 std::make_tuple(&E, SourceLocation(), MangledName); 4225 4226 // Add metadata to the named metadata node. 4227 MD->addOperand(llvm::MDNode::get(C, Ops)); 4228 }; 4229 4230 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 4231 DeviceGlobalVarMetadataEmitter); 4232 4233 for (const auto &E : OrderedEntries) { 4234 assert(std::get<0>(E) && "All ordered entries must exist!"); 4235 if (const auto *CE = 4236 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 4237 std::get<0>(E))) { 4238 if (!CE->getID() || !CE->getAddress()) { 4239 // Do not blame the entry if the parent funtion is not emitted. 4240 StringRef FnName = ParentFunctions[CE->getOrder()]; 4241 if (!CGM.GetGlobalValue(FnName)) 4242 continue; 4243 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4244 DiagnosticsEngine::Error, 4245 "Offloading entry for target region in %0 is incorrect: either the " 4246 "address or the ID is invalid."); 4247 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 4248 continue; 4249 } 4250 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 4251 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 4252 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 4253 OffloadEntryInfoDeviceGlobalVar>( 4254 std::get<0>(E))) { 4255 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 4256 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4257 CE->getFlags()); 4258 switch (Flags) { 4259 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 4260 if (CGM.getLangOpts().OpenMPIsDevice && 4261 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 4262 continue; 4263 if (!CE->getAddress()) { 4264 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4265 DiagnosticsEngine::Error, "Offloading entry for declare target " 4266 "variable %0 is incorrect: the " 4267 "address is invalid."); 4268 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 4269 continue; 4270 } 4271 // The vaiable has no definition - no need to add the entry. 4272 if (CE->getVarSize().isZero()) 4273 continue; 4274 break; 4275 } 4276 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 4277 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 4278 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 4279 "Declaret target link address is set."); 4280 if (CGM.getLangOpts().OpenMPIsDevice) 4281 continue; 4282 if (!CE->getAddress()) { 4283 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4284 DiagnosticsEngine::Error, 4285 "Offloading entry for declare target variable is incorrect: the " 4286 "address is invalid."); 4287 CGM.getDiags().Report(DiagID); 4288 continue; 4289 } 4290 break; 4291 } 4292 createOffloadEntry(CE->getAddress(), CE->getAddress(), 4293 CE->getVarSize().getQuantity(), Flags, 4294 CE->getLinkage()); 4295 } else { 4296 llvm_unreachable("Unsupported entry kind."); 4297 } 4298 } 4299 } 4300 4301 /// Loads all the offload entries information from the host IR 4302 /// metadata. 4303 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 4304 // If we are in target mode, load the metadata from the host IR. This code has 4305 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 4306 4307 if (!CGM.getLangOpts().OpenMPIsDevice) 4308 return; 4309 4310 if (CGM.getLangOpts().OMPHostIRFile.empty()) 4311 return; 4312 4313 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 4314 if (auto EC = Buf.getError()) { 4315 CGM.getDiags().Report(diag::err_cannot_open_file) 4316 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4317 return; 4318 } 4319 4320 llvm::LLVMContext C; 4321 auto ME = expectedToErrorOrAndEmitErrors( 4322 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 4323 4324 if (auto EC = ME.getError()) { 4325 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4326 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 4327 CGM.getDiags().Report(DiagID) 4328 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4329 return; 4330 } 4331 4332 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4333 if (!MD) 4334 return; 4335 4336 for (llvm::MDNode *MN : MD->operands()) { 4337 auto &&GetMDInt = [MN](unsigned Idx) { 4338 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4339 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4340 }; 4341 4342 auto &&GetMDString = [MN](unsigned Idx) { 4343 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4344 return V->getString(); 4345 }; 4346 4347 switch (GetMDInt(0)) { 4348 default: 4349 llvm_unreachable("Unexpected metadata!"); 4350 break; 4351 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4352 OffloadingEntryInfoTargetRegion: 4353 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4354 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4355 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4356 /*Order=*/GetMDInt(5)); 4357 break; 4358 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4359 OffloadingEntryInfoDeviceGlobalVar: 4360 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4361 /*MangledName=*/GetMDString(1), 4362 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4363 /*Flags=*/GetMDInt(2)), 4364 /*Order=*/GetMDInt(3)); 4365 break; 4366 } 4367 } 4368 } 4369 4370 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4371 if (!KmpRoutineEntryPtrTy) { 4372 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4373 ASTContext &C = CGM.getContext(); 4374 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4375 FunctionProtoType::ExtProtoInfo EPI; 4376 KmpRoutineEntryPtrQTy = C.getPointerType( 4377 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4378 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4379 } 4380 } 4381 4382 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4383 // Make sure the type of the entry is already created. This is the type we 4384 // have to create: 4385 // struct __tgt_offload_entry{ 4386 // void *addr; // Pointer to the offload entry info. 4387 // // (function or global) 4388 // char *name; // Name of the function or global. 4389 // size_t size; // Size of the entry info (0 if it a function). 4390 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4391 // int32_t reserved; // Reserved, to use by the runtime library. 4392 // }; 4393 if (TgtOffloadEntryQTy.isNull()) { 4394 ASTContext &C = CGM.getContext(); 4395 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4396 RD->startDefinition(); 4397 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4398 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4399 addFieldToRecordDecl(C, RD, C.getSizeType()); 4400 addFieldToRecordDecl( 4401 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4402 addFieldToRecordDecl( 4403 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4404 RD->completeDefinition(); 4405 RD->addAttr(PackedAttr::CreateImplicit(C)); 4406 TgtOffloadEntryQTy = C.getRecordType(RD); 4407 } 4408 return TgtOffloadEntryQTy; 4409 } 4410 4411 namespace { 4412 struct PrivateHelpersTy { 4413 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 4414 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 4415 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 4416 PrivateElemInit(PrivateElemInit) {} 4417 const Expr *OriginalRef = nullptr; 4418 const VarDecl *Original = nullptr; 4419 const VarDecl *PrivateCopy = nullptr; 4420 const VarDecl *PrivateElemInit = nullptr; 4421 }; 4422 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4423 } // anonymous namespace 4424 4425 static RecordDecl * 4426 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4427 if (!Privates.empty()) { 4428 ASTContext &C = CGM.getContext(); 4429 // Build struct .kmp_privates_t. { 4430 // /* private vars */ 4431 // }; 4432 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4433 RD->startDefinition(); 4434 for (const auto &Pair : Privates) { 4435 const VarDecl *VD = Pair.second.Original; 4436 QualType Type = VD->getType().getNonReferenceType(); 4437 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4438 if (VD->hasAttrs()) { 4439 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4440 E(VD->getAttrs().end()); 4441 I != E; ++I) 4442 FD->addAttr(*I); 4443 } 4444 } 4445 RD->completeDefinition(); 4446 return RD; 4447 } 4448 return nullptr; 4449 } 4450 4451 static RecordDecl * 4452 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4453 QualType KmpInt32Ty, 4454 QualType KmpRoutineEntryPointerQTy) { 4455 ASTContext &C = CGM.getContext(); 4456 // Build struct kmp_task_t { 4457 // void * shareds; 4458 // kmp_routine_entry_t routine; 4459 // kmp_int32 part_id; 4460 // kmp_cmplrdata_t data1; 4461 // kmp_cmplrdata_t data2; 4462 // For taskloops additional fields: 4463 // kmp_uint64 lb; 4464 // kmp_uint64 ub; 4465 // kmp_int64 st; 4466 // kmp_int32 liter; 4467 // void * reductions; 4468 // }; 4469 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4470 UD->startDefinition(); 4471 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4472 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4473 UD->completeDefinition(); 4474 QualType KmpCmplrdataTy = C.getRecordType(UD); 4475 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4476 RD->startDefinition(); 4477 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4478 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4479 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4480 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4481 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4482 if (isOpenMPTaskLoopDirective(Kind)) { 4483 QualType KmpUInt64Ty = 4484 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4485 QualType KmpInt64Ty = 4486 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4487 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4488 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4489 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4490 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4491 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4492 } 4493 RD->completeDefinition(); 4494 return RD; 4495 } 4496 4497 static RecordDecl * 4498 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4499 ArrayRef<PrivateDataTy> Privates) { 4500 ASTContext &C = CGM.getContext(); 4501 // Build struct kmp_task_t_with_privates { 4502 // kmp_task_t task_data; 4503 // .kmp_privates_t. privates; 4504 // }; 4505 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4506 RD->startDefinition(); 4507 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4508 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4509 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4510 RD->completeDefinition(); 4511 return RD; 4512 } 4513 4514 /// Emit a proxy function which accepts kmp_task_t as the second 4515 /// argument. 4516 /// \code 4517 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4518 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4519 /// For taskloops: 4520 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4521 /// tt->reductions, tt->shareds); 4522 /// return 0; 4523 /// } 4524 /// \endcode 4525 static llvm::Function * 4526 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4527 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4528 QualType KmpTaskTWithPrivatesPtrQTy, 4529 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4530 QualType SharedsPtrTy, llvm::Function *TaskFunction, 4531 llvm::Value *TaskPrivatesMap) { 4532 ASTContext &C = CGM.getContext(); 4533 FunctionArgList Args; 4534 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4535 ImplicitParamDecl::Other); 4536 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4537 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4538 ImplicitParamDecl::Other); 4539 Args.push_back(&GtidArg); 4540 Args.push_back(&TaskTypeArg); 4541 const auto &TaskEntryFnInfo = 4542 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4543 llvm::FunctionType *TaskEntryTy = 4544 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4545 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 4546 auto *TaskEntry = llvm::Function::Create( 4547 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4548 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4549 TaskEntry->setDoesNotRecurse(); 4550 CodeGenFunction CGF(CGM); 4551 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4552 Loc, Loc); 4553 4554 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4555 // tt, 4556 // For taskloops: 4557 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4558 // tt->task_data.shareds); 4559 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4560 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4561 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4562 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4563 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4564 const auto *KmpTaskTWithPrivatesQTyRD = 4565 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4566 LValue Base = 4567 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4568 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4569 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4570 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4571 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 4572 4573 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4574 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4575 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4576 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4577 CGF.ConvertTypeForMem(SharedsPtrTy)); 4578 4579 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4580 llvm::Value *PrivatesParam; 4581 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4582 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4583 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4584 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 4585 } else { 4586 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4587 } 4588 4589 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4590 TaskPrivatesMap, 4591 CGF.Builder 4592 .CreatePointerBitCastOrAddrSpaceCast( 4593 TDBase.getAddress(CGF), CGF.VoidPtrTy) 4594 .getPointer()}; 4595 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4596 std::end(CommonArgs)); 4597 if (isOpenMPTaskLoopDirective(Kind)) { 4598 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4599 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4600 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4601 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4602 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4603 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4604 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4605 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4606 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4607 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4608 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4609 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4610 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4611 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4612 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4613 CallArgs.push_back(LBParam); 4614 CallArgs.push_back(UBParam); 4615 CallArgs.push_back(StParam); 4616 CallArgs.push_back(LIParam); 4617 CallArgs.push_back(RParam); 4618 } 4619 CallArgs.push_back(SharedsParam); 4620 4621 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4622 CallArgs); 4623 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4624 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4625 CGF.FinishFunction(); 4626 return TaskEntry; 4627 } 4628 4629 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4630 SourceLocation Loc, 4631 QualType KmpInt32Ty, 4632 QualType KmpTaskTWithPrivatesPtrQTy, 4633 QualType KmpTaskTWithPrivatesQTy) { 4634 ASTContext &C = CGM.getContext(); 4635 FunctionArgList Args; 4636 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4637 ImplicitParamDecl::Other); 4638 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4639 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4640 ImplicitParamDecl::Other); 4641 Args.push_back(&GtidArg); 4642 Args.push_back(&TaskTypeArg); 4643 const auto &DestructorFnInfo = 4644 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4645 llvm::FunctionType *DestructorFnTy = 4646 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4647 std::string Name = 4648 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 4649 auto *DestructorFn = 4650 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4651 Name, &CGM.getModule()); 4652 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4653 DestructorFnInfo); 4654 DestructorFn->setDoesNotRecurse(); 4655 CodeGenFunction CGF(CGM); 4656 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4657 Args, Loc, Loc); 4658 4659 LValue Base = CGF.EmitLoadOfPointerLValue( 4660 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4661 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4662 const auto *KmpTaskTWithPrivatesQTyRD = 4663 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4664 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4665 Base = CGF.EmitLValueForField(Base, *FI); 4666 for (const auto *Field : 4667 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4668 if (QualType::DestructionKind DtorKind = 4669 Field->getType().isDestructedType()) { 4670 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4671 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 4672 } 4673 } 4674 CGF.FinishFunction(); 4675 return DestructorFn; 4676 } 4677 4678 /// Emit a privates mapping function for correct handling of private and 4679 /// firstprivate variables. 4680 /// \code 4681 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4682 /// **noalias priv1,..., <tyn> **noalias privn) { 4683 /// *priv1 = &.privates.priv1; 4684 /// ...; 4685 /// *privn = &.privates.privn; 4686 /// } 4687 /// \endcode 4688 static llvm::Value * 4689 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4690 ArrayRef<const Expr *> PrivateVars, 4691 ArrayRef<const Expr *> FirstprivateVars, 4692 ArrayRef<const Expr *> LastprivateVars, 4693 QualType PrivatesQTy, 4694 ArrayRef<PrivateDataTy> Privates) { 4695 ASTContext &C = CGM.getContext(); 4696 FunctionArgList Args; 4697 ImplicitParamDecl TaskPrivatesArg( 4698 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4699 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4700 ImplicitParamDecl::Other); 4701 Args.push_back(&TaskPrivatesArg); 4702 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4703 unsigned Counter = 1; 4704 for (const Expr *E : PrivateVars) { 4705 Args.push_back(ImplicitParamDecl::Create( 4706 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4707 C.getPointerType(C.getPointerType(E->getType())) 4708 .withConst() 4709 .withRestrict(), 4710 ImplicitParamDecl::Other)); 4711 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4712 PrivateVarsPos[VD] = Counter; 4713 ++Counter; 4714 } 4715 for (const Expr *E : FirstprivateVars) { 4716 Args.push_back(ImplicitParamDecl::Create( 4717 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4718 C.getPointerType(C.getPointerType(E->getType())) 4719 .withConst() 4720 .withRestrict(), 4721 ImplicitParamDecl::Other)); 4722 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4723 PrivateVarsPos[VD] = Counter; 4724 ++Counter; 4725 } 4726 for (const Expr *E : LastprivateVars) { 4727 Args.push_back(ImplicitParamDecl::Create( 4728 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4729 C.getPointerType(C.getPointerType(E->getType())) 4730 .withConst() 4731 .withRestrict(), 4732 ImplicitParamDecl::Other)); 4733 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4734 PrivateVarsPos[VD] = Counter; 4735 ++Counter; 4736 } 4737 const auto &TaskPrivatesMapFnInfo = 4738 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4739 llvm::FunctionType *TaskPrivatesMapTy = 4740 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4741 std::string Name = 4742 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 4743 auto *TaskPrivatesMap = llvm::Function::Create( 4744 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 4745 &CGM.getModule()); 4746 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4747 TaskPrivatesMapFnInfo); 4748 if (CGM.getLangOpts().Optimize) { 4749 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4750 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4751 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4752 } 4753 CodeGenFunction CGF(CGM); 4754 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4755 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4756 4757 // *privi = &.privates.privi; 4758 LValue Base = CGF.EmitLoadOfPointerLValue( 4759 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4760 TaskPrivatesArg.getType()->castAs<PointerType>()); 4761 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4762 Counter = 0; 4763 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4764 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4765 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4766 LValue RefLVal = 4767 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4768 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4769 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 4770 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 4771 ++Counter; 4772 } 4773 CGF.FinishFunction(); 4774 return TaskPrivatesMap; 4775 } 4776 4777 /// Emit initialization for private variables in task-based directives. 4778 static void emitPrivatesInit(CodeGenFunction &CGF, 4779 const OMPExecutableDirective &D, 4780 Address KmpTaskSharedsPtr, LValue TDBase, 4781 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4782 QualType SharedsTy, QualType SharedsPtrTy, 4783 const OMPTaskDataTy &Data, 4784 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4785 ASTContext &C = CGF.getContext(); 4786 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4787 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4788 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4789 ? OMPD_taskloop 4790 : OMPD_task; 4791 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4792 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4793 LValue SrcBase; 4794 bool IsTargetTask = 4795 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4796 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4797 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4798 // PointersArray and SizesArray. The original variables for these arrays are 4799 // not captured and we get their addresses explicitly. 4800 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 4801 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4802 SrcBase = CGF.MakeAddrLValue( 4803 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4804 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4805 SharedsTy); 4806 } 4807 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4808 for (const PrivateDataTy &Pair : Privates) { 4809 const VarDecl *VD = Pair.second.PrivateCopy; 4810 const Expr *Init = VD->getAnyInitializer(); 4811 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4812 !CGF.isTrivialInitializer(Init)))) { 4813 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4814 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4815 const VarDecl *OriginalVD = Pair.second.Original; 4816 // Check if the variable is the target-based BasePointersArray, 4817 // PointersArray or SizesArray. 4818 LValue SharedRefLValue; 4819 QualType Type = PrivateLValue.getType(); 4820 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4821 if (IsTargetTask && !SharedField) { 4822 assert(isa<ImplicitParamDecl>(OriginalVD) && 4823 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4824 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4825 ->getNumParams() == 0 && 4826 isa<TranslationUnitDecl>( 4827 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4828 ->getDeclContext()) && 4829 "Expected artificial target data variable."); 4830 SharedRefLValue = 4831 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4832 } else if (ForDup) { 4833 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4834 SharedRefLValue = CGF.MakeAddrLValue( 4835 Address(SharedRefLValue.getPointer(CGF), 4836 C.getDeclAlign(OriginalVD)), 4837 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4838 SharedRefLValue.getTBAAInfo()); 4839 } else { 4840 InlinedOpenMPRegionRAII Region( 4841 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 4842 /*HasCancel=*/false); 4843 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 4844 } 4845 if (Type->isArrayType()) { 4846 // Initialize firstprivate array. 4847 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4848 // Perform simple memcpy. 4849 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4850 } else { 4851 // Initialize firstprivate array using element-by-element 4852 // initialization. 4853 CGF.EmitOMPAggregateAssign( 4854 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 4855 Type, 4856 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4857 Address SrcElement) { 4858 // Clean up any temporaries needed by the initialization. 4859 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4860 InitScope.addPrivate( 4861 Elem, [SrcElement]() -> Address { return SrcElement; }); 4862 (void)InitScope.Privatize(); 4863 // Emit initialization for single element. 4864 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4865 CGF, &CapturesInfo); 4866 CGF.EmitAnyExprToMem(Init, DestElement, 4867 Init->getType().getQualifiers(), 4868 /*IsInitializer=*/false); 4869 }); 4870 } 4871 } else { 4872 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4873 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 4874 return SharedRefLValue.getAddress(CGF); 4875 }); 4876 (void)InitScope.Privatize(); 4877 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4878 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4879 /*capturedByInit=*/false); 4880 } 4881 } else { 4882 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4883 } 4884 } 4885 ++FI; 4886 } 4887 } 4888 4889 /// Check if duplication function is required for taskloops. 4890 static bool checkInitIsRequired(CodeGenFunction &CGF, 4891 ArrayRef<PrivateDataTy> Privates) { 4892 bool InitRequired = false; 4893 for (const PrivateDataTy &Pair : Privates) { 4894 const VarDecl *VD = Pair.second.PrivateCopy; 4895 const Expr *Init = VD->getAnyInitializer(); 4896 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4897 !CGF.isTrivialInitializer(Init)); 4898 if (InitRequired) 4899 break; 4900 } 4901 return InitRequired; 4902 } 4903 4904 4905 /// Emit task_dup function (for initialization of 4906 /// private/firstprivate/lastprivate vars and last_iter flag) 4907 /// \code 4908 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4909 /// lastpriv) { 4910 /// // setup lastprivate flag 4911 /// task_dst->last = lastpriv; 4912 /// // could be constructor calls here... 4913 /// } 4914 /// \endcode 4915 static llvm::Value * 4916 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4917 const OMPExecutableDirective &D, 4918 QualType KmpTaskTWithPrivatesPtrQTy, 4919 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4920 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4921 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4922 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4923 ASTContext &C = CGM.getContext(); 4924 FunctionArgList Args; 4925 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4926 KmpTaskTWithPrivatesPtrQTy, 4927 ImplicitParamDecl::Other); 4928 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4929 KmpTaskTWithPrivatesPtrQTy, 4930 ImplicitParamDecl::Other); 4931 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4932 ImplicitParamDecl::Other); 4933 Args.push_back(&DstArg); 4934 Args.push_back(&SrcArg); 4935 Args.push_back(&LastprivArg); 4936 const auto &TaskDupFnInfo = 4937 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4938 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4939 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4940 auto *TaskDup = llvm::Function::Create( 4941 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4942 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4943 TaskDup->setDoesNotRecurse(); 4944 CodeGenFunction CGF(CGM); 4945 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4946 Loc); 4947 4948 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4949 CGF.GetAddrOfLocalVar(&DstArg), 4950 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4951 // task_dst->liter = lastpriv; 4952 if (WithLastIter) { 4953 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4954 LValue Base = CGF.EmitLValueForField( 4955 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4956 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4957 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4958 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4959 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4960 } 4961 4962 // Emit initial values for private copies (if any). 4963 assert(!Privates.empty()); 4964 Address KmpTaskSharedsPtr = Address::invalid(); 4965 if (!Data.FirstprivateVars.empty()) { 4966 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4967 CGF.GetAddrOfLocalVar(&SrcArg), 4968 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4969 LValue Base = CGF.EmitLValueForField( 4970 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4971 KmpTaskSharedsPtr = Address( 4972 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4973 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4974 KmpTaskTShareds)), 4975 Loc), 4976 CGF.getNaturalTypeAlignment(SharedsTy)); 4977 } 4978 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4979 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4980 CGF.FinishFunction(); 4981 return TaskDup; 4982 } 4983 4984 /// Checks if destructor function is required to be generated. 4985 /// \return true if cleanups are required, false otherwise. 4986 static bool 4987 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4988 bool NeedsCleanup = false; 4989 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4990 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4991 for (const FieldDecl *FD : PrivateRD->fields()) { 4992 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4993 if (NeedsCleanup) 4994 break; 4995 } 4996 return NeedsCleanup; 4997 } 4998 4999 CGOpenMPRuntime::TaskResultTy 5000 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 5001 const OMPExecutableDirective &D, 5002 llvm::Function *TaskFunction, QualType SharedsTy, 5003 Address Shareds, const OMPTaskDataTy &Data) { 5004 ASTContext &C = CGM.getContext(); 5005 llvm::SmallVector<PrivateDataTy, 4> Privates; 5006 // Aggregate privates and sort them by the alignment. 5007 const auto *I = Data.PrivateCopies.begin(); 5008 for (const Expr *E : Data.PrivateVars) { 5009 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5010 Privates.emplace_back( 5011 C.getDeclAlign(VD), 5012 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5013 /*PrivateElemInit=*/nullptr)); 5014 ++I; 5015 } 5016 I = Data.FirstprivateCopies.begin(); 5017 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 5018 for (const Expr *E : Data.FirstprivateVars) { 5019 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5020 Privates.emplace_back( 5021 C.getDeclAlign(VD), 5022 PrivateHelpersTy( 5023 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5024 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 5025 ++I; 5026 ++IElemInitRef; 5027 } 5028 I = Data.LastprivateCopies.begin(); 5029 for (const Expr *E : Data.LastprivateVars) { 5030 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5031 Privates.emplace_back( 5032 C.getDeclAlign(VD), 5033 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5034 /*PrivateElemInit=*/nullptr)); 5035 ++I; 5036 } 5037 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 5038 return L.first > R.first; 5039 }); 5040 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 5041 // Build type kmp_routine_entry_t (if not built yet). 5042 emitKmpRoutineEntryT(KmpInt32Ty); 5043 // Build type kmp_task_t (if not built yet). 5044 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 5045 if (SavedKmpTaskloopTQTy.isNull()) { 5046 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5047 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5048 } 5049 KmpTaskTQTy = SavedKmpTaskloopTQTy; 5050 } else { 5051 assert((D.getDirectiveKind() == OMPD_task || 5052 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 5053 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 5054 "Expected taskloop, task or target directive"); 5055 if (SavedKmpTaskTQTy.isNull()) { 5056 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5057 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5058 } 5059 KmpTaskTQTy = SavedKmpTaskTQTy; 5060 } 5061 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 5062 // Build particular struct kmp_task_t for the given task. 5063 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 5064 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 5065 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 5066 QualType KmpTaskTWithPrivatesPtrQTy = 5067 C.getPointerType(KmpTaskTWithPrivatesQTy); 5068 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 5069 llvm::Type *KmpTaskTWithPrivatesPtrTy = 5070 KmpTaskTWithPrivatesTy->getPointerTo(); 5071 llvm::Value *KmpTaskTWithPrivatesTySize = 5072 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 5073 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 5074 5075 // Emit initial values for private copies (if any). 5076 llvm::Value *TaskPrivatesMap = nullptr; 5077 llvm::Type *TaskPrivatesMapTy = 5078 std::next(TaskFunction->arg_begin(), 3)->getType(); 5079 if (!Privates.empty()) { 5080 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 5081 TaskPrivatesMap = emitTaskPrivateMappingFunction( 5082 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 5083 FI->getType(), Privates); 5084 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5085 TaskPrivatesMap, TaskPrivatesMapTy); 5086 } else { 5087 TaskPrivatesMap = llvm::ConstantPointerNull::get( 5088 cast<llvm::PointerType>(TaskPrivatesMapTy)); 5089 } 5090 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 5091 // kmp_task_t *tt); 5092 llvm::Function *TaskEntry = emitProxyTaskFunction( 5093 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5094 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 5095 TaskPrivatesMap); 5096 5097 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 5098 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 5099 // kmp_routine_entry_t *task_entry); 5100 // Task flags. Format is taken from 5101 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 5102 // description of kmp_tasking_flags struct. 5103 enum { 5104 TiedFlag = 0x1, 5105 FinalFlag = 0x2, 5106 DestructorsFlag = 0x8, 5107 PriorityFlag = 0x20, 5108 DetachableFlag = 0x40, 5109 }; 5110 unsigned Flags = Data.Tied ? TiedFlag : 0; 5111 bool NeedsCleanup = false; 5112 if (!Privates.empty()) { 5113 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 5114 if (NeedsCleanup) 5115 Flags = Flags | DestructorsFlag; 5116 } 5117 if (Data.Priority.getInt()) 5118 Flags = Flags | PriorityFlag; 5119 if (D.hasClausesOfKind<OMPDetachClause>()) 5120 Flags = Flags | DetachableFlag; 5121 llvm::Value *TaskFlags = 5122 Data.Final.getPointer() 5123 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 5124 CGF.Builder.getInt32(FinalFlag), 5125 CGF.Builder.getInt32(/*C=*/0)) 5126 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 5127 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 5128 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 5129 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 5130 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 5131 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5132 TaskEntry, KmpRoutineEntryPtrTy)}; 5133 llvm::Value *NewTask; 5134 if (D.hasClausesOfKind<OMPNowaitClause>()) { 5135 // Check if we have any device clause associated with the directive. 5136 const Expr *Device = nullptr; 5137 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 5138 Device = C->getDevice(); 5139 // Emit device ID if any otherwise use default value. 5140 llvm::Value *DeviceID; 5141 if (Device) 5142 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 5143 CGF.Int64Ty, /*isSigned=*/true); 5144 else 5145 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 5146 AllocArgs.push_back(DeviceID); 5147 NewTask = CGF.EmitRuntimeCall( 5148 createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs); 5149 } else { 5150 NewTask = CGF.EmitRuntimeCall( 5151 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 5152 } 5153 // Emit detach clause initialization. 5154 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 5155 // task_descriptor); 5156 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 5157 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 5158 LValue EvtLVal = CGF.EmitLValue(Evt); 5159 5160 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 5161 // int gtid, kmp_task_t *task); 5162 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 5163 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 5164 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 5165 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 5166 createRuntimeFunction(OMPRTL__kmpc_task_allow_completion_event), 5167 {Loc, Tid, NewTask}); 5168 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 5169 Evt->getExprLoc()); 5170 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 5171 } 5172 llvm::Value *NewTaskNewTaskTTy = 5173 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5174 NewTask, KmpTaskTWithPrivatesPtrTy); 5175 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 5176 KmpTaskTWithPrivatesQTy); 5177 LValue TDBase = 5178 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 5179 // Fill the data in the resulting kmp_task_t record. 5180 // Copy shareds if there are any. 5181 Address KmpTaskSharedsPtr = Address::invalid(); 5182 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 5183 KmpTaskSharedsPtr = 5184 Address(CGF.EmitLoadOfScalar( 5185 CGF.EmitLValueForField( 5186 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 5187 KmpTaskTShareds)), 5188 Loc), 5189 CGF.getNaturalTypeAlignment(SharedsTy)); 5190 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 5191 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 5192 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 5193 } 5194 // Emit initial values for private copies (if any). 5195 TaskResultTy Result; 5196 if (!Privates.empty()) { 5197 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 5198 SharedsTy, SharedsPtrTy, Data, Privates, 5199 /*ForDup=*/false); 5200 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 5201 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 5202 Result.TaskDupFn = emitTaskDupFunction( 5203 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 5204 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 5205 /*WithLastIter=*/!Data.LastprivateVars.empty()); 5206 } 5207 } 5208 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 5209 enum { Priority = 0, Destructors = 1 }; 5210 // Provide pointer to function with destructors for privates. 5211 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 5212 const RecordDecl *KmpCmplrdataUD = 5213 (*FI)->getType()->getAsUnionType()->getDecl(); 5214 if (NeedsCleanup) { 5215 llvm::Value *DestructorFn = emitDestructorsFunction( 5216 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5217 KmpTaskTWithPrivatesQTy); 5218 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 5219 LValue DestructorsLV = CGF.EmitLValueForField( 5220 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 5221 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5222 DestructorFn, KmpRoutineEntryPtrTy), 5223 DestructorsLV); 5224 } 5225 // Set priority. 5226 if (Data.Priority.getInt()) { 5227 LValue Data2LV = CGF.EmitLValueForField( 5228 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 5229 LValue PriorityLV = CGF.EmitLValueForField( 5230 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 5231 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 5232 } 5233 Result.NewTask = NewTask; 5234 Result.TaskEntry = TaskEntry; 5235 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 5236 Result.TDBase = TDBase; 5237 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 5238 return Result; 5239 } 5240 5241 namespace { 5242 /// Dependence kind for RTL. 5243 enum RTLDependenceKindTy { 5244 DepIn = 0x01, 5245 DepInOut = 0x3, 5246 DepMutexInOutSet = 0x4 5247 }; 5248 /// Fields ids in kmp_depend_info record. 5249 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 5250 } // namespace 5251 5252 /// Translates internal dependency kind into the runtime kind. 5253 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 5254 RTLDependenceKindTy DepKind; 5255 switch (K) { 5256 case OMPC_DEPEND_in: 5257 DepKind = DepIn; 5258 break; 5259 // Out and InOut dependencies must use the same code. 5260 case OMPC_DEPEND_out: 5261 case OMPC_DEPEND_inout: 5262 DepKind = DepInOut; 5263 break; 5264 case OMPC_DEPEND_mutexinoutset: 5265 DepKind = DepMutexInOutSet; 5266 break; 5267 case OMPC_DEPEND_source: 5268 case OMPC_DEPEND_sink: 5269 case OMPC_DEPEND_depobj: 5270 case OMPC_DEPEND_unknown: 5271 llvm_unreachable("Unknown task dependence type"); 5272 } 5273 return DepKind; 5274 } 5275 5276 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 5277 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 5278 QualType &FlagsTy) { 5279 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 5280 if (KmpDependInfoTy.isNull()) { 5281 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 5282 KmpDependInfoRD->startDefinition(); 5283 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 5284 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 5285 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 5286 KmpDependInfoRD->completeDefinition(); 5287 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 5288 } 5289 } 5290 5291 std::pair<llvm::Value *, LValue> 5292 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 5293 SourceLocation Loc) { 5294 ASTContext &C = CGM.getContext(); 5295 QualType FlagsTy; 5296 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5297 RecordDecl *KmpDependInfoRD = 5298 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5299 LValue Base = CGF.EmitLoadOfPointerLValue( 5300 DepobjLVal.getAddress(CGF), 5301 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5302 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5303 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5304 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5305 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 5306 Base.getTBAAInfo()); 5307 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5308 Addr.getPointer(), 5309 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5310 LValue NumDepsBase = CGF.MakeAddrLValue( 5311 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 5312 Base.getBaseInfo(), Base.getTBAAInfo()); 5313 // NumDeps = deps[i].base_addr; 5314 LValue BaseAddrLVal = CGF.EmitLValueForField( 5315 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5316 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 5317 return std::make_pair(NumDeps, Base); 5318 } 5319 5320 namespace { 5321 /// Loop generator for OpenMP iterator expression. 5322 class OMPIteratorGeneratorScope final 5323 : public CodeGenFunction::OMPPrivateScope { 5324 CodeGenFunction &CGF; 5325 const OMPIteratorExpr *E = nullptr; 5326 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 5327 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 5328 OMPIteratorGeneratorScope() = delete; 5329 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 5330 5331 public: 5332 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 5333 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 5334 if (!E) 5335 return; 5336 SmallVector<llvm::Value *, 4> Uppers; 5337 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 5338 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 5339 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 5340 addPrivate(VD, [&CGF, VD]() { 5341 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 5342 }); 5343 const OMPIteratorHelperData &HelperData = E->getHelper(I); 5344 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 5345 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 5346 "counter.addr"); 5347 }); 5348 } 5349 Privatize(); 5350 5351 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 5352 const OMPIteratorHelperData &HelperData = E->getHelper(I); 5353 LValue CLVal = 5354 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 5355 HelperData.CounterVD->getType()); 5356 // Counter = 0; 5357 CGF.EmitStoreOfScalar( 5358 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 5359 CLVal); 5360 CodeGenFunction::JumpDest &ContDest = 5361 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 5362 CodeGenFunction::JumpDest &ExitDest = 5363 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 5364 // N = <number-of_iterations>; 5365 llvm::Value *N = Uppers[I]; 5366 // cont: 5367 // if (Counter < N) goto body; else goto exit; 5368 CGF.EmitBlock(ContDest.getBlock()); 5369 auto *CVal = 5370 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 5371 llvm::Value *Cmp = 5372 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 5373 ? CGF.Builder.CreateICmpSLT(CVal, N) 5374 : CGF.Builder.CreateICmpULT(CVal, N); 5375 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 5376 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 5377 // body: 5378 CGF.EmitBlock(BodyBB); 5379 // Iteri = Begini + Counter * Stepi; 5380 CGF.EmitIgnoredExpr(HelperData.Update); 5381 } 5382 } 5383 ~OMPIteratorGeneratorScope() { 5384 if (!E) 5385 return; 5386 for (unsigned I = E->numOfIterators(); I > 0; --I) { 5387 // Counter = Counter + 1; 5388 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 5389 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 5390 // goto cont; 5391 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 5392 // exit: 5393 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 5394 } 5395 } 5396 }; 5397 } // namespace 5398 5399 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 5400 llvm::PointerUnion<unsigned *, LValue *> Pos, 5401 const OMPTaskDataTy::DependData &Data, 5402 Address DependenciesArray) { 5403 CodeGenModule &CGM = CGF.CGM; 5404 ASTContext &C = CGM.getContext(); 5405 QualType FlagsTy; 5406 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5407 RecordDecl *KmpDependInfoRD = 5408 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5409 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5410 5411 OMPIteratorGeneratorScope IteratorScope( 5412 CGF, cast_or_null<OMPIteratorExpr>( 5413 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 5414 : nullptr)); 5415 for (const Expr *E : Data.DepExprs) { 5416 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 5417 llvm::Value *Addr; 5418 if (OASE) { 5419 const Expr *Base = OASE->getBase(); 5420 Addr = CGF.EmitScalarExpr(Base); 5421 } else { 5422 Addr = CGF.EmitLValue(E).getPointer(CGF); 5423 } 5424 llvm::Value *Size; 5425 QualType Ty = E->getType(); 5426 if (OASE) { 5427 Size = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 5428 for (const Expr *SE : OASE->getDimensions()) { 5429 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 5430 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 5431 CGF.getContext().getSizeType(), 5432 SE->getExprLoc()); 5433 Size = CGF.Builder.CreateNUWMul(Size, Sz); 5434 } 5435 } else if (const auto *ASE = 5436 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 5437 LValue UpAddrLVal = 5438 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 5439 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 5440 UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 5441 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGM.SizeTy); 5442 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 5443 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 5444 } else { 5445 Size = CGF.getTypeSize(Ty); 5446 } 5447 LValue Base; 5448 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 5449 Base = CGF.MakeAddrLValue( 5450 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 5451 } else { 5452 LValue &PosLVal = *Pos.get<LValue *>(); 5453 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 5454 Base = CGF.MakeAddrLValue( 5455 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 5456 DependenciesArray.getAlignment()), 5457 KmpDependInfoTy); 5458 } 5459 // deps[i].base_addr = &<Dependencies[i].second>; 5460 LValue BaseAddrLVal = CGF.EmitLValueForField( 5461 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5462 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 5463 BaseAddrLVal); 5464 // deps[i].len = sizeof(<Dependencies[i].second>); 5465 LValue LenLVal = CGF.EmitLValueForField( 5466 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 5467 CGF.EmitStoreOfScalar(Size, LenLVal); 5468 // deps[i].flags = <Dependencies[i].first>; 5469 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 5470 LValue FlagsLVal = CGF.EmitLValueForField( 5471 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5472 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5473 FlagsLVal); 5474 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 5475 ++(*P); 5476 } else { 5477 LValue &PosLVal = *Pos.get<LValue *>(); 5478 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 5479 Idx = CGF.Builder.CreateNUWAdd(Idx, 5480 llvm::ConstantInt::get(Idx->getType(), 1)); 5481 CGF.EmitStoreOfScalar(Idx, PosLVal); 5482 } 5483 } 5484 } 5485 5486 static SmallVector<llvm::Value *, 4> 5487 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 5488 const OMPTaskDataTy::DependData &Data) { 5489 assert(Data.DepKind == OMPC_DEPEND_depobj && 5490 "Expected depobj dependecy kind."); 5491 SmallVector<llvm::Value *, 4> Sizes; 5492 SmallVector<LValue, 4> SizeLVals; 5493 ASTContext &C = CGF.getContext(); 5494 QualType FlagsTy; 5495 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5496 RecordDecl *KmpDependInfoRD = 5497 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5498 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5499 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 5500 { 5501 OMPIteratorGeneratorScope IteratorScope( 5502 CGF, cast_or_null<OMPIteratorExpr>( 5503 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 5504 : nullptr)); 5505 for (const Expr *E : Data.DepExprs) { 5506 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 5507 LValue Base = CGF.EmitLoadOfPointerLValue( 5508 DepobjLVal.getAddress(CGF), 5509 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5510 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5511 Base.getAddress(CGF), KmpDependInfoPtrT); 5512 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 5513 Base.getTBAAInfo()); 5514 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5515 Addr.getPointer(), 5516 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5517 LValue NumDepsBase = CGF.MakeAddrLValue( 5518 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 5519 Base.getBaseInfo(), Base.getTBAAInfo()); 5520 // NumDeps = deps[i].base_addr; 5521 LValue BaseAddrLVal = CGF.EmitLValueForField( 5522 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5523 llvm::Value *NumDeps = 5524 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 5525 LValue NumLVal = CGF.MakeAddrLValue( 5526 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 5527 C.getUIntPtrType()); 5528 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 5529 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 5530 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 5531 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 5532 CGF.EmitStoreOfScalar(Add, NumLVal); 5533 SizeLVals.push_back(NumLVal); 5534 } 5535 } 5536 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 5537 llvm::Value *Size = 5538 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 5539 Sizes.push_back(Size); 5540 } 5541 return Sizes; 5542 } 5543 5544 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 5545 LValue PosLVal, 5546 const OMPTaskDataTy::DependData &Data, 5547 Address DependenciesArray) { 5548 assert(Data.DepKind == OMPC_DEPEND_depobj && 5549 "Expected depobj dependecy kind."); 5550 ASTContext &C = CGF.getContext(); 5551 QualType FlagsTy; 5552 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5553 RecordDecl *KmpDependInfoRD = 5554 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5555 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5556 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 5557 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 5558 { 5559 OMPIteratorGeneratorScope IteratorScope( 5560 CGF, cast_or_null<OMPIteratorExpr>( 5561 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 5562 : nullptr)); 5563 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 5564 const Expr *E = Data.DepExprs[I]; 5565 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 5566 LValue Base = CGF.EmitLoadOfPointerLValue( 5567 DepobjLVal.getAddress(CGF), 5568 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5569 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5570 Base.getAddress(CGF), KmpDependInfoPtrT); 5571 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 5572 Base.getTBAAInfo()); 5573 5574 // Get number of elements in a single depobj. 5575 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5576 Addr.getPointer(), 5577 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5578 LValue NumDepsBase = CGF.MakeAddrLValue( 5579 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 5580 Base.getBaseInfo(), Base.getTBAAInfo()); 5581 // NumDeps = deps[i].base_addr; 5582 LValue BaseAddrLVal = CGF.EmitLValueForField( 5583 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5584 llvm::Value *NumDeps = 5585 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 5586 5587 // memcopy dependency data. 5588 llvm::Value *Size = CGF.Builder.CreateNUWMul( 5589 ElSize, 5590 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 5591 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 5592 Address DepAddr = 5593 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 5594 DependenciesArray.getAlignment()); 5595 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 5596 5597 // Increase pos. 5598 // pos += size; 5599 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 5600 CGF.EmitStoreOfScalar(Add, PosLVal); 5601 } 5602 } 5603 } 5604 5605 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 5606 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 5607 SourceLocation Loc) { 5608 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 5609 return D.DepExprs.empty(); 5610 })) 5611 return std::make_pair(nullptr, Address::invalid()); 5612 // Process list of dependencies. 5613 ASTContext &C = CGM.getContext(); 5614 Address DependenciesArray = Address::invalid(); 5615 llvm::Value *NumOfElements = nullptr; 5616 unsigned NumDependencies = std::accumulate( 5617 Dependencies.begin(), Dependencies.end(), 0, 5618 [](unsigned V, const OMPTaskDataTy::DependData &D) { 5619 return D.DepKind == OMPC_DEPEND_depobj 5620 ? V 5621 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 5622 }); 5623 QualType FlagsTy; 5624 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5625 bool HasDepobjDeps = false; 5626 bool HasRegularWithIterators = false; 5627 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 5628 llvm::Value *NumOfRegularWithIterators = 5629 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 5630 // Calculate number of depobj dependecies and regular deps with the iterators. 5631 for (const OMPTaskDataTy::DependData &D : Dependencies) { 5632 if (D.DepKind == OMPC_DEPEND_depobj) { 5633 SmallVector<llvm::Value *, 4> Sizes = 5634 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 5635 for (llvm::Value *Size : Sizes) { 5636 NumOfDepobjElements = 5637 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 5638 } 5639 HasDepobjDeps = true; 5640 continue; 5641 } 5642 // Include number of iterations, if any. 5643 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 5644 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 5645 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 5646 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 5647 NumOfRegularWithIterators = 5648 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 5649 } 5650 HasRegularWithIterators = true; 5651 continue; 5652 } 5653 } 5654 5655 QualType KmpDependInfoArrayTy; 5656 if (HasDepobjDeps || HasRegularWithIterators) { 5657 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 5658 /*isSigned=*/false); 5659 if (HasDepobjDeps) { 5660 NumOfElements = 5661 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 5662 } 5663 if (HasRegularWithIterators) { 5664 NumOfElements = 5665 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 5666 } 5667 OpaqueValueExpr OVE(Loc, 5668 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 5669 VK_RValue); 5670 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 5671 RValue::get(NumOfElements)); 5672 KmpDependInfoArrayTy = 5673 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 5674 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 5675 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 5676 // Properly emit variable-sized array. 5677 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 5678 ImplicitParamDecl::Other); 5679 CGF.EmitVarDecl(*PD); 5680 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 5681 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 5682 /*isSigned=*/false); 5683 } else { 5684 KmpDependInfoArrayTy = C.getConstantArrayType( 5685 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 5686 ArrayType::Normal, /*IndexTypeQuals=*/0); 5687 DependenciesArray = 5688 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 5689 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 5690 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 5691 /*isSigned=*/false); 5692 } 5693 unsigned Pos = 0; 5694 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 5695 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 5696 Dependencies[I].IteratorExpr) 5697 continue; 5698 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 5699 DependenciesArray); 5700 } 5701 // Copy regular dependecies with iterators. 5702 LValue PosLVal = CGF.MakeAddrLValue( 5703 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 5704 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 5705 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 5706 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 5707 !Dependencies[I].IteratorExpr) 5708 continue; 5709 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 5710 DependenciesArray); 5711 } 5712 // Copy final depobj arrays without iterators. 5713 if (HasDepobjDeps) { 5714 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 5715 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 5716 continue; 5717 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 5718 DependenciesArray); 5719 } 5720 } 5721 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5722 DependenciesArray, CGF.VoidPtrTy); 5723 return std::make_pair(NumOfElements, DependenciesArray); 5724 } 5725 5726 Address CGOpenMPRuntime::emitDepobjDependClause( 5727 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 5728 SourceLocation Loc) { 5729 if (Dependencies.DepExprs.empty()) 5730 return Address::invalid(); 5731 // Process list of dependencies. 5732 ASTContext &C = CGM.getContext(); 5733 Address DependenciesArray = Address::invalid(); 5734 unsigned NumDependencies = Dependencies.DepExprs.size(); 5735 QualType FlagsTy; 5736 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5737 RecordDecl *KmpDependInfoRD = 5738 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5739 5740 llvm::Value *Size; 5741 // Define type kmp_depend_info[<Dependencies.size()>]; 5742 // For depobj reserve one extra element to store the number of elements. 5743 // It is required to handle depobj(x) update(in) construct. 5744 // kmp_depend_info[<Dependencies.size()>] deps; 5745 llvm::Value *NumDepsVal; 5746 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 5747 if (const auto *IE = 5748 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 5749 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 5750 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 5751 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 5752 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 5753 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 5754 } 5755 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 5756 NumDepsVal); 5757 CharUnits SizeInBytes = 5758 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 5759 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 5760 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 5761 NumDepsVal = 5762 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 5763 } else { 5764 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5765 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 5766 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5767 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 5768 Size = CGM.getSize(Sz.alignTo(Align)); 5769 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 5770 } 5771 // Need to allocate on the dynamic memory. 5772 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5773 // Use default allocator. 5774 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5775 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5776 5777 llvm::Value *Addr = CGF.EmitRuntimeCall( 5778 createRuntimeFunction(OMPRTL__kmpc_alloc), Args, ".dep.arr.addr"); 5779 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5780 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 5781 DependenciesArray = Address(Addr, Align); 5782 // Write number of elements in the first element of array for depobj. 5783 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 5784 // deps[i].base_addr = NumDependencies; 5785 LValue BaseAddrLVal = CGF.EmitLValueForField( 5786 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5787 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5788 llvm::PointerUnion<unsigned *, LValue *> Pos; 5789 unsigned Idx = 1; 5790 LValue PosLVal; 5791 if (Dependencies.IteratorExpr) { 5792 PosLVal = CGF.MakeAddrLValue( 5793 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5794 C.getSizeType()); 5795 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5796 /*IsInit=*/true); 5797 Pos = &PosLVal; 5798 } else { 5799 Pos = &Idx; 5800 } 5801 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5802 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5803 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 5804 return DependenciesArray; 5805 } 5806 5807 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5808 SourceLocation Loc) { 5809 ASTContext &C = CGM.getContext(); 5810 QualType FlagsTy; 5811 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5812 LValue Base = CGF.EmitLoadOfPointerLValue( 5813 DepobjLVal.getAddress(CGF), 5814 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5815 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5816 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5817 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5818 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5819 Addr.getPointer(), 5820 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5821 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5822 CGF.VoidPtrTy); 5823 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5824 // Use default allocator. 5825 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5826 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5827 5828 // _kmpc_free(gtid, addr, nullptr); 5829 (void)CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_free), Args); 5830 } 5831 5832 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5833 OpenMPDependClauseKind NewDepKind, 5834 SourceLocation Loc) { 5835 ASTContext &C = CGM.getContext(); 5836 QualType FlagsTy; 5837 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5838 RecordDecl *KmpDependInfoRD = 5839 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5840 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5841 llvm::Value *NumDeps; 5842 LValue Base; 5843 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5844 5845 Address Begin = Base.getAddress(CGF); 5846 // Cast from pointer to array type to pointer to single element. 5847 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5848 // The basic structure here is a while-do loop. 5849 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5850 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5851 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5852 CGF.EmitBlock(BodyBB); 5853 llvm::PHINode *ElementPHI = 5854 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5855 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5856 Begin = Address(ElementPHI, Begin.getAlignment()); 5857 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5858 Base.getTBAAInfo()); 5859 // deps[i].flags = NewDepKind; 5860 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5861 LValue FlagsLVal = CGF.EmitLValueForField( 5862 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5863 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5864 FlagsLVal); 5865 5866 // Shift the address forward by one element. 5867 Address ElementNext = 5868 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5869 ElementPHI->addIncoming(ElementNext.getPointer(), 5870 CGF.Builder.GetInsertBlock()); 5871 llvm::Value *IsEmpty = 5872 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5873 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5874 // Done. 5875 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5876 } 5877 5878 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5879 const OMPExecutableDirective &D, 5880 llvm::Function *TaskFunction, 5881 QualType SharedsTy, Address Shareds, 5882 const Expr *IfCond, 5883 const OMPTaskDataTy &Data) { 5884 if (!CGF.HaveInsertPoint()) 5885 return; 5886 5887 TaskResultTy Result = 5888 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5889 llvm::Value *NewTask = Result.NewTask; 5890 llvm::Function *TaskEntry = Result.TaskEntry; 5891 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5892 LValue TDBase = Result.TDBase; 5893 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5894 // Process list of dependences. 5895 Address DependenciesArray = Address::invalid(); 5896 llvm::Value *NumOfElements; 5897 std::tie(NumOfElements, DependenciesArray) = 5898 emitDependClause(CGF, Data.Dependences, Loc); 5899 5900 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5901 // libcall. 5902 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5903 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5904 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5905 // list is not empty 5906 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5907 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5908 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5909 llvm::Value *DepTaskArgs[7]; 5910 if (!Data.Dependences.empty()) { 5911 DepTaskArgs[0] = UpLoc; 5912 DepTaskArgs[1] = ThreadID; 5913 DepTaskArgs[2] = NewTask; 5914 DepTaskArgs[3] = NumOfElements; 5915 DepTaskArgs[4] = DependenciesArray.getPointer(); 5916 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5917 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5918 } 5919 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5920 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5921 if (!Data.Tied) { 5922 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5923 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5924 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5925 } 5926 if (!Data.Dependences.empty()) { 5927 CGF.EmitRuntimeCall( 5928 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5929 } else { 5930 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5931 TaskArgs); 5932 } 5933 // Check if parent region is untied and build return for untied task; 5934 if (auto *Region = 5935 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5936 Region->emitUntiedSwitch(CGF); 5937 }; 5938 5939 llvm::Value *DepWaitTaskArgs[6]; 5940 if (!Data.Dependences.empty()) { 5941 DepWaitTaskArgs[0] = UpLoc; 5942 DepWaitTaskArgs[1] = ThreadID; 5943 DepWaitTaskArgs[2] = NumOfElements; 5944 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5945 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5946 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5947 } 5948 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5949 &Data, &DepWaitTaskArgs, 5950 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5951 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5952 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5953 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5954 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5955 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5956 // is specified. 5957 if (!Data.Dependences.empty()) 5958 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5959 DepWaitTaskArgs); 5960 // Call proxy_task_entry(gtid, new_task); 5961 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5962 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5963 Action.Enter(CGF); 5964 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5965 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5966 OutlinedFnArgs); 5967 }; 5968 5969 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5970 // kmp_task_t *new_task); 5971 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5972 // kmp_task_t *new_task); 5973 RegionCodeGenTy RCG(CodeGen); 5974 CommonActionTy Action( 5975 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5976 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5977 RCG.setAction(Action); 5978 RCG(CGF); 5979 }; 5980 5981 if (IfCond) { 5982 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5983 } else { 5984 RegionCodeGenTy ThenRCG(ThenCodeGen); 5985 ThenRCG(CGF); 5986 } 5987 } 5988 5989 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5990 const OMPLoopDirective &D, 5991 llvm::Function *TaskFunction, 5992 QualType SharedsTy, Address Shareds, 5993 const Expr *IfCond, 5994 const OMPTaskDataTy &Data) { 5995 if (!CGF.HaveInsertPoint()) 5996 return; 5997 TaskResultTy Result = 5998 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5999 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 6000 // libcall. 6001 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 6002 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 6003 // sched, kmp_uint64 grainsize, void *task_dup); 6004 llvm::Value *ThreadID = getThreadID(CGF, Loc); 6005 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 6006 llvm::Value *IfVal; 6007 if (IfCond) { 6008 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 6009 /*isSigned=*/true); 6010 } else { 6011 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 6012 } 6013 6014 LValue LBLVal = CGF.EmitLValueForField( 6015 Result.TDBase, 6016 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 6017 const auto *LBVar = 6018 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 6019 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 6020 LBLVal.getQuals(), 6021 /*IsInitializer=*/true); 6022 LValue UBLVal = CGF.EmitLValueForField( 6023 Result.TDBase, 6024 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 6025 const auto *UBVar = 6026 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 6027 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 6028 UBLVal.getQuals(), 6029 /*IsInitializer=*/true); 6030 LValue StLVal = CGF.EmitLValueForField( 6031 Result.TDBase, 6032 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 6033 const auto *StVar = 6034 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 6035 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 6036 StLVal.getQuals(), 6037 /*IsInitializer=*/true); 6038 // Store reductions address. 6039 LValue RedLVal = CGF.EmitLValueForField( 6040 Result.TDBase, 6041 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 6042 if (Data.Reductions) { 6043 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 6044 } else { 6045 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 6046 CGF.getContext().VoidPtrTy); 6047 } 6048 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 6049 llvm::Value *TaskArgs[] = { 6050 UpLoc, 6051 ThreadID, 6052 Result.NewTask, 6053 IfVal, 6054 LBLVal.getPointer(CGF), 6055 UBLVal.getPointer(CGF), 6056 CGF.EmitLoadOfScalar(StLVal, Loc), 6057 llvm::ConstantInt::getSigned( 6058 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 6059 llvm::ConstantInt::getSigned( 6060 CGF.IntTy, Data.Schedule.getPointer() 6061 ? Data.Schedule.getInt() ? NumTasks : Grainsize 6062 : NoSchedule), 6063 Data.Schedule.getPointer() 6064 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 6065 /*isSigned=*/false) 6066 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 6067 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6068 Result.TaskDupFn, CGF.VoidPtrTy) 6069 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 6070 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 6071 } 6072 6073 /// Emit reduction operation for each element of array (required for 6074 /// array sections) LHS op = RHS. 6075 /// \param Type Type of array. 6076 /// \param LHSVar Variable on the left side of the reduction operation 6077 /// (references element of array in original variable). 6078 /// \param RHSVar Variable on the right side of the reduction operation 6079 /// (references element of array in original variable). 6080 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 6081 /// RHSVar. 6082 static void EmitOMPAggregateReduction( 6083 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 6084 const VarDecl *RHSVar, 6085 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 6086 const Expr *, const Expr *)> &RedOpGen, 6087 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 6088 const Expr *UpExpr = nullptr) { 6089 // Perform element-by-element initialization. 6090 QualType ElementTy; 6091 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 6092 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 6093 6094 // Drill down to the base element type on both arrays. 6095 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 6096 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 6097 6098 llvm::Value *RHSBegin = RHSAddr.getPointer(); 6099 llvm::Value *LHSBegin = LHSAddr.getPointer(); 6100 // Cast from pointer to array type to pointer to single element. 6101 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 6102 // The basic structure here is a while-do loop. 6103 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 6104 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 6105 llvm::Value *IsEmpty = 6106 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 6107 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 6108 6109 // Enter the loop body, making that address the current address. 6110 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 6111 CGF.EmitBlock(BodyBB); 6112 6113 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 6114 6115 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 6116 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 6117 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 6118 Address RHSElementCurrent = 6119 Address(RHSElementPHI, 6120 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 6121 6122 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 6123 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 6124 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 6125 Address LHSElementCurrent = 6126 Address(LHSElementPHI, 6127 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 6128 6129 // Emit copy. 6130 CodeGenFunction::OMPPrivateScope Scope(CGF); 6131 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 6132 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 6133 Scope.Privatize(); 6134 RedOpGen(CGF, XExpr, EExpr, UpExpr); 6135 Scope.ForceCleanup(); 6136 6137 // Shift the address forward by one element. 6138 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 6139 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 6140 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 6141 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 6142 // Check whether we've reached the end. 6143 llvm::Value *Done = 6144 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 6145 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 6146 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 6147 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 6148 6149 // Done. 6150 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 6151 } 6152 6153 /// Emit reduction combiner. If the combiner is a simple expression emit it as 6154 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 6155 /// UDR combiner function. 6156 static void emitReductionCombiner(CodeGenFunction &CGF, 6157 const Expr *ReductionOp) { 6158 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 6159 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 6160 if (const auto *DRE = 6161 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 6162 if (const auto *DRD = 6163 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 6164 std::pair<llvm::Function *, llvm::Function *> Reduction = 6165 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 6166 RValue Func = RValue::get(Reduction.first); 6167 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 6168 CGF.EmitIgnoredExpr(ReductionOp); 6169 return; 6170 } 6171 CGF.EmitIgnoredExpr(ReductionOp); 6172 } 6173 6174 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 6175 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 6176 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 6177 ArrayRef<const Expr *> ReductionOps) { 6178 ASTContext &C = CGM.getContext(); 6179 6180 // void reduction_func(void *LHSArg, void *RHSArg); 6181 FunctionArgList Args; 6182 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6183 ImplicitParamDecl::Other); 6184 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6185 ImplicitParamDecl::Other); 6186 Args.push_back(&LHSArg); 6187 Args.push_back(&RHSArg); 6188 const auto &CGFI = 6189 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6190 std::string Name = getName({"omp", "reduction", "reduction_func"}); 6191 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 6192 llvm::GlobalValue::InternalLinkage, Name, 6193 &CGM.getModule()); 6194 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 6195 Fn->setDoesNotRecurse(); 6196 CodeGenFunction CGF(CGM); 6197 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 6198 6199 // Dst = (void*[n])(LHSArg); 6200 // Src = (void*[n])(RHSArg); 6201 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6202 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 6203 ArgsType), CGF.getPointerAlign()); 6204 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6205 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 6206 ArgsType), CGF.getPointerAlign()); 6207 6208 // ... 6209 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 6210 // ... 6211 CodeGenFunction::OMPPrivateScope Scope(CGF); 6212 auto IPriv = Privates.begin(); 6213 unsigned Idx = 0; 6214 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 6215 const auto *RHSVar = 6216 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 6217 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 6218 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 6219 }); 6220 const auto *LHSVar = 6221 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 6222 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 6223 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 6224 }); 6225 QualType PrivTy = (*IPriv)->getType(); 6226 if (PrivTy->isVariablyModifiedType()) { 6227 // Get array size and emit VLA type. 6228 ++Idx; 6229 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 6230 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 6231 const VariableArrayType *VLA = 6232 CGF.getContext().getAsVariableArrayType(PrivTy); 6233 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 6234 CodeGenFunction::OpaqueValueMapping OpaqueMap( 6235 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 6236 CGF.EmitVariablyModifiedType(PrivTy); 6237 } 6238 } 6239 Scope.Privatize(); 6240 IPriv = Privates.begin(); 6241 auto ILHS = LHSExprs.begin(); 6242 auto IRHS = RHSExprs.begin(); 6243 for (const Expr *E : ReductionOps) { 6244 if ((*IPriv)->getType()->isArrayType()) { 6245 // Emit reduction for array section. 6246 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 6247 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 6248 EmitOMPAggregateReduction( 6249 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 6250 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 6251 emitReductionCombiner(CGF, E); 6252 }); 6253 } else { 6254 // Emit reduction for array subscript or single variable. 6255 emitReductionCombiner(CGF, E); 6256 } 6257 ++IPriv; 6258 ++ILHS; 6259 ++IRHS; 6260 } 6261 Scope.ForceCleanup(); 6262 CGF.FinishFunction(); 6263 return Fn; 6264 } 6265 6266 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 6267 const Expr *ReductionOp, 6268 const Expr *PrivateRef, 6269 const DeclRefExpr *LHS, 6270 const DeclRefExpr *RHS) { 6271 if (PrivateRef->getType()->isArrayType()) { 6272 // Emit reduction for array section. 6273 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 6274 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 6275 EmitOMPAggregateReduction( 6276 CGF, PrivateRef->getType(), LHSVar, RHSVar, 6277 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 6278 emitReductionCombiner(CGF, ReductionOp); 6279 }); 6280 } else { 6281 // Emit reduction for array subscript or single variable. 6282 emitReductionCombiner(CGF, ReductionOp); 6283 } 6284 } 6285 6286 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 6287 ArrayRef<const Expr *> Privates, 6288 ArrayRef<const Expr *> LHSExprs, 6289 ArrayRef<const Expr *> RHSExprs, 6290 ArrayRef<const Expr *> ReductionOps, 6291 ReductionOptionsTy Options) { 6292 if (!CGF.HaveInsertPoint()) 6293 return; 6294 6295 bool WithNowait = Options.WithNowait; 6296 bool SimpleReduction = Options.SimpleReduction; 6297 6298 // Next code should be emitted for reduction: 6299 // 6300 // static kmp_critical_name lock = { 0 }; 6301 // 6302 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 6303 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 6304 // ... 6305 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 6306 // *(Type<n>-1*)rhs[<n>-1]); 6307 // } 6308 // 6309 // ... 6310 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 6311 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 6312 // RedList, reduce_func, &<lock>)) { 6313 // case 1: 6314 // ... 6315 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 6316 // ... 6317 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 6318 // break; 6319 // case 2: 6320 // ... 6321 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 6322 // ... 6323 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 6324 // break; 6325 // default:; 6326 // } 6327 // 6328 // if SimpleReduction is true, only the next code is generated: 6329 // ... 6330 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 6331 // ... 6332 6333 ASTContext &C = CGM.getContext(); 6334 6335 if (SimpleReduction) { 6336 CodeGenFunction::RunCleanupsScope Scope(CGF); 6337 auto IPriv = Privates.begin(); 6338 auto ILHS = LHSExprs.begin(); 6339 auto IRHS = RHSExprs.begin(); 6340 for (const Expr *E : ReductionOps) { 6341 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 6342 cast<DeclRefExpr>(*IRHS)); 6343 ++IPriv; 6344 ++ILHS; 6345 ++IRHS; 6346 } 6347 return; 6348 } 6349 6350 // 1. Build a list of reduction variables. 6351 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 6352 auto Size = RHSExprs.size(); 6353 for (const Expr *E : Privates) { 6354 if (E->getType()->isVariablyModifiedType()) 6355 // Reserve place for array size. 6356 ++Size; 6357 } 6358 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 6359 QualType ReductionArrayTy = 6360 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 6361 /*IndexTypeQuals=*/0); 6362 Address ReductionList = 6363 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 6364 auto IPriv = Privates.begin(); 6365 unsigned Idx = 0; 6366 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 6367 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 6368 CGF.Builder.CreateStore( 6369 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6370 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 6371 Elem); 6372 if ((*IPriv)->getType()->isVariablyModifiedType()) { 6373 // Store array size. 6374 ++Idx; 6375 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 6376 llvm::Value *Size = CGF.Builder.CreateIntCast( 6377 CGF.getVLASize( 6378 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 6379 .NumElts, 6380 CGF.SizeTy, /*isSigned=*/false); 6381 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 6382 Elem); 6383 } 6384 } 6385 6386 // 2. Emit reduce_func(). 6387 llvm::Function *ReductionFn = emitReductionFunction( 6388 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 6389 LHSExprs, RHSExprs, ReductionOps); 6390 6391 // 3. Create static kmp_critical_name lock = { 0 }; 6392 std::string Name = getName({"reduction"}); 6393 llvm::Value *Lock = getCriticalRegionLock(Name); 6394 6395 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 6396 // RedList, reduce_func, &<lock>); 6397 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 6398 llvm::Value *ThreadId = getThreadID(CGF, Loc); 6399 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 6400 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6401 ReductionList.getPointer(), CGF.VoidPtrTy); 6402 llvm::Value *Args[] = { 6403 IdentTLoc, // ident_t *<loc> 6404 ThreadId, // i32 <gtid> 6405 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 6406 ReductionArrayTySize, // size_type sizeof(RedList) 6407 RL, // void *RedList 6408 ReductionFn, // void (*) (void *, void *) <reduce_func> 6409 Lock // kmp_critical_name *&<lock> 6410 }; 6411 llvm::Value *Res = CGF.EmitRuntimeCall( 6412 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 6413 : OMPRTL__kmpc_reduce), 6414 Args); 6415 6416 // 5. Build switch(res) 6417 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 6418 llvm::SwitchInst *SwInst = 6419 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 6420 6421 // 6. Build case 1: 6422 // ... 6423 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 6424 // ... 6425 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 6426 // break; 6427 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 6428 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 6429 CGF.EmitBlock(Case1BB); 6430 6431 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 6432 llvm::Value *EndArgs[] = { 6433 IdentTLoc, // ident_t *<loc> 6434 ThreadId, // i32 <gtid> 6435 Lock // kmp_critical_name *&<lock> 6436 }; 6437 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 6438 CodeGenFunction &CGF, PrePostActionTy &Action) { 6439 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6440 auto IPriv = Privates.begin(); 6441 auto ILHS = LHSExprs.begin(); 6442 auto IRHS = RHSExprs.begin(); 6443 for (const Expr *E : ReductionOps) { 6444 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 6445 cast<DeclRefExpr>(*IRHS)); 6446 ++IPriv; 6447 ++ILHS; 6448 ++IRHS; 6449 } 6450 }; 6451 RegionCodeGenTy RCG(CodeGen); 6452 CommonActionTy Action( 6453 nullptr, llvm::None, 6454 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 6455 : OMPRTL__kmpc_end_reduce), 6456 EndArgs); 6457 RCG.setAction(Action); 6458 RCG(CGF); 6459 6460 CGF.EmitBranch(DefaultBB); 6461 6462 // 7. Build case 2: 6463 // ... 6464 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 6465 // ... 6466 // break; 6467 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 6468 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 6469 CGF.EmitBlock(Case2BB); 6470 6471 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 6472 CodeGenFunction &CGF, PrePostActionTy &Action) { 6473 auto ILHS = LHSExprs.begin(); 6474 auto IRHS = RHSExprs.begin(); 6475 auto IPriv = Privates.begin(); 6476 for (const Expr *E : ReductionOps) { 6477 const Expr *XExpr = nullptr; 6478 const Expr *EExpr = nullptr; 6479 const Expr *UpExpr = nullptr; 6480 BinaryOperatorKind BO = BO_Comma; 6481 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 6482 if (BO->getOpcode() == BO_Assign) { 6483 XExpr = BO->getLHS(); 6484 UpExpr = BO->getRHS(); 6485 } 6486 } 6487 // Try to emit update expression as a simple atomic. 6488 const Expr *RHSExpr = UpExpr; 6489 if (RHSExpr) { 6490 // Analyze RHS part of the whole expression. 6491 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 6492 RHSExpr->IgnoreParenImpCasts())) { 6493 // If this is a conditional operator, analyze its condition for 6494 // min/max reduction operator. 6495 RHSExpr = ACO->getCond(); 6496 } 6497 if (const auto *BORHS = 6498 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 6499 EExpr = BORHS->getRHS(); 6500 BO = BORHS->getOpcode(); 6501 } 6502 } 6503 if (XExpr) { 6504 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 6505 auto &&AtomicRedGen = [BO, VD, 6506 Loc](CodeGenFunction &CGF, const Expr *XExpr, 6507 const Expr *EExpr, const Expr *UpExpr) { 6508 LValue X = CGF.EmitLValue(XExpr); 6509 RValue E; 6510 if (EExpr) 6511 E = CGF.EmitAnyExpr(EExpr); 6512 CGF.EmitOMPAtomicSimpleUpdateExpr( 6513 X, E, BO, /*IsXLHSInRHSPart=*/true, 6514 llvm::AtomicOrdering::Monotonic, Loc, 6515 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 6516 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6517 PrivateScope.addPrivate( 6518 VD, [&CGF, VD, XRValue, Loc]() { 6519 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 6520 CGF.emitOMPSimpleStore( 6521 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 6522 VD->getType().getNonReferenceType(), Loc); 6523 return LHSTemp; 6524 }); 6525 (void)PrivateScope.Privatize(); 6526 return CGF.EmitAnyExpr(UpExpr); 6527 }); 6528 }; 6529 if ((*IPriv)->getType()->isArrayType()) { 6530 // Emit atomic reduction for array section. 6531 const auto *RHSVar = 6532 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 6533 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 6534 AtomicRedGen, XExpr, EExpr, UpExpr); 6535 } else { 6536 // Emit atomic reduction for array subscript or single variable. 6537 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 6538 } 6539 } else { 6540 // Emit as a critical region. 6541 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 6542 const Expr *, const Expr *) { 6543 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6544 std::string Name = RT.getName({"atomic_reduction"}); 6545 RT.emitCriticalRegion( 6546 CGF, Name, 6547 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 6548 Action.Enter(CGF); 6549 emitReductionCombiner(CGF, E); 6550 }, 6551 Loc); 6552 }; 6553 if ((*IPriv)->getType()->isArrayType()) { 6554 const auto *LHSVar = 6555 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 6556 const auto *RHSVar = 6557 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 6558 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 6559 CritRedGen); 6560 } else { 6561 CritRedGen(CGF, nullptr, nullptr, nullptr); 6562 } 6563 } 6564 ++ILHS; 6565 ++IRHS; 6566 ++IPriv; 6567 } 6568 }; 6569 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 6570 if (!WithNowait) { 6571 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 6572 llvm::Value *EndArgs[] = { 6573 IdentTLoc, // ident_t *<loc> 6574 ThreadId, // i32 <gtid> 6575 Lock // kmp_critical_name *&<lock> 6576 }; 6577 CommonActionTy Action(nullptr, llvm::None, 6578 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 6579 EndArgs); 6580 AtomicRCG.setAction(Action); 6581 AtomicRCG(CGF); 6582 } else { 6583 AtomicRCG(CGF); 6584 } 6585 6586 CGF.EmitBranch(DefaultBB); 6587 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 6588 } 6589 6590 /// Generates unique name for artificial threadprivate variables. 6591 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 6592 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 6593 const Expr *Ref) { 6594 SmallString<256> Buffer; 6595 llvm::raw_svector_ostream Out(Buffer); 6596 const clang::DeclRefExpr *DE; 6597 const VarDecl *D = ::getBaseDecl(Ref, DE); 6598 if (!D) 6599 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 6600 D = D->getCanonicalDecl(); 6601 std::string Name = CGM.getOpenMPRuntime().getName( 6602 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 6603 Out << Prefix << Name << "_" 6604 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 6605 return std::string(Out.str()); 6606 } 6607 6608 /// Emits reduction initializer function: 6609 /// \code 6610 /// void @.red_init(void* %arg, void* %orig) { 6611 /// %0 = bitcast void* %arg to <type>* 6612 /// store <type> <init>, <type>* %0 6613 /// ret void 6614 /// } 6615 /// \endcode 6616 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 6617 SourceLocation Loc, 6618 ReductionCodeGen &RCG, unsigned N) { 6619 ASTContext &C = CGM.getContext(); 6620 QualType VoidPtrTy = C.VoidPtrTy; 6621 VoidPtrTy.addRestrict(); 6622 FunctionArgList Args; 6623 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 6624 ImplicitParamDecl::Other); 6625 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 6626 ImplicitParamDecl::Other); 6627 Args.emplace_back(&Param); 6628 Args.emplace_back(&ParamOrig); 6629 const auto &FnInfo = 6630 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6631 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6632 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 6633 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6634 Name, &CGM.getModule()); 6635 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6636 Fn->setDoesNotRecurse(); 6637 CodeGenFunction CGF(CGM); 6638 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6639 Address PrivateAddr = CGF.EmitLoadOfPointer( 6640 CGF.GetAddrOfLocalVar(&Param), 6641 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6642 llvm::Value *Size = nullptr; 6643 // If the size of the reduction item is non-constant, load it from global 6644 // threadprivate variable. 6645 if (RCG.getSizes(N).second) { 6646 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6647 CGF, CGM.getContext().getSizeType(), 6648 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6649 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6650 CGM.getContext().getSizeType(), Loc); 6651 } 6652 RCG.emitAggregateType(CGF, N, Size); 6653 LValue OrigLVal; 6654 // If initializer uses initializer from declare reduction construct, emit a 6655 // pointer to the address of the original reduction item (reuired by reduction 6656 // initializer) 6657 if (RCG.usesReductionInitializer(N)) { 6658 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 6659 SharedAddr = CGF.EmitLoadOfPointer( 6660 SharedAddr, 6661 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 6662 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 6663 } else { 6664 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 6665 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 6666 CGM.getContext().VoidPtrTy); 6667 } 6668 // Emit the initializer: 6669 // %0 = bitcast void* %arg to <type>* 6670 // store <type> <init>, <type>* %0 6671 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 6672 [](CodeGenFunction &) { return false; }); 6673 CGF.FinishFunction(); 6674 return Fn; 6675 } 6676 6677 /// Emits reduction combiner function: 6678 /// \code 6679 /// void @.red_comb(void* %arg0, void* %arg1) { 6680 /// %lhs = bitcast void* %arg0 to <type>* 6681 /// %rhs = bitcast void* %arg1 to <type>* 6682 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 6683 /// store <type> %2, <type>* %lhs 6684 /// ret void 6685 /// } 6686 /// \endcode 6687 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 6688 SourceLocation Loc, 6689 ReductionCodeGen &RCG, unsigned N, 6690 const Expr *ReductionOp, 6691 const Expr *LHS, const Expr *RHS, 6692 const Expr *PrivateRef) { 6693 ASTContext &C = CGM.getContext(); 6694 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 6695 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 6696 FunctionArgList Args; 6697 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 6698 C.VoidPtrTy, ImplicitParamDecl::Other); 6699 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6700 ImplicitParamDecl::Other); 6701 Args.emplace_back(&ParamInOut); 6702 Args.emplace_back(&ParamIn); 6703 const auto &FnInfo = 6704 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6705 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6706 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 6707 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6708 Name, &CGM.getModule()); 6709 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6710 Fn->setDoesNotRecurse(); 6711 CodeGenFunction CGF(CGM); 6712 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6713 llvm::Value *Size = nullptr; 6714 // If the size of the reduction item is non-constant, load it from global 6715 // threadprivate variable. 6716 if (RCG.getSizes(N).second) { 6717 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6718 CGF, CGM.getContext().getSizeType(), 6719 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6720 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6721 CGM.getContext().getSizeType(), Loc); 6722 } 6723 RCG.emitAggregateType(CGF, N, Size); 6724 // Remap lhs and rhs variables to the addresses of the function arguments. 6725 // %lhs = bitcast void* %arg0 to <type>* 6726 // %rhs = bitcast void* %arg1 to <type>* 6727 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6728 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6729 // Pull out the pointer to the variable. 6730 Address PtrAddr = CGF.EmitLoadOfPointer( 6731 CGF.GetAddrOfLocalVar(&ParamInOut), 6732 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6733 return CGF.Builder.CreateElementBitCast( 6734 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6735 }); 6736 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6737 // Pull out the pointer to the variable. 6738 Address PtrAddr = CGF.EmitLoadOfPointer( 6739 CGF.GetAddrOfLocalVar(&ParamIn), 6740 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6741 return CGF.Builder.CreateElementBitCast( 6742 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6743 }); 6744 PrivateScope.Privatize(); 6745 // Emit the combiner body: 6746 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6747 // store <type> %2, <type>* %lhs 6748 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6749 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6750 cast<DeclRefExpr>(RHS)); 6751 CGF.FinishFunction(); 6752 return Fn; 6753 } 6754 6755 /// Emits reduction finalizer function: 6756 /// \code 6757 /// void @.red_fini(void* %arg) { 6758 /// %0 = bitcast void* %arg to <type>* 6759 /// <destroy>(<type>* %0) 6760 /// ret void 6761 /// } 6762 /// \endcode 6763 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6764 SourceLocation Loc, 6765 ReductionCodeGen &RCG, unsigned N) { 6766 if (!RCG.needCleanups(N)) 6767 return nullptr; 6768 ASTContext &C = CGM.getContext(); 6769 FunctionArgList Args; 6770 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6771 ImplicitParamDecl::Other); 6772 Args.emplace_back(&Param); 6773 const auto &FnInfo = 6774 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6775 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6776 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6777 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6778 Name, &CGM.getModule()); 6779 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6780 Fn->setDoesNotRecurse(); 6781 CodeGenFunction CGF(CGM); 6782 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6783 Address PrivateAddr = CGF.EmitLoadOfPointer( 6784 CGF.GetAddrOfLocalVar(&Param), 6785 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6786 llvm::Value *Size = nullptr; 6787 // If the size of the reduction item is non-constant, load it from global 6788 // threadprivate variable. 6789 if (RCG.getSizes(N).second) { 6790 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6791 CGF, CGM.getContext().getSizeType(), 6792 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6793 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6794 CGM.getContext().getSizeType(), Loc); 6795 } 6796 RCG.emitAggregateType(CGF, N, Size); 6797 // Emit the finalizer body: 6798 // <destroy>(<type>* %0) 6799 RCG.emitCleanups(CGF, N, PrivateAddr); 6800 CGF.FinishFunction(Loc); 6801 return Fn; 6802 } 6803 6804 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6805 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6806 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6807 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6808 return nullptr; 6809 6810 // Build typedef struct: 6811 // kmp_taskred_input { 6812 // void *reduce_shar; // shared reduction item 6813 // void *reduce_orig; // original reduction item used for initialization 6814 // size_t reduce_size; // size of data item 6815 // void *reduce_init; // data initialization routine 6816 // void *reduce_fini; // data finalization routine 6817 // void *reduce_comb; // data combiner routine 6818 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6819 // } kmp_taskred_input_t; 6820 ASTContext &C = CGM.getContext(); 6821 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6822 RD->startDefinition(); 6823 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6824 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6825 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6826 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6827 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6828 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6829 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6830 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6831 RD->completeDefinition(); 6832 QualType RDType = C.getRecordType(RD); 6833 unsigned Size = Data.ReductionVars.size(); 6834 llvm::APInt ArraySize(/*numBits=*/64, Size); 6835 QualType ArrayRDType = C.getConstantArrayType( 6836 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6837 // kmp_task_red_input_t .rd_input.[Size]; 6838 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6839 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6840 Data.ReductionCopies, Data.ReductionOps); 6841 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6842 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6843 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6844 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6845 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6846 TaskRedInput.getPointer(), Idxs, 6847 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6848 ".rd_input.gep."); 6849 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6850 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6851 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6852 RCG.emitSharedOrigLValue(CGF, Cnt); 6853 llvm::Value *CastedShared = 6854 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6855 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6856 // ElemLVal.reduce_orig = &Origs[Cnt]; 6857 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6858 llvm::Value *CastedOrig = 6859 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6860 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6861 RCG.emitAggregateType(CGF, Cnt); 6862 llvm::Value *SizeValInChars; 6863 llvm::Value *SizeVal; 6864 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6865 // We use delayed creation/initialization for VLAs and array sections. It is 6866 // required because runtime does not provide the way to pass the sizes of 6867 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6868 // threadprivate global variables are used to store these values and use 6869 // them in the functions. 6870 bool DelayedCreation = !!SizeVal; 6871 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6872 /*isSigned=*/false); 6873 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6874 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6875 // ElemLVal.reduce_init = init; 6876 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6877 llvm::Value *InitAddr = 6878 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6879 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6880 // ElemLVal.reduce_fini = fini; 6881 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6882 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6883 llvm::Value *FiniAddr = Fini 6884 ? CGF.EmitCastToVoidPtr(Fini) 6885 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6886 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6887 // ElemLVal.reduce_comb = comb; 6888 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6889 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6890 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6891 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6892 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6893 // ElemLVal.flags = 0; 6894 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6895 if (DelayedCreation) { 6896 CGF.EmitStoreOfScalar( 6897 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6898 FlagsLVal); 6899 } else 6900 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6901 FlagsLVal.getType()); 6902 } 6903 if (Data.IsReductionWithTaskMod) { 6904 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6905 // is_ws, int num, void *data); 6906 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6907 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6908 CGM.IntTy, /*isSigned=*/true); 6909 llvm::Value *Args[] = { 6910 IdentTLoc, GTid, 6911 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6912 /*isSigned=*/true), 6913 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6914 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6915 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6916 return CGF.EmitRuntimeCall( 6917 createRuntimeFunction(OMPRTL__kmpc_taskred_modifier_init), Args); 6918 } 6919 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6920 llvm::Value *Args[] = { 6921 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6922 /*isSigned=*/true), 6923 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6924 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6925 CGM.VoidPtrTy)}; 6926 return CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskred_init), 6927 Args); 6928 } 6929 6930 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6931 SourceLocation Loc, 6932 bool IsWorksharingReduction) { 6933 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6934 // is_ws, int num, void *data); 6935 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6936 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6937 CGM.IntTy, /*isSigned=*/true); 6938 llvm::Value *Args[] = {IdentTLoc, GTid, 6939 llvm::ConstantInt::get(CGM.IntTy, 6940 IsWorksharingReduction ? 1 : 0, 6941 /*isSigned=*/true)}; 6942 (void)CGF.EmitRuntimeCall( 6943 createRuntimeFunction(OMPRTL__kmpc_task_reduction_modifier_fini), Args); 6944 } 6945 6946 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6947 SourceLocation Loc, 6948 ReductionCodeGen &RCG, 6949 unsigned N) { 6950 auto Sizes = RCG.getSizes(N); 6951 // Emit threadprivate global variable if the type is non-constant 6952 // (Sizes.second = nullptr). 6953 if (Sizes.second) { 6954 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6955 /*isSigned=*/false); 6956 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6957 CGF, CGM.getContext().getSizeType(), 6958 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6959 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6960 } 6961 } 6962 6963 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6964 SourceLocation Loc, 6965 llvm::Value *ReductionsPtr, 6966 LValue SharedLVal) { 6967 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6968 // *d); 6969 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6970 CGM.IntTy, 6971 /*isSigned=*/true), 6972 ReductionsPtr, 6973 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6974 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6975 return Address( 6976 CGF.EmitRuntimeCall( 6977 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6978 SharedLVal.getAlignment()); 6979 } 6980 6981 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6982 SourceLocation Loc) { 6983 if (!CGF.HaveInsertPoint()) 6984 return; 6985 6986 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 6987 if (OMPBuilder) { 6988 OMPBuilder->CreateTaskwait(CGF.Builder); 6989 } else { 6990 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6991 // global_tid); 6992 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6993 // Ignore return result until untied tasks are supported. 6994 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 6995 } 6996 6997 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6998 Region->emitUntiedSwitch(CGF); 6999 } 7000 7001 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 7002 OpenMPDirectiveKind InnerKind, 7003 const RegionCodeGenTy &CodeGen, 7004 bool HasCancel) { 7005 if (!CGF.HaveInsertPoint()) 7006 return; 7007 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 7008 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 7009 } 7010 7011 namespace { 7012 enum RTCancelKind { 7013 CancelNoreq = 0, 7014 CancelParallel = 1, 7015 CancelLoop = 2, 7016 CancelSections = 3, 7017 CancelTaskgroup = 4 7018 }; 7019 } // anonymous namespace 7020 7021 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 7022 RTCancelKind CancelKind = CancelNoreq; 7023 if (CancelRegion == OMPD_parallel) 7024 CancelKind = CancelParallel; 7025 else if (CancelRegion == OMPD_for) 7026 CancelKind = CancelLoop; 7027 else if (CancelRegion == OMPD_sections) 7028 CancelKind = CancelSections; 7029 else { 7030 assert(CancelRegion == OMPD_taskgroup); 7031 CancelKind = CancelTaskgroup; 7032 } 7033 return CancelKind; 7034 } 7035 7036 void CGOpenMPRuntime::emitCancellationPointCall( 7037 CodeGenFunction &CGF, SourceLocation Loc, 7038 OpenMPDirectiveKind CancelRegion) { 7039 if (!CGF.HaveInsertPoint()) 7040 return; 7041 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 7042 // global_tid, kmp_int32 cncl_kind); 7043 if (auto *OMPRegionInfo = 7044 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 7045 // For 'cancellation point taskgroup', the task region info may not have a 7046 // cancel. This may instead happen in another adjacent task. 7047 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 7048 llvm::Value *Args[] = { 7049 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 7050 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 7051 // Ignore return result until untied tasks are supported. 7052 llvm::Value *Result = CGF.EmitRuntimeCall( 7053 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 7054 // if (__kmpc_cancellationpoint()) { 7055 // exit from construct; 7056 // } 7057 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 7058 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 7059 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 7060 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 7061 CGF.EmitBlock(ExitBB); 7062 // exit from construct; 7063 CodeGenFunction::JumpDest CancelDest = 7064 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 7065 CGF.EmitBranchThroughCleanup(CancelDest); 7066 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 7067 } 7068 } 7069 } 7070 7071 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 7072 const Expr *IfCond, 7073 OpenMPDirectiveKind CancelRegion) { 7074 if (!CGF.HaveInsertPoint()) 7075 return; 7076 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 7077 // kmp_int32 cncl_kind); 7078 if (auto *OMPRegionInfo = 7079 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 7080 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 7081 PrePostActionTy &) { 7082 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 7083 llvm::Value *Args[] = { 7084 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 7085 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 7086 // Ignore return result until untied tasks are supported. 7087 llvm::Value *Result = CGF.EmitRuntimeCall( 7088 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 7089 // if (__kmpc_cancel()) { 7090 // exit from construct; 7091 // } 7092 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 7093 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 7094 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 7095 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 7096 CGF.EmitBlock(ExitBB); 7097 // exit from construct; 7098 CodeGenFunction::JumpDest CancelDest = 7099 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 7100 CGF.EmitBranchThroughCleanup(CancelDest); 7101 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 7102 }; 7103 if (IfCond) { 7104 emitIfClause(CGF, IfCond, ThenGen, 7105 [](CodeGenFunction &, PrePostActionTy &) {}); 7106 } else { 7107 RegionCodeGenTy ThenRCG(ThenGen); 7108 ThenRCG(CGF); 7109 } 7110 } 7111 } 7112 7113 namespace { 7114 /// Cleanup action for uses_allocators support. 7115 class OMPUsesAllocatorsActionTy final : public PrePostActionTy { 7116 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators; 7117 7118 public: 7119 OMPUsesAllocatorsActionTy( 7120 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators) 7121 : Allocators(Allocators) {} 7122 void Enter(CodeGenFunction &CGF) override { 7123 if (!CGF.HaveInsertPoint()) 7124 return; 7125 for (const auto &AllocatorData : Allocators) { 7126 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit( 7127 CGF, AllocatorData.first, AllocatorData.second); 7128 } 7129 } 7130 void Exit(CodeGenFunction &CGF) override { 7131 if (!CGF.HaveInsertPoint()) 7132 return; 7133 for (const auto &AllocatorData : Allocators) { 7134 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF, 7135 AllocatorData.first); 7136 } 7137 } 7138 }; 7139 } // namespace 7140 7141 void CGOpenMPRuntime::emitTargetOutlinedFunction( 7142 const OMPExecutableDirective &D, StringRef ParentName, 7143 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 7144 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 7145 assert(!ParentName.empty() && "Invalid target region parent name!"); 7146 HasEmittedTargetRegion = true; 7147 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; 7148 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { 7149 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 7150 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 7151 if (!D.AllocatorTraits) 7152 continue; 7153 Allocators.emplace_back(D.Allocator, D.AllocatorTraits); 7154 } 7155 } 7156 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators); 7157 CodeGen.setAction(UsesAllocatorAction); 7158 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 7159 IsOffloadEntry, CodeGen); 7160 } 7161 7162 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, 7163 const Expr *Allocator, 7164 const Expr *AllocatorTraits) { 7165 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 7166 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 7167 // Use default memspace handle. 7168 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 7169 llvm::Value *NumTraits = llvm::ConstantInt::get( 7170 CGF.IntTy, cast<ConstantArrayType>( 7171 AllocatorTraits->getType()->getAsArrayTypeUnsafe()) 7172 ->getSize() 7173 .getLimitedValue()); 7174 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); 7175 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 7176 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); 7177 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, 7178 AllocatorTraitsLVal.getBaseInfo(), 7179 AllocatorTraitsLVal.getTBAAInfo()); 7180 llvm::Value *Traits = 7181 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); 7182 7183 llvm::Value *AllocatorVal = 7184 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_init_allocator), 7185 {ThreadId, MemSpaceHandle, NumTraits, Traits}); 7186 // Store to allocator. 7187 CGF.EmitVarDecl(*cast<VarDecl>( 7188 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); 7189 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 7190 AllocatorVal = 7191 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy, 7192 Allocator->getType(), Allocator->getExprLoc()); 7193 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal); 7194 } 7195 7196 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, 7197 const Expr *Allocator) { 7198 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc()); 7199 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true); 7200 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); 7201 llvm::Value *AllocatorVal = 7202 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc()); 7203 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(), 7204 CGF.getContext().VoidPtrTy, 7205 Allocator->getExprLoc()); 7206 (void)CGF.EmitRuntimeCall( 7207 createRuntimeFunction(OMPRTL__kmpc_destroy_allocator), 7208 {ThreadId, AllocatorVal}); 7209 } 7210 7211 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 7212 const OMPExecutableDirective &D, StringRef ParentName, 7213 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 7214 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 7215 // Create a unique name for the entry function using the source location 7216 // information of the current target region. The name will be something like: 7217 // 7218 // __omp_offloading_DD_FFFF_PP_lBB 7219 // 7220 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 7221 // mangled name of the function that encloses the target region and BB is the 7222 // line number of the target region. 7223 7224 unsigned DeviceID; 7225 unsigned FileID; 7226 unsigned Line; 7227 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 7228 Line); 7229 SmallString<64> EntryFnName; 7230 { 7231 llvm::raw_svector_ostream OS(EntryFnName); 7232 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 7233 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 7234 } 7235 7236 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 7237 7238 CodeGenFunction CGF(CGM, true); 7239 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 7240 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7241 7242 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 7243 7244 // If this target outline function is not an offload entry, we don't need to 7245 // register it. 7246 if (!IsOffloadEntry) 7247 return; 7248 7249 // The target region ID is used by the runtime library to identify the current 7250 // target region, so it only has to be unique and not necessarily point to 7251 // anything. It could be the pointer to the outlined function that implements 7252 // the target region, but we aren't using that so that the compiler doesn't 7253 // need to keep that, and could therefore inline the host function if proven 7254 // worthwhile during optimization. In the other hand, if emitting code for the 7255 // device, the ID has to be the function address so that it can retrieved from 7256 // the offloading entry and launched by the runtime library. We also mark the 7257 // outlined function to have external linkage in case we are emitting code for 7258 // the device, because these functions will be entry points to the device. 7259 7260 if (CGM.getLangOpts().OpenMPIsDevice) { 7261 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 7262 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 7263 OutlinedFn->setDSOLocal(false); 7264 } else { 7265 std::string Name = getName({EntryFnName, "region_id"}); 7266 OutlinedFnID = new llvm::GlobalVariable( 7267 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 7268 llvm::GlobalValue::WeakAnyLinkage, 7269 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 7270 } 7271 7272 // Register the information for the entry associated with this target region. 7273 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 7274 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 7275 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 7276 } 7277 7278 /// Checks if the expression is constant or does not have non-trivial function 7279 /// calls. 7280 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 7281 // We can skip constant expressions. 7282 // We can skip expressions with trivial calls or simple expressions. 7283 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 7284 !E->hasNonTrivialCall(Ctx)) && 7285 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 7286 } 7287 7288 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 7289 const Stmt *Body) { 7290 const Stmt *Child = Body->IgnoreContainers(); 7291 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 7292 Child = nullptr; 7293 for (const Stmt *S : C->body()) { 7294 if (const auto *E = dyn_cast<Expr>(S)) { 7295 if (isTrivial(Ctx, E)) 7296 continue; 7297 } 7298 // Some of the statements can be ignored. 7299 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 7300 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 7301 continue; 7302 // Analyze declarations. 7303 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 7304 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 7305 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 7306 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 7307 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 7308 isa<UsingDirectiveDecl>(D) || 7309 isa<OMPDeclareReductionDecl>(D) || 7310 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 7311 return true; 7312 const auto *VD = dyn_cast<VarDecl>(D); 7313 if (!VD) 7314 return false; 7315 return VD->isConstexpr() || 7316 ((VD->getType().isTrivialType(Ctx) || 7317 VD->getType()->isReferenceType()) && 7318 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 7319 })) 7320 continue; 7321 } 7322 // Found multiple children - cannot get the one child only. 7323 if (Child) 7324 return nullptr; 7325 Child = S; 7326 } 7327 if (Child) 7328 Child = Child->IgnoreContainers(); 7329 } 7330 return Child; 7331 } 7332 7333 /// Emit the number of teams for a target directive. Inspect the num_teams 7334 /// clause associated with a teams construct combined or closely nested 7335 /// with the target directive. 7336 /// 7337 /// Emit a team of size one for directives such as 'target parallel' that 7338 /// have no associated teams construct. 7339 /// 7340 /// Otherwise, return nullptr. 7341 static llvm::Value * 7342 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 7343 const OMPExecutableDirective &D) { 7344 assert(!CGF.getLangOpts().OpenMPIsDevice && 7345 "Clauses associated with the teams directive expected to be emitted " 7346 "only for the host!"); 7347 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7348 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7349 "Expected target-based executable directive."); 7350 CGBuilderTy &Bld = CGF.Builder; 7351 switch (DirectiveKind) { 7352 case OMPD_target: { 7353 const auto *CS = D.getInnermostCapturedStmt(); 7354 const auto *Body = 7355 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 7356 const Stmt *ChildStmt = 7357 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 7358 if (const auto *NestedDir = 7359 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 7360 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 7361 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 7362 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7363 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7364 const Expr *NumTeams = 7365 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 7366 llvm::Value *NumTeamsVal = 7367 CGF.EmitScalarExpr(NumTeams, 7368 /*IgnoreResultAssign*/ true); 7369 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 7370 /*isSigned=*/true); 7371 } 7372 return Bld.getInt32(0); 7373 } 7374 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 7375 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 7376 return Bld.getInt32(1); 7377 return Bld.getInt32(0); 7378 } 7379 return nullptr; 7380 } 7381 case OMPD_target_teams: 7382 case OMPD_target_teams_distribute: 7383 case OMPD_target_teams_distribute_simd: 7384 case OMPD_target_teams_distribute_parallel_for: 7385 case OMPD_target_teams_distribute_parallel_for_simd: { 7386 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 7387 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 7388 const Expr *NumTeams = 7389 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 7390 llvm::Value *NumTeamsVal = 7391 CGF.EmitScalarExpr(NumTeams, 7392 /*IgnoreResultAssign*/ true); 7393 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 7394 /*isSigned=*/true); 7395 } 7396 return Bld.getInt32(0); 7397 } 7398 case OMPD_target_parallel: 7399 case OMPD_target_parallel_for: 7400 case OMPD_target_parallel_for_simd: 7401 case OMPD_target_simd: 7402 return Bld.getInt32(1); 7403 case OMPD_parallel: 7404 case OMPD_for: 7405 case OMPD_parallel_for: 7406 case OMPD_parallel_master: 7407 case OMPD_parallel_sections: 7408 case OMPD_for_simd: 7409 case OMPD_parallel_for_simd: 7410 case OMPD_cancel: 7411 case OMPD_cancellation_point: 7412 case OMPD_ordered: 7413 case OMPD_threadprivate: 7414 case OMPD_allocate: 7415 case OMPD_task: 7416 case OMPD_simd: 7417 case OMPD_sections: 7418 case OMPD_section: 7419 case OMPD_single: 7420 case OMPD_master: 7421 case OMPD_critical: 7422 case OMPD_taskyield: 7423 case OMPD_barrier: 7424 case OMPD_taskwait: 7425 case OMPD_taskgroup: 7426 case OMPD_atomic: 7427 case OMPD_flush: 7428 case OMPD_depobj: 7429 case OMPD_scan: 7430 case OMPD_teams: 7431 case OMPD_target_data: 7432 case OMPD_target_exit_data: 7433 case OMPD_target_enter_data: 7434 case OMPD_distribute: 7435 case OMPD_distribute_simd: 7436 case OMPD_distribute_parallel_for: 7437 case OMPD_distribute_parallel_for_simd: 7438 case OMPD_teams_distribute: 7439 case OMPD_teams_distribute_simd: 7440 case OMPD_teams_distribute_parallel_for: 7441 case OMPD_teams_distribute_parallel_for_simd: 7442 case OMPD_target_update: 7443 case OMPD_declare_simd: 7444 case OMPD_declare_variant: 7445 case OMPD_begin_declare_variant: 7446 case OMPD_end_declare_variant: 7447 case OMPD_declare_target: 7448 case OMPD_end_declare_target: 7449 case OMPD_declare_reduction: 7450 case OMPD_declare_mapper: 7451 case OMPD_taskloop: 7452 case OMPD_taskloop_simd: 7453 case OMPD_master_taskloop: 7454 case OMPD_master_taskloop_simd: 7455 case OMPD_parallel_master_taskloop: 7456 case OMPD_parallel_master_taskloop_simd: 7457 case OMPD_requires: 7458 case OMPD_unknown: 7459 break; 7460 } 7461 llvm_unreachable("Unexpected directive kind."); 7462 } 7463 7464 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 7465 llvm::Value *DefaultThreadLimitVal) { 7466 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7467 CGF.getContext(), CS->getCapturedStmt()); 7468 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7469 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 7470 llvm::Value *NumThreads = nullptr; 7471 llvm::Value *CondVal = nullptr; 7472 // Handle if clause. If if clause present, the number of threads is 7473 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7474 if (Dir->hasClausesOfKind<OMPIfClause>()) { 7475 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7476 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7477 const OMPIfClause *IfClause = nullptr; 7478 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 7479 if (C->getNameModifier() == OMPD_unknown || 7480 C->getNameModifier() == OMPD_parallel) { 7481 IfClause = C; 7482 break; 7483 } 7484 } 7485 if (IfClause) { 7486 const Expr *Cond = IfClause->getCondition(); 7487 bool Result; 7488 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7489 if (!Result) 7490 return CGF.Builder.getInt32(1); 7491 } else { 7492 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 7493 if (const auto *PreInit = 7494 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 7495 for (const auto *I : PreInit->decls()) { 7496 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7497 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7498 } else { 7499 CodeGenFunction::AutoVarEmission Emission = 7500 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7501 CGF.EmitAutoVarCleanups(Emission); 7502 } 7503 } 7504 } 7505 CondVal = CGF.EvaluateExprAsBool(Cond); 7506 } 7507 } 7508 } 7509 // Check the value of num_threads clause iff if clause was not specified 7510 // or is not evaluated to false. 7511 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 7512 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7513 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7514 const auto *NumThreadsClause = 7515 Dir->getSingleClause<OMPNumThreadsClause>(); 7516 CodeGenFunction::LexicalScope Scope( 7517 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 7518 if (const auto *PreInit = 7519 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 7520 for (const auto *I : PreInit->decls()) { 7521 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7522 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7523 } else { 7524 CodeGenFunction::AutoVarEmission Emission = 7525 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7526 CGF.EmitAutoVarCleanups(Emission); 7527 } 7528 } 7529 } 7530 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 7531 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 7532 /*isSigned=*/false); 7533 if (DefaultThreadLimitVal) 7534 NumThreads = CGF.Builder.CreateSelect( 7535 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 7536 DefaultThreadLimitVal, NumThreads); 7537 } else { 7538 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 7539 : CGF.Builder.getInt32(0); 7540 } 7541 // Process condition of the if clause. 7542 if (CondVal) { 7543 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 7544 CGF.Builder.getInt32(1)); 7545 } 7546 return NumThreads; 7547 } 7548 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 7549 return CGF.Builder.getInt32(1); 7550 return DefaultThreadLimitVal; 7551 } 7552 return DefaultThreadLimitVal ? DefaultThreadLimitVal 7553 : CGF.Builder.getInt32(0); 7554 } 7555 7556 /// Emit the number of threads for a target directive. Inspect the 7557 /// thread_limit clause associated with a teams construct combined or closely 7558 /// nested with the target directive. 7559 /// 7560 /// Emit the num_threads clause for directives such as 'target parallel' that 7561 /// have no associated teams construct. 7562 /// 7563 /// Otherwise, return nullptr. 7564 static llvm::Value * 7565 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 7566 const OMPExecutableDirective &D) { 7567 assert(!CGF.getLangOpts().OpenMPIsDevice && 7568 "Clauses associated with the teams directive expected to be emitted " 7569 "only for the host!"); 7570 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7571 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7572 "Expected target-based executable directive."); 7573 CGBuilderTy &Bld = CGF.Builder; 7574 llvm::Value *ThreadLimitVal = nullptr; 7575 llvm::Value *NumThreadsVal = nullptr; 7576 switch (DirectiveKind) { 7577 case OMPD_target: { 7578 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7579 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7580 return NumThreads; 7581 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7582 CGF.getContext(), CS->getCapturedStmt()); 7583 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7584 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 7585 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7586 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7587 const auto *ThreadLimitClause = 7588 Dir->getSingleClause<OMPThreadLimitClause>(); 7589 CodeGenFunction::LexicalScope Scope( 7590 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 7591 if (const auto *PreInit = 7592 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 7593 for (const auto *I : PreInit->decls()) { 7594 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7595 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7596 } else { 7597 CodeGenFunction::AutoVarEmission Emission = 7598 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7599 CGF.EmitAutoVarCleanups(Emission); 7600 } 7601 } 7602 } 7603 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7604 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7605 ThreadLimitVal = 7606 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7607 } 7608 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 7609 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 7610 CS = Dir->getInnermostCapturedStmt(); 7611 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7612 CGF.getContext(), CS->getCapturedStmt()); 7613 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 7614 } 7615 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 7616 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 7617 CS = Dir->getInnermostCapturedStmt(); 7618 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7619 return NumThreads; 7620 } 7621 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 7622 return Bld.getInt32(1); 7623 } 7624 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7625 } 7626 case OMPD_target_teams: { 7627 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7628 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7629 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7630 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7631 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7632 ThreadLimitVal = 7633 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7634 } 7635 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7636 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7637 return NumThreads; 7638 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7639 CGF.getContext(), CS->getCapturedStmt()); 7640 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7641 if (Dir->getDirectiveKind() == OMPD_distribute) { 7642 CS = Dir->getInnermostCapturedStmt(); 7643 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7644 return NumThreads; 7645 } 7646 } 7647 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7648 } 7649 case OMPD_target_teams_distribute: 7650 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7651 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7652 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7653 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7654 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7655 ThreadLimitVal = 7656 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7657 } 7658 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7659 case OMPD_target_parallel: 7660 case OMPD_target_parallel_for: 7661 case OMPD_target_parallel_for_simd: 7662 case OMPD_target_teams_distribute_parallel_for: 7663 case OMPD_target_teams_distribute_parallel_for_simd: { 7664 llvm::Value *CondVal = nullptr; 7665 // Handle if clause. If if clause present, the number of threads is 7666 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7667 if (D.hasClausesOfKind<OMPIfClause>()) { 7668 const OMPIfClause *IfClause = nullptr; 7669 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7670 if (C->getNameModifier() == OMPD_unknown || 7671 C->getNameModifier() == OMPD_parallel) { 7672 IfClause = C; 7673 break; 7674 } 7675 } 7676 if (IfClause) { 7677 const Expr *Cond = IfClause->getCondition(); 7678 bool Result; 7679 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7680 if (!Result) 7681 return Bld.getInt32(1); 7682 } else { 7683 CodeGenFunction::RunCleanupsScope Scope(CGF); 7684 CondVal = CGF.EvaluateExprAsBool(Cond); 7685 } 7686 } 7687 } 7688 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7689 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7690 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7691 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7692 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7693 ThreadLimitVal = 7694 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7695 } 7696 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7697 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7698 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7699 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7700 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7701 NumThreadsVal = 7702 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7703 ThreadLimitVal = ThreadLimitVal 7704 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7705 ThreadLimitVal), 7706 NumThreadsVal, ThreadLimitVal) 7707 : NumThreadsVal; 7708 } 7709 if (!ThreadLimitVal) 7710 ThreadLimitVal = Bld.getInt32(0); 7711 if (CondVal) 7712 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7713 return ThreadLimitVal; 7714 } 7715 case OMPD_target_teams_distribute_simd: 7716 case OMPD_target_simd: 7717 return Bld.getInt32(1); 7718 case OMPD_parallel: 7719 case OMPD_for: 7720 case OMPD_parallel_for: 7721 case OMPD_parallel_master: 7722 case OMPD_parallel_sections: 7723 case OMPD_for_simd: 7724 case OMPD_parallel_for_simd: 7725 case OMPD_cancel: 7726 case OMPD_cancellation_point: 7727 case OMPD_ordered: 7728 case OMPD_threadprivate: 7729 case OMPD_allocate: 7730 case OMPD_task: 7731 case OMPD_simd: 7732 case OMPD_sections: 7733 case OMPD_section: 7734 case OMPD_single: 7735 case OMPD_master: 7736 case OMPD_critical: 7737 case OMPD_taskyield: 7738 case OMPD_barrier: 7739 case OMPD_taskwait: 7740 case OMPD_taskgroup: 7741 case OMPD_atomic: 7742 case OMPD_flush: 7743 case OMPD_depobj: 7744 case OMPD_scan: 7745 case OMPD_teams: 7746 case OMPD_target_data: 7747 case OMPD_target_exit_data: 7748 case OMPD_target_enter_data: 7749 case OMPD_distribute: 7750 case OMPD_distribute_simd: 7751 case OMPD_distribute_parallel_for: 7752 case OMPD_distribute_parallel_for_simd: 7753 case OMPD_teams_distribute: 7754 case OMPD_teams_distribute_simd: 7755 case OMPD_teams_distribute_parallel_for: 7756 case OMPD_teams_distribute_parallel_for_simd: 7757 case OMPD_target_update: 7758 case OMPD_declare_simd: 7759 case OMPD_declare_variant: 7760 case OMPD_begin_declare_variant: 7761 case OMPD_end_declare_variant: 7762 case OMPD_declare_target: 7763 case OMPD_end_declare_target: 7764 case OMPD_declare_reduction: 7765 case OMPD_declare_mapper: 7766 case OMPD_taskloop: 7767 case OMPD_taskloop_simd: 7768 case OMPD_master_taskloop: 7769 case OMPD_master_taskloop_simd: 7770 case OMPD_parallel_master_taskloop: 7771 case OMPD_parallel_master_taskloop_simd: 7772 case OMPD_requires: 7773 case OMPD_unknown: 7774 break; 7775 } 7776 llvm_unreachable("Unsupported directive kind."); 7777 } 7778 7779 namespace { 7780 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7781 7782 // Utility to handle information from clauses associated with a given 7783 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7784 // It provides a convenient interface to obtain the information and generate 7785 // code for that information. 7786 class MappableExprsHandler { 7787 public: 7788 /// Values for bit flags used to specify the mapping type for 7789 /// offloading. 7790 enum OpenMPOffloadMappingFlags : uint64_t { 7791 /// No flags 7792 OMP_MAP_NONE = 0x0, 7793 /// Allocate memory on the device and move data from host to device. 7794 OMP_MAP_TO = 0x01, 7795 /// Allocate memory on the device and move data from device to host. 7796 OMP_MAP_FROM = 0x02, 7797 /// Always perform the requested mapping action on the element, even 7798 /// if it was already mapped before. 7799 OMP_MAP_ALWAYS = 0x04, 7800 /// Delete the element from the device environment, ignoring the 7801 /// current reference count associated with the element. 7802 OMP_MAP_DELETE = 0x08, 7803 /// The element being mapped is a pointer-pointee pair; both the 7804 /// pointer and the pointee should be mapped. 7805 OMP_MAP_PTR_AND_OBJ = 0x10, 7806 /// This flags signals that the base address of an entry should be 7807 /// passed to the target kernel as an argument. 7808 OMP_MAP_TARGET_PARAM = 0x20, 7809 /// Signal that the runtime library has to return the device pointer 7810 /// in the current position for the data being mapped. Used when we have the 7811 /// use_device_ptr clause. 7812 OMP_MAP_RETURN_PARAM = 0x40, 7813 /// This flag signals that the reference being passed is a pointer to 7814 /// private data. 7815 OMP_MAP_PRIVATE = 0x80, 7816 /// Pass the element to the device by value. 7817 OMP_MAP_LITERAL = 0x100, 7818 /// Implicit map 7819 OMP_MAP_IMPLICIT = 0x200, 7820 /// Close is a hint to the runtime to allocate memory close to 7821 /// the target device. 7822 OMP_MAP_CLOSE = 0x400, 7823 /// The 16 MSBs of the flags indicate whether the entry is member of some 7824 /// struct/class. 7825 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7826 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7827 }; 7828 7829 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7830 static unsigned getFlagMemberOffset() { 7831 unsigned Offset = 0; 7832 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7833 Remain = Remain >> 1) 7834 Offset++; 7835 return Offset; 7836 } 7837 7838 /// Class that associates information with a base pointer to be passed to the 7839 /// runtime library. 7840 class BasePointerInfo { 7841 /// The base pointer. 7842 llvm::Value *Ptr = nullptr; 7843 /// The base declaration that refers to this device pointer, or null if 7844 /// there is none. 7845 const ValueDecl *DevPtrDecl = nullptr; 7846 7847 public: 7848 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7849 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7850 llvm::Value *operator*() const { return Ptr; } 7851 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7852 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7853 }; 7854 7855 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7856 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7857 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7858 7859 /// Map between a struct and the its lowest & highest elements which have been 7860 /// mapped. 7861 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7862 /// HE(FieldIndex, Pointer)} 7863 struct StructRangeInfoTy { 7864 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7865 0, Address::invalid()}; 7866 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7867 0, Address::invalid()}; 7868 Address Base = Address::invalid(); 7869 }; 7870 7871 private: 7872 /// Kind that defines how a device pointer has to be returned. 7873 struct MapInfo { 7874 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7875 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7876 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7877 bool ReturnDevicePointer = false; 7878 bool IsImplicit = false; 7879 7880 MapInfo() = default; 7881 MapInfo( 7882 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7883 OpenMPMapClauseKind MapType, 7884 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7885 bool ReturnDevicePointer, bool IsImplicit) 7886 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7887 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 7888 }; 7889 7890 /// If use_device_ptr is used on a pointer which is a struct member and there 7891 /// is no map information about it, then emission of that entry is deferred 7892 /// until the whole struct has been processed. 7893 struct DeferredDevicePtrEntryTy { 7894 const Expr *IE = nullptr; 7895 const ValueDecl *VD = nullptr; 7896 7897 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 7898 : IE(IE), VD(VD) {} 7899 }; 7900 7901 /// The target directive from where the mappable clauses were extracted. It 7902 /// is either a executable directive or a user-defined mapper directive. 7903 llvm::PointerUnion<const OMPExecutableDirective *, 7904 const OMPDeclareMapperDecl *> 7905 CurDir; 7906 7907 /// Function the directive is being generated for. 7908 CodeGenFunction &CGF; 7909 7910 /// Set of all first private variables in the current directive. 7911 /// bool data is set to true if the variable is implicitly marked as 7912 /// firstprivate, false otherwise. 7913 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7914 7915 /// Map between device pointer declarations and their expression components. 7916 /// The key value for declarations in 'this' is null. 7917 llvm::DenseMap< 7918 const ValueDecl *, 7919 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7920 DevPointersMap; 7921 7922 llvm::Value *getExprTypeSize(const Expr *E) const { 7923 QualType ExprTy = E->getType().getCanonicalType(); 7924 7925 // Calculate the size for array shaping expression. 7926 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7927 llvm::Value *Size = 7928 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7929 for (const Expr *SE : OAE->getDimensions()) { 7930 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7931 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7932 CGF.getContext().getSizeType(), 7933 SE->getExprLoc()); 7934 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7935 } 7936 return Size; 7937 } 7938 7939 // Reference types are ignored for mapping purposes. 7940 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7941 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7942 7943 // Given that an array section is considered a built-in type, we need to 7944 // do the calculation based on the length of the section instead of relying 7945 // on CGF.getTypeSize(E->getType()). 7946 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7947 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7948 OAE->getBase()->IgnoreParenImpCasts()) 7949 .getCanonicalType(); 7950 7951 // If there is no length associated with the expression and lower bound is 7952 // not specified too, that means we are using the whole length of the 7953 // base. 7954 if (!OAE->getLength() && OAE->getColonLoc().isValid() && 7955 !OAE->getLowerBound()) 7956 return CGF.getTypeSize(BaseTy); 7957 7958 llvm::Value *ElemSize; 7959 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7960 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7961 } else { 7962 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7963 assert(ATy && "Expecting array type if not a pointer type."); 7964 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7965 } 7966 7967 // If we don't have a length at this point, that is because we have an 7968 // array section with a single element. 7969 if (!OAE->getLength() && OAE->getColonLoc().isInvalid()) 7970 return ElemSize; 7971 7972 if (const Expr *LenExpr = OAE->getLength()) { 7973 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7974 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7975 CGF.getContext().getSizeType(), 7976 LenExpr->getExprLoc()); 7977 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7978 } 7979 assert(!OAE->getLength() && OAE->getColonLoc().isValid() && 7980 OAE->getLowerBound() && "expected array_section[lb:]."); 7981 // Size = sizetype - lb * elemtype; 7982 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7983 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7984 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7985 CGF.getContext().getSizeType(), 7986 OAE->getLowerBound()->getExprLoc()); 7987 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7988 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7989 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7990 LengthVal = CGF.Builder.CreateSelect( 7991 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7992 return LengthVal; 7993 } 7994 return CGF.getTypeSize(ExprTy); 7995 } 7996 7997 /// Return the corresponding bits for a given map clause modifier. Add 7998 /// a flag marking the map as a pointer if requested. Add a flag marking the 7999 /// map as the first one of a series of maps that relate to the same map 8000 /// expression. 8001 OpenMPOffloadMappingFlags getMapTypeBits( 8002 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 8003 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 8004 OpenMPOffloadMappingFlags Bits = 8005 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 8006 switch (MapType) { 8007 case OMPC_MAP_alloc: 8008 case OMPC_MAP_release: 8009 // alloc and release is the default behavior in the runtime library, i.e. 8010 // if we don't pass any bits alloc/release that is what the runtime is 8011 // going to do. Therefore, we don't need to signal anything for these two 8012 // type modifiers. 8013 break; 8014 case OMPC_MAP_to: 8015 Bits |= OMP_MAP_TO; 8016 break; 8017 case OMPC_MAP_from: 8018 Bits |= OMP_MAP_FROM; 8019 break; 8020 case OMPC_MAP_tofrom: 8021 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 8022 break; 8023 case OMPC_MAP_delete: 8024 Bits |= OMP_MAP_DELETE; 8025 break; 8026 case OMPC_MAP_unknown: 8027 llvm_unreachable("Unexpected map type!"); 8028 } 8029 if (AddPtrFlag) 8030 Bits |= OMP_MAP_PTR_AND_OBJ; 8031 if (AddIsTargetParamFlag) 8032 Bits |= OMP_MAP_TARGET_PARAM; 8033 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 8034 != MapModifiers.end()) 8035 Bits |= OMP_MAP_ALWAYS; 8036 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 8037 != MapModifiers.end()) 8038 Bits |= OMP_MAP_CLOSE; 8039 return Bits; 8040 } 8041 8042 /// Return true if the provided expression is a final array section. A 8043 /// final array section, is one whose length can't be proved to be one. 8044 bool isFinalArraySectionExpression(const Expr *E) const { 8045 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 8046 8047 // It is not an array section and therefore not a unity-size one. 8048 if (!OASE) 8049 return false; 8050 8051 // An array section with no colon always refer to a single element. 8052 if (OASE->getColonLoc().isInvalid()) 8053 return false; 8054 8055 const Expr *Length = OASE->getLength(); 8056 8057 // If we don't have a length we have to check if the array has size 1 8058 // for this dimension. Also, we should always expect a length if the 8059 // base type is pointer. 8060 if (!Length) { 8061 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 8062 OASE->getBase()->IgnoreParenImpCasts()) 8063 .getCanonicalType(); 8064 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 8065 return ATy->getSize().getSExtValue() != 1; 8066 // If we don't have a constant dimension length, we have to consider 8067 // the current section as having any size, so it is not necessarily 8068 // unitary. If it happen to be unity size, that's user fault. 8069 return true; 8070 } 8071 8072 // Check if the length evaluates to 1. 8073 Expr::EvalResult Result; 8074 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 8075 return true; // Can have more that size 1. 8076 8077 llvm::APSInt ConstLength = Result.Val.getInt(); 8078 return ConstLength.getSExtValue() != 1; 8079 } 8080 8081 /// Generate the base pointers, section pointers, sizes and map type 8082 /// bits for the provided map type, map modifier, and expression components. 8083 /// \a IsFirstComponent should be set to true if the provided set of 8084 /// components is the first associated with a capture. 8085 void generateInfoForComponentList( 8086 OpenMPMapClauseKind MapType, 8087 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8088 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 8089 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8090 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8091 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 8092 bool IsImplicit, 8093 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8094 OverlappedElements = llvm::None) const { 8095 // The following summarizes what has to be generated for each map and the 8096 // types below. The generated information is expressed in this order: 8097 // base pointer, section pointer, size, flags 8098 // (to add to the ones that come from the map type and modifier). 8099 // 8100 // double d; 8101 // int i[100]; 8102 // float *p; 8103 // 8104 // struct S1 { 8105 // int i; 8106 // float f[50]; 8107 // } 8108 // struct S2 { 8109 // int i; 8110 // float f[50]; 8111 // S1 s; 8112 // double *p; 8113 // struct S2 *ps; 8114 // } 8115 // S2 s; 8116 // S2 *ps; 8117 // 8118 // map(d) 8119 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 8120 // 8121 // map(i) 8122 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 8123 // 8124 // map(i[1:23]) 8125 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 8126 // 8127 // map(p) 8128 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 8129 // 8130 // map(p[1:24]) 8131 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 8132 // 8133 // map(s) 8134 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 8135 // 8136 // map(s.i) 8137 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 8138 // 8139 // map(s.s.f) 8140 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 8141 // 8142 // map(s.p) 8143 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 8144 // 8145 // map(to: s.p[:22]) 8146 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 8147 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 8148 // &(s.p), &(s.p[0]), 22*sizeof(double), 8149 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 8150 // (*) alloc space for struct members, only this is a target parameter 8151 // (**) map the pointer (nothing to be mapped in this example) (the compiler 8152 // optimizes this entry out, same in the examples below) 8153 // (***) map the pointee (map: to) 8154 // 8155 // map(s.ps) 8156 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 8157 // 8158 // map(from: s.ps->s.i) 8159 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 8160 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 8161 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 8162 // 8163 // map(to: s.ps->ps) 8164 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 8165 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 8166 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 8167 // 8168 // map(s.ps->ps->ps) 8169 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 8170 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 8171 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 8172 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 8173 // 8174 // map(to: s.ps->ps->s.f[:22]) 8175 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 8176 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 8177 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 8178 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 8179 // 8180 // map(ps) 8181 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 8182 // 8183 // map(ps->i) 8184 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 8185 // 8186 // map(ps->s.f) 8187 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 8188 // 8189 // map(from: ps->p) 8190 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 8191 // 8192 // map(to: ps->p[:22]) 8193 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 8194 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 8195 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 8196 // 8197 // map(ps->ps) 8198 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 8199 // 8200 // map(from: ps->ps->s.i) 8201 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 8202 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 8203 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 8204 // 8205 // map(from: ps->ps->ps) 8206 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 8207 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 8208 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 8209 // 8210 // map(ps->ps->ps->ps) 8211 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 8212 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 8213 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 8214 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 8215 // 8216 // map(to: ps->ps->ps->s.f[:22]) 8217 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 8218 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 8219 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 8220 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 8221 // 8222 // map(to: s.f[:22]) map(from: s.p[:33]) 8223 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 8224 // sizeof(double*) (**), TARGET_PARAM 8225 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 8226 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 8227 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 8228 // (*) allocate contiguous space needed to fit all mapped members even if 8229 // we allocate space for members not mapped (in this example, 8230 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 8231 // them as well because they fall between &s.f[0] and &s.p) 8232 // 8233 // map(from: s.f[:22]) map(to: ps->p[:33]) 8234 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 8235 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 8236 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 8237 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 8238 // (*) the struct this entry pertains to is the 2nd element in the list of 8239 // arguments, hence MEMBER_OF(2) 8240 // 8241 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 8242 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 8243 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 8244 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 8245 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 8246 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 8247 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 8248 // (*) the struct this entry pertains to is the 4th element in the list 8249 // of arguments, hence MEMBER_OF(4) 8250 8251 // Track if the map information being generated is the first for a capture. 8252 bool IsCaptureFirstInfo = IsFirstComponentList; 8253 // When the variable is on a declare target link or in a to clause with 8254 // unified memory, a reference is needed to hold the host/device address 8255 // of the variable. 8256 bool RequiresReference = false; 8257 8258 // Scan the components from the base to the complete expression. 8259 auto CI = Components.rbegin(); 8260 auto CE = Components.rend(); 8261 auto I = CI; 8262 8263 // Track if the map information being generated is the first for a list of 8264 // components. 8265 bool IsExpressionFirstInfo = true; 8266 Address BP = Address::invalid(); 8267 const Expr *AssocExpr = I->getAssociatedExpression(); 8268 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 8269 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8270 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 8271 8272 if (isa<MemberExpr>(AssocExpr)) { 8273 // The base is the 'this' pointer. The content of the pointer is going 8274 // to be the base of the field being mapped. 8275 BP = CGF.LoadCXXThisAddress(); 8276 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 8277 (OASE && 8278 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 8279 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 8280 } else if (OAShE && 8281 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 8282 BP = Address( 8283 CGF.EmitScalarExpr(OAShE->getBase()), 8284 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 8285 } else { 8286 // The base is the reference to the variable. 8287 // BP = &Var. 8288 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 8289 if (const auto *VD = 8290 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 8291 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8292 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 8293 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 8294 (*Res == OMPDeclareTargetDeclAttr::MT_To && 8295 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 8296 RequiresReference = true; 8297 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 8298 } 8299 } 8300 } 8301 8302 // If the variable is a pointer and is being dereferenced (i.e. is not 8303 // the last component), the base has to be the pointer itself, not its 8304 // reference. References are ignored for mapping purposes. 8305 QualType Ty = 8306 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 8307 if (Ty->isAnyPointerType() && std::next(I) != CE) { 8308 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8309 8310 // We do not need to generate individual map information for the 8311 // pointer, it can be associated with the combined storage. 8312 ++I; 8313 } 8314 } 8315 8316 // Track whether a component of the list should be marked as MEMBER_OF some 8317 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 8318 // in a component list should be marked as MEMBER_OF, all subsequent entries 8319 // do not belong to the base struct. E.g. 8320 // struct S2 s; 8321 // s.ps->ps->ps->f[:] 8322 // (1) (2) (3) (4) 8323 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 8324 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 8325 // is the pointee of ps(2) which is not member of struct s, so it should not 8326 // be marked as such (it is still PTR_AND_OBJ). 8327 // The variable is initialized to false so that PTR_AND_OBJ entries which 8328 // are not struct members are not considered (e.g. array of pointers to 8329 // data). 8330 bool ShouldBeMemberOf = false; 8331 8332 // Variable keeping track of whether or not we have encountered a component 8333 // in the component list which is a member expression. Useful when we have a 8334 // pointer or a final array section, in which case it is the previous 8335 // component in the list which tells us whether we have a member expression. 8336 // E.g. X.f[:] 8337 // While processing the final array section "[:]" it is "f" which tells us 8338 // whether we are dealing with a member of a declared struct. 8339 const MemberExpr *EncounteredME = nullptr; 8340 8341 for (; I != CE; ++I) { 8342 // If the current component is member of a struct (parent struct) mark it. 8343 if (!EncounteredME) { 8344 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 8345 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 8346 // as MEMBER_OF the parent struct. 8347 if (EncounteredME) 8348 ShouldBeMemberOf = true; 8349 } 8350 8351 auto Next = std::next(I); 8352 8353 // We need to generate the addresses and sizes if this is the last 8354 // component, if the component is a pointer or if it is an array section 8355 // whose length can't be proved to be one. If this is a pointer, it 8356 // becomes the base address for the following components. 8357 8358 // A final array section, is one whose length can't be proved to be one. 8359 bool IsFinalArraySection = 8360 isFinalArraySectionExpression(I->getAssociatedExpression()); 8361 8362 // Get information on whether the element is a pointer. Have to do a 8363 // special treatment for array sections given that they are built-in 8364 // types. 8365 const auto *OASE = 8366 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 8367 const auto *OAShE = 8368 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 8369 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 8370 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 8371 bool IsPointer = 8372 OAShE || 8373 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 8374 .getCanonicalType() 8375 ->isAnyPointerType()) || 8376 I->getAssociatedExpression()->getType()->isAnyPointerType(); 8377 bool IsNonDerefPointer = IsPointer && !UO && !BO; 8378 8379 if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { 8380 // If this is not the last component, we expect the pointer to be 8381 // associated with an array expression or member expression. 8382 assert((Next == CE || 8383 isa<MemberExpr>(Next->getAssociatedExpression()) || 8384 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 8385 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 8386 isa<UnaryOperator>(Next->getAssociatedExpression()) || 8387 isa<BinaryOperator>(Next->getAssociatedExpression())) && 8388 "Unexpected expression"); 8389 8390 Address LB = Address::invalid(); 8391 if (OAShE) { 8392 LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 8393 CGF.getContext().getTypeAlignInChars( 8394 OAShE->getBase()->getType())); 8395 } else { 8396 LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 8397 .getAddress(CGF); 8398 } 8399 8400 // If this component is a pointer inside the base struct then we don't 8401 // need to create any entry for it - it will be combined with the object 8402 // it is pointing to into a single PTR_AND_OBJ entry. 8403 bool IsMemberPointer = 8404 IsPointer && EncounteredME && 8405 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 8406 EncounteredME); 8407 if (!OverlappedElements.empty()) { 8408 // Handle base element with the info for overlapped elements. 8409 assert(!PartialStruct.Base.isValid() && "The base element is set."); 8410 assert(Next == CE && 8411 "Expected last element for the overlapped elements."); 8412 assert(!IsPointer && 8413 "Unexpected base element with the pointer type."); 8414 // Mark the whole struct as the struct that requires allocation on the 8415 // device. 8416 PartialStruct.LowestElem = {0, LB}; 8417 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 8418 I->getAssociatedExpression()->getType()); 8419 Address HB = CGF.Builder.CreateConstGEP( 8420 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 8421 CGF.VoidPtrTy), 8422 TypeSize.getQuantity() - 1); 8423 PartialStruct.HighestElem = { 8424 std::numeric_limits<decltype( 8425 PartialStruct.HighestElem.first)>::max(), 8426 HB}; 8427 PartialStruct.Base = BP; 8428 // Emit data for non-overlapped data. 8429 OpenMPOffloadMappingFlags Flags = 8430 OMP_MAP_MEMBER_OF | 8431 getMapTypeBits(MapType, MapModifiers, IsImplicit, 8432 /*AddPtrFlag=*/false, 8433 /*AddIsTargetParamFlag=*/false); 8434 LB = BP; 8435 llvm::Value *Size = nullptr; 8436 // Do bitcopy of all non-overlapped structure elements. 8437 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 8438 Component : OverlappedElements) { 8439 Address ComponentLB = Address::invalid(); 8440 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 8441 Component) { 8442 if (MC.getAssociatedDeclaration()) { 8443 ComponentLB = 8444 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 8445 .getAddress(CGF); 8446 Size = CGF.Builder.CreatePtrDiff( 8447 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 8448 CGF.EmitCastToVoidPtr(LB.getPointer())); 8449 break; 8450 } 8451 } 8452 BasePointers.push_back(BP.getPointer()); 8453 Pointers.push_back(LB.getPointer()); 8454 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, 8455 /*isSigned=*/true)); 8456 Types.push_back(Flags); 8457 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 8458 } 8459 BasePointers.push_back(BP.getPointer()); 8460 Pointers.push_back(LB.getPointer()); 8461 Size = CGF.Builder.CreatePtrDiff( 8462 CGF.EmitCastToVoidPtr( 8463 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 8464 CGF.EmitCastToVoidPtr(LB.getPointer())); 8465 Sizes.push_back( 8466 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8467 Types.push_back(Flags); 8468 break; 8469 } 8470 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 8471 if (!IsMemberPointer) { 8472 BasePointers.push_back(BP.getPointer()); 8473 Pointers.push_back(LB.getPointer()); 8474 Sizes.push_back( 8475 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8476 8477 // We need to add a pointer flag for each map that comes from the 8478 // same expression except for the first one. We also need to signal 8479 // this map is the first one that relates with the current capture 8480 // (there is a set of entries for each capture). 8481 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 8482 MapType, MapModifiers, IsImplicit, 8483 !IsExpressionFirstInfo || RequiresReference, 8484 IsCaptureFirstInfo && !RequiresReference); 8485 8486 if (!IsExpressionFirstInfo) { 8487 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 8488 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 8489 if (IsPointer) 8490 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 8491 OMP_MAP_DELETE | OMP_MAP_CLOSE); 8492 8493 if (ShouldBeMemberOf) { 8494 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 8495 // should be later updated with the correct value of MEMBER_OF. 8496 Flags |= OMP_MAP_MEMBER_OF; 8497 // From now on, all subsequent PTR_AND_OBJ entries should not be 8498 // marked as MEMBER_OF. 8499 ShouldBeMemberOf = false; 8500 } 8501 } 8502 8503 Types.push_back(Flags); 8504 } 8505 8506 // If we have encountered a member expression so far, keep track of the 8507 // mapped member. If the parent is "*this", then the value declaration 8508 // is nullptr. 8509 if (EncounteredME) { 8510 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 8511 unsigned FieldIndex = FD->getFieldIndex(); 8512 8513 // Update info about the lowest and highest elements for this struct 8514 if (!PartialStruct.Base.isValid()) { 8515 PartialStruct.LowestElem = {FieldIndex, LB}; 8516 PartialStruct.HighestElem = {FieldIndex, LB}; 8517 PartialStruct.Base = BP; 8518 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8519 PartialStruct.LowestElem = {FieldIndex, LB}; 8520 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8521 PartialStruct.HighestElem = {FieldIndex, LB}; 8522 } 8523 } 8524 8525 // If we have a final array section, we are done with this expression. 8526 if (IsFinalArraySection) 8527 break; 8528 8529 // The pointer becomes the base for the next element. 8530 if (Next != CE) 8531 BP = LB; 8532 8533 IsExpressionFirstInfo = false; 8534 IsCaptureFirstInfo = false; 8535 } 8536 } 8537 } 8538 8539 /// Return the adjusted map modifiers if the declaration a capture refers to 8540 /// appears in a first-private clause. This is expected to be used only with 8541 /// directives that start with 'target'. 8542 MappableExprsHandler::OpenMPOffloadMappingFlags 8543 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8544 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8545 8546 // A first private variable captured by reference will use only the 8547 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8548 // declaration is known as first-private in this handler. 8549 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8550 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 8551 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 8552 return MappableExprsHandler::OMP_MAP_ALWAYS | 8553 MappableExprsHandler::OMP_MAP_TO; 8554 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8555 return MappableExprsHandler::OMP_MAP_TO | 8556 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8557 return MappableExprsHandler::OMP_MAP_PRIVATE | 8558 MappableExprsHandler::OMP_MAP_TO; 8559 } 8560 return MappableExprsHandler::OMP_MAP_TO | 8561 MappableExprsHandler::OMP_MAP_FROM; 8562 } 8563 8564 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8565 // Rotate by getFlagMemberOffset() bits. 8566 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8567 << getFlagMemberOffset()); 8568 } 8569 8570 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8571 OpenMPOffloadMappingFlags MemberOfFlag) { 8572 // If the entry is PTR_AND_OBJ but has not been marked with the special 8573 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8574 // marked as MEMBER_OF. 8575 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8576 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8577 return; 8578 8579 // Reset the placeholder value to prepare the flag for the assignment of the 8580 // proper MEMBER_OF value. 8581 Flags &= ~OMP_MAP_MEMBER_OF; 8582 Flags |= MemberOfFlag; 8583 } 8584 8585 void getPlainLayout(const CXXRecordDecl *RD, 8586 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8587 bool AsBase) const { 8588 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8589 8590 llvm::StructType *St = 8591 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8592 8593 unsigned NumElements = St->getNumElements(); 8594 llvm::SmallVector< 8595 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8596 RecordLayout(NumElements); 8597 8598 // Fill bases. 8599 for (const auto &I : RD->bases()) { 8600 if (I.isVirtual()) 8601 continue; 8602 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8603 // Ignore empty bases. 8604 if (Base->isEmpty() || CGF.getContext() 8605 .getASTRecordLayout(Base) 8606 .getNonVirtualSize() 8607 .isZero()) 8608 continue; 8609 8610 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8611 RecordLayout[FieldIndex] = Base; 8612 } 8613 // Fill in virtual bases. 8614 for (const auto &I : RD->vbases()) { 8615 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8616 // Ignore empty bases. 8617 if (Base->isEmpty()) 8618 continue; 8619 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8620 if (RecordLayout[FieldIndex]) 8621 continue; 8622 RecordLayout[FieldIndex] = Base; 8623 } 8624 // Fill in all the fields. 8625 assert(!RD->isUnion() && "Unexpected union."); 8626 for (const auto *Field : RD->fields()) { 8627 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8628 // will fill in later.) 8629 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8630 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8631 RecordLayout[FieldIndex] = Field; 8632 } 8633 } 8634 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8635 &Data : RecordLayout) { 8636 if (Data.isNull()) 8637 continue; 8638 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8639 getPlainLayout(Base, Layout, /*AsBase=*/true); 8640 else 8641 Layout.push_back(Data.get<const FieldDecl *>()); 8642 } 8643 } 8644 8645 public: 8646 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8647 : CurDir(&Dir), CGF(CGF) { 8648 // Extract firstprivate clause information. 8649 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8650 for (const auto *D : C->varlists()) 8651 FirstPrivateDecls.try_emplace( 8652 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8653 // Extract implicit firstprivates from uses_allocators clauses. 8654 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { 8655 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { 8656 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); 8657 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) 8658 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), 8659 /*Implicit=*/true); 8660 else if (const auto *VD = dyn_cast<VarDecl>( 8661 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) 8662 ->getDecl())) 8663 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); 8664 } 8665 } 8666 // Extract device pointer clause information. 8667 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8668 for (auto L : C->component_lists()) 8669 DevPointersMap[L.first].push_back(L.second); 8670 } 8671 8672 /// Constructor for the declare mapper directive. 8673 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8674 : CurDir(&Dir), CGF(CGF) {} 8675 8676 /// Generate code for the combined entry if we have a partially mapped struct 8677 /// and take care of the mapping flags of the arguments corresponding to 8678 /// individual struct members. 8679 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 8680 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8681 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 8682 const StructRangeInfoTy &PartialStruct) const { 8683 // Base is the base of the struct 8684 BasePointers.push_back(PartialStruct.Base.getPointer()); 8685 // Pointer is the address of the lowest element 8686 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 8687 Pointers.push_back(LB); 8688 // Size is (addr of {highest+1} element) - (addr of lowest element) 8689 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 8690 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 8691 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8692 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8693 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8694 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8695 /*isSigned=*/false); 8696 Sizes.push_back(Size); 8697 // Map type is always TARGET_PARAM 8698 Types.push_back(OMP_MAP_TARGET_PARAM); 8699 // Remove TARGET_PARAM flag from the first element 8700 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8701 8702 // All other current entries will be MEMBER_OF the combined entry 8703 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8704 // 0xFFFF in the MEMBER_OF field). 8705 OpenMPOffloadMappingFlags MemberOfFlag = 8706 getMemberOfFlag(BasePointers.size() - 1); 8707 for (auto &M : CurTypes) 8708 setCorrectMemberOfFlag(M, MemberOfFlag); 8709 } 8710 8711 /// Generate all the base pointers, section pointers, sizes and map 8712 /// types for the extracted mappable expressions. Also, for each item that 8713 /// relates with a device pointer, a pair of the relevant declaration and 8714 /// index where it occurs is appended to the device pointers info array. 8715 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 8716 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8717 MapFlagsArrayTy &Types) const { 8718 // We have to process the component lists that relate with the same 8719 // declaration in a single chunk so that we can generate the map flags 8720 // correctly. Therefore, we organize all lists in a map. 8721 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8722 8723 // Helper function to fill the information map for the different supported 8724 // clauses. 8725 auto &&InfoGen = [&Info]( 8726 const ValueDecl *D, 8727 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8728 OpenMPMapClauseKind MapType, 8729 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8730 bool ReturnDevicePointer, bool IsImplicit) { 8731 const ValueDecl *VD = 8732 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8733 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8734 IsImplicit); 8735 }; 8736 8737 assert(CurDir.is<const OMPExecutableDirective *>() && 8738 "Expect a executable directive"); 8739 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8740 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 8741 for (const auto L : C->component_lists()) { 8742 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 8743 /*ReturnDevicePointer=*/false, C->isImplicit()); 8744 } 8745 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 8746 for (const auto L : C->component_lists()) { 8747 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 8748 /*ReturnDevicePointer=*/false, C->isImplicit()); 8749 } 8750 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 8751 for (const auto L : C->component_lists()) { 8752 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 8753 /*ReturnDevicePointer=*/false, C->isImplicit()); 8754 } 8755 8756 // Look at the use_device_ptr clause information and mark the existing map 8757 // entries as such. If there is no map information for an entry in the 8758 // use_device_ptr list, we create one with map type 'alloc' and zero size 8759 // section. It is the user fault if that was not mapped before. If there is 8760 // no map information and the pointer is a struct member, then we defer the 8761 // emission of that entry until the whole struct has been processed. 8762 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 8763 DeferredInfo; 8764 8765 for (const auto *C : 8766 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 8767 for (const auto L : C->component_lists()) { 8768 assert(!L.second.empty() && "Not expecting empty list of components!"); 8769 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 8770 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8771 const Expr *IE = L.second.back().getAssociatedExpression(); 8772 // If the first component is a member expression, we have to look into 8773 // 'this', which maps to null in the map of map information. Otherwise 8774 // look directly for the information. 8775 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8776 8777 // We potentially have map information for this declaration already. 8778 // Look for the first set of components that refer to it. 8779 if (It != Info.end()) { 8780 auto CI = std::find_if( 8781 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 8782 return MI.Components.back().getAssociatedDeclaration() == VD; 8783 }); 8784 // If we found a map entry, signal that the pointer has to be returned 8785 // and move on to the next declaration. 8786 if (CI != It->second.end()) { 8787 CI->ReturnDevicePointer = true; 8788 continue; 8789 } 8790 } 8791 8792 // We didn't find any match in our map information - generate a zero 8793 // size array section - if the pointer is a struct member we defer this 8794 // action until the whole struct has been processed. 8795 if (isa<MemberExpr>(IE)) { 8796 // Insert the pointer into Info to be processed by 8797 // generateInfoForComponentList. Because it is a member pointer 8798 // without a pointee, no entry will be generated for it, therefore 8799 // we need to generate one after the whole struct has been processed. 8800 // Nonetheless, generateInfoForComponentList must be called to take 8801 // the pointer into account for the calculation of the range of the 8802 // partial struct. 8803 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 8804 /*ReturnDevicePointer=*/false, C->isImplicit()); 8805 DeferredInfo[nullptr].emplace_back(IE, VD); 8806 } else { 8807 llvm::Value *Ptr = 8808 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8809 BasePointers.emplace_back(Ptr, VD); 8810 Pointers.push_back(Ptr); 8811 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8812 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8813 } 8814 } 8815 } 8816 8817 for (const auto &M : Info) { 8818 // We need to know when we generate information for the first component 8819 // associated with a capture, because the mapping flags depend on it. 8820 bool IsFirstComponentList = true; 8821 8822 // Temporary versions of arrays 8823 MapBaseValuesArrayTy CurBasePointers; 8824 MapValuesArrayTy CurPointers; 8825 MapValuesArrayTy CurSizes; 8826 MapFlagsArrayTy CurTypes; 8827 StructRangeInfoTy PartialStruct; 8828 8829 for (const MapInfo &L : M.second) { 8830 assert(!L.Components.empty() && 8831 "Not expecting declaration with no component lists."); 8832 8833 // Remember the current base pointer index. 8834 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 8835 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8836 CurBasePointers, CurPointers, CurSizes, 8837 CurTypes, PartialStruct, 8838 IsFirstComponentList, L.IsImplicit); 8839 8840 // If this entry relates with a device pointer, set the relevant 8841 // declaration and add the 'return pointer' flag. 8842 if (L.ReturnDevicePointer) { 8843 assert(CurBasePointers.size() > CurrentBasePointersIdx && 8844 "Unexpected number of mapped base pointers."); 8845 8846 const ValueDecl *RelevantVD = 8847 L.Components.back().getAssociatedDeclaration(); 8848 assert(RelevantVD && 8849 "No relevant declaration related with device pointer??"); 8850 8851 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 8852 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8853 } 8854 IsFirstComponentList = false; 8855 } 8856 8857 // Append any pending zero-length pointers which are struct members and 8858 // used with use_device_ptr. 8859 auto CI = DeferredInfo.find(M.first); 8860 if (CI != DeferredInfo.end()) { 8861 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8862 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8863 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 8864 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 8865 CurBasePointers.emplace_back(BasePtr, L.VD); 8866 CurPointers.push_back(Ptr); 8867 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8868 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8869 // value MEMBER_OF=FFFF so that the entry is later updated with the 8870 // correct value of MEMBER_OF. 8871 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8872 OMP_MAP_MEMBER_OF); 8873 } 8874 } 8875 8876 // If there is an entry in PartialStruct it means we have a struct with 8877 // individual members mapped. Emit an extra combined entry. 8878 if (PartialStruct.Base.isValid()) 8879 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8880 PartialStruct); 8881 8882 // We need to append the results of this capture to what we already have. 8883 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8884 Pointers.append(CurPointers.begin(), CurPointers.end()); 8885 Sizes.append(CurSizes.begin(), CurSizes.end()); 8886 Types.append(CurTypes.begin(), CurTypes.end()); 8887 } 8888 } 8889 8890 /// Generate all the base pointers, section pointers, sizes and map types for 8891 /// the extracted map clauses of user-defined mapper. 8892 void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, 8893 MapValuesArrayTy &Pointers, 8894 MapValuesArrayTy &Sizes, 8895 MapFlagsArrayTy &Types) const { 8896 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8897 "Expect a declare mapper directive"); 8898 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8899 // We have to process the component lists that relate with the same 8900 // declaration in a single chunk so that we can generate the map flags 8901 // correctly. Therefore, we organize all lists in a map. 8902 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8903 8904 // Helper function to fill the information map for the different supported 8905 // clauses. 8906 auto &&InfoGen = [&Info]( 8907 const ValueDecl *D, 8908 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8909 OpenMPMapClauseKind MapType, 8910 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8911 bool ReturnDevicePointer, bool IsImplicit) { 8912 const ValueDecl *VD = 8913 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8914 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8915 IsImplicit); 8916 }; 8917 8918 for (const auto *C : CurMapperDir->clauselists()) { 8919 const auto *MC = cast<OMPMapClause>(C); 8920 for (const auto L : MC->component_lists()) { 8921 InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), 8922 /*ReturnDevicePointer=*/false, MC->isImplicit()); 8923 } 8924 } 8925 8926 for (const auto &M : Info) { 8927 // We need to know when we generate information for the first component 8928 // associated with a capture, because the mapping flags depend on it. 8929 bool IsFirstComponentList = true; 8930 8931 // Temporary versions of arrays 8932 MapBaseValuesArrayTy CurBasePointers; 8933 MapValuesArrayTy CurPointers; 8934 MapValuesArrayTy CurSizes; 8935 MapFlagsArrayTy CurTypes; 8936 StructRangeInfoTy PartialStruct; 8937 8938 for (const MapInfo &L : M.second) { 8939 assert(!L.Components.empty() && 8940 "Not expecting declaration with no component lists."); 8941 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8942 CurBasePointers, CurPointers, CurSizes, 8943 CurTypes, PartialStruct, 8944 IsFirstComponentList, L.IsImplicit); 8945 IsFirstComponentList = false; 8946 } 8947 8948 // If there is an entry in PartialStruct it means we have a struct with 8949 // individual members mapped. Emit an extra combined entry. 8950 if (PartialStruct.Base.isValid()) 8951 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8952 PartialStruct); 8953 8954 // We need to append the results of this capture to what we already have. 8955 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8956 Pointers.append(CurPointers.begin(), CurPointers.end()); 8957 Sizes.append(CurSizes.begin(), CurSizes.end()); 8958 Types.append(CurTypes.begin(), CurTypes.end()); 8959 } 8960 } 8961 8962 /// Emit capture info for lambdas for variables captured by reference. 8963 void generateInfoForLambdaCaptures( 8964 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 8965 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8966 MapFlagsArrayTy &Types, 8967 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8968 const auto *RD = VD->getType() 8969 .getCanonicalType() 8970 .getNonReferenceType() 8971 ->getAsCXXRecordDecl(); 8972 if (!RD || !RD->isLambda()) 8973 return; 8974 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8975 LValue VDLVal = CGF.MakeAddrLValue( 8976 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8977 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8978 FieldDecl *ThisCapture = nullptr; 8979 RD->getCaptureFields(Captures, ThisCapture); 8980 if (ThisCapture) { 8981 LValue ThisLVal = 8982 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8983 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8984 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8985 VDLVal.getPointer(CGF)); 8986 BasePointers.push_back(ThisLVal.getPointer(CGF)); 8987 Pointers.push_back(ThisLValVal.getPointer(CGF)); 8988 Sizes.push_back( 8989 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8990 CGF.Int64Ty, /*isSigned=*/true)); 8991 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8992 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8993 } 8994 for (const LambdaCapture &LC : RD->captures()) { 8995 if (!LC.capturesVariable()) 8996 continue; 8997 const VarDecl *VD = LC.getCapturedVar(); 8998 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8999 continue; 9000 auto It = Captures.find(VD); 9001 assert(It != Captures.end() && "Found lambda capture without field."); 9002 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 9003 if (LC.getCaptureKind() == LCK_ByRef) { 9004 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 9005 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9006 VDLVal.getPointer(CGF)); 9007 BasePointers.push_back(VarLVal.getPointer(CGF)); 9008 Pointers.push_back(VarLValVal.getPointer(CGF)); 9009 Sizes.push_back(CGF.Builder.CreateIntCast( 9010 CGF.getTypeSize( 9011 VD->getType().getCanonicalType().getNonReferenceType()), 9012 CGF.Int64Ty, /*isSigned=*/true)); 9013 } else { 9014 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 9015 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 9016 VDLVal.getPointer(CGF)); 9017 BasePointers.push_back(VarLVal.getPointer(CGF)); 9018 Pointers.push_back(VarRVal.getScalarVal()); 9019 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 9020 } 9021 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9022 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 9023 } 9024 } 9025 9026 /// Set correct indices for lambdas captures. 9027 void adjustMemberOfForLambdaCaptures( 9028 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 9029 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 9030 MapFlagsArrayTy &Types) const { 9031 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 9032 // Set correct member_of idx for all implicit lambda captures. 9033 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 9034 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 9035 continue; 9036 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 9037 assert(BasePtr && "Unable to find base lambda address."); 9038 int TgtIdx = -1; 9039 for (unsigned J = I; J > 0; --J) { 9040 unsigned Idx = J - 1; 9041 if (Pointers[Idx] != BasePtr) 9042 continue; 9043 TgtIdx = Idx; 9044 break; 9045 } 9046 assert(TgtIdx != -1 && "Unable to find parent lambda."); 9047 // All other current entries will be MEMBER_OF the combined entry 9048 // (except for PTR_AND_OBJ entries which do not have a placeholder value 9049 // 0xFFFF in the MEMBER_OF field). 9050 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 9051 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 9052 } 9053 } 9054 9055 /// Generate the base pointers, section pointers, sizes and map types 9056 /// associated to a given capture. 9057 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 9058 llvm::Value *Arg, 9059 MapBaseValuesArrayTy &BasePointers, 9060 MapValuesArrayTy &Pointers, 9061 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 9062 StructRangeInfoTy &PartialStruct) const { 9063 assert(!Cap->capturesVariableArrayType() && 9064 "Not expecting to generate map info for a variable array type!"); 9065 9066 // We need to know when we generating information for the first component 9067 const ValueDecl *VD = Cap->capturesThis() 9068 ? nullptr 9069 : Cap->getCapturedVar()->getCanonicalDecl(); 9070 9071 // If this declaration appears in a is_device_ptr clause we just have to 9072 // pass the pointer by value. If it is a reference to a declaration, we just 9073 // pass its value. 9074 if (DevPointersMap.count(VD)) { 9075 BasePointers.emplace_back(Arg, VD); 9076 Pointers.push_back(Arg); 9077 Sizes.push_back( 9078 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 9079 CGF.Int64Ty, /*isSigned=*/true)); 9080 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 9081 return; 9082 } 9083 9084 using MapData = 9085 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 9086 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 9087 SmallVector<MapData, 4> DeclComponentLists; 9088 assert(CurDir.is<const OMPExecutableDirective *>() && 9089 "Expect a executable directive"); 9090 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9091 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 9092 for (const auto L : C->decl_component_lists(VD)) { 9093 assert(L.first == VD && 9094 "We got information for the wrong declaration??"); 9095 assert(!L.second.empty() && 9096 "Not expecting declaration with no component lists."); 9097 DeclComponentLists.emplace_back(L.second, C->getMapType(), 9098 C->getMapTypeModifiers(), 9099 C->isImplicit()); 9100 } 9101 } 9102 9103 // Find overlapping elements (including the offset from the base element). 9104 llvm::SmallDenseMap< 9105 const MapData *, 9106 llvm::SmallVector< 9107 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 9108 4> 9109 OverlappedData; 9110 size_t Count = 0; 9111 for (const MapData &L : DeclComponentLists) { 9112 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9113 OpenMPMapClauseKind MapType; 9114 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9115 bool IsImplicit; 9116 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 9117 ++Count; 9118 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 9119 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 9120 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 9121 auto CI = Components.rbegin(); 9122 auto CE = Components.rend(); 9123 auto SI = Components1.rbegin(); 9124 auto SE = Components1.rend(); 9125 for (; CI != CE && SI != SE; ++CI, ++SI) { 9126 if (CI->getAssociatedExpression()->getStmtClass() != 9127 SI->getAssociatedExpression()->getStmtClass()) 9128 break; 9129 // Are we dealing with different variables/fields? 9130 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 9131 break; 9132 } 9133 // Found overlapping if, at least for one component, reached the head of 9134 // the components list. 9135 if (CI == CE || SI == SE) { 9136 assert((CI != CE || SI != SE) && 9137 "Unexpected full match of the mapping components."); 9138 const MapData &BaseData = CI == CE ? L : L1; 9139 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9140 SI == SE ? Components : Components1; 9141 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9142 OverlappedElements.getSecond().push_back(SubData); 9143 } 9144 } 9145 } 9146 // Sort the overlapped elements for each item. 9147 llvm::SmallVector<const FieldDecl *, 4> Layout; 9148 if (!OverlappedData.empty()) { 9149 if (const auto *CRD = 9150 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 9151 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9152 else { 9153 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 9154 Layout.append(RD->field_begin(), RD->field_end()); 9155 } 9156 } 9157 for (auto &Pair : OverlappedData) { 9158 llvm::sort( 9159 Pair.getSecond(), 9160 [&Layout]( 9161 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9162 OMPClauseMappableExprCommon::MappableExprComponentListRef 9163 Second) { 9164 auto CI = First.rbegin(); 9165 auto CE = First.rend(); 9166 auto SI = Second.rbegin(); 9167 auto SE = Second.rend(); 9168 for (; CI != CE && SI != SE; ++CI, ++SI) { 9169 if (CI->getAssociatedExpression()->getStmtClass() != 9170 SI->getAssociatedExpression()->getStmtClass()) 9171 break; 9172 // Are we dealing with different variables/fields? 9173 if (CI->getAssociatedDeclaration() != 9174 SI->getAssociatedDeclaration()) 9175 break; 9176 } 9177 9178 // Lists contain the same elements. 9179 if (CI == CE && SI == SE) 9180 return false; 9181 9182 // List with less elements is less than list with more elements. 9183 if (CI == CE || SI == SE) 9184 return CI == CE; 9185 9186 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9187 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9188 if (FD1->getParent() == FD2->getParent()) 9189 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9190 const auto It = 9191 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9192 return FD == FD1 || FD == FD2; 9193 }); 9194 return *It == FD1; 9195 }); 9196 } 9197 9198 // Associated with a capture, because the mapping flags depend on it. 9199 // Go through all of the elements with the overlapped elements. 9200 for (const auto &Pair : OverlappedData) { 9201 const MapData &L = *Pair.getFirst(); 9202 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9203 OpenMPMapClauseKind MapType; 9204 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9205 bool IsImplicit; 9206 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 9207 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9208 OverlappedComponents = Pair.getSecond(); 9209 bool IsFirstComponentList = true; 9210 generateInfoForComponentList(MapType, MapModifiers, Components, 9211 BasePointers, Pointers, Sizes, Types, 9212 PartialStruct, IsFirstComponentList, 9213 IsImplicit, OverlappedComponents); 9214 } 9215 // Go through other elements without overlapped elements. 9216 bool IsFirstComponentList = OverlappedData.empty(); 9217 for (const MapData &L : DeclComponentLists) { 9218 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9219 OpenMPMapClauseKind MapType; 9220 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9221 bool IsImplicit; 9222 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 9223 auto It = OverlappedData.find(&L); 9224 if (It == OverlappedData.end()) 9225 generateInfoForComponentList(MapType, MapModifiers, Components, 9226 BasePointers, Pointers, Sizes, Types, 9227 PartialStruct, IsFirstComponentList, 9228 IsImplicit); 9229 IsFirstComponentList = false; 9230 } 9231 } 9232 9233 /// Generate the base pointers, section pointers, sizes and map types 9234 /// associated with the declare target link variables. 9235 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 9236 MapValuesArrayTy &Pointers, 9237 MapValuesArrayTy &Sizes, 9238 MapFlagsArrayTy &Types) const { 9239 assert(CurDir.is<const OMPExecutableDirective *>() && 9240 "Expect a executable directive"); 9241 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9242 // Map other list items in the map clause which are not captured variables 9243 // but "declare target link" global variables. 9244 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 9245 for (const auto L : C->component_lists()) { 9246 if (!L.first) 9247 continue; 9248 const auto *VD = dyn_cast<VarDecl>(L.first); 9249 if (!VD) 9250 continue; 9251 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9252 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9253 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 9254 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 9255 continue; 9256 StructRangeInfoTy PartialStruct; 9257 generateInfoForComponentList( 9258 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 9259 Pointers, Sizes, Types, PartialStruct, 9260 /*IsFirstComponentList=*/true, C->isImplicit()); 9261 assert(!PartialStruct.Base.isValid() && 9262 "No partial structs for declare target link expected."); 9263 } 9264 } 9265 } 9266 9267 /// Generate the default map information for a given capture \a CI, 9268 /// record field declaration \a RI and captured value \a CV. 9269 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9270 const FieldDecl &RI, llvm::Value *CV, 9271 MapBaseValuesArrayTy &CurBasePointers, 9272 MapValuesArrayTy &CurPointers, 9273 MapValuesArrayTy &CurSizes, 9274 MapFlagsArrayTy &CurMapTypes) const { 9275 bool IsImplicit = true; 9276 // Do the default mapping. 9277 if (CI.capturesThis()) { 9278 CurBasePointers.push_back(CV); 9279 CurPointers.push_back(CV); 9280 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9281 CurSizes.push_back( 9282 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9283 CGF.Int64Ty, /*isSigned=*/true)); 9284 // Default map type. 9285 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9286 } else if (CI.capturesVariableByCopy()) { 9287 CurBasePointers.push_back(CV); 9288 CurPointers.push_back(CV); 9289 if (!RI.getType()->isAnyPointerType()) { 9290 // We have to signal to the runtime captures passed by value that are 9291 // not pointers. 9292 CurMapTypes.push_back(OMP_MAP_LITERAL); 9293 CurSizes.push_back(CGF.Builder.CreateIntCast( 9294 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9295 } else { 9296 // Pointers are implicitly mapped with a zero size and no flags 9297 // (other than first map that is added for all implicit maps). 9298 CurMapTypes.push_back(OMP_MAP_NONE); 9299 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9300 } 9301 const VarDecl *VD = CI.getCapturedVar(); 9302 auto I = FirstPrivateDecls.find(VD); 9303 if (I != FirstPrivateDecls.end()) 9304 IsImplicit = I->getSecond(); 9305 } else { 9306 assert(CI.capturesVariable() && "Expected captured reference."); 9307 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9308 QualType ElementType = PtrTy->getPointeeType(); 9309 CurSizes.push_back(CGF.Builder.CreateIntCast( 9310 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9311 // The default map type for a scalar/complex type is 'to' because by 9312 // default the value doesn't have to be retrieved. For an aggregate 9313 // type, the default is 'tofrom'. 9314 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 9315 const VarDecl *VD = CI.getCapturedVar(); 9316 auto I = FirstPrivateDecls.find(VD); 9317 if (I != FirstPrivateDecls.end() && 9318 VD->getType().isConstant(CGF.getContext())) { 9319 llvm::Constant *Addr = 9320 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 9321 // Copy the value of the original variable to the new global copy. 9322 CGF.Builder.CreateMemCpy( 9323 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 9324 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 9325 CurSizes.back(), /*IsVolatile=*/false); 9326 // Use new global variable as the base pointers. 9327 CurBasePointers.push_back(Addr); 9328 CurPointers.push_back(Addr); 9329 } else { 9330 CurBasePointers.push_back(CV); 9331 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9332 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9333 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9334 AlignmentSource::Decl)); 9335 CurPointers.push_back(PtrAddr.getPointer()); 9336 } else { 9337 CurPointers.push_back(CV); 9338 } 9339 } 9340 if (I != FirstPrivateDecls.end()) 9341 IsImplicit = I->getSecond(); 9342 } 9343 // Every default map produces a single argument which is a target parameter. 9344 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 9345 9346 // Add flag stating this is an implicit map. 9347 if (IsImplicit) 9348 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 9349 } 9350 }; 9351 } // anonymous namespace 9352 9353 /// Emit the arrays used to pass the captures and map information to the 9354 /// offloading runtime library. If there is no map or capture information, 9355 /// return nullptr by reference. 9356 static void 9357 emitOffloadingArrays(CodeGenFunction &CGF, 9358 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 9359 MappableExprsHandler::MapValuesArrayTy &Pointers, 9360 MappableExprsHandler::MapValuesArrayTy &Sizes, 9361 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 9362 CGOpenMPRuntime::TargetDataInfo &Info) { 9363 CodeGenModule &CGM = CGF.CGM; 9364 ASTContext &Ctx = CGF.getContext(); 9365 9366 // Reset the array information. 9367 Info.clearArrayInfo(); 9368 Info.NumberOfPtrs = BasePointers.size(); 9369 9370 if (Info.NumberOfPtrs) { 9371 // Detect if we have any capture size requiring runtime evaluation of the 9372 // size so that a constant array could be eventually used. 9373 bool hasRuntimeEvaluationCaptureSize = false; 9374 for (llvm::Value *S : Sizes) 9375 if (!isa<llvm::Constant>(S)) { 9376 hasRuntimeEvaluationCaptureSize = true; 9377 break; 9378 } 9379 9380 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9381 QualType PointerArrayType = Ctx.getConstantArrayType( 9382 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9383 /*IndexTypeQuals=*/0); 9384 9385 Info.BasePointersArray = 9386 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9387 Info.PointersArray = 9388 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9389 9390 // If we don't have any VLA types or other types that require runtime 9391 // evaluation, we can use a constant array for the map sizes, otherwise we 9392 // need to fill up the arrays as we do for the pointers. 9393 QualType Int64Ty = 9394 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9395 if (hasRuntimeEvaluationCaptureSize) { 9396 QualType SizeArrayType = Ctx.getConstantArrayType( 9397 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9398 /*IndexTypeQuals=*/0); 9399 Info.SizesArray = 9400 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9401 } else { 9402 // We expect all the sizes to be constant, so we collect them to create 9403 // a constant array. 9404 SmallVector<llvm::Constant *, 16> ConstSizes; 9405 for (llvm::Value *S : Sizes) 9406 ConstSizes.push_back(cast<llvm::Constant>(S)); 9407 9408 auto *SizesArrayInit = llvm::ConstantArray::get( 9409 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9410 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9411 auto *SizesArrayGbl = new llvm::GlobalVariable( 9412 CGM.getModule(), SizesArrayInit->getType(), 9413 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9414 SizesArrayInit, Name); 9415 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9416 Info.SizesArray = SizesArrayGbl; 9417 } 9418 9419 // The map types are always constant so we don't need to generate code to 9420 // fill arrays. Instead, we create an array constant. 9421 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 9422 llvm::copy(MapTypes, Mapping.begin()); 9423 llvm::Constant *MapTypesArrayInit = 9424 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 9425 std::string MaptypesName = 9426 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9427 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 9428 CGM.getModule(), MapTypesArrayInit->getType(), 9429 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9430 MapTypesArrayInit, MaptypesName); 9431 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9432 Info.MapTypesArray = MapTypesArrayGbl; 9433 9434 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9435 llvm::Value *BPVal = *BasePointers[I]; 9436 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9437 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9438 Info.BasePointersArray, 0, I); 9439 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9440 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9441 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9442 CGF.Builder.CreateStore(BPVal, BPAddr); 9443 9444 if (Info.requiresDevicePointerInfo()) 9445 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 9446 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9447 9448 llvm::Value *PVal = Pointers[I]; 9449 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9450 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9451 Info.PointersArray, 0, I); 9452 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9453 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9454 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9455 CGF.Builder.CreateStore(PVal, PAddr); 9456 9457 if (hasRuntimeEvaluationCaptureSize) { 9458 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9459 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9460 Info.SizesArray, 9461 /*Idx0=*/0, 9462 /*Idx1=*/I); 9463 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9464 CGF.Builder.CreateStore( 9465 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true), 9466 SAddr); 9467 } 9468 } 9469 } 9470 } 9471 9472 /// Emit the arguments to be passed to the runtime library based on the 9473 /// arrays of pointers, sizes and map types. 9474 static void emitOffloadingArraysArgument( 9475 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9476 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9477 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 9478 CodeGenModule &CGM = CGF.CGM; 9479 if (Info.NumberOfPtrs) { 9480 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9481 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9482 Info.BasePointersArray, 9483 /*Idx0=*/0, /*Idx1=*/0); 9484 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9485 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9486 Info.PointersArray, 9487 /*Idx0=*/0, 9488 /*Idx1=*/0); 9489 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9490 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9491 /*Idx0=*/0, /*Idx1=*/0); 9492 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9493 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9494 Info.MapTypesArray, 9495 /*Idx0=*/0, 9496 /*Idx1=*/0); 9497 } else { 9498 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9499 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9500 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9501 MapTypesArrayArg = 9502 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9503 } 9504 } 9505 9506 /// Check for inner distribute directive. 9507 static const OMPExecutableDirective * 9508 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9509 const auto *CS = D.getInnermostCapturedStmt(); 9510 const auto *Body = 9511 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9512 const Stmt *ChildStmt = 9513 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9514 9515 if (const auto *NestedDir = 9516 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9517 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9518 switch (D.getDirectiveKind()) { 9519 case OMPD_target: 9520 if (isOpenMPDistributeDirective(DKind)) 9521 return NestedDir; 9522 if (DKind == OMPD_teams) { 9523 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9524 /*IgnoreCaptured=*/true); 9525 if (!Body) 9526 return nullptr; 9527 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9528 if (const auto *NND = 9529 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9530 DKind = NND->getDirectiveKind(); 9531 if (isOpenMPDistributeDirective(DKind)) 9532 return NND; 9533 } 9534 } 9535 return nullptr; 9536 case OMPD_target_teams: 9537 if (isOpenMPDistributeDirective(DKind)) 9538 return NestedDir; 9539 return nullptr; 9540 case OMPD_target_parallel: 9541 case OMPD_target_simd: 9542 case OMPD_target_parallel_for: 9543 case OMPD_target_parallel_for_simd: 9544 return nullptr; 9545 case OMPD_target_teams_distribute: 9546 case OMPD_target_teams_distribute_simd: 9547 case OMPD_target_teams_distribute_parallel_for: 9548 case OMPD_target_teams_distribute_parallel_for_simd: 9549 case OMPD_parallel: 9550 case OMPD_for: 9551 case OMPD_parallel_for: 9552 case OMPD_parallel_master: 9553 case OMPD_parallel_sections: 9554 case OMPD_for_simd: 9555 case OMPD_parallel_for_simd: 9556 case OMPD_cancel: 9557 case OMPD_cancellation_point: 9558 case OMPD_ordered: 9559 case OMPD_threadprivate: 9560 case OMPD_allocate: 9561 case OMPD_task: 9562 case OMPD_simd: 9563 case OMPD_sections: 9564 case OMPD_section: 9565 case OMPD_single: 9566 case OMPD_master: 9567 case OMPD_critical: 9568 case OMPD_taskyield: 9569 case OMPD_barrier: 9570 case OMPD_taskwait: 9571 case OMPD_taskgroup: 9572 case OMPD_atomic: 9573 case OMPD_flush: 9574 case OMPD_depobj: 9575 case OMPD_scan: 9576 case OMPD_teams: 9577 case OMPD_target_data: 9578 case OMPD_target_exit_data: 9579 case OMPD_target_enter_data: 9580 case OMPD_distribute: 9581 case OMPD_distribute_simd: 9582 case OMPD_distribute_parallel_for: 9583 case OMPD_distribute_parallel_for_simd: 9584 case OMPD_teams_distribute: 9585 case OMPD_teams_distribute_simd: 9586 case OMPD_teams_distribute_parallel_for: 9587 case OMPD_teams_distribute_parallel_for_simd: 9588 case OMPD_target_update: 9589 case OMPD_declare_simd: 9590 case OMPD_declare_variant: 9591 case OMPD_begin_declare_variant: 9592 case OMPD_end_declare_variant: 9593 case OMPD_declare_target: 9594 case OMPD_end_declare_target: 9595 case OMPD_declare_reduction: 9596 case OMPD_declare_mapper: 9597 case OMPD_taskloop: 9598 case OMPD_taskloop_simd: 9599 case OMPD_master_taskloop: 9600 case OMPD_master_taskloop_simd: 9601 case OMPD_parallel_master_taskloop: 9602 case OMPD_parallel_master_taskloop_simd: 9603 case OMPD_requires: 9604 case OMPD_unknown: 9605 llvm_unreachable("Unexpected directive."); 9606 } 9607 } 9608 9609 return nullptr; 9610 } 9611 9612 /// Emit the user-defined mapper function. The code generation follows the 9613 /// pattern in the example below. 9614 /// \code 9615 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9616 /// void *base, void *begin, 9617 /// int64_t size, int64_t type) { 9618 /// // Allocate space for an array section first. 9619 /// if (size > 1 && !maptype.IsDelete) 9620 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9621 /// size*sizeof(Ty), clearToFrom(type)); 9622 /// // Map members. 9623 /// for (unsigned i = 0; i < size; i++) { 9624 /// // For each component specified by this mapper: 9625 /// for (auto c : all_components) { 9626 /// if (c.hasMapper()) 9627 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9628 /// c.arg_type); 9629 /// else 9630 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9631 /// c.arg_begin, c.arg_size, c.arg_type); 9632 /// } 9633 /// } 9634 /// // Delete the array section. 9635 /// if (size > 1 && maptype.IsDelete) 9636 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9637 /// size*sizeof(Ty), clearToFrom(type)); 9638 /// } 9639 /// \endcode 9640 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9641 CodeGenFunction *CGF) { 9642 if (UDMMap.count(D) > 0) 9643 return; 9644 ASTContext &C = CGM.getContext(); 9645 QualType Ty = D->getType(); 9646 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9647 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9648 auto *MapperVarDecl = 9649 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9650 SourceLocation Loc = D->getLocation(); 9651 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9652 9653 // Prepare mapper function arguments and attributes. 9654 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9655 C.VoidPtrTy, ImplicitParamDecl::Other); 9656 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9657 ImplicitParamDecl::Other); 9658 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9659 C.VoidPtrTy, ImplicitParamDecl::Other); 9660 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9661 ImplicitParamDecl::Other); 9662 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9663 ImplicitParamDecl::Other); 9664 FunctionArgList Args; 9665 Args.push_back(&HandleArg); 9666 Args.push_back(&BaseArg); 9667 Args.push_back(&BeginArg); 9668 Args.push_back(&SizeArg); 9669 Args.push_back(&TypeArg); 9670 const CGFunctionInfo &FnInfo = 9671 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9672 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9673 SmallString<64> TyStr; 9674 llvm::raw_svector_ostream Out(TyStr); 9675 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9676 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9677 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9678 Name, &CGM.getModule()); 9679 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9680 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9681 // Start the mapper function code generation. 9682 CodeGenFunction MapperCGF(CGM); 9683 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9684 // Compute the starting and end addreses of array elements. 9685 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9686 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9687 C.getPointerType(Int64Ty), Loc); 9688 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9689 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 9690 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 9691 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9692 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9693 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9694 C.getPointerType(Int64Ty), Loc); 9695 // Prepare common arguments for array initiation and deletion. 9696 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9697 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9698 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9699 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9700 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9701 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9702 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9703 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9704 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9705 9706 // Emit array initiation if this is an array section and \p MapType indicates 9707 // that memory allocation is required. 9708 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9709 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9710 ElementSize, HeadBB, /*IsInit=*/true); 9711 9712 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9713 9714 // Emit the loop header block. 9715 MapperCGF.EmitBlock(HeadBB); 9716 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9717 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9718 // Evaluate whether the initial condition is satisfied. 9719 llvm::Value *IsEmpty = 9720 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9721 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9722 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9723 9724 // Emit the loop body block. 9725 MapperCGF.EmitBlock(BodyBB); 9726 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9727 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9728 PtrPHI->addIncoming(PtrBegin, EntryBB); 9729 Address PtrCurrent = 9730 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9731 .getAlignment() 9732 .alignmentOfArrayElement(ElementSize)); 9733 // Privatize the declared variable of mapper to be the current array element. 9734 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9735 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 9736 return MapperCGF 9737 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 9738 .getAddress(MapperCGF); 9739 }); 9740 (void)Scope.Privatize(); 9741 9742 // Get map clause information. Fill up the arrays with all mapped variables. 9743 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9744 MappableExprsHandler::MapValuesArrayTy Pointers; 9745 MappableExprsHandler::MapValuesArrayTy Sizes; 9746 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9747 MappableExprsHandler MEHandler(*D, MapperCGF); 9748 MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); 9749 9750 // Call the runtime API __tgt_mapper_num_components to get the number of 9751 // pre-existing components. 9752 llvm::Value *OffloadingArgs[] = {Handle}; 9753 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9754 createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs); 9755 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9756 PreviousSize, 9757 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9758 9759 // Fill up the runtime mapper handle for all components. 9760 for (unsigned I = 0; I < BasePointers.size(); ++I) { 9761 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9762 *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9763 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9764 Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9765 llvm::Value *CurSizeArg = Sizes[I]; 9766 9767 // Extract the MEMBER_OF field from the map type. 9768 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 9769 MapperCGF.EmitBlock(MemberBB); 9770 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); 9771 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 9772 OriMapType, 9773 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 9774 llvm::BasicBlock *MemberCombineBB = 9775 MapperCGF.createBasicBlock("omp.member.combine"); 9776 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 9777 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 9778 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 9779 // Add the number of pre-existing components to the MEMBER_OF field if it 9780 // is valid. 9781 MapperCGF.EmitBlock(MemberCombineBB); 9782 llvm::Value *CombinedMember = 9783 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9784 // Do nothing if it is not a member of previous components. 9785 MapperCGF.EmitBlock(TypeBB); 9786 llvm::PHINode *MemberMapType = 9787 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9788 MemberMapType->addIncoming(OriMapType, MemberBB); 9789 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9790 9791 // Combine the map type inherited from user-defined mapper with that 9792 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9793 // bits of the \a MapType, which is the input argument of the mapper 9794 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9795 // bits of MemberMapType. 9796 // [OpenMP 5.0], 1.2.6. map-type decay. 9797 // | alloc | to | from | tofrom | release | delete 9798 // ---------------------------------------------------------- 9799 // alloc | alloc | alloc | alloc | alloc | release | delete 9800 // to | alloc | to | alloc | to | release | delete 9801 // from | alloc | alloc | from | from | release | delete 9802 // tofrom | alloc | to | from | tofrom | release | delete 9803 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9804 MapType, 9805 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9806 MappableExprsHandler::OMP_MAP_FROM)); 9807 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9808 llvm::BasicBlock *AllocElseBB = 9809 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9810 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9811 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9812 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9813 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9814 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9815 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9816 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9817 MapperCGF.EmitBlock(AllocBB); 9818 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9819 MemberMapType, 9820 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9821 MappableExprsHandler::OMP_MAP_FROM))); 9822 MapperCGF.Builder.CreateBr(EndBB); 9823 MapperCGF.EmitBlock(AllocElseBB); 9824 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9825 LeftToFrom, 9826 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9827 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9828 // In case of to, clear OMP_MAP_FROM. 9829 MapperCGF.EmitBlock(ToBB); 9830 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9831 MemberMapType, 9832 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9833 MapperCGF.Builder.CreateBr(EndBB); 9834 MapperCGF.EmitBlock(ToElseBB); 9835 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9836 LeftToFrom, 9837 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9838 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9839 // In case of from, clear OMP_MAP_TO. 9840 MapperCGF.EmitBlock(FromBB); 9841 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9842 MemberMapType, 9843 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9844 // In case of tofrom, do nothing. 9845 MapperCGF.EmitBlock(EndBB); 9846 llvm::PHINode *CurMapType = 9847 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9848 CurMapType->addIncoming(AllocMapType, AllocBB); 9849 CurMapType->addIncoming(ToMapType, ToBB); 9850 CurMapType->addIncoming(FromMapType, FromBB); 9851 CurMapType->addIncoming(MemberMapType, ToElseBB); 9852 9853 // TODO: call the corresponding mapper function if a user-defined mapper is 9854 // associated with this map clause. 9855 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9856 // data structure. 9857 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9858 CurSizeArg, CurMapType}; 9859 MapperCGF.EmitRuntimeCall( 9860 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), 9861 OffloadingArgs); 9862 } 9863 9864 // Update the pointer to point to the next element that needs to be mapped, 9865 // and check whether we have mapped all elements. 9866 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9867 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9868 PtrPHI->addIncoming(PtrNext, BodyBB); 9869 llvm::Value *IsDone = 9870 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9871 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9872 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9873 9874 MapperCGF.EmitBlock(ExitBB); 9875 // Emit array deletion if this is an array section and \p MapType indicates 9876 // that deletion is required. 9877 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9878 ElementSize, DoneBB, /*IsInit=*/false); 9879 9880 // Emit the function exit block. 9881 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9882 MapperCGF.FinishFunction(); 9883 UDMMap.try_emplace(D, Fn); 9884 if (CGF) { 9885 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9886 Decls.second.push_back(D); 9887 } 9888 } 9889 9890 /// Emit the array initialization or deletion portion for user-defined mapper 9891 /// code generation. First, it evaluates whether an array section is mapped and 9892 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9893 /// true, and \a MapType indicates to not delete this array, array 9894 /// initialization code is generated. If \a IsInit is false, and \a MapType 9895 /// indicates to not this array, array deletion code is generated. 9896 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9897 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9898 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9899 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9900 StringRef Prefix = IsInit ? ".init" : ".del"; 9901 9902 // Evaluate if this is an array section. 9903 llvm::BasicBlock *IsDeleteBB = 9904 MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"})); 9905 llvm::BasicBlock *BodyBB = 9906 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9907 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9908 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9909 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9910 9911 // Evaluate if we are going to delete this section. 9912 MapperCGF.EmitBlock(IsDeleteBB); 9913 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9914 MapType, 9915 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9916 llvm::Value *DeleteCond; 9917 if (IsInit) { 9918 DeleteCond = MapperCGF.Builder.CreateIsNull( 9919 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9920 } else { 9921 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9922 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9923 } 9924 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9925 9926 MapperCGF.EmitBlock(BodyBB); 9927 // Get the array size by multiplying element size and element number (i.e., \p 9928 // Size). 9929 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9930 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9931 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9932 // memory allocation/deletion purpose only. 9933 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9934 MapType, 9935 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9936 MappableExprsHandler::OMP_MAP_FROM))); 9937 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9938 // data structure. 9939 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9940 MapperCGF.EmitRuntimeCall( 9941 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs); 9942 } 9943 9944 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9945 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9946 llvm::Value *DeviceID, 9947 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9948 const OMPLoopDirective &D)> 9949 SizeEmitter) { 9950 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9951 const OMPExecutableDirective *TD = &D; 9952 // Get nested teams distribute kind directive, if any. 9953 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9954 TD = getNestedDistributeDirective(CGM.getContext(), D); 9955 if (!TD) 9956 return; 9957 const auto *LD = cast<OMPLoopDirective>(TD); 9958 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, 9959 PrePostActionTy &) { 9960 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9961 llvm::Value *Args[] = {DeviceID, NumIterations}; 9962 CGF.EmitRuntimeCall( 9963 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); 9964 } 9965 }; 9966 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9967 } 9968 9969 void CGOpenMPRuntime::emitTargetCall( 9970 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9971 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9972 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9973 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9974 const OMPLoopDirective &D)> 9975 SizeEmitter) { 9976 if (!CGF.HaveInsertPoint()) 9977 return; 9978 9979 assert(OutlinedFn && "Invalid outlined function!"); 9980 9981 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 9982 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9983 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9984 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9985 PrePostActionTy &) { 9986 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9987 }; 9988 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9989 9990 CodeGenFunction::OMPTargetDataInfo InputInfo; 9991 llvm::Value *MapTypesArray = nullptr; 9992 // Fill up the pointer arrays and transfer execution to the device. 9993 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9994 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, 9995 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9996 if (Device.getInt() == OMPC_DEVICE_ancestor) { 9997 // Reverse offloading is not supported, so just execute on the host. 9998 if (RequiresOuterTask) { 9999 CapturedVars.clear(); 10000 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10001 } 10002 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10003 return; 10004 } 10005 10006 // On top of the arrays that were filled up, the target offloading call 10007 // takes as arguments the device id as well as the host pointer. The host 10008 // pointer is used by the runtime library to identify the current target 10009 // region, so it only has to be unique and not necessarily point to 10010 // anything. It could be the pointer to the outlined function that 10011 // implements the target region, but we aren't using that so that the 10012 // compiler doesn't need to keep that, and could therefore inline the host 10013 // function if proven worthwhile during optimization. 10014 10015 // From this point on, we need to have an ID of the target region defined. 10016 assert(OutlinedFnID && "Invalid outlined function ID!"); 10017 10018 // Emit device ID if any. 10019 llvm::Value *DeviceID; 10020 if (Device.getPointer()) { 10021 assert((Device.getInt() == OMPC_DEVICE_unknown || 10022 Device.getInt() == OMPC_DEVICE_device_num) && 10023 "Expected device_num modifier."); 10024 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 10025 DeviceID = 10026 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 10027 } else { 10028 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10029 } 10030 10031 // Emit the number of elements in the offloading arrays. 10032 llvm::Value *PointerNum = 10033 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10034 10035 // Return value of the runtime offloading call. 10036 llvm::Value *Return; 10037 10038 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 10039 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 10040 10041 // Emit tripcount for the target loop-based directive. 10042 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 10043 10044 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10045 // The target region is an outlined function launched by the runtime 10046 // via calls __tgt_target() or __tgt_target_teams(). 10047 // 10048 // __tgt_target() launches a target region with one team and one thread, 10049 // executing a serial region. This master thread may in turn launch 10050 // more threads within its team upon encountering a parallel region, 10051 // however, no additional teams can be launched on the device. 10052 // 10053 // __tgt_target_teams() launches a target region with one or more teams, 10054 // each with one or more threads. This call is required for target 10055 // constructs such as: 10056 // 'target teams' 10057 // 'target' / 'teams' 10058 // 'target teams distribute parallel for' 10059 // 'target parallel' 10060 // and so on. 10061 // 10062 // Note that on the host and CPU targets, the runtime implementation of 10063 // these calls simply call the outlined function without forking threads. 10064 // The outlined functions themselves have runtime calls to 10065 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 10066 // the compiler in emitTeamsCall() and emitParallelCall(). 10067 // 10068 // In contrast, on the NVPTX target, the implementation of 10069 // __tgt_target_teams() launches a GPU kernel with the requested number 10070 // of teams and threads so no additional calls to the runtime are required. 10071 if (NumTeams) { 10072 // If we have NumTeams defined this means that we have an enclosed teams 10073 // region. Therefore we also expect to have NumThreads defined. These two 10074 // values should be defined in the presence of a teams directive, 10075 // regardless of having any clauses associated. If the user is using teams 10076 // but no clauses, these two values will be the default that should be 10077 // passed to the runtime library - a 32-bit integer with the value zero. 10078 assert(NumThreads && "Thread limit expression should be available along " 10079 "with number of teams."); 10080 llvm::Value *OffloadingArgs[] = {DeviceID, 10081 OutlinedFnID, 10082 PointerNum, 10083 InputInfo.BasePointersArray.getPointer(), 10084 InputInfo.PointersArray.getPointer(), 10085 InputInfo.SizesArray.getPointer(), 10086 MapTypesArray, 10087 NumTeams, 10088 NumThreads}; 10089 Return = CGF.EmitRuntimeCall( 10090 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 10091 : OMPRTL__tgt_target_teams), 10092 OffloadingArgs); 10093 } else { 10094 llvm::Value *OffloadingArgs[] = {DeviceID, 10095 OutlinedFnID, 10096 PointerNum, 10097 InputInfo.BasePointersArray.getPointer(), 10098 InputInfo.PointersArray.getPointer(), 10099 InputInfo.SizesArray.getPointer(), 10100 MapTypesArray}; 10101 Return = CGF.EmitRuntimeCall( 10102 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 10103 : OMPRTL__tgt_target), 10104 OffloadingArgs); 10105 } 10106 10107 // Check the error code and execute the host version if required. 10108 llvm::BasicBlock *OffloadFailedBlock = 10109 CGF.createBasicBlock("omp_offload.failed"); 10110 llvm::BasicBlock *OffloadContBlock = 10111 CGF.createBasicBlock("omp_offload.cont"); 10112 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 10113 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 10114 10115 CGF.EmitBlock(OffloadFailedBlock); 10116 if (RequiresOuterTask) { 10117 CapturedVars.clear(); 10118 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10119 } 10120 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10121 CGF.EmitBranch(OffloadContBlock); 10122 10123 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 10124 }; 10125 10126 // Notify that the host version must be executed. 10127 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10128 RequiresOuterTask](CodeGenFunction &CGF, 10129 PrePostActionTy &) { 10130 if (RequiresOuterTask) { 10131 CapturedVars.clear(); 10132 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10133 } 10134 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10135 }; 10136 10137 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10138 &CapturedVars, RequiresOuterTask, 10139 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10140 // Fill up the arrays with all the captured variables. 10141 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10142 MappableExprsHandler::MapValuesArrayTy Pointers; 10143 MappableExprsHandler::MapValuesArrayTy Sizes; 10144 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10145 10146 // Get mappable expression information. 10147 MappableExprsHandler MEHandler(D, CGF); 10148 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10149 10150 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10151 auto CV = CapturedVars.begin(); 10152 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10153 CE = CS.capture_end(); 10154 CI != CE; ++CI, ++RI, ++CV) { 10155 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 10156 MappableExprsHandler::MapValuesArrayTy CurPointers; 10157 MappableExprsHandler::MapValuesArrayTy CurSizes; 10158 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 10159 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10160 10161 // VLA sizes are passed to the outlined region by copy and do not have map 10162 // information associated. 10163 if (CI->capturesVariableArrayType()) { 10164 CurBasePointers.push_back(*CV); 10165 CurPointers.push_back(*CV); 10166 CurSizes.push_back(CGF.Builder.CreateIntCast( 10167 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10168 // Copy to the device as an argument. No need to retrieve it. 10169 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10170 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10171 MappableExprsHandler::OMP_MAP_IMPLICIT); 10172 } else { 10173 // If we have any information in the map clause, we use it, otherwise we 10174 // just do a default mapping. 10175 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 10176 CurSizes, CurMapTypes, PartialStruct); 10177 if (CurBasePointers.empty()) 10178 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 10179 CurPointers, CurSizes, CurMapTypes); 10180 // Generate correct mapping for variables captured by reference in 10181 // lambdas. 10182 if (CI->capturesVariable()) 10183 MEHandler.generateInfoForLambdaCaptures( 10184 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 10185 CurMapTypes, LambdaPointers); 10186 } 10187 // We expect to have at least an element of information for this capture. 10188 assert(!CurBasePointers.empty() && 10189 "Non-existing map pointer for capture!"); 10190 assert(CurBasePointers.size() == CurPointers.size() && 10191 CurBasePointers.size() == CurSizes.size() && 10192 CurBasePointers.size() == CurMapTypes.size() && 10193 "Inconsistent map information sizes!"); 10194 10195 // If there is an entry in PartialStruct it means we have a struct with 10196 // individual members mapped. Emit an extra combined entry. 10197 if (PartialStruct.Base.isValid()) 10198 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 10199 CurMapTypes, PartialStruct); 10200 10201 // We need to append the results of this capture to what we already have. 10202 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 10203 Pointers.append(CurPointers.begin(), CurPointers.end()); 10204 Sizes.append(CurSizes.begin(), CurSizes.end()); 10205 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 10206 } 10207 // Adjust MEMBER_OF flags for the lambdas captures. 10208 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 10209 Pointers, MapTypes); 10210 // Map other list items in the map clause which are not captured variables 10211 // but "declare target link" global variables. 10212 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 10213 MapTypes); 10214 10215 TargetDataInfo Info; 10216 // Fill up the arrays and create the arguments. 10217 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10218 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10219 Info.PointersArray, Info.SizesArray, 10220 Info.MapTypesArray, Info); 10221 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10222 InputInfo.BasePointersArray = 10223 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10224 InputInfo.PointersArray = 10225 Address(Info.PointersArray, CGM.getPointerAlign()); 10226 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10227 MapTypesArray = Info.MapTypesArray; 10228 if (RequiresOuterTask) 10229 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10230 else 10231 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10232 }; 10233 10234 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10235 CodeGenFunction &CGF, PrePostActionTy &) { 10236 if (RequiresOuterTask) { 10237 CodeGenFunction::OMPTargetDataInfo InputInfo; 10238 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10239 } else { 10240 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10241 } 10242 }; 10243 10244 // If we have a target function ID it means that we need to support 10245 // offloading, otherwise, just execute on the host. We need to execute on host 10246 // regardless of the conditional in the if clause if, e.g., the user do not 10247 // specify target triples. 10248 if (OutlinedFnID) { 10249 if (IfCond) { 10250 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10251 } else { 10252 RegionCodeGenTy ThenRCG(TargetThenGen); 10253 ThenRCG(CGF); 10254 } 10255 } else { 10256 RegionCodeGenTy ElseRCG(TargetElseGen); 10257 ElseRCG(CGF); 10258 } 10259 } 10260 10261 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10262 StringRef ParentName) { 10263 if (!S) 10264 return; 10265 10266 // Codegen OMP target directives that offload compute to the device. 10267 bool RequiresDeviceCodegen = 10268 isa<OMPExecutableDirective>(S) && 10269 isOpenMPTargetExecutionDirective( 10270 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10271 10272 if (RequiresDeviceCodegen) { 10273 const auto &E = *cast<OMPExecutableDirective>(S); 10274 unsigned DeviceID; 10275 unsigned FileID; 10276 unsigned Line; 10277 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10278 FileID, Line); 10279 10280 // Is this a target region that should not be emitted as an entry point? If 10281 // so just signal we are done with this target region. 10282 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10283 ParentName, Line)) 10284 return; 10285 10286 switch (E.getDirectiveKind()) { 10287 case OMPD_target: 10288 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10289 cast<OMPTargetDirective>(E)); 10290 break; 10291 case OMPD_target_parallel: 10292 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10293 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10294 break; 10295 case OMPD_target_teams: 10296 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10297 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10298 break; 10299 case OMPD_target_teams_distribute: 10300 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10301 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10302 break; 10303 case OMPD_target_teams_distribute_simd: 10304 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10305 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10306 break; 10307 case OMPD_target_parallel_for: 10308 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10309 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10310 break; 10311 case OMPD_target_parallel_for_simd: 10312 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10313 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10314 break; 10315 case OMPD_target_simd: 10316 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10317 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10318 break; 10319 case OMPD_target_teams_distribute_parallel_for: 10320 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10321 CGM, ParentName, 10322 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10323 break; 10324 case OMPD_target_teams_distribute_parallel_for_simd: 10325 CodeGenFunction:: 10326 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10327 CGM, ParentName, 10328 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10329 break; 10330 case OMPD_parallel: 10331 case OMPD_for: 10332 case OMPD_parallel_for: 10333 case OMPD_parallel_master: 10334 case OMPD_parallel_sections: 10335 case OMPD_for_simd: 10336 case OMPD_parallel_for_simd: 10337 case OMPD_cancel: 10338 case OMPD_cancellation_point: 10339 case OMPD_ordered: 10340 case OMPD_threadprivate: 10341 case OMPD_allocate: 10342 case OMPD_task: 10343 case OMPD_simd: 10344 case OMPD_sections: 10345 case OMPD_section: 10346 case OMPD_single: 10347 case OMPD_master: 10348 case OMPD_critical: 10349 case OMPD_taskyield: 10350 case OMPD_barrier: 10351 case OMPD_taskwait: 10352 case OMPD_taskgroup: 10353 case OMPD_atomic: 10354 case OMPD_flush: 10355 case OMPD_depobj: 10356 case OMPD_scan: 10357 case OMPD_teams: 10358 case OMPD_target_data: 10359 case OMPD_target_exit_data: 10360 case OMPD_target_enter_data: 10361 case OMPD_distribute: 10362 case OMPD_distribute_simd: 10363 case OMPD_distribute_parallel_for: 10364 case OMPD_distribute_parallel_for_simd: 10365 case OMPD_teams_distribute: 10366 case OMPD_teams_distribute_simd: 10367 case OMPD_teams_distribute_parallel_for: 10368 case OMPD_teams_distribute_parallel_for_simd: 10369 case OMPD_target_update: 10370 case OMPD_declare_simd: 10371 case OMPD_declare_variant: 10372 case OMPD_begin_declare_variant: 10373 case OMPD_end_declare_variant: 10374 case OMPD_declare_target: 10375 case OMPD_end_declare_target: 10376 case OMPD_declare_reduction: 10377 case OMPD_declare_mapper: 10378 case OMPD_taskloop: 10379 case OMPD_taskloop_simd: 10380 case OMPD_master_taskloop: 10381 case OMPD_master_taskloop_simd: 10382 case OMPD_parallel_master_taskloop: 10383 case OMPD_parallel_master_taskloop_simd: 10384 case OMPD_requires: 10385 case OMPD_unknown: 10386 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10387 } 10388 return; 10389 } 10390 10391 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10392 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10393 return; 10394 10395 scanForTargetRegionsFunctions( 10396 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 10397 return; 10398 } 10399 10400 // If this is a lambda function, look into its body. 10401 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10402 S = L->getBody(); 10403 10404 // Keep looking for target regions recursively. 10405 for (const Stmt *II : S->children()) 10406 scanForTargetRegionsFunctions(II, ParentName); 10407 } 10408 10409 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10410 // If emitting code for the host, we do not process FD here. Instead we do 10411 // the normal code generation. 10412 if (!CGM.getLangOpts().OpenMPIsDevice) { 10413 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 10414 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10415 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10416 // Do not emit device_type(nohost) functions for the host. 10417 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10418 return true; 10419 } 10420 return false; 10421 } 10422 10423 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10424 // Try to detect target regions in the function. 10425 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10426 StringRef Name = CGM.getMangledName(GD); 10427 scanForTargetRegionsFunctions(FD->getBody(), Name); 10428 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10429 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10430 // Do not emit device_type(nohost) functions for the host. 10431 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10432 return true; 10433 } 10434 10435 // Do not to emit function if it is not marked as declare target. 10436 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10437 AlreadyEmittedTargetDecls.count(VD) == 0; 10438 } 10439 10440 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10441 if (!CGM.getLangOpts().OpenMPIsDevice) 10442 return false; 10443 10444 // Check if there are Ctors/Dtors in this declaration and look for target 10445 // regions in it. We use the complete variant to produce the kernel name 10446 // mangling. 10447 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10448 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10449 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10450 StringRef ParentName = 10451 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10452 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10453 } 10454 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10455 StringRef ParentName = 10456 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10457 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10458 } 10459 } 10460 10461 // Do not to emit variable if it is not marked as declare target. 10462 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10463 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10464 cast<VarDecl>(GD.getDecl())); 10465 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10466 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10467 HasRequiresUnifiedSharedMemory)) { 10468 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10469 return true; 10470 } 10471 return false; 10472 } 10473 10474 llvm::Constant * 10475 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 10476 const VarDecl *VD) { 10477 assert(VD->getType().isConstant(CGM.getContext()) && 10478 "Expected constant variable."); 10479 StringRef VarName; 10480 llvm::Constant *Addr; 10481 llvm::GlobalValue::LinkageTypes Linkage; 10482 QualType Ty = VD->getType(); 10483 SmallString<128> Buffer; 10484 { 10485 unsigned DeviceID; 10486 unsigned FileID; 10487 unsigned Line; 10488 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 10489 FileID, Line); 10490 llvm::raw_svector_ostream OS(Buffer); 10491 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 10492 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 10493 VarName = OS.str(); 10494 } 10495 Linkage = llvm::GlobalValue::InternalLinkage; 10496 Addr = 10497 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 10498 getDefaultFirstprivateAddressSpace()); 10499 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 10500 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 10501 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 10502 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10503 VarName, Addr, VarSize, 10504 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 10505 return Addr; 10506 } 10507 10508 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10509 llvm::Constant *Addr) { 10510 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10511 !CGM.getLangOpts().OpenMPIsDevice) 10512 return; 10513 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10514 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10515 if (!Res) { 10516 if (CGM.getLangOpts().OpenMPIsDevice) { 10517 // Register non-target variables being emitted in device code (debug info 10518 // may cause this). 10519 StringRef VarName = CGM.getMangledName(VD); 10520 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10521 } 10522 return; 10523 } 10524 // Register declare target variables. 10525 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10526 StringRef VarName; 10527 CharUnits VarSize; 10528 llvm::GlobalValue::LinkageTypes Linkage; 10529 10530 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10531 !HasRequiresUnifiedSharedMemory) { 10532 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10533 VarName = CGM.getMangledName(VD); 10534 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10535 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10536 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10537 } else { 10538 VarSize = CharUnits::Zero(); 10539 } 10540 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10541 // Temp solution to prevent optimizations of the internal variables. 10542 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10543 std::string RefName = getName({VarName, "ref"}); 10544 if (!CGM.GetGlobalValue(RefName)) { 10545 llvm::Constant *AddrRef = 10546 getOrCreateInternalVariable(Addr->getType(), RefName); 10547 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10548 GVAddrRef->setConstant(/*Val=*/true); 10549 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10550 GVAddrRef->setInitializer(Addr); 10551 CGM.addCompilerUsedGlobal(GVAddrRef); 10552 } 10553 } 10554 } else { 10555 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10556 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10557 HasRequiresUnifiedSharedMemory)) && 10558 "Declare target attribute must link or to with unified memory."); 10559 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10560 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10561 else 10562 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10563 10564 if (CGM.getLangOpts().OpenMPIsDevice) { 10565 VarName = Addr->getName(); 10566 Addr = nullptr; 10567 } else { 10568 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10569 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10570 } 10571 VarSize = CGM.getPointerSize(); 10572 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10573 } 10574 10575 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10576 VarName, Addr, VarSize, Flags, Linkage); 10577 } 10578 10579 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10580 if (isa<FunctionDecl>(GD.getDecl()) || 10581 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10582 return emitTargetFunctions(GD); 10583 10584 return emitTargetGlobalVariable(GD); 10585 } 10586 10587 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10588 for (const VarDecl *VD : DeferredGlobalVariables) { 10589 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10590 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10591 if (!Res) 10592 continue; 10593 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10594 !HasRequiresUnifiedSharedMemory) { 10595 CGM.EmitGlobal(VD); 10596 } else { 10597 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10598 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10599 HasRequiresUnifiedSharedMemory)) && 10600 "Expected link clause or to clause with unified memory."); 10601 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10602 } 10603 } 10604 } 10605 10606 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10607 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10608 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10609 " Expected target-based directive."); 10610 } 10611 10612 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10613 for (const OMPClause *Clause : D->clauselists()) { 10614 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10615 HasRequiresUnifiedSharedMemory = true; 10616 } else if (const auto *AC = 10617 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10618 switch (AC->getAtomicDefaultMemOrderKind()) { 10619 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10620 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10621 break; 10622 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10623 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10624 break; 10625 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10626 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10627 break; 10628 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10629 break; 10630 } 10631 } 10632 } 10633 } 10634 10635 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10636 return RequiresAtomicOrdering; 10637 } 10638 10639 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10640 LangAS &AS) { 10641 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10642 return false; 10643 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10644 switch(A->getAllocatorType()) { 10645 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10646 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10647 // Not supported, fallback to the default mem space. 10648 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10649 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10650 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10651 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10652 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10653 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10654 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10655 AS = LangAS::Default; 10656 return true; 10657 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10658 llvm_unreachable("Expected predefined allocator for the variables with the " 10659 "static storage."); 10660 } 10661 return false; 10662 } 10663 10664 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10665 return HasRequiresUnifiedSharedMemory; 10666 } 10667 10668 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10669 CodeGenModule &CGM) 10670 : CGM(CGM) { 10671 if (CGM.getLangOpts().OpenMPIsDevice) { 10672 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10673 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10674 } 10675 } 10676 10677 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10678 if (CGM.getLangOpts().OpenMPIsDevice) 10679 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10680 } 10681 10682 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10683 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10684 return true; 10685 10686 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10687 // Do not to emit function if it is marked as declare target as it was already 10688 // emitted. 10689 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10690 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10691 if (auto *F = dyn_cast_or_null<llvm::Function>( 10692 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10693 return !F->isDeclaration(); 10694 return false; 10695 } 10696 return true; 10697 } 10698 10699 return !AlreadyEmittedTargetDecls.insert(D).second; 10700 } 10701 10702 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10703 // If we don't have entries or if we are emitting code for the device, we 10704 // don't need to do anything. 10705 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10706 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10707 (OffloadEntriesInfoManager.empty() && 10708 !HasEmittedDeclareTargetRegion && 10709 !HasEmittedTargetRegion)) 10710 return nullptr; 10711 10712 // Create and register the function that handles the requires directives. 10713 ASTContext &C = CGM.getContext(); 10714 10715 llvm::Function *RequiresRegFn; 10716 { 10717 CodeGenFunction CGF(CGM); 10718 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10719 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10720 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10721 RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); 10722 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10723 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10724 // TODO: check for other requires clauses. 10725 // The requires directive takes effect only when a target region is 10726 // present in the compilation unit. Otherwise it is ignored and not 10727 // passed to the runtime. This avoids the runtime from throwing an error 10728 // for mismatching requires clauses across compilation units that don't 10729 // contain at least 1 target region. 10730 assert((HasEmittedTargetRegion || 10731 HasEmittedDeclareTargetRegion || 10732 !OffloadEntriesInfoManager.empty()) && 10733 "Target or declare target region expected."); 10734 if (HasRequiresUnifiedSharedMemory) 10735 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10736 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires), 10737 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10738 CGF.FinishFunction(); 10739 } 10740 return RequiresRegFn; 10741 } 10742 10743 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10744 const OMPExecutableDirective &D, 10745 SourceLocation Loc, 10746 llvm::Function *OutlinedFn, 10747 ArrayRef<llvm::Value *> CapturedVars) { 10748 if (!CGF.HaveInsertPoint()) 10749 return; 10750 10751 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10752 CodeGenFunction::RunCleanupsScope Scope(CGF); 10753 10754 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10755 llvm::Value *Args[] = { 10756 RTLoc, 10757 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10758 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10759 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10760 RealArgs.append(std::begin(Args), std::end(Args)); 10761 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10762 10763 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 10764 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10765 } 10766 10767 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10768 const Expr *NumTeams, 10769 const Expr *ThreadLimit, 10770 SourceLocation Loc) { 10771 if (!CGF.HaveInsertPoint()) 10772 return; 10773 10774 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10775 10776 llvm::Value *NumTeamsVal = 10777 NumTeams 10778 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10779 CGF.CGM.Int32Ty, /* isSigned = */ true) 10780 : CGF.Builder.getInt32(0); 10781 10782 llvm::Value *ThreadLimitVal = 10783 ThreadLimit 10784 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10785 CGF.CGM.Int32Ty, /* isSigned = */ true) 10786 : CGF.Builder.getInt32(0); 10787 10788 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10789 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10790 ThreadLimitVal}; 10791 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 10792 PushNumTeamsArgs); 10793 } 10794 10795 void CGOpenMPRuntime::emitTargetDataCalls( 10796 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10797 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10798 if (!CGF.HaveInsertPoint()) 10799 return; 10800 10801 // Action used to replace the default codegen action and turn privatization 10802 // off. 10803 PrePostActionTy NoPrivAction; 10804 10805 // Generate the code for the opening of the data environment. Capture all the 10806 // arguments of the runtime call by reference because they are used in the 10807 // closing of the region. 10808 auto &&BeginThenGen = [this, &D, Device, &Info, 10809 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10810 // Fill up the arrays with all the mapped variables. 10811 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10812 MappableExprsHandler::MapValuesArrayTy Pointers; 10813 MappableExprsHandler::MapValuesArrayTy Sizes; 10814 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10815 10816 // Get map clause information. 10817 MappableExprsHandler MCHandler(D, CGF); 10818 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10819 10820 // Fill up the arrays and create the arguments. 10821 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10822 10823 llvm::Value *BasePointersArrayArg = nullptr; 10824 llvm::Value *PointersArrayArg = nullptr; 10825 llvm::Value *SizesArrayArg = nullptr; 10826 llvm::Value *MapTypesArrayArg = nullptr; 10827 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10828 SizesArrayArg, MapTypesArrayArg, Info); 10829 10830 // Emit device ID if any. 10831 llvm::Value *DeviceID = nullptr; 10832 if (Device) { 10833 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10834 CGF.Int64Ty, /*isSigned=*/true); 10835 } else { 10836 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10837 } 10838 10839 // Emit the number of elements in the offloading arrays. 10840 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10841 10842 llvm::Value *OffloadingArgs[] = { 10843 DeviceID, PointerNum, BasePointersArrayArg, 10844 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10845 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 10846 OffloadingArgs); 10847 10848 // If device pointer privatization is required, emit the body of the region 10849 // here. It will have to be duplicated: with and without privatization. 10850 if (!Info.CaptureDeviceAddrMap.empty()) 10851 CodeGen(CGF); 10852 }; 10853 10854 // Generate code for the closing of the data region. 10855 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10856 PrePostActionTy &) { 10857 assert(Info.isValid() && "Invalid data environment closing arguments."); 10858 10859 llvm::Value *BasePointersArrayArg = nullptr; 10860 llvm::Value *PointersArrayArg = nullptr; 10861 llvm::Value *SizesArrayArg = nullptr; 10862 llvm::Value *MapTypesArrayArg = nullptr; 10863 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10864 SizesArrayArg, MapTypesArrayArg, Info); 10865 10866 // Emit device ID if any. 10867 llvm::Value *DeviceID = nullptr; 10868 if (Device) { 10869 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10870 CGF.Int64Ty, /*isSigned=*/true); 10871 } else { 10872 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10873 } 10874 10875 // Emit the number of elements in the offloading arrays. 10876 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10877 10878 llvm::Value *OffloadingArgs[] = { 10879 DeviceID, PointerNum, BasePointersArrayArg, 10880 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10881 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 10882 OffloadingArgs); 10883 }; 10884 10885 // If we need device pointer privatization, we need to emit the body of the 10886 // region with no privatization in the 'else' branch of the conditional. 10887 // Otherwise, we don't have to do anything. 10888 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10889 PrePostActionTy &) { 10890 if (!Info.CaptureDeviceAddrMap.empty()) { 10891 CodeGen.setAction(NoPrivAction); 10892 CodeGen(CGF); 10893 } 10894 }; 10895 10896 // We don't have to do anything to close the region if the if clause evaluates 10897 // to false. 10898 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10899 10900 if (IfCond) { 10901 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10902 } else { 10903 RegionCodeGenTy RCG(BeginThenGen); 10904 RCG(CGF); 10905 } 10906 10907 // If we don't require privatization of device pointers, we emit the body in 10908 // between the runtime calls. This avoids duplicating the body code. 10909 if (Info.CaptureDeviceAddrMap.empty()) { 10910 CodeGen.setAction(NoPrivAction); 10911 CodeGen(CGF); 10912 } 10913 10914 if (IfCond) { 10915 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10916 } else { 10917 RegionCodeGenTy RCG(EndThenGen); 10918 RCG(CGF); 10919 } 10920 } 10921 10922 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10923 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10924 const Expr *Device) { 10925 if (!CGF.HaveInsertPoint()) 10926 return; 10927 10928 assert((isa<OMPTargetEnterDataDirective>(D) || 10929 isa<OMPTargetExitDataDirective>(D) || 10930 isa<OMPTargetUpdateDirective>(D)) && 10931 "Expecting either target enter, exit data, or update directives."); 10932 10933 CodeGenFunction::OMPTargetDataInfo InputInfo; 10934 llvm::Value *MapTypesArray = nullptr; 10935 // Generate the code for the opening of the data environment. 10936 auto &&ThenGen = [this, &D, Device, &InputInfo, 10937 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10938 // Emit device ID if any. 10939 llvm::Value *DeviceID = nullptr; 10940 if (Device) { 10941 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10942 CGF.Int64Ty, /*isSigned=*/true); 10943 } else { 10944 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10945 } 10946 10947 // Emit the number of elements in the offloading arrays. 10948 llvm::Constant *PointerNum = 10949 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10950 10951 llvm::Value *OffloadingArgs[] = {DeviceID, 10952 PointerNum, 10953 InputInfo.BasePointersArray.getPointer(), 10954 InputInfo.PointersArray.getPointer(), 10955 InputInfo.SizesArray.getPointer(), 10956 MapTypesArray}; 10957 10958 // Select the right runtime function call for each expected standalone 10959 // directive. 10960 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10961 OpenMPRTLFunction RTLFn; 10962 switch (D.getDirectiveKind()) { 10963 case OMPD_target_enter_data: 10964 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 10965 : OMPRTL__tgt_target_data_begin; 10966 break; 10967 case OMPD_target_exit_data: 10968 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 10969 : OMPRTL__tgt_target_data_end; 10970 break; 10971 case OMPD_target_update: 10972 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 10973 : OMPRTL__tgt_target_data_update; 10974 break; 10975 case OMPD_parallel: 10976 case OMPD_for: 10977 case OMPD_parallel_for: 10978 case OMPD_parallel_master: 10979 case OMPD_parallel_sections: 10980 case OMPD_for_simd: 10981 case OMPD_parallel_for_simd: 10982 case OMPD_cancel: 10983 case OMPD_cancellation_point: 10984 case OMPD_ordered: 10985 case OMPD_threadprivate: 10986 case OMPD_allocate: 10987 case OMPD_task: 10988 case OMPD_simd: 10989 case OMPD_sections: 10990 case OMPD_section: 10991 case OMPD_single: 10992 case OMPD_master: 10993 case OMPD_critical: 10994 case OMPD_taskyield: 10995 case OMPD_barrier: 10996 case OMPD_taskwait: 10997 case OMPD_taskgroup: 10998 case OMPD_atomic: 10999 case OMPD_flush: 11000 case OMPD_depobj: 11001 case OMPD_scan: 11002 case OMPD_teams: 11003 case OMPD_target_data: 11004 case OMPD_distribute: 11005 case OMPD_distribute_simd: 11006 case OMPD_distribute_parallel_for: 11007 case OMPD_distribute_parallel_for_simd: 11008 case OMPD_teams_distribute: 11009 case OMPD_teams_distribute_simd: 11010 case OMPD_teams_distribute_parallel_for: 11011 case OMPD_teams_distribute_parallel_for_simd: 11012 case OMPD_declare_simd: 11013 case OMPD_declare_variant: 11014 case OMPD_begin_declare_variant: 11015 case OMPD_end_declare_variant: 11016 case OMPD_declare_target: 11017 case OMPD_end_declare_target: 11018 case OMPD_declare_reduction: 11019 case OMPD_declare_mapper: 11020 case OMPD_taskloop: 11021 case OMPD_taskloop_simd: 11022 case OMPD_master_taskloop: 11023 case OMPD_master_taskloop_simd: 11024 case OMPD_parallel_master_taskloop: 11025 case OMPD_parallel_master_taskloop_simd: 11026 case OMPD_target: 11027 case OMPD_target_simd: 11028 case OMPD_target_teams_distribute: 11029 case OMPD_target_teams_distribute_simd: 11030 case OMPD_target_teams_distribute_parallel_for: 11031 case OMPD_target_teams_distribute_parallel_for_simd: 11032 case OMPD_target_teams: 11033 case OMPD_target_parallel: 11034 case OMPD_target_parallel_for: 11035 case OMPD_target_parallel_for_simd: 11036 case OMPD_requires: 11037 case OMPD_unknown: 11038 llvm_unreachable("Unexpected standalone target data directive."); 11039 break; 11040 } 11041 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 11042 }; 11043 11044 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 11045 CodeGenFunction &CGF, PrePostActionTy &) { 11046 // Fill up the arrays with all the mapped variables. 11047 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 11048 MappableExprsHandler::MapValuesArrayTy Pointers; 11049 MappableExprsHandler::MapValuesArrayTy Sizes; 11050 MappableExprsHandler::MapFlagsArrayTy MapTypes; 11051 11052 // Get map clause information. 11053 MappableExprsHandler MEHandler(D, CGF); 11054 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 11055 11056 TargetDataInfo Info; 11057 // Fill up the arrays and create the arguments. 11058 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 11059 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 11060 Info.PointersArray, Info.SizesArray, 11061 Info.MapTypesArray, Info); 11062 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 11063 InputInfo.BasePointersArray = 11064 Address(Info.BasePointersArray, CGM.getPointerAlign()); 11065 InputInfo.PointersArray = 11066 Address(Info.PointersArray, CGM.getPointerAlign()); 11067 InputInfo.SizesArray = 11068 Address(Info.SizesArray, CGM.getPointerAlign()); 11069 MapTypesArray = Info.MapTypesArray; 11070 if (D.hasClausesOfKind<OMPDependClause>()) 11071 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 11072 else 11073 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 11074 }; 11075 11076 if (IfCond) { 11077 emitIfClause(CGF, IfCond, TargetThenGen, 11078 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 11079 } else { 11080 RegionCodeGenTy ThenRCG(TargetThenGen); 11081 ThenRCG(CGF); 11082 } 11083 } 11084 11085 namespace { 11086 /// Kind of parameter in a function with 'declare simd' directive. 11087 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 11088 /// Attribute set of the parameter. 11089 struct ParamAttrTy { 11090 ParamKindTy Kind = Vector; 11091 llvm::APSInt StrideOrArg; 11092 llvm::APSInt Alignment; 11093 }; 11094 } // namespace 11095 11096 static unsigned evaluateCDTSize(const FunctionDecl *FD, 11097 ArrayRef<ParamAttrTy> ParamAttrs) { 11098 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 11099 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 11100 // of that clause. The VLEN value must be power of 2. 11101 // In other case the notion of the function`s "characteristic data type" (CDT) 11102 // is used to compute the vector length. 11103 // CDT is defined in the following order: 11104 // a) For non-void function, the CDT is the return type. 11105 // b) If the function has any non-uniform, non-linear parameters, then the 11106 // CDT is the type of the first such parameter. 11107 // c) If the CDT determined by a) or b) above is struct, union, or class 11108 // type which is pass-by-value (except for the type that maps to the 11109 // built-in complex data type), the characteristic data type is int. 11110 // d) If none of the above three cases is applicable, the CDT is int. 11111 // The VLEN is then determined based on the CDT and the size of vector 11112 // register of that ISA for which current vector version is generated. The 11113 // VLEN is computed using the formula below: 11114 // VLEN = sizeof(vector_register) / sizeof(CDT), 11115 // where vector register size specified in section 3.2.1 Registers and the 11116 // Stack Frame of original AMD64 ABI document. 11117 QualType RetType = FD->getReturnType(); 11118 if (RetType.isNull()) 11119 return 0; 11120 ASTContext &C = FD->getASTContext(); 11121 QualType CDT; 11122 if (!RetType.isNull() && !RetType->isVoidType()) { 11123 CDT = RetType; 11124 } else { 11125 unsigned Offset = 0; 11126 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11127 if (ParamAttrs[Offset].Kind == Vector) 11128 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11129 ++Offset; 11130 } 11131 if (CDT.isNull()) { 11132 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11133 if (ParamAttrs[I + Offset].Kind == Vector) { 11134 CDT = FD->getParamDecl(I)->getType(); 11135 break; 11136 } 11137 } 11138 } 11139 } 11140 if (CDT.isNull()) 11141 CDT = C.IntTy; 11142 CDT = CDT->getCanonicalTypeUnqualified(); 11143 if (CDT->isRecordType() || CDT->isUnionType()) 11144 CDT = C.IntTy; 11145 return C.getTypeSize(CDT); 11146 } 11147 11148 static void 11149 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11150 const llvm::APSInt &VLENVal, 11151 ArrayRef<ParamAttrTy> ParamAttrs, 11152 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11153 struct ISADataTy { 11154 char ISA; 11155 unsigned VecRegSize; 11156 }; 11157 ISADataTy ISAData[] = { 11158 { 11159 'b', 128 11160 }, // SSE 11161 { 11162 'c', 256 11163 }, // AVX 11164 { 11165 'd', 256 11166 }, // AVX2 11167 { 11168 'e', 512 11169 }, // AVX512 11170 }; 11171 llvm::SmallVector<char, 2> Masked; 11172 switch (State) { 11173 case OMPDeclareSimdDeclAttr::BS_Undefined: 11174 Masked.push_back('N'); 11175 Masked.push_back('M'); 11176 break; 11177 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11178 Masked.push_back('N'); 11179 break; 11180 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11181 Masked.push_back('M'); 11182 break; 11183 } 11184 for (char Mask : Masked) { 11185 for (const ISADataTy &Data : ISAData) { 11186 SmallString<256> Buffer; 11187 llvm::raw_svector_ostream Out(Buffer); 11188 Out << "_ZGV" << Data.ISA << Mask; 11189 if (!VLENVal) { 11190 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11191 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11192 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11193 } else { 11194 Out << VLENVal; 11195 } 11196 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11197 switch (ParamAttr.Kind){ 11198 case LinearWithVarStride: 11199 Out << 's' << ParamAttr.StrideOrArg; 11200 break; 11201 case Linear: 11202 Out << 'l'; 11203 if (ParamAttr.StrideOrArg != 1) 11204 Out << ParamAttr.StrideOrArg; 11205 break; 11206 case Uniform: 11207 Out << 'u'; 11208 break; 11209 case Vector: 11210 Out << 'v'; 11211 break; 11212 } 11213 if (!!ParamAttr.Alignment) 11214 Out << 'a' << ParamAttr.Alignment; 11215 } 11216 Out << '_' << Fn->getName(); 11217 Fn->addFnAttr(Out.str()); 11218 } 11219 } 11220 } 11221 11222 // This are the Functions that are needed to mangle the name of the 11223 // vector functions generated by the compiler, according to the rules 11224 // defined in the "Vector Function ABI specifications for AArch64", 11225 // available at 11226 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11227 11228 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11229 /// 11230 /// TODO: Need to implement the behavior for reference marked with a 11231 /// var or no linear modifiers (1.b in the section). For this, we 11232 /// need to extend ParamKindTy to support the linear modifiers. 11233 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11234 QT = QT.getCanonicalType(); 11235 11236 if (QT->isVoidType()) 11237 return false; 11238 11239 if (Kind == ParamKindTy::Uniform) 11240 return false; 11241 11242 if (Kind == ParamKindTy::Linear) 11243 return false; 11244 11245 // TODO: Handle linear references with modifiers 11246 11247 if (Kind == ParamKindTy::LinearWithVarStride) 11248 return false; 11249 11250 return true; 11251 } 11252 11253 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11254 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11255 QT = QT.getCanonicalType(); 11256 unsigned Size = C.getTypeSize(QT); 11257 11258 // Only scalars and complex within 16 bytes wide set PVB to true. 11259 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11260 return false; 11261 11262 if (QT->isFloatingType()) 11263 return true; 11264 11265 if (QT->isIntegerType()) 11266 return true; 11267 11268 if (QT->isPointerType()) 11269 return true; 11270 11271 // TODO: Add support for complex types (section 3.1.2, item 2). 11272 11273 return false; 11274 } 11275 11276 /// Computes the lane size (LS) of a return type or of an input parameter, 11277 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11278 /// TODO: Add support for references, section 3.2.1, item 1. 11279 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11280 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11281 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11282 if (getAArch64PBV(PTy, C)) 11283 return C.getTypeSize(PTy); 11284 } 11285 if (getAArch64PBV(QT, C)) 11286 return C.getTypeSize(QT); 11287 11288 return C.getTypeSize(C.getUIntPtrType()); 11289 } 11290 11291 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11292 // signature of the scalar function, as defined in 3.2.2 of the 11293 // AAVFABI. 11294 static std::tuple<unsigned, unsigned, bool> 11295 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11296 QualType RetType = FD->getReturnType().getCanonicalType(); 11297 11298 ASTContext &C = FD->getASTContext(); 11299 11300 bool OutputBecomesInput = false; 11301 11302 llvm::SmallVector<unsigned, 8> Sizes; 11303 if (!RetType->isVoidType()) { 11304 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11305 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11306 OutputBecomesInput = true; 11307 } 11308 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11309 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11310 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11311 } 11312 11313 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11314 // The LS of a function parameter / return value can only be a power 11315 // of 2, starting from 8 bits, up to 128. 11316 assert(std::all_of(Sizes.begin(), Sizes.end(), 11317 [](unsigned Size) { 11318 return Size == 8 || Size == 16 || Size == 32 || 11319 Size == 64 || Size == 128; 11320 }) && 11321 "Invalid size"); 11322 11323 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11324 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11325 OutputBecomesInput); 11326 } 11327 11328 /// Mangle the parameter part of the vector function name according to 11329 /// their OpenMP classification. The mangling function is defined in 11330 /// section 3.5 of the AAVFABI. 11331 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11332 SmallString<256> Buffer; 11333 llvm::raw_svector_ostream Out(Buffer); 11334 for (const auto &ParamAttr : ParamAttrs) { 11335 switch (ParamAttr.Kind) { 11336 case LinearWithVarStride: 11337 Out << "ls" << ParamAttr.StrideOrArg; 11338 break; 11339 case Linear: 11340 Out << 'l'; 11341 // Don't print the step value if it is not present or if it is 11342 // equal to 1. 11343 if (ParamAttr.StrideOrArg != 1) 11344 Out << ParamAttr.StrideOrArg; 11345 break; 11346 case Uniform: 11347 Out << 'u'; 11348 break; 11349 case Vector: 11350 Out << 'v'; 11351 break; 11352 } 11353 11354 if (!!ParamAttr.Alignment) 11355 Out << 'a' << ParamAttr.Alignment; 11356 } 11357 11358 return std::string(Out.str()); 11359 } 11360 11361 // Function used to add the attribute. The parameter `VLEN` is 11362 // templated to allow the use of "x" when targeting scalable functions 11363 // for SVE. 11364 template <typename T> 11365 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11366 char ISA, StringRef ParSeq, 11367 StringRef MangledName, bool OutputBecomesInput, 11368 llvm::Function *Fn) { 11369 SmallString<256> Buffer; 11370 llvm::raw_svector_ostream Out(Buffer); 11371 Out << Prefix << ISA << LMask << VLEN; 11372 if (OutputBecomesInput) 11373 Out << "v"; 11374 Out << ParSeq << "_" << MangledName; 11375 Fn->addFnAttr(Out.str()); 11376 } 11377 11378 // Helper function to generate the Advanced SIMD names depending on 11379 // the value of the NDS when simdlen is not present. 11380 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11381 StringRef Prefix, char ISA, 11382 StringRef ParSeq, StringRef MangledName, 11383 bool OutputBecomesInput, 11384 llvm::Function *Fn) { 11385 switch (NDS) { 11386 case 8: 11387 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11388 OutputBecomesInput, Fn); 11389 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11390 OutputBecomesInput, Fn); 11391 break; 11392 case 16: 11393 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11394 OutputBecomesInput, Fn); 11395 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11396 OutputBecomesInput, Fn); 11397 break; 11398 case 32: 11399 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11400 OutputBecomesInput, Fn); 11401 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11402 OutputBecomesInput, Fn); 11403 break; 11404 case 64: 11405 case 128: 11406 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11407 OutputBecomesInput, Fn); 11408 break; 11409 default: 11410 llvm_unreachable("Scalar type is too wide."); 11411 } 11412 } 11413 11414 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11415 static void emitAArch64DeclareSimdFunction( 11416 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11417 ArrayRef<ParamAttrTy> ParamAttrs, 11418 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11419 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11420 11421 // Get basic data for building the vector signature. 11422 const auto Data = getNDSWDS(FD, ParamAttrs); 11423 const unsigned NDS = std::get<0>(Data); 11424 const unsigned WDS = std::get<1>(Data); 11425 const bool OutputBecomesInput = std::get<2>(Data); 11426 11427 // Check the values provided via `simdlen` by the user. 11428 // 1. A `simdlen(1)` doesn't produce vector signatures, 11429 if (UserVLEN == 1) { 11430 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11431 DiagnosticsEngine::Warning, 11432 "The clause simdlen(1) has no effect when targeting aarch64."); 11433 CGM.getDiags().Report(SLoc, DiagID); 11434 return; 11435 } 11436 11437 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11438 // Advanced SIMD output. 11439 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11440 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11441 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11442 "power of 2 when targeting Advanced SIMD."); 11443 CGM.getDiags().Report(SLoc, DiagID); 11444 return; 11445 } 11446 11447 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11448 // limits. 11449 if (ISA == 's' && UserVLEN != 0) { 11450 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11451 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11452 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11453 "lanes in the architectural constraints " 11454 "for SVE (min is 128-bit, max is " 11455 "2048-bit, by steps of 128-bit)"); 11456 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11457 return; 11458 } 11459 } 11460 11461 // Sort out parameter sequence. 11462 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11463 StringRef Prefix = "_ZGV"; 11464 // Generate simdlen from user input (if any). 11465 if (UserVLEN) { 11466 if (ISA == 's') { 11467 // SVE generates only a masked function. 11468 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11469 OutputBecomesInput, Fn); 11470 } else { 11471 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11472 // Advanced SIMD generates one or two functions, depending on 11473 // the `[not]inbranch` clause. 11474 switch (State) { 11475 case OMPDeclareSimdDeclAttr::BS_Undefined: 11476 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11477 OutputBecomesInput, Fn); 11478 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11479 OutputBecomesInput, Fn); 11480 break; 11481 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11482 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11483 OutputBecomesInput, Fn); 11484 break; 11485 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11486 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11487 OutputBecomesInput, Fn); 11488 break; 11489 } 11490 } 11491 } else { 11492 // If no user simdlen is provided, follow the AAVFABI rules for 11493 // generating the vector length. 11494 if (ISA == 's') { 11495 // SVE, section 3.4.1, item 1. 11496 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11497 OutputBecomesInput, Fn); 11498 } else { 11499 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11500 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11501 // two vector names depending on the use of the clause 11502 // `[not]inbranch`. 11503 switch (State) { 11504 case OMPDeclareSimdDeclAttr::BS_Undefined: 11505 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11506 OutputBecomesInput, Fn); 11507 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11508 OutputBecomesInput, Fn); 11509 break; 11510 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11511 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11512 OutputBecomesInput, Fn); 11513 break; 11514 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11515 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11516 OutputBecomesInput, Fn); 11517 break; 11518 } 11519 } 11520 } 11521 } 11522 11523 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11524 llvm::Function *Fn) { 11525 ASTContext &C = CGM.getContext(); 11526 FD = FD->getMostRecentDecl(); 11527 // Map params to their positions in function decl. 11528 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11529 if (isa<CXXMethodDecl>(FD)) 11530 ParamPositions.try_emplace(FD, 0); 11531 unsigned ParamPos = ParamPositions.size(); 11532 for (const ParmVarDecl *P : FD->parameters()) { 11533 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11534 ++ParamPos; 11535 } 11536 while (FD) { 11537 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11538 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11539 // Mark uniform parameters. 11540 for (const Expr *E : Attr->uniforms()) { 11541 E = E->IgnoreParenImpCasts(); 11542 unsigned Pos; 11543 if (isa<CXXThisExpr>(E)) { 11544 Pos = ParamPositions[FD]; 11545 } else { 11546 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11547 ->getCanonicalDecl(); 11548 Pos = ParamPositions[PVD]; 11549 } 11550 ParamAttrs[Pos].Kind = Uniform; 11551 } 11552 // Get alignment info. 11553 auto NI = Attr->alignments_begin(); 11554 for (const Expr *E : Attr->aligneds()) { 11555 E = E->IgnoreParenImpCasts(); 11556 unsigned Pos; 11557 QualType ParmTy; 11558 if (isa<CXXThisExpr>(E)) { 11559 Pos = ParamPositions[FD]; 11560 ParmTy = E->getType(); 11561 } else { 11562 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11563 ->getCanonicalDecl(); 11564 Pos = ParamPositions[PVD]; 11565 ParmTy = PVD->getType(); 11566 } 11567 ParamAttrs[Pos].Alignment = 11568 (*NI) 11569 ? (*NI)->EvaluateKnownConstInt(C) 11570 : llvm::APSInt::getUnsigned( 11571 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11572 .getQuantity()); 11573 ++NI; 11574 } 11575 // Mark linear parameters. 11576 auto SI = Attr->steps_begin(); 11577 auto MI = Attr->modifiers_begin(); 11578 for (const Expr *E : Attr->linears()) { 11579 E = E->IgnoreParenImpCasts(); 11580 unsigned Pos; 11581 // Rescaling factor needed to compute the linear parameter 11582 // value in the mangled name. 11583 unsigned PtrRescalingFactor = 1; 11584 if (isa<CXXThisExpr>(E)) { 11585 Pos = ParamPositions[FD]; 11586 } else { 11587 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11588 ->getCanonicalDecl(); 11589 Pos = ParamPositions[PVD]; 11590 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11591 PtrRescalingFactor = CGM.getContext() 11592 .getTypeSizeInChars(P->getPointeeType()) 11593 .getQuantity(); 11594 } 11595 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11596 ParamAttr.Kind = Linear; 11597 // Assuming a stride of 1, for `linear` without modifiers. 11598 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11599 if (*SI) { 11600 Expr::EvalResult Result; 11601 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11602 if (const auto *DRE = 11603 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11604 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 11605 ParamAttr.Kind = LinearWithVarStride; 11606 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 11607 ParamPositions[StridePVD->getCanonicalDecl()]); 11608 } 11609 } 11610 } else { 11611 ParamAttr.StrideOrArg = Result.Val.getInt(); 11612 } 11613 } 11614 // If we are using a linear clause on a pointer, we need to 11615 // rescale the value of linear_step with the byte size of the 11616 // pointee type. 11617 if (Linear == ParamAttr.Kind) 11618 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11619 ++SI; 11620 ++MI; 11621 } 11622 llvm::APSInt VLENVal; 11623 SourceLocation ExprLoc; 11624 const Expr *VLENExpr = Attr->getSimdlen(); 11625 if (VLENExpr) { 11626 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11627 ExprLoc = VLENExpr->getExprLoc(); 11628 } 11629 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11630 if (CGM.getTriple().isX86()) { 11631 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11632 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11633 unsigned VLEN = VLENVal.getExtValue(); 11634 StringRef MangledName = Fn->getName(); 11635 if (CGM.getTarget().hasFeature("sve")) 11636 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11637 MangledName, 's', 128, Fn, ExprLoc); 11638 if (CGM.getTarget().hasFeature("neon")) 11639 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11640 MangledName, 'n', 128, Fn, ExprLoc); 11641 } 11642 } 11643 FD = FD->getPreviousDecl(); 11644 } 11645 } 11646 11647 namespace { 11648 /// Cleanup action for doacross support. 11649 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11650 public: 11651 static const int DoacrossFinArgs = 2; 11652 11653 private: 11654 llvm::FunctionCallee RTLFn; 11655 llvm::Value *Args[DoacrossFinArgs]; 11656 11657 public: 11658 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11659 ArrayRef<llvm::Value *> CallArgs) 11660 : RTLFn(RTLFn) { 11661 assert(CallArgs.size() == DoacrossFinArgs); 11662 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11663 } 11664 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11665 if (!CGF.HaveInsertPoint()) 11666 return; 11667 CGF.EmitRuntimeCall(RTLFn, Args); 11668 } 11669 }; 11670 } // namespace 11671 11672 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11673 const OMPLoopDirective &D, 11674 ArrayRef<Expr *> NumIterations) { 11675 if (!CGF.HaveInsertPoint()) 11676 return; 11677 11678 ASTContext &C = CGM.getContext(); 11679 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11680 RecordDecl *RD; 11681 if (KmpDimTy.isNull()) { 11682 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11683 // kmp_int64 lo; // lower 11684 // kmp_int64 up; // upper 11685 // kmp_int64 st; // stride 11686 // }; 11687 RD = C.buildImplicitRecord("kmp_dim"); 11688 RD->startDefinition(); 11689 addFieldToRecordDecl(C, RD, Int64Ty); 11690 addFieldToRecordDecl(C, RD, Int64Ty); 11691 addFieldToRecordDecl(C, RD, Int64Ty); 11692 RD->completeDefinition(); 11693 KmpDimTy = C.getRecordType(RD); 11694 } else { 11695 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11696 } 11697 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11698 QualType ArrayTy = 11699 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11700 11701 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11702 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11703 enum { LowerFD = 0, UpperFD, StrideFD }; 11704 // Fill dims with data. 11705 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11706 LValue DimsLVal = CGF.MakeAddrLValue( 11707 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11708 // dims.upper = num_iterations; 11709 LValue UpperLVal = CGF.EmitLValueForField( 11710 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11711 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11712 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11713 Int64Ty, NumIterations[I]->getExprLoc()); 11714 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11715 // dims.stride = 1; 11716 LValue StrideLVal = CGF.EmitLValueForField( 11717 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11718 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11719 StrideLVal); 11720 } 11721 11722 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11723 // kmp_int32 num_dims, struct kmp_dim * dims); 11724 llvm::Value *Args[] = { 11725 emitUpdateLocation(CGF, D.getBeginLoc()), 11726 getThreadID(CGF, D.getBeginLoc()), 11727 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11728 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11729 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11730 CGM.VoidPtrTy)}; 11731 11732 llvm::FunctionCallee RTLFn = 11733 createRuntimeFunction(OMPRTL__kmpc_doacross_init); 11734 CGF.EmitRuntimeCall(RTLFn, Args); 11735 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11736 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11737 llvm::FunctionCallee FiniRTLFn = 11738 createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 11739 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11740 llvm::makeArrayRef(FiniArgs)); 11741 } 11742 11743 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11744 const OMPDependClause *C) { 11745 QualType Int64Ty = 11746 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11747 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11748 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11749 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11750 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11751 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11752 const Expr *CounterVal = C->getLoopData(I); 11753 assert(CounterVal); 11754 llvm::Value *CntVal = CGF.EmitScalarConversion( 11755 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11756 CounterVal->getExprLoc()); 11757 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11758 /*Volatile=*/false, Int64Ty); 11759 } 11760 llvm::Value *Args[] = { 11761 emitUpdateLocation(CGF, C->getBeginLoc()), 11762 getThreadID(CGF, C->getBeginLoc()), 11763 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11764 llvm::FunctionCallee RTLFn; 11765 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11766 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 11767 } else { 11768 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11769 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 11770 } 11771 CGF.EmitRuntimeCall(RTLFn, Args); 11772 } 11773 11774 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11775 llvm::FunctionCallee Callee, 11776 ArrayRef<llvm::Value *> Args) const { 11777 assert(Loc.isValid() && "Outlined function call location must be valid."); 11778 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11779 11780 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11781 if (Fn->doesNotThrow()) { 11782 CGF.EmitNounwindRuntimeCall(Fn, Args); 11783 return; 11784 } 11785 } 11786 CGF.EmitRuntimeCall(Callee, Args); 11787 } 11788 11789 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11790 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11791 ArrayRef<llvm::Value *> Args) const { 11792 emitCall(CGF, Loc, OutlinedFn, Args); 11793 } 11794 11795 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11796 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11797 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11798 HasEmittedDeclareTargetRegion = true; 11799 } 11800 11801 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11802 const VarDecl *NativeParam, 11803 const VarDecl *TargetParam) const { 11804 return CGF.GetAddrOfLocalVar(NativeParam); 11805 } 11806 11807 namespace { 11808 /// Cleanup action for allocate support. 11809 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11810 public: 11811 static const int CleanupArgs = 3; 11812 11813 private: 11814 llvm::FunctionCallee RTLFn; 11815 llvm::Value *Args[CleanupArgs]; 11816 11817 public: 11818 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 11819 ArrayRef<llvm::Value *> CallArgs) 11820 : RTLFn(RTLFn) { 11821 assert(CallArgs.size() == CleanupArgs && 11822 "Size of arguments does not match."); 11823 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11824 } 11825 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11826 if (!CGF.HaveInsertPoint()) 11827 return; 11828 CGF.EmitRuntimeCall(RTLFn, Args); 11829 } 11830 }; 11831 } // namespace 11832 11833 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11834 const VarDecl *VD) { 11835 if (!VD) 11836 return Address::invalid(); 11837 const VarDecl *CVD = VD->getCanonicalDecl(); 11838 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 11839 return Address::invalid(); 11840 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11841 // Use the default allocation. 11842 if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 11843 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 11844 !AA->getAllocator()) 11845 return Address::invalid(); 11846 llvm::Value *Size; 11847 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11848 if (CVD->getType()->isVariablyModifiedType()) { 11849 Size = CGF.getTypeSize(CVD->getType()); 11850 // Align the size: ((size + align - 1) / align) * align 11851 Size = CGF.Builder.CreateNUWAdd( 11852 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11853 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11854 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11855 } else { 11856 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11857 Size = CGM.getSize(Sz.alignTo(Align)); 11858 } 11859 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11860 assert(AA->getAllocator() && 11861 "Expected allocator expression for non-default allocator."); 11862 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11863 // According to the standard, the original allocator type is a enum (integer). 11864 // Convert to pointer type, if required. 11865 if (Allocator->getType()->isIntegerTy()) 11866 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 11867 else if (Allocator->getType()->isPointerTy()) 11868 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 11869 CGM.VoidPtrTy); 11870 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11871 11872 llvm::Value *Addr = 11873 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, 11874 getName({CVD->getName(), ".void.addr"})); 11875 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 11876 Allocator}; 11877 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); 11878 11879 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11880 llvm::makeArrayRef(FiniArgs)); 11881 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11882 Addr, 11883 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 11884 getName({CVD->getName(), ".addr"})); 11885 return Address(Addr, Align); 11886 } 11887 11888 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11889 CodeGenModule &CGM, const OMPLoopDirective &S) 11890 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11891 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11892 if (!NeedToPush) 11893 return; 11894 NontemporalDeclsSet &DS = 11895 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11896 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11897 for (const Stmt *Ref : C->private_refs()) { 11898 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11899 const ValueDecl *VD; 11900 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11901 VD = DRE->getDecl(); 11902 } else { 11903 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11904 assert((ME->isImplicitCXXThis() || 11905 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11906 "Expected member of current class."); 11907 VD = ME->getMemberDecl(); 11908 } 11909 DS.insert(VD); 11910 } 11911 } 11912 } 11913 11914 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11915 if (!NeedToPush) 11916 return; 11917 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11918 } 11919 11920 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11921 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11922 11923 return llvm::any_of( 11924 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11925 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 11926 } 11927 11928 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11929 const OMPExecutableDirective &S, 11930 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11931 const { 11932 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11933 // Vars in target/task regions must be excluded completely. 11934 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11935 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11936 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11937 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11938 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11939 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11940 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11941 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11942 } 11943 } 11944 // Exclude vars in private clauses. 11945 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11946 for (const Expr *Ref : C->varlists()) { 11947 if (!Ref->getType()->isScalarType()) 11948 continue; 11949 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11950 if (!DRE) 11951 continue; 11952 NeedToCheckForLPCs.insert(DRE->getDecl()); 11953 } 11954 } 11955 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11956 for (const Expr *Ref : C->varlists()) { 11957 if (!Ref->getType()->isScalarType()) 11958 continue; 11959 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11960 if (!DRE) 11961 continue; 11962 NeedToCheckForLPCs.insert(DRE->getDecl()); 11963 } 11964 } 11965 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11966 for (const Expr *Ref : C->varlists()) { 11967 if (!Ref->getType()->isScalarType()) 11968 continue; 11969 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11970 if (!DRE) 11971 continue; 11972 NeedToCheckForLPCs.insert(DRE->getDecl()); 11973 } 11974 } 11975 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11976 for (const Expr *Ref : C->varlists()) { 11977 if (!Ref->getType()->isScalarType()) 11978 continue; 11979 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11980 if (!DRE) 11981 continue; 11982 NeedToCheckForLPCs.insert(DRE->getDecl()); 11983 } 11984 } 11985 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 11986 for (const Expr *Ref : C->varlists()) { 11987 if (!Ref->getType()->isScalarType()) 11988 continue; 11989 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11990 if (!DRE) 11991 continue; 11992 NeedToCheckForLPCs.insert(DRE->getDecl()); 11993 } 11994 } 11995 for (const Decl *VD : NeedToCheckForLPCs) { 11996 for (const LastprivateConditionalData &Data : 11997 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 11998 if (Data.DeclToUniqueName.count(VD) > 0) { 11999 if (!Data.Disabled) 12000 NeedToAddForLPCsAsDisabled.insert(VD); 12001 break; 12002 } 12003 } 12004 } 12005 } 12006 12007 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12008 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 12009 : CGM(CGF.CGM), 12010 Action((CGM.getLangOpts().OpenMP >= 50 && 12011 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 12012 [](const OMPLastprivateClause *C) { 12013 return C->getKind() == 12014 OMPC_LASTPRIVATE_conditional; 12015 })) 12016 ? ActionToDo::PushAsLastprivateConditional 12017 : ActionToDo::DoNotPush) { 12018 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12019 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 12020 return; 12021 assert(Action == ActionToDo::PushAsLastprivateConditional && 12022 "Expected a push action."); 12023 LastprivateConditionalData &Data = 12024 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12025 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 12026 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 12027 continue; 12028 12029 for (const Expr *Ref : C->varlists()) { 12030 Data.DeclToUniqueName.insert(std::make_pair( 12031 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 12032 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 12033 } 12034 } 12035 Data.IVLVal = IVLVal; 12036 Data.Fn = CGF.CurFn; 12037 } 12038 12039 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 12040 CodeGenFunction &CGF, const OMPExecutableDirective &S) 12041 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 12042 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 12043 if (CGM.getLangOpts().OpenMP < 50) 12044 return; 12045 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 12046 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 12047 if (!NeedToAddForLPCsAsDisabled.empty()) { 12048 Action = ActionToDo::DisableLastprivateConditional; 12049 LastprivateConditionalData &Data = 12050 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 12051 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 12052 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 12053 Data.Fn = CGF.CurFn; 12054 Data.Disabled = true; 12055 } 12056 } 12057 12058 CGOpenMPRuntime::LastprivateConditionalRAII 12059 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 12060 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 12061 return LastprivateConditionalRAII(CGF, S); 12062 } 12063 12064 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 12065 if (CGM.getLangOpts().OpenMP < 50) 12066 return; 12067 if (Action == ActionToDo::DisableLastprivateConditional) { 12068 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12069 "Expected list of disabled private vars."); 12070 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12071 } 12072 if (Action == ActionToDo::PushAsLastprivateConditional) { 12073 assert( 12074 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 12075 "Expected list of lastprivate conditional vars."); 12076 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 12077 } 12078 } 12079 12080 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 12081 const VarDecl *VD) { 12082 ASTContext &C = CGM.getContext(); 12083 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 12084 if (I == LastprivateConditionalToTypes.end()) 12085 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 12086 QualType NewType; 12087 const FieldDecl *VDField; 12088 const FieldDecl *FiredField; 12089 LValue BaseLVal; 12090 auto VI = I->getSecond().find(VD); 12091 if (VI == I->getSecond().end()) { 12092 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 12093 RD->startDefinition(); 12094 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 12095 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 12096 RD->completeDefinition(); 12097 NewType = C.getRecordType(RD); 12098 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 12099 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 12100 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 12101 } else { 12102 NewType = std::get<0>(VI->getSecond()); 12103 VDField = std::get<1>(VI->getSecond()); 12104 FiredField = std::get<2>(VI->getSecond()); 12105 BaseLVal = std::get<3>(VI->getSecond()); 12106 } 12107 LValue FiredLVal = 12108 CGF.EmitLValueForField(BaseLVal, FiredField); 12109 CGF.EmitStoreOfScalar( 12110 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 12111 FiredLVal); 12112 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 12113 } 12114 12115 namespace { 12116 /// Checks if the lastprivate conditional variable is referenced in LHS. 12117 class LastprivateConditionalRefChecker final 12118 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 12119 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 12120 const Expr *FoundE = nullptr; 12121 const Decl *FoundD = nullptr; 12122 StringRef UniqueDeclName; 12123 LValue IVLVal; 12124 llvm::Function *FoundFn = nullptr; 12125 SourceLocation Loc; 12126 12127 public: 12128 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12129 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12130 llvm::reverse(LPM)) { 12131 auto It = D.DeclToUniqueName.find(E->getDecl()); 12132 if (It == D.DeclToUniqueName.end()) 12133 continue; 12134 if (D.Disabled) 12135 return false; 12136 FoundE = E; 12137 FoundD = E->getDecl()->getCanonicalDecl(); 12138 UniqueDeclName = It->second; 12139 IVLVal = D.IVLVal; 12140 FoundFn = D.Fn; 12141 break; 12142 } 12143 return FoundE == E; 12144 } 12145 bool VisitMemberExpr(const MemberExpr *E) { 12146 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12147 return false; 12148 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12149 llvm::reverse(LPM)) { 12150 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12151 if (It == D.DeclToUniqueName.end()) 12152 continue; 12153 if (D.Disabled) 12154 return false; 12155 FoundE = E; 12156 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12157 UniqueDeclName = It->second; 12158 IVLVal = D.IVLVal; 12159 FoundFn = D.Fn; 12160 break; 12161 } 12162 return FoundE == E; 12163 } 12164 bool VisitStmt(const Stmt *S) { 12165 for (const Stmt *Child : S->children()) { 12166 if (!Child) 12167 continue; 12168 if (const auto *E = dyn_cast<Expr>(Child)) 12169 if (!E->isGLValue()) 12170 continue; 12171 if (Visit(Child)) 12172 return true; 12173 } 12174 return false; 12175 } 12176 explicit LastprivateConditionalRefChecker( 12177 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12178 : LPM(LPM) {} 12179 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12180 getFoundData() const { 12181 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12182 } 12183 }; 12184 } // namespace 12185 12186 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12187 LValue IVLVal, 12188 StringRef UniqueDeclName, 12189 LValue LVal, 12190 SourceLocation Loc) { 12191 // Last updated loop counter for the lastprivate conditional var. 12192 // int<xx> last_iv = 0; 12193 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12194 llvm::Constant *LastIV = 12195 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12196 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12197 IVLVal.getAlignment().getAsAlign()); 12198 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12199 12200 // Last value of the lastprivate conditional. 12201 // decltype(priv_a) last_a; 12202 llvm::Constant *Last = getOrCreateInternalVariable( 12203 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12204 cast<llvm::GlobalVariable>(Last)->setAlignment( 12205 LVal.getAlignment().getAsAlign()); 12206 LValue LastLVal = 12207 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 12208 12209 // Global loop counter. Required to handle inner parallel-for regions. 12210 // iv 12211 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12212 12213 // #pragma omp critical(a) 12214 // if (last_iv <= iv) { 12215 // last_iv = iv; 12216 // last_a = priv_a; 12217 // } 12218 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12219 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12220 Action.Enter(CGF); 12221 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12222 // (last_iv <= iv) ? Check if the variable is updated and store new 12223 // value in global var. 12224 llvm::Value *CmpRes; 12225 if (IVLVal.getType()->isSignedIntegerType()) { 12226 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12227 } else { 12228 assert(IVLVal.getType()->isUnsignedIntegerType() && 12229 "Loop iteration variable must be integer."); 12230 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12231 } 12232 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12233 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12234 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12235 // { 12236 CGF.EmitBlock(ThenBB); 12237 12238 // last_iv = iv; 12239 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12240 12241 // last_a = priv_a; 12242 switch (CGF.getEvaluationKind(LVal.getType())) { 12243 case TEK_Scalar: { 12244 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12245 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12246 break; 12247 } 12248 case TEK_Complex: { 12249 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12250 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12251 break; 12252 } 12253 case TEK_Aggregate: 12254 llvm_unreachable( 12255 "Aggregates are not supported in lastprivate conditional."); 12256 } 12257 // } 12258 CGF.EmitBranch(ExitBB); 12259 // There is no need to emit line number for unconditional branch. 12260 (void)ApplyDebugLocation::CreateEmpty(CGF); 12261 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12262 }; 12263 12264 if (CGM.getLangOpts().OpenMPSimd) { 12265 // Do not emit as a critical region as no parallel region could be emitted. 12266 RegionCodeGenTy ThenRCG(CodeGen); 12267 ThenRCG(CGF); 12268 } else { 12269 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12270 } 12271 } 12272 12273 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12274 const Expr *LHS) { 12275 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12276 return; 12277 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12278 if (!Checker.Visit(LHS)) 12279 return; 12280 const Expr *FoundE; 12281 const Decl *FoundD; 12282 StringRef UniqueDeclName; 12283 LValue IVLVal; 12284 llvm::Function *FoundFn; 12285 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12286 Checker.getFoundData(); 12287 if (FoundFn != CGF.CurFn) { 12288 // Special codegen for inner parallel regions. 12289 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12290 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12291 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12292 "Lastprivate conditional is not found in outer region."); 12293 QualType StructTy = std::get<0>(It->getSecond()); 12294 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12295 LValue PrivLVal = CGF.EmitLValue(FoundE); 12296 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12297 PrivLVal.getAddress(CGF), 12298 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12299 LValue BaseLVal = 12300 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12301 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12302 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12303 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12304 FiredLVal, llvm::AtomicOrdering::Unordered, 12305 /*IsVolatile=*/true, /*isInit=*/false); 12306 return; 12307 } 12308 12309 // Private address of the lastprivate conditional in the current context. 12310 // priv_a 12311 LValue LVal = CGF.EmitLValue(FoundE); 12312 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12313 FoundE->getExprLoc()); 12314 } 12315 12316 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12317 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12318 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12319 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12320 return; 12321 auto Range = llvm::reverse(LastprivateConditionalStack); 12322 auto It = llvm::find_if( 12323 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12324 if (It == Range.end() || It->Fn != CGF.CurFn) 12325 return; 12326 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12327 assert(LPCI != LastprivateConditionalToTypes.end() && 12328 "Lastprivates must be registered already."); 12329 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12330 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12331 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12332 for (const auto &Pair : It->DeclToUniqueName) { 12333 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12334 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 12335 continue; 12336 auto I = LPCI->getSecond().find(Pair.first); 12337 assert(I != LPCI->getSecond().end() && 12338 "Lastprivate must be rehistered already."); 12339 // bool Cmp = priv_a.Fired != 0; 12340 LValue BaseLVal = std::get<3>(I->getSecond()); 12341 LValue FiredLVal = 12342 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12343 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12344 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12345 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12346 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12347 // if (Cmp) { 12348 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12349 CGF.EmitBlock(ThenBB); 12350 Address Addr = CGF.GetAddrOfLocalVar(VD); 12351 LValue LVal; 12352 if (VD->getType()->isReferenceType()) 12353 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12354 AlignmentSource::Decl); 12355 else 12356 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12357 AlignmentSource::Decl); 12358 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12359 D.getBeginLoc()); 12360 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12361 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12362 // } 12363 } 12364 } 12365 12366 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12367 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12368 SourceLocation Loc) { 12369 if (CGF.getLangOpts().OpenMP < 50) 12370 return; 12371 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12372 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12373 "Unknown lastprivate conditional variable."); 12374 StringRef UniqueName = It->second; 12375 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12376 // The variable was not updated in the region - exit. 12377 if (!GV) 12378 return; 12379 LValue LPLVal = CGF.MakeAddrLValue( 12380 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 12381 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12382 CGF.EmitStoreOfScalar(Res, PrivLVal); 12383 } 12384 12385 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12386 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12387 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12388 llvm_unreachable("Not supported in SIMD-only mode"); 12389 } 12390 12391 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12392 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12393 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12394 llvm_unreachable("Not supported in SIMD-only mode"); 12395 } 12396 12397 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12398 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12399 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12400 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12401 bool Tied, unsigned &NumberOfParts) { 12402 llvm_unreachable("Not supported in SIMD-only mode"); 12403 } 12404 12405 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12406 SourceLocation Loc, 12407 llvm::Function *OutlinedFn, 12408 ArrayRef<llvm::Value *> CapturedVars, 12409 const Expr *IfCond) { 12410 llvm_unreachable("Not supported in SIMD-only mode"); 12411 } 12412 12413 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12414 CodeGenFunction &CGF, StringRef CriticalName, 12415 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12416 const Expr *Hint) { 12417 llvm_unreachable("Not supported in SIMD-only mode"); 12418 } 12419 12420 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12421 const RegionCodeGenTy &MasterOpGen, 12422 SourceLocation Loc) { 12423 llvm_unreachable("Not supported in SIMD-only mode"); 12424 } 12425 12426 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12427 SourceLocation Loc) { 12428 llvm_unreachable("Not supported in SIMD-only mode"); 12429 } 12430 12431 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12432 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12433 SourceLocation Loc) { 12434 llvm_unreachable("Not supported in SIMD-only mode"); 12435 } 12436 12437 void CGOpenMPSIMDRuntime::emitSingleRegion( 12438 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12439 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12440 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12441 ArrayRef<const Expr *> AssignmentOps) { 12442 llvm_unreachable("Not supported in SIMD-only mode"); 12443 } 12444 12445 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12446 const RegionCodeGenTy &OrderedOpGen, 12447 SourceLocation Loc, 12448 bool IsThreads) { 12449 llvm_unreachable("Not supported in SIMD-only mode"); 12450 } 12451 12452 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12453 SourceLocation Loc, 12454 OpenMPDirectiveKind Kind, 12455 bool EmitChecks, 12456 bool ForceSimpleCall) { 12457 llvm_unreachable("Not supported in SIMD-only mode"); 12458 } 12459 12460 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12461 CodeGenFunction &CGF, SourceLocation Loc, 12462 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12463 bool Ordered, const DispatchRTInput &DispatchValues) { 12464 llvm_unreachable("Not supported in SIMD-only mode"); 12465 } 12466 12467 void CGOpenMPSIMDRuntime::emitForStaticInit( 12468 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12469 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12470 llvm_unreachable("Not supported in SIMD-only mode"); 12471 } 12472 12473 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12474 CodeGenFunction &CGF, SourceLocation Loc, 12475 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12476 llvm_unreachable("Not supported in SIMD-only mode"); 12477 } 12478 12479 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12480 SourceLocation Loc, 12481 unsigned IVSize, 12482 bool IVSigned) { 12483 llvm_unreachable("Not supported in SIMD-only mode"); 12484 } 12485 12486 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12487 SourceLocation Loc, 12488 OpenMPDirectiveKind DKind) { 12489 llvm_unreachable("Not supported in SIMD-only mode"); 12490 } 12491 12492 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12493 SourceLocation Loc, 12494 unsigned IVSize, bool IVSigned, 12495 Address IL, Address LB, 12496 Address UB, Address ST) { 12497 llvm_unreachable("Not supported in SIMD-only mode"); 12498 } 12499 12500 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12501 llvm::Value *NumThreads, 12502 SourceLocation Loc) { 12503 llvm_unreachable("Not supported in SIMD-only mode"); 12504 } 12505 12506 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12507 ProcBindKind ProcBind, 12508 SourceLocation Loc) { 12509 llvm_unreachable("Not supported in SIMD-only mode"); 12510 } 12511 12512 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12513 const VarDecl *VD, 12514 Address VDAddr, 12515 SourceLocation Loc) { 12516 llvm_unreachable("Not supported in SIMD-only mode"); 12517 } 12518 12519 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12520 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12521 CodeGenFunction *CGF) { 12522 llvm_unreachable("Not supported in SIMD-only mode"); 12523 } 12524 12525 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12526 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12527 llvm_unreachable("Not supported in SIMD-only mode"); 12528 } 12529 12530 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12531 ArrayRef<const Expr *> Vars, 12532 SourceLocation Loc, 12533 llvm::AtomicOrdering AO) { 12534 llvm_unreachable("Not supported in SIMD-only mode"); 12535 } 12536 12537 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12538 const OMPExecutableDirective &D, 12539 llvm::Function *TaskFunction, 12540 QualType SharedsTy, Address Shareds, 12541 const Expr *IfCond, 12542 const OMPTaskDataTy &Data) { 12543 llvm_unreachable("Not supported in SIMD-only mode"); 12544 } 12545 12546 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12547 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12548 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12549 const Expr *IfCond, const OMPTaskDataTy &Data) { 12550 llvm_unreachable("Not supported in SIMD-only mode"); 12551 } 12552 12553 void CGOpenMPSIMDRuntime::emitReduction( 12554 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12555 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12556 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12557 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12558 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12559 ReductionOps, Options); 12560 } 12561 12562 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12563 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12564 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12565 llvm_unreachable("Not supported in SIMD-only mode"); 12566 } 12567 12568 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12569 SourceLocation Loc, 12570 bool IsWorksharingReduction) { 12571 llvm_unreachable("Not supported in SIMD-only mode"); 12572 } 12573 12574 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12575 SourceLocation Loc, 12576 ReductionCodeGen &RCG, 12577 unsigned N) { 12578 llvm_unreachable("Not supported in SIMD-only mode"); 12579 } 12580 12581 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12582 SourceLocation Loc, 12583 llvm::Value *ReductionsPtr, 12584 LValue SharedLVal) { 12585 llvm_unreachable("Not supported in SIMD-only mode"); 12586 } 12587 12588 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12589 SourceLocation Loc) { 12590 llvm_unreachable("Not supported in SIMD-only mode"); 12591 } 12592 12593 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12594 CodeGenFunction &CGF, SourceLocation Loc, 12595 OpenMPDirectiveKind CancelRegion) { 12596 llvm_unreachable("Not supported in SIMD-only mode"); 12597 } 12598 12599 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12600 SourceLocation Loc, const Expr *IfCond, 12601 OpenMPDirectiveKind CancelRegion) { 12602 llvm_unreachable("Not supported in SIMD-only mode"); 12603 } 12604 12605 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12606 const OMPExecutableDirective &D, StringRef ParentName, 12607 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12608 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12609 llvm_unreachable("Not supported in SIMD-only mode"); 12610 } 12611 12612 void CGOpenMPSIMDRuntime::emitTargetCall( 12613 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12614 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12615 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12616 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12617 const OMPLoopDirective &D)> 12618 SizeEmitter) { 12619 llvm_unreachable("Not supported in SIMD-only mode"); 12620 } 12621 12622 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12623 llvm_unreachable("Not supported in SIMD-only mode"); 12624 } 12625 12626 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12627 llvm_unreachable("Not supported in SIMD-only mode"); 12628 } 12629 12630 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12631 return false; 12632 } 12633 12634 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12635 const OMPExecutableDirective &D, 12636 SourceLocation Loc, 12637 llvm::Function *OutlinedFn, 12638 ArrayRef<llvm::Value *> CapturedVars) { 12639 llvm_unreachable("Not supported in SIMD-only mode"); 12640 } 12641 12642 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12643 const Expr *NumTeams, 12644 const Expr *ThreadLimit, 12645 SourceLocation Loc) { 12646 llvm_unreachable("Not supported in SIMD-only mode"); 12647 } 12648 12649 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12650 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12651 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12652 llvm_unreachable("Not supported in SIMD-only mode"); 12653 } 12654 12655 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12656 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12657 const Expr *Device) { 12658 llvm_unreachable("Not supported in SIMD-only mode"); 12659 } 12660 12661 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12662 const OMPLoopDirective &D, 12663 ArrayRef<Expr *> NumIterations) { 12664 llvm_unreachable("Not supported in SIMD-only mode"); 12665 } 12666 12667 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12668 const OMPDependClause *C) { 12669 llvm_unreachable("Not supported in SIMD-only mode"); 12670 } 12671 12672 const VarDecl * 12673 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12674 const VarDecl *NativeParam) const { 12675 llvm_unreachable("Not supported in SIMD-only mode"); 12676 } 12677 12678 Address 12679 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12680 const VarDecl *NativeParam, 12681 const VarDecl *TargetParam) const { 12682 llvm_unreachable("Not supported in SIMD-only mode"); 12683 } 12684