1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/FileManager.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/CodeGen/ConstantInitBuilder.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/SetOperations.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/Bitcode/BitcodeReader.h" 32 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 33 #include "llvm/IR/Constants.h" 34 #include "llvm/IR/DerivedTypes.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/IR/Value.h" 37 #include "llvm/Support/AtomicOrdering.h" 38 #include "llvm/Support/Format.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include <cassert> 41 #include <numeric> 42 43 using namespace clang; 44 using namespace CodeGen; 45 using namespace llvm::omp; 46 47 namespace { 48 /// Base class for handling code generation inside OpenMP regions. 49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 50 public: 51 /// Kinds of OpenMP regions used in codegen. 52 enum CGOpenMPRegionKind { 53 /// Region with outlined function for standalone 'parallel' 54 /// directive. 55 ParallelOutlinedRegion, 56 /// Region with outlined function for standalone 'task' directive. 57 TaskOutlinedRegion, 58 /// Region for constructs that do not require function outlining, 59 /// like 'for', 'sections', 'atomic' etc. directives. 60 InlinedRegion, 61 /// Region with outlined function for standalone 'target' directive. 62 TargetRegion, 63 }; 64 65 CGOpenMPRegionInfo(const CapturedStmt &CS, 66 const CGOpenMPRegionKind RegionKind, 67 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 68 bool HasCancel) 69 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 70 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 71 72 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 73 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 74 bool HasCancel) 75 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 76 Kind(Kind), HasCancel(HasCancel) {} 77 78 /// Get a variable or parameter for storing global thread id 79 /// inside OpenMP construct. 80 virtual const VarDecl *getThreadIDVariable() const = 0; 81 82 /// Emit the captured statement body. 83 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 84 85 /// Get an LValue for the current ThreadID variable. 86 /// \return LValue for thread id variable. This LValue always has type int32*. 87 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 88 89 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 90 91 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 92 93 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 94 95 bool hasCancel() const { return HasCancel; } 96 97 static bool classof(const CGCapturedStmtInfo *Info) { 98 return Info->getKind() == CR_OpenMP; 99 } 100 101 ~CGOpenMPRegionInfo() override = default; 102 103 protected: 104 CGOpenMPRegionKind RegionKind; 105 RegionCodeGenTy CodeGen; 106 OpenMPDirectiveKind Kind; 107 bool HasCancel; 108 }; 109 110 /// API for captured statement code generation in OpenMP constructs. 111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 112 public: 113 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 114 const RegionCodeGenTy &CodeGen, 115 OpenMPDirectiveKind Kind, bool HasCancel, 116 StringRef HelperName) 117 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 118 HasCancel), 119 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 120 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 121 } 122 123 /// Get a variable or parameter for storing global thread id 124 /// inside OpenMP construct. 125 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 126 127 /// Get the name of the capture helper. 128 StringRef getHelperName() const override { return HelperName; } 129 130 static bool classof(const CGCapturedStmtInfo *Info) { 131 return CGOpenMPRegionInfo::classof(Info) && 132 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 133 ParallelOutlinedRegion; 134 } 135 136 private: 137 /// A variable or parameter storing global thread id for OpenMP 138 /// constructs. 139 const VarDecl *ThreadIDVar; 140 StringRef HelperName; 141 }; 142 143 /// API for captured statement code generation in OpenMP constructs. 144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 145 public: 146 class UntiedTaskActionTy final : public PrePostActionTy { 147 bool Untied; 148 const VarDecl *PartIDVar; 149 const RegionCodeGenTy UntiedCodeGen; 150 llvm::SwitchInst *UntiedSwitch = nullptr; 151 152 public: 153 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 154 const RegionCodeGenTy &UntiedCodeGen) 155 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 156 void Enter(CodeGenFunction &CGF) override { 157 if (Untied) { 158 // Emit task switching point. 159 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 160 CGF.GetAddrOfLocalVar(PartIDVar), 161 PartIDVar->getType()->castAs<PointerType>()); 162 llvm::Value *Res = 163 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 164 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 165 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 166 CGF.EmitBlock(DoneBB); 167 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 168 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 169 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 170 CGF.Builder.GetInsertBlock()); 171 emitUntiedSwitch(CGF); 172 } 173 } 174 void emitUntiedSwitch(CodeGenFunction &CGF) const { 175 if (Untied) { 176 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 177 CGF.GetAddrOfLocalVar(PartIDVar), 178 PartIDVar->getType()->castAs<PointerType>()); 179 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 180 PartIdLVal); 181 UntiedCodeGen(CGF); 182 CodeGenFunction::JumpDest CurPoint = 183 CGF.getJumpDestInCurrentScope(".untied.next."); 184 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 185 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 186 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 187 CGF.Builder.GetInsertBlock()); 188 CGF.EmitBranchThroughCleanup(CurPoint); 189 CGF.EmitBlock(CurPoint.getBlock()); 190 } 191 } 192 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 193 }; 194 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 195 const VarDecl *ThreadIDVar, 196 const RegionCodeGenTy &CodeGen, 197 OpenMPDirectiveKind Kind, bool HasCancel, 198 const UntiedTaskActionTy &Action) 199 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 200 ThreadIDVar(ThreadIDVar), Action(Action) { 201 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 202 } 203 204 /// Get a variable or parameter for storing global thread id 205 /// inside OpenMP construct. 206 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 207 208 /// Get an LValue for the current ThreadID variable. 209 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 210 211 /// Get the name of the capture helper. 212 StringRef getHelperName() const override { return ".omp_outlined."; } 213 214 void emitUntiedSwitch(CodeGenFunction &CGF) override { 215 Action.emitUntiedSwitch(CGF); 216 } 217 218 static bool classof(const CGCapturedStmtInfo *Info) { 219 return CGOpenMPRegionInfo::classof(Info) && 220 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 221 TaskOutlinedRegion; 222 } 223 224 private: 225 /// A variable or parameter storing global thread id for OpenMP 226 /// constructs. 227 const VarDecl *ThreadIDVar; 228 /// Action for emitting code for untied tasks. 229 const UntiedTaskActionTy &Action; 230 }; 231 232 /// API for inlined captured statement code generation in OpenMP 233 /// constructs. 234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 235 public: 236 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 237 const RegionCodeGenTy &CodeGen, 238 OpenMPDirectiveKind Kind, bool HasCancel) 239 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 240 OldCSI(OldCSI), 241 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 242 243 // Retrieve the value of the context parameter. 244 llvm::Value *getContextValue() const override { 245 if (OuterRegionInfo) 246 return OuterRegionInfo->getContextValue(); 247 llvm_unreachable("No context value for inlined OpenMP region"); 248 } 249 250 void setContextValue(llvm::Value *V) override { 251 if (OuterRegionInfo) { 252 OuterRegionInfo->setContextValue(V); 253 return; 254 } 255 llvm_unreachable("No context value for inlined OpenMP region"); 256 } 257 258 /// Lookup the captured field decl for a variable. 259 const FieldDecl *lookup(const VarDecl *VD) const override { 260 if (OuterRegionInfo) 261 return OuterRegionInfo->lookup(VD); 262 // If there is no outer outlined region,no need to lookup in a list of 263 // captured variables, we can use the original one. 264 return nullptr; 265 } 266 267 FieldDecl *getThisFieldDecl() const override { 268 if (OuterRegionInfo) 269 return OuterRegionInfo->getThisFieldDecl(); 270 return nullptr; 271 } 272 273 /// Get a variable or parameter for storing global thread id 274 /// inside OpenMP construct. 275 const VarDecl *getThreadIDVariable() const override { 276 if (OuterRegionInfo) 277 return OuterRegionInfo->getThreadIDVariable(); 278 return nullptr; 279 } 280 281 /// Get an LValue for the current ThreadID variable. 282 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 285 llvm_unreachable("No LValue for inlined OpenMP construct"); 286 } 287 288 /// Get the name of the capture helper. 289 StringRef getHelperName() const override { 290 if (auto *OuterRegionInfo = getOldCSI()) 291 return OuterRegionInfo->getHelperName(); 292 llvm_unreachable("No helper name for inlined OpenMP construct"); 293 } 294 295 void emitUntiedSwitch(CodeGenFunction &CGF) override { 296 if (OuterRegionInfo) 297 OuterRegionInfo->emitUntiedSwitch(CGF); 298 } 299 300 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 301 302 static bool classof(const CGCapturedStmtInfo *Info) { 303 return CGOpenMPRegionInfo::classof(Info) && 304 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 305 } 306 307 ~CGOpenMPInlinedRegionInfo() override = default; 308 309 private: 310 /// CodeGen info about outer OpenMP region. 311 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 312 CGOpenMPRegionInfo *OuterRegionInfo; 313 }; 314 315 /// API for captured statement code generation in OpenMP target 316 /// constructs. For this captures, implicit parameters are used instead of the 317 /// captured fields. The name of the target region has to be unique in a given 318 /// application so it is provided by the client, because only the client has 319 /// the information to generate that. 320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 321 public: 322 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 323 const RegionCodeGenTy &CodeGen, StringRef HelperName) 324 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 325 /*HasCancel=*/false), 326 HelperName(HelperName) {} 327 328 /// This is unused for target regions because each starts executing 329 /// with a single thread. 330 const VarDecl *getThreadIDVariable() const override { return nullptr; } 331 332 /// Get the name of the capture helper. 333 StringRef getHelperName() const override { return HelperName; } 334 335 static bool classof(const CGCapturedStmtInfo *Info) { 336 return CGOpenMPRegionInfo::classof(Info) && 337 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 338 } 339 340 private: 341 StringRef HelperName; 342 }; 343 344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 345 llvm_unreachable("No codegen for expressions"); 346 } 347 /// API for generation of expressions captured in a innermost OpenMP 348 /// region. 349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 350 public: 351 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 352 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 353 OMPD_unknown, 354 /*HasCancel=*/false), 355 PrivScope(CGF) { 356 // Make sure the globals captured in the provided statement are local by 357 // using the privatization logic. We assume the same variable is not 358 // captured more than once. 359 for (const auto &C : CS.captures()) { 360 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 361 continue; 362 363 const VarDecl *VD = C.getCapturedVar(); 364 if (VD->isLocalVarDeclOrParm()) 365 continue; 366 367 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 368 /*RefersToEnclosingVariableOrCapture=*/false, 369 VD->getType().getNonReferenceType(), VK_LValue, 370 C.getLocation()); 371 PrivScope.addPrivate( 372 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 373 } 374 (void)PrivScope.Privatize(); 375 } 376 377 /// Lookup the captured field decl for a variable. 378 const FieldDecl *lookup(const VarDecl *VD) const override { 379 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 380 return FD; 381 return nullptr; 382 } 383 384 /// Emit the captured statement body. 385 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 386 llvm_unreachable("No body for expressions"); 387 } 388 389 /// Get a variable or parameter for storing global thread id 390 /// inside OpenMP construct. 391 const VarDecl *getThreadIDVariable() const override { 392 llvm_unreachable("No thread id for expressions"); 393 } 394 395 /// Get the name of the capture helper. 396 StringRef getHelperName() const override { 397 llvm_unreachable("No helper name for expressions"); 398 } 399 400 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 401 402 private: 403 /// Private scope to capture global variables. 404 CodeGenFunction::OMPPrivateScope PrivScope; 405 }; 406 407 /// RAII for emitting code of OpenMP constructs. 408 class InlinedOpenMPRegionRAII { 409 CodeGenFunction &CGF; 410 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 411 FieldDecl *LambdaThisCaptureField = nullptr; 412 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 413 414 public: 415 /// Constructs region for combined constructs. 416 /// \param CodeGen Code generation sequence for combined directives. Includes 417 /// a list of functions used for code generation of implicitly inlined 418 /// regions. 419 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 420 OpenMPDirectiveKind Kind, bool HasCancel) 421 : CGF(CGF) { 422 // Start emission for the construct. 423 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 424 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 425 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 426 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 427 CGF.LambdaThisCaptureField = nullptr; 428 BlockInfo = CGF.BlockInfo; 429 CGF.BlockInfo = nullptr; 430 } 431 432 ~InlinedOpenMPRegionRAII() { 433 // Restore original CapturedStmtInfo only if we're done with code emission. 434 auto *OldCSI = 435 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 436 delete CGF.CapturedStmtInfo; 437 CGF.CapturedStmtInfo = OldCSI; 438 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 439 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 440 CGF.BlockInfo = BlockInfo; 441 } 442 }; 443 444 /// Values for bit flags used in the ident_t to describe the fields. 445 /// All enumeric elements are named and described in accordance with the code 446 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 447 enum OpenMPLocationFlags : unsigned { 448 /// Use trampoline for internal microtask. 449 OMP_IDENT_IMD = 0x01, 450 /// Use c-style ident structure. 451 OMP_IDENT_KMPC = 0x02, 452 /// Atomic reduction option for kmpc_reduce. 453 OMP_ATOMIC_REDUCE = 0x10, 454 /// Explicit 'barrier' directive. 455 OMP_IDENT_BARRIER_EXPL = 0x20, 456 /// Implicit barrier in code. 457 OMP_IDENT_BARRIER_IMPL = 0x40, 458 /// Implicit barrier in 'for' directive. 459 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 460 /// Implicit barrier in 'sections' directive. 461 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 462 /// Implicit barrier in 'single' directive. 463 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 464 /// Call of __kmp_for_static_init for static loop. 465 OMP_IDENT_WORK_LOOP = 0x200, 466 /// Call of __kmp_for_static_init for sections. 467 OMP_IDENT_WORK_SECTIONS = 0x400, 468 /// Call of __kmp_for_static_init for distribute. 469 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 470 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 471 }; 472 473 namespace { 474 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 475 /// Values for bit flags for marking which requires clauses have been used. 476 enum OpenMPOffloadingRequiresDirFlags : int64_t { 477 /// flag undefined. 478 OMP_REQ_UNDEFINED = 0x000, 479 /// no requires clause present. 480 OMP_REQ_NONE = 0x001, 481 /// reverse_offload clause. 482 OMP_REQ_REVERSE_OFFLOAD = 0x002, 483 /// unified_address clause. 484 OMP_REQ_UNIFIED_ADDRESS = 0x004, 485 /// unified_shared_memory clause. 486 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 487 /// dynamic_allocators clause. 488 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 489 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 490 }; 491 492 enum OpenMPOffloadingReservedDeviceIDs { 493 /// Device ID if the device was not defined, runtime should get it 494 /// from environment variables in the spec. 495 OMP_DEVICEID_UNDEF = -1, 496 }; 497 } // anonymous namespace 498 499 /// Describes ident structure that describes a source location. 500 /// All descriptions are taken from 501 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 502 /// Original structure: 503 /// typedef struct ident { 504 /// kmp_int32 reserved_1; /**< might be used in Fortran; 505 /// see above */ 506 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 507 /// KMP_IDENT_KMPC identifies this union 508 /// member */ 509 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 510 /// see above */ 511 ///#if USE_ITT_BUILD 512 /// /* but currently used for storing 513 /// region-specific ITT */ 514 /// /* contextual information. */ 515 ///#endif /* USE_ITT_BUILD */ 516 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 517 /// C++ */ 518 /// char const *psource; /**< String describing the source location. 519 /// The string is composed of semi-colon separated 520 // fields which describe the source file, 521 /// the function and a pair of line numbers that 522 /// delimit the construct. 523 /// */ 524 /// } ident_t; 525 enum IdentFieldIndex { 526 /// might be used in Fortran 527 IdentField_Reserved_1, 528 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 529 IdentField_Flags, 530 /// Not really used in Fortran any more 531 IdentField_Reserved_2, 532 /// Source[4] in Fortran, do not use for C++ 533 IdentField_Reserved_3, 534 /// String describing the source location. The string is composed of 535 /// semi-colon separated fields which describe the source file, the function 536 /// and a pair of line numbers that delimit the construct. 537 IdentField_PSource 538 }; 539 540 /// Schedule types for 'omp for' loops (these enumerators are taken from 541 /// the enum sched_type in kmp.h). 542 enum OpenMPSchedType { 543 /// Lower bound for default (unordered) versions. 544 OMP_sch_lower = 32, 545 OMP_sch_static_chunked = 33, 546 OMP_sch_static = 34, 547 OMP_sch_dynamic_chunked = 35, 548 OMP_sch_guided_chunked = 36, 549 OMP_sch_runtime = 37, 550 OMP_sch_auto = 38, 551 /// static with chunk adjustment (e.g., simd) 552 OMP_sch_static_balanced_chunked = 45, 553 /// Lower bound for 'ordered' versions. 554 OMP_ord_lower = 64, 555 OMP_ord_static_chunked = 65, 556 OMP_ord_static = 66, 557 OMP_ord_dynamic_chunked = 67, 558 OMP_ord_guided_chunked = 68, 559 OMP_ord_runtime = 69, 560 OMP_ord_auto = 70, 561 OMP_sch_default = OMP_sch_static, 562 /// dist_schedule types 563 OMP_dist_sch_static_chunked = 91, 564 OMP_dist_sch_static = 92, 565 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 566 /// Set if the monotonic schedule modifier was present. 567 OMP_sch_modifier_monotonic = (1 << 29), 568 /// Set if the nonmonotonic schedule modifier was present. 569 OMP_sch_modifier_nonmonotonic = (1 << 30), 570 }; 571 572 enum OpenMPRTLFunction { 573 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 574 /// kmpc_micro microtask, ...); 575 OMPRTL__kmpc_fork_call, 576 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, 577 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 578 OMPRTL__kmpc_threadprivate_cached, 579 /// Call to void __kmpc_threadprivate_register( ident_t *, 580 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 581 OMPRTL__kmpc_threadprivate_register, 582 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 583 OMPRTL__kmpc_global_thread_num, 584 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 585 // kmp_critical_name *crit); 586 OMPRTL__kmpc_critical, 587 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 588 // global_tid, kmp_critical_name *crit, uintptr_t hint); 589 OMPRTL__kmpc_critical_with_hint, 590 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 591 // kmp_critical_name *crit); 592 OMPRTL__kmpc_end_critical, 593 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 594 // global_tid); 595 OMPRTL__kmpc_cancel_barrier, 596 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 597 OMPRTL__kmpc_barrier, 598 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 599 OMPRTL__kmpc_for_static_fini, 600 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 601 // global_tid); 602 OMPRTL__kmpc_serialized_parallel, 603 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 604 // global_tid); 605 OMPRTL__kmpc_end_serialized_parallel, 606 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 607 // kmp_int32 num_threads); 608 OMPRTL__kmpc_push_num_threads, 609 // Call to void __kmpc_flush(ident_t *loc); 610 OMPRTL__kmpc_flush, 611 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 612 OMPRTL__kmpc_master, 613 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 614 OMPRTL__kmpc_end_master, 615 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 616 // int end_part); 617 OMPRTL__kmpc_omp_taskyield, 618 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 619 OMPRTL__kmpc_single, 620 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 621 OMPRTL__kmpc_end_single, 622 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 623 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 624 // kmp_routine_entry_t *task_entry); 625 OMPRTL__kmpc_omp_task_alloc, 626 // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *, 627 // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, 628 // size_t sizeof_shareds, kmp_routine_entry_t *task_entry, 629 // kmp_int64 device_id); 630 OMPRTL__kmpc_omp_target_task_alloc, 631 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 632 // new_task); 633 OMPRTL__kmpc_omp_task, 634 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 635 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 636 // kmp_int32 didit); 637 OMPRTL__kmpc_copyprivate, 638 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 639 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 640 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 641 OMPRTL__kmpc_reduce, 642 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 643 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 644 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 645 // *lck); 646 OMPRTL__kmpc_reduce_nowait, 647 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 648 // kmp_critical_name *lck); 649 OMPRTL__kmpc_end_reduce, 650 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 651 // kmp_critical_name *lck); 652 OMPRTL__kmpc_end_reduce_nowait, 653 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 654 // kmp_task_t * new_task); 655 OMPRTL__kmpc_omp_task_begin_if0, 656 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 657 // kmp_task_t * new_task); 658 OMPRTL__kmpc_omp_task_complete_if0, 659 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 660 OMPRTL__kmpc_ordered, 661 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 662 OMPRTL__kmpc_end_ordered, 663 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 664 // global_tid); 665 OMPRTL__kmpc_omp_taskwait, 666 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 667 OMPRTL__kmpc_taskgroup, 668 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 669 OMPRTL__kmpc_end_taskgroup, 670 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 671 // int proc_bind); 672 OMPRTL__kmpc_push_proc_bind, 673 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 674 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 675 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 676 OMPRTL__kmpc_omp_task_with_deps, 677 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 678 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 679 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 680 OMPRTL__kmpc_omp_wait_deps, 681 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 682 // global_tid, kmp_int32 cncl_kind); 683 OMPRTL__kmpc_cancellationpoint, 684 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 685 // kmp_int32 cncl_kind); 686 OMPRTL__kmpc_cancel, 687 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 688 // kmp_int32 num_teams, kmp_int32 thread_limit); 689 OMPRTL__kmpc_push_num_teams, 690 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 691 // microtask, ...); 692 OMPRTL__kmpc_fork_teams, 693 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 694 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 695 // sched, kmp_uint64 grainsize, void *task_dup); 696 OMPRTL__kmpc_taskloop, 697 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 698 // num_dims, struct kmp_dim *dims); 699 OMPRTL__kmpc_doacross_init, 700 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 701 OMPRTL__kmpc_doacross_fini, 702 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 703 // *vec); 704 OMPRTL__kmpc_doacross_post, 705 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 706 // *vec); 707 OMPRTL__kmpc_doacross_wait, 708 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 709 // *data); 710 OMPRTL__kmpc_task_reduction_init, 711 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 712 // *d); 713 OMPRTL__kmpc_task_reduction_get_th_data, 714 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); 715 OMPRTL__kmpc_alloc, 716 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); 717 OMPRTL__kmpc_free, 718 719 // 720 // Offloading related calls 721 // 722 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 723 // size); 724 OMPRTL__kmpc_push_target_tripcount, 725 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 726 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 727 // *arg_types); 728 OMPRTL__tgt_target, 729 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 730 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 731 // *arg_types); 732 OMPRTL__tgt_target_nowait, 733 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 734 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 735 // *arg_types, int32_t num_teams, int32_t thread_limit); 736 OMPRTL__tgt_target_teams, 737 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 738 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 739 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 740 OMPRTL__tgt_target_teams_nowait, 741 // Call to void __tgt_register_requires(int64_t flags); 742 OMPRTL__tgt_register_requires, 743 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 744 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 745 OMPRTL__tgt_target_data_begin, 746 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 747 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 748 // *arg_types); 749 OMPRTL__tgt_target_data_begin_nowait, 750 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 751 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 752 OMPRTL__tgt_target_data_end, 753 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 754 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 755 // *arg_types); 756 OMPRTL__tgt_target_data_end_nowait, 757 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 758 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 759 OMPRTL__tgt_target_data_update, 760 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 761 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 762 // *arg_types); 763 OMPRTL__tgt_target_data_update_nowait, 764 // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 765 OMPRTL__tgt_mapper_num_components, 766 // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void 767 // *base, void *begin, int64_t size, int64_t type); 768 OMPRTL__tgt_push_mapper_component, 769 // Call to kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 770 // int gtid, kmp_task_t *task); 771 OMPRTL__kmpc_task_allow_completion_event, 772 }; 773 774 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 775 /// region. 776 class CleanupTy final : public EHScopeStack::Cleanup { 777 PrePostActionTy *Action; 778 779 public: 780 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 781 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 782 if (!CGF.HaveInsertPoint()) 783 return; 784 Action->Exit(CGF); 785 } 786 }; 787 788 } // anonymous namespace 789 790 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 791 CodeGenFunction::RunCleanupsScope Scope(CGF); 792 if (PrePostAction) { 793 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 794 Callback(CodeGen, CGF, *PrePostAction); 795 } else { 796 PrePostActionTy Action; 797 Callback(CodeGen, CGF, Action); 798 } 799 } 800 801 /// Check if the combiner is a call to UDR combiner and if it is so return the 802 /// UDR decl used for reduction. 803 static const OMPDeclareReductionDecl * 804 getReductionInit(const Expr *ReductionOp) { 805 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 806 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 807 if (const auto *DRE = 808 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 809 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 810 return DRD; 811 return nullptr; 812 } 813 814 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 815 const OMPDeclareReductionDecl *DRD, 816 const Expr *InitOp, 817 Address Private, Address Original, 818 QualType Ty) { 819 if (DRD->getInitializer()) { 820 std::pair<llvm::Function *, llvm::Function *> Reduction = 821 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 822 const auto *CE = cast<CallExpr>(InitOp); 823 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 824 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 825 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 826 const auto *LHSDRE = 827 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 828 const auto *RHSDRE = 829 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 830 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 831 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 832 [=]() { return Private; }); 833 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 834 [=]() { return Original; }); 835 (void)PrivateScope.Privatize(); 836 RValue Func = RValue::get(Reduction.second); 837 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 838 CGF.EmitIgnoredExpr(InitOp); 839 } else { 840 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 841 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 842 auto *GV = new llvm::GlobalVariable( 843 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 844 llvm::GlobalValue::PrivateLinkage, Init, Name); 845 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 846 RValue InitRVal; 847 switch (CGF.getEvaluationKind(Ty)) { 848 case TEK_Scalar: 849 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 850 break; 851 case TEK_Complex: 852 InitRVal = 853 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 854 break; 855 case TEK_Aggregate: 856 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 857 break; 858 } 859 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 860 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 861 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 862 /*IsInitializer=*/false); 863 } 864 } 865 866 /// Emit initialization of arrays of complex types. 867 /// \param DestAddr Address of the array. 868 /// \param Type Type of array. 869 /// \param Init Initial expression of array. 870 /// \param SrcAddr Address of the original array. 871 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 872 QualType Type, bool EmitDeclareReductionInit, 873 const Expr *Init, 874 const OMPDeclareReductionDecl *DRD, 875 Address SrcAddr = Address::invalid()) { 876 // Perform element-by-element initialization. 877 QualType ElementTy; 878 879 // Drill down to the base element type on both arrays. 880 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 881 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 882 DestAddr = 883 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 884 if (DRD) 885 SrcAddr = 886 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 887 888 llvm::Value *SrcBegin = nullptr; 889 if (DRD) 890 SrcBegin = SrcAddr.getPointer(); 891 llvm::Value *DestBegin = DestAddr.getPointer(); 892 // Cast from pointer to array type to pointer to single element. 893 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 894 // The basic structure here is a while-do loop. 895 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 896 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 897 llvm::Value *IsEmpty = 898 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 899 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 900 901 // Enter the loop body, making that address the current address. 902 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 903 CGF.EmitBlock(BodyBB); 904 905 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 906 907 llvm::PHINode *SrcElementPHI = nullptr; 908 Address SrcElementCurrent = Address::invalid(); 909 if (DRD) { 910 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 911 "omp.arraycpy.srcElementPast"); 912 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 913 SrcElementCurrent = 914 Address(SrcElementPHI, 915 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 916 } 917 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 918 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 919 DestElementPHI->addIncoming(DestBegin, EntryBB); 920 Address DestElementCurrent = 921 Address(DestElementPHI, 922 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 923 924 // Emit copy. 925 { 926 CodeGenFunction::RunCleanupsScope InitScope(CGF); 927 if (EmitDeclareReductionInit) { 928 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 929 SrcElementCurrent, ElementTy); 930 } else 931 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 932 /*IsInitializer=*/false); 933 } 934 935 if (DRD) { 936 // Shift the address forward by one element. 937 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 938 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 939 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 940 } 941 942 // Shift the address forward by one element. 943 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 944 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 945 // Check whether we've reached the end. 946 llvm::Value *Done = 947 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 948 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 949 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 950 951 // Done. 952 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 953 } 954 955 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 956 return CGF.EmitOMPSharedLValue(E); 957 } 958 959 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 960 const Expr *E) { 961 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 962 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 963 return LValue(); 964 } 965 966 void ReductionCodeGen::emitAggregateInitialization( 967 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 968 const OMPDeclareReductionDecl *DRD) { 969 // Emit VarDecl with copy init for arrays. 970 // Get the address of the original variable captured in current 971 // captured region. 972 const auto *PrivateVD = 973 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 974 bool EmitDeclareReductionInit = 975 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 976 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 977 EmitDeclareReductionInit, 978 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 979 : PrivateVD->getInit(), 980 DRD, SharedLVal.getAddress(CGF)); 981 } 982 983 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 984 ArrayRef<const Expr *> Privates, 985 ArrayRef<const Expr *> ReductionOps) { 986 ClausesData.reserve(Shareds.size()); 987 SharedAddresses.reserve(Shareds.size()); 988 Sizes.reserve(Shareds.size()); 989 BaseDecls.reserve(Shareds.size()); 990 auto IPriv = Privates.begin(); 991 auto IRed = ReductionOps.begin(); 992 for (const Expr *Ref : Shareds) { 993 ClausesData.emplace_back(Ref, *IPriv, *IRed); 994 std::advance(IPriv, 1); 995 std::advance(IRed, 1); 996 } 997 } 998 999 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 1000 assert(SharedAddresses.size() == N && 1001 "Number of generated lvalues must be exactly N."); 1002 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 1003 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 1004 SharedAddresses.emplace_back(First, Second); 1005 } 1006 1007 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 1008 const auto *PrivateVD = 1009 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1010 QualType PrivateType = PrivateVD->getType(); 1011 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 1012 if (!PrivateType->isVariablyModifiedType()) { 1013 Sizes.emplace_back( 1014 CGF.getTypeSize( 1015 SharedAddresses[N].first.getType().getNonReferenceType()), 1016 nullptr); 1017 return; 1018 } 1019 llvm::Value *Size; 1020 llvm::Value *SizeInChars; 1021 auto *ElemType = cast<llvm::PointerType>( 1022 SharedAddresses[N].first.getPointer(CGF)->getType()) 1023 ->getElementType(); 1024 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 1025 if (AsArraySection) { 1026 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF), 1027 SharedAddresses[N].first.getPointer(CGF)); 1028 Size = CGF.Builder.CreateNUWAdd( 1029 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 1030 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 1031 } else { 1032 SizeInChars = CGF.getTypeSize( 1033 SharedAddresses[N].first.getType().getNonReferenceType()); 1034 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 1035 } 1036 Sizes.emplace_back(SizeInChars, Size); 1037 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1038 CGF, 1039 cast<OpaqueValueExpr>( 1040 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1041 RValue::get(Size)); 1042 CGF.EmitVariablyModifiedType(PrivateType); 1043 } 1044 1045 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 1046 llvm::Value *Size) { 1047 const auto *PrivateVD = 1048 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1049 QualType PrivateType = PrivateVD->getType(); 1050 if (!PrivateType->isVariablyModifiedType()) { 1051 assert(!Size && !Sizes[N].second && 1052 "Size should be nullptr for non-variably modified reduction " 1053 "items."); 1054 return; 1055 } 1056 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1057 CGF, 1058 cast<OpaqueValueExpr>( 1059 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1060 RValue::get(Size)); 1061 CGF.EmitVariablyModifiedType(PrivateType); 1062 } 1063 1064 void ReductionCodeGen::emitInitialization( 1065 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1066 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1067 assert(SharedAddresses.size() > N && "No variable was generated"); 1068 const auto *PrivateVD = 1069 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1070 const OMPDeclareReductionDecl *DRD = 1071 getReductionInit(ClausesData[N].ReductionOp); 1072 QualType PrivateType = PrivateVD->getType(); 1073 PrivateAddr = CGF.Builder.CreateElementBitCast( 1074 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1075 QualType SharedType = SharedAddresses[N].first.getType(); 1076 SharedLVal = CGF.MakeAddrLValue( 1077 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 1078 CGF.ConvertTypeForMem(SharedType)), 1079 SharedType, SharedAddresses[N].first.getBaseInfo(), 1080 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1081 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1082 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1083 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1084 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1085 PrivateAddr, SharedLVal.getAddress(CGF), 1086 SharedLVal.getType()); 1087 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1088 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1089 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1090 PrivateVD->getType().getQualifiers(), 1091 /*IsInitializer=*/false); 1092 } 1093 } 1094 1095 bool ReductionCodeGen::needCleanups(unsigned N) { 1096 const auto *PrivateVD = 1097 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1098 QualType PrivateType = PrivateVD->getType(); 1099 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1100 return DTorKind != QualType::DK_none; 1101 } 1102 1103 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1104 Address PrivateAddr) { 1105 const auto *PrivateVD = 1106 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1107 QualType PrivateType = PrivateVD->getType(); 1108 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1109 if (needCleanups(N)) { 1110 PrivateAddr = CGF.Builder.CreateElementBitCast( 1111 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1112 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1113 } 1114 } 1115 1116 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1117 LValue BaseLV) { 1118 BaseTy = BaseTy.getNonReferenceType(); 1119 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1120 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1121 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1122 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 1123 } else { 1124 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 1125 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1126 } 1127 BaseTy = BaseTy->getPointeeType(); 1128 } 1129 return CGF.MakeAddrLValue( 1130 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 1131 CGF.ConvertTypeForMem(ElTy)), 1132 BaseLV.getType(), BaseLV.getBaseInfo(), 1133 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1134 } 1135 1136 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1137 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1138 llvm::Value *Addr) { 1139 Address Tmp = Address::invalid(); 1140 Address TopTmp = Address::invalid(); 1141 Address MostTopTmp = Address::invalid(); 1142 BaseTy = BaseTy.getNonReferenceType(); 1143 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1144 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1145 Tmp = CGF.CreateMemTemp(BaseTy); 1146 if (TopTmp.isValid()) 1147 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1148 else 1149 MostTopTmp = Tmp; 1150 TopTmp = Tmp; 1151 BaseTy = BaseTy->getPointeeType(); 1152 } 1153 llvm::Type *Ty = BaseLVType; 1154 if (Tmp.isValid()) 1155 Ty = Tmp.getElementType(); 1156 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1157 if (Tmp.isValid()) { 1158 CGF.Builder.CreateStore(Addr, Tmp); 1159 return MostTopTmp; 1160 } 1161 return Address(Addr, BaseLVAlignment); 1162 } 1163 1164 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1165 const VarDecl *OrigVD = nullptr; 1166 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1167 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1168 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1169 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1170 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1171 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1172 DE = cast<DeclRefExpr>(Base); 1173 OrigVD = cast<VarDecl>(DE->getDecl()); 1174 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1175 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1176 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1177 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1178 DE = cast<DeclRefExpr>(Base); 1179 OrigVD = cast<VarDecl>(DE->getDecl()); 1180 } 1181 return OrigVD; 1182 } 1183 1184 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1185 Address PrivateAddr) { 1186 const DeclRefExpr *DE; 1187 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1188 BaseDecls.emplace_back(OrigVD); 1189 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1190 LValue BaseLValue = 1191 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1192 OriginalBaseLValue); 1193 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1194 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1195 llvm::Value *PrivatePointer = 1196 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1197 PrivateAddr.getPointer(), 1198 SharedAddresses[N].first.getAddress(CGF).getType()); 1199 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1200 return castToBase(CGF, OrigVD->getType(), 1201 SharedAddresses[N].first.getType(), 1202 OriginalBaseLValue.getAddress(CGF).getType(), 1203 OriginalBaseLValue.getAlignment(), Ptr); 1204 } 1205 BaseDecls.emplace_back( 1206 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1207 return PrivateAddr; 1208 } 1209 1210 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1211 const OMPDeclareReductionDecl *DRD = 1212 getReductionInit(ClausesData[N].ReductionOp); 1213 return DRD && DRD->getInitializer(); 1214 } 1215 1216 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1217 return CGF.EmitLoadOfPointerLValue( 1218 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1219 getThreadIDVariable()->getType()->castAs<PointerType>()); 1220 } 1221 1222 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1223 if (!CGF.HaveInsertPoint()) 1224 return; 1225 // 1.2.2 OpenMP Language Terminology 1226 // Structured block - An executable statement with a single entry at the 1227 // top and a single exit at the bottom. 1228 // The point of exit cannot be a branch out of the structured block. 1229 // longjmp() and throw() must not violate the entry/exit criteria. 1230 CGF.EHStack.pushTerminate(); 1231 CodeGen(CGF); 1232 CGF.EHStack.popTerminate(); 1233 } 1234 1235 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1236 CodeGenFunction &CGF) { 1237 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1238 getThreadIDVariable()->getType(), 1239 AlignmentSource::Decl); 1240 } 1241 1242 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1243 QualType FieldTy) { 1244 auto *Field = FieldDecl::Create( 1245 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1246 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1247 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1248 Field->setAccess(AS_public); 1249 DC->addDecl(Field); 1250 return Field; 1251 } 1252 1253 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1254 StringRef Separator) 1255 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1256 OffloadEntriesInfoManager(CGM) { 1257 ASTContext &C = CGM.getContext(); 1258 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1259 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1260 RD->startDefinition(); 1261 // reserved_1 1262 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1263 // flags 1264 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1265 // reserved_2 1266 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1267 // reserved_3 1268 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1269 // psource 1270 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1271 RD->completeDefinition(); 1272 IdentQTy = C.getRecordType(RD); 1273 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1274 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1275 1276 loadOffloadInfoMetadata(); 1277 } 1278 1279 void CGOpenMPRuntime::clear() { 1280 InternalVars.clear(); 1281 // Clean non-target variable declarations possibly used only in debug info. 1282 for (const auto &Data : EmittedNonTargetVariables) { 1283 if (!Data.getValue().pointsToAliveValue()) 1284 continue; 1285 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1286 if (!GV) 1287 continue; 1288 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1289 continue; 1290 GV->eraseFromParent(); 1291 } 1292 } 1293 1294 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1295 SmallString<128> Buffer; 1296 llvm::raw_svector_ostream OS(Buffer); 1297 StringRef Sep = FirstSeparator; 1298 for (StringRef Part : Parts) { 1299 OS << Sep << Part; 1300 Sep = Separator; 1301 } 1302 return std::string(OS.str()); 1303 } 1304 1305 static llvm::Function * 1306 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1307 const Expr *CombinerInitializer, const VarDecl *In, 1308 const VarDecl *Out, bool IsCombiner) { 1309 // void .omp_combiner.(Ty *in, Ty *out); 1310 ASTContext &C = CGM.getContext(); 1311 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1312 FunctionArgList Args; 1313 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1314 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1315 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1316 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1317 Args.push_back(&OmpOutParm); 1318 Args.push_back(&OmpInParm); 1319 const CGFunctionInfo &FnInfo = 1320 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1321 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1322 std::string Name = CGM.getOpenMPRuntime().getName( 1323 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1324 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1325 Name, &CGM.getModule()); 1326 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1327 if (CGM.getLangOpts().Optimize) { 1328 Fn->removeFnAttr(llvm::Attribute::NoInline); 1329 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1330 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1331 } 1332 CodeGenFunction CGF(CGM); 1333 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1334 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1335 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1336 Out->getLocation()); 1337 CodeGenFunction::OMPPrivateScope Scope(CGF); 1338 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1339 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1340 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1341 .getAddress(CGF); 1342 }); 1343 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1344 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1345 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1346 .getAddress(CGF); 1347 }); 1348 (void)Scope.Privatize(); 1349 if (!IsCombiner && Out->hasInit() && 1350 !CGF.isTrivialInitializer(Out->getInit())) { 1351 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1352 Out->getType().getQualifiers(), 1353 /*IsInitializer=*/true); 1354 } 1355 if (CombinerInitializer) 1356 CGF.EmitIgnoredExpr(CombinerInitializer); 1357 Scope.ForceCleanup(); 1358 CGF.FinishFunction(); 1359 return Fn; 1360 } 1361 1362 void CGOpenMPRuntime::emitUserDefinedReduction( 1363 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1364 if (UDRMap.count(D) > 0) 1365 return; 1366 llvm::Function *Combiner = emitCombinerOrInitializer( 1367 CGM, D->getType(), D->getCombiner(), 1368 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1369 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1370 /*IsCombiner=*/true); 1371 llvm::Function *Initializer = nullptr; 1372 if (const Expr *Init = D->getInitializer()) { 1373 Initializer = emitCombinerOrInitializer( 1374 CGM, D->getType(), 1375 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1376 : nullptr, 1377 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1378 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1379 /*IsCombiner=*/false); 1380 } 1381 UDRMap.try_emplace(D, Combiner, Initializer); 1382 if (CGF) { 1383 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1384 Decls.second.push_back(D); 1385 } 1386 } 1387 1388 std::pair<llvm::Function *, llvm::Function *> 1389 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1390 auto I = UDRMap.find(D); 1391 if (I != UDRMap.end()) 1392 return I->second; 1393 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1394 return UDRMap.lookup(D); 1395 } 1396 1397 namespace { 1398 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1399 // Builder if one is present. 1400 struct PushAndPopStackRAII { 1401 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1402 bool HasCancel) 1403 : OMPBuilder(OMPBuilder) { 1404 if (!OMPBuilder) 1405 return; 1406 1407 // The following callback is the crucial part of clangs cleanup process. 1408 // 1409 // NOTE: 1410 // Once the OpenMPIRBuilder is used to create parallel regions (and 1411 // similar), the cancellation destination (Dest below) is determined via 1412 // IP. That means if we have variables to finalize we split the block at IP, 1413 // use the new block (=BB) as destination to build a JumpDest (via 1414 // getJumpDestInCurrentScope(BB)) which then is fed to 1415 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1416 // to push & pop an FinalizationInfo object. 1417 // The FiniCB will still be needed but at the point where the 1418 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1419 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1420 assert(IP.getBlock()->end() == IP.getPoint() && 1421 "Clang CG should cause non-terminated block!"); 1422 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1423 CGF.Builder.restoreIP(IP); 1424 CodeGenFunction::JumpDest Dest = 1425 CGF.getOMPCancelDestination(OMPD_parallel); 1426 CGF.EmitBranchThroughCleanup(Dest); 1427 }; 1428 1429 // TODO: Remove this once we emit parallel regions through the 1430 // OpenMPIRBuilder as it can do this setup internally. 1431 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1432 {FiniCB, OMPD_parallel, HasCancel}); 1433 OMPBuilder->pushFinalizationCB(std::move(FI)); 1434 } 1435 ~PushAndPopStackRAII() { 1436 if (OMPBuilder) 1437 OMPBuilder->popFinalizationCB(); 1438 } 1439 llvm::OpenMPIRBuilder *OMPBuilder; 1440 }; 1441 } // namespace 1442 1443 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1444 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1445 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1446 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1447 assert(ThreadIDVar->getType()->isPointerType() && 1448 "thread id variable must be of type kmp_int32 *"); 1449 CodeGenFunction CGF(CGM, true); 1450 bool HasCancel = false; 1451 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1452 HasCancel = OPD->hasCancel(); 1453 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1454 HasCancel = OPSD->hasCancel(); 1455 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1456 HasCancel = OPFD->hasCancel(); 1457 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1458 HasCancel = OPFD->hasCancel(); 1459 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1460 HasCancel = OPFD->hasCancel(); 1461 else if (const auto *OPFD = 1462 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1463 HasCancel = OPFD->hasCancel(); 1464 else if (const auto *OPFD = 1465 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1466 HasCancel = OPFD->hasCancel(); 1467 1468 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1469 // parallel region to make cancellation barriers work properly. 1470 llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder(); 1471 PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel); 1472 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1473 HasCancel, OutlinedHelperName); 1474 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1475 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1476 } 1477 1478 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1479 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1480 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1481 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1482 return emitParallelOrTeamsOutlinedFunction( 1483 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1484 } 1485 1486 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1487 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1488 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1489 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1490 return emitParallelOrTeamsOutlinedFunction( 1491 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1492 } 1493 1494 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1495 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1496 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1497 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1498 bool Tied, unsigned &NumberOfParts) { 1499 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1500 PrePostActionTy &) { 1501 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1502 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1503 llvm::Value *TaskArgs[] = { 1504 UpLoc, ThreadID, 1505 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1506 TaskTVar->getType()->castAs<PointerType>()) 1507 .getPointer(CGF)}; 1508 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1509 }; 1510 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1511 UntiedCodeGen); 1512 CodeGen.setAction(Action); 1513 assert(!ThreadIDVar->getType()->isPointerType() && 1514 "thread id variable must be of type kmp_int32 for tasks"); 1515 const OpenMPDirectiveKind Region = 1516 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1517 : OMPD_task; 1518 const CapturedStmt *CS = D.getCapturedStmt(Region); 1519 bool HasCancel = false; 1520 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1521 HasCancel = TD->hasCancel(); 1522 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1523 HasCancel = TD->hasCancel(); 1524 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1525 HasCancel = TD->hasCancel(); 1526 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1527 HasCancel = TD->hasCancel(); 1528 1529 CodeGenFunction CGF(CGM, true); 1530 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1531 InnermostKind, HasCancel, Action); 1532 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1533 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1534 if (!Tied) 1535 NumberOfParts = Action.getNumberOfParts(); 1536 return Res; 1537 } 1538 1539 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1540 const RecordDecl *RD, const CGRecordLayout &RL, 1541 ArrayRef<llvm::Constant *> Data) { 1542 llvm::StructType *StructTy = RL.getLLVMType(); 1543 unsigned PrevIdx = 0; 1544 ConstantInitBuilder CIBuilder(CGM); 1545 auto DI = Data.begin(); 1546 for (const FieldDecl *FD : RD->fields()) { 1547 unsigned Idx = RL.getLLVMFieldNo(FD); 1548 // Fill the alignment. 1549 for (unsigned I = PrevIdx; I < Idx; ++I) 1550 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1551 PrevIdx = Idx + 1; 1552 Fields.add(*DI); 1553 ++DI; 1554 } 1555 } 1556 1557 template <class... As> 1558 static llvm::GlobalVariable * 1559 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1560 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1561 As &&... Args) { 1562 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1563 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1564 ConstantInitBuilder CIBuilder(CGM); 1565 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1566 buildStructValue(Fields, CGM, RD, RL, Data); 1567 return Fields.finishAndCreateGlobal( 1568 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1569 std::forward<As>(Args)...); 1570 } 1571 1572 template <typename T> 1573 static void 1574 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1575 ArrayRef<llvm::Constant *> Data, 1576 T &Parent) { 1577 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1578 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1579 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1580 buildStructValue(Fields, CGM, RD, RL, Data); 1581 Fields.finishAndAddTo(Parent); 1582 } 1583 1584 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1585 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1586 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1587 FlagsTy FlagsKey(Flags, Reserved2Flags); 1588 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1589 if (!Entry) { 1590 if (!DefaultOpenMPPSource) { 1591 // Initialize default location for psource field of ident_t structure of 1592 // all ident_t objects. Format is ";file;function;line;column;;". 1593 // Taken from 1594 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1595 DefaultOpenMPPSource = 1596 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1597 DefaultOpenMPPSource = 1598 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1599 } 1600 1601 llvm::Constant *Data[] = { 1602 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1603 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1604 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1605 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1606 llvm::GlobalValue *DefaultOpenMPLocation = 1607 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1608 llvm::GlobalValue::PrivateLinkage); 1609 DefaultOpenMPLocation->setUnnamedAddr( 1610 llvm::GlobalValue::UnnamedAddr::Global); 1611 1612 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1613 } 1614 return Address(Entry, Align); 1615 } 1616 1617 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1618 bool AtCurrentPoint) { 1619 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1620 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1621 1622 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1623 if (AtCurrentPoint) { 1624 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1625 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1626 } else { 1627 Elem.second.ServiceInsertPt = 1628 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1629 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1630 } 1631 } 1632 1633 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1634 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1635 if (Elem.second.ServiceInsertPt) { 1636 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1637 Elem.second.ServiceInsertPt = nullptr; 1638 Ptr->eraseFromParent(); 1639 } 1640 } 1641 1642 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1643 SourceLocation Loc, 1644 unsigned Flags) { 1645 Flags |= OMP_IDENT_KMPC; 1646 // If no debug info is generated - return global default location. 1647 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1648 Loc.isInvalid()) 1649 return getOrCreateDefaultLocation(Flags).getPointer(); 1650 1651 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1652 1653 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1654 Address LocValue = Address::invalid(); 1655 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1656 if (I != OpenMPLocThreadIDMap.end()) 1657 LocValue = Address(I->second.DebugLoc, Align); 1658 1659 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1660 // GetOpenMPThreadID was called before this routine. 1661 if (!LocValue.isValid()) { 1662 // Generate "ident_t .kmpc_loc.addr;" 1663 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1664 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1665 Elem.second.DebugLoc = AI.getPointer(); 1666 LocValue = AI; 1667 1668 if (!Elem.second.ServiceInsertPt) 1669 setLocThreadIdInsertPt(CGF); 1670 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1671 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1672 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1673 CGF.getTypeSize(IdentQTy)); 1674 } 1675 1676 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1677 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1678 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1679 LValue PSource = 1680 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1681 1682 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1683 if (OMPDebugLoc == nullptr) { 1684 SmallString<128> Buffer2; 1685 llvm::raw_svector_ostream OS2(Buffer2); 1686 // Build debug location 1687 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1688 OS2 << ";" << PLoc.getFilename() << ";"; 1689 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1690 OS2 << FD->getQualifiedNameAsString(); 1691 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1692 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1693 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1694 } 1695 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1696 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1697 1698 // Our callers always pass this to a runtime function, so for 1699 // convenience, go ahead and return a naked pointer. 1700 return LocValue.getPointer(); 1701 } 1702 1703 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1704 SourceLocation Loc) { 1705 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1706 1707 llvm::Value *ThreadID = nullptr; 1708 // Check whether we've already cached a load of the thread id in this 1709 // function. 1710 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1711 if (I != OpenMPLocThreadIDMap.end()) { 1712 ThreadID = I->second.ThreadID; 1713 if (ThreadID != nullptr) 1714 return ThreadID; 1715 } 1716 // If exceptions are enabled, do not use parameter to avoid possible crash. 1717 if (auto *OMPRegionInfo = 1718 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1719 if (OMPRegionInfo->getThreadIDVariable()) { 1720 // Check if this an outlined function with thread id passed as argument. 1721 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1722 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1723 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1724 !CGF.getLangOpts().CXXExceptions || 1725 CGF.Builder.GetInsertBlock() == TopBlock || 1726 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1727 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1728 TopBlock || 1729 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1730 CGF.Builder.GetInsertBlock()) { 1731 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1732 // If value loaded in entry block, cache it and use it everywhere in 1733 // function. 1734 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1735 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1736 Elem.second.ThreadID = ThreadID; 1737 } 1738 return ThreadID; 1739 } 1740 } 1741 } 1742 1743 // This is not an outlined function region - need to call __kmpc_int32 1744 // kmpc_global_thread_num(ident_t *loc). 1745 // Generate thread id value and cache this value for use across the 1746 // function. 1747 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1748 if (!Elem.second.ServiceInsertPt) 1749 setLocThreadIdInsertPt(CGF); 1750 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1751 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1752 llvm::CallInst *Call = CGF.Builder.CreateCall( 1753 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1754 emitUpdateLocation(CGF, Loc)); 1755 Call->setCallingConv(CGF.getRuntimeCC()); 1756 Elem.second.ThreadID = Call; 1757 return Call; 1758 } 1759 1760 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1761 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1762 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1763 clearLocThreadIdInsertPt(CGF); 1764 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1765 } 1766 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1767 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1768 UDRMap.erase(D); 1769 FunctionUDRMap.erase(CGF.CurFn); 1770 } 1771 auto I = FunctionUDMMap.find(CGF.CurFn); 1772 if (I != FunctionUDMMap.end()) { 1773 for(const auto *D : I->second) 1774 UDMMap.erase(D); 1775 FunctionUDMMap.erase(I); 1776 } 1777 LastprivateConditionalToTypes.erase(CGF.CurFn); 1778 } 1779 1780 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1781 return IdentTy->getPointerTo(); 1782 } 1783 1784 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1785 if (!Kmpc_MicroTy) { 1786 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1787 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1788 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1789 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1790 } 1791 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1792 } 1793 1794 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1795 llvm::FunctionCallee RTLFn = nullptr; 1796 switch (static_cast<OpenMPRTLFunction>(Function)) { 1797 case OMPRTL__kmpc_fork_call: { 1798 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1799 // microtask, ...); 1800 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1801 getKmpc_MicroPointerTy()}; 1802 auto *FnTy = 1803 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1804 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1805 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 1806 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 1807 llvm::LLVMContext &Ctx = F->getContext(); 1808 llvm::MDBuilder MDB(Ctx); 1809 // Annotate the callback behavior of the __kmpc_fork_call: 1810 // - The callback callee is argument number 2 (microtask). 1811 // - The first two arguments of the callback callee are unknown (-1). 1812 // - All variadic arguments to the __kmpc_fork_call are passed to the 1813 // callback callee. 1814 F->addMetadata( 1815 llvm::LLVMContext::MD_callback, 1816 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 1817 2, {-1, -1}, 1818 /* VarArgsArePassed */ true)})); 1819 } 1820 } 1821 break; 1822 } 1823 case OMPRTL__kmpc_global_thread_num: { 1824 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1825 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1826 auto *FnTy = 1827 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1828 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1829 break; 1830 } 1831 case OMPRTL__kmpc_threadprivate_cached: { 1832 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1833 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1834 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1835 CGM.VoidPtrTy, CGM.SizeTy, 1836 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1837 auto *FnTy = 1838 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1839 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1840 break; 1841 } 1842 case OMPRTL__kmpc_critical: { 1843 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1844 // kmp_critical_name *crit); 1845 llvm::Type *TypeParams[] = { 1846 getIdentTyPointerTy(), CGM.Int32Ty, 1847 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1848 auto *FnTy = 1849 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1850 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1851 break; 1852 } 1853 case OMPRTL__kmpc_critical_with_hint: { 1854 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1855 // kmp_critical_name *crit, uintptr_t hint); 1856 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1857 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1858 CGM.IntPtrTy}; 1859 auto *FnTy = 1860 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1861 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1862 break; 1863 } 1864 case OMPRTL__kmpc_threadprivate_register: { 1865 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1866 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1867 // typedef void *(*kmpc_ctor)(void *); 1868 auto *KmpcCtorTy = 1869 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1870 /*isVarArg*/ false)->getPointerTo(); 1871 // typedef void *(*kmpc_cctor)(void *, void *); 1872 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1873 auto *KmpcCopyCtorTy = 1874 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1875 /*isVarArg*/ false) 1876 ->getPointerTo(); 1877 // typedef void (*kmpc_dtor)(void *); 1878 auto *KmpcDtorTy = 1879 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1880 ->getPointerTo(); 1881 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1882 KmpcCopyCtorTy, KmpcDtorTy}; 1883 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1884 /*isVarArg*/ false); 1885 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1886 break; 1887 } 1888 case OMPRTL__kmpc_end_critical: { 1889 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1890 // kmp_critical_name *crit); 1891 llvm::Type *TypeParams[] = { 1892 getIdentTyPointerTy(), CGM.Int32Ty, 1893 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1894 auto *FnTy = 1895 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1896 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1897 break; 1898 } 1899 case OMPRTL__kmpc_cancel_barrier: { 1900 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1901 // global_tid); 1902 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1903 auto *FnTy = 1904 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1905 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1906 break; 1907 } 1908 case OMPRTL__kmpc_barrier: { 1909 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1910 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1911 auto *FnTy = 1912 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1913 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1914 break; 1915 } 1916 case OMPRTL__kmpc_for_static_fini: { 1917 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1918 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1919 auto *FnTy = 1920 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1921 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1922 break; 1923 } 1924 case OMPRTL__kmpc_push_num_threads: { 1925 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1926 // kmp_int32 num_threads) 1927 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1928 CGM.Int32Ty}; 1929 auto *FnTy = 1930 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1931 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1932 break; 1933 } 1934 case OMPRTL__kmpc_serialized_parallel: { 1935 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1936 // global_tid); 1937 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1938 auto *FnTy = 1939 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1940 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1941 break; 1942 } 1943 case OMPRTL__kmpc_end_serialized_parallel: { 1944 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1945 // global_tid); 1946 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1947 auto *FnTy = 1948 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1949 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1950 break; 1951 } 1952 case OMPRTL__kmpc_flush: { 1953 // Build void __kmpc_flush(ident_t *loc); 1954 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1955 auto *FnTy = 1956 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1957 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1958 break; 1959 } 1960 case OMPRTL__kmpc_master: { 1961 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1962 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1963 auto *FnTy = 1964 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1965 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1966 break; 1967 } 1968 case OMPRTL__kmpc_end_master: { 1969 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1970 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1971 auto *FnTy = 1972 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1973 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1974 break; 1975 } 1976 case OMPRTL__kmpc_omp_taskyield: { 1977 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1978 // int end_part); 1979 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1980 auto *FnTy = 1981 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1982 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1983 break; 1984 } 1985 case OMPRTL__kmpc_single: { 1986 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1987 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1988 auto *FnTy = 1989 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1990 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1991 break; 1992 } 1993 case OMPRTL__kmpc_end_single: { 1994 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1995 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1996 auto *FnTy = 1997 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1998 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1999 break; 2000 } 2001 case OMPRTL__kmpc_omp_task_alloc: { 2002 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 2003 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2004 // kmp_routine_entry_t *task_entry); 2005 assert(KmpRoutineEntryPtrTy != nullptr && 2006 "Type kmp_routine_entry_t must be created."); 2007 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2008 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 2009 // Return void * and then cast to particular kmp_task_t type. 2010 auto *FnTy = 2011 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2012 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 2013 break; 2014 } 2015 case OMPRTL__kmpc_omp_target_task_alloc: { 2016 // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid, 2017 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2018 // kmp_routine_entry_t *task_entry, kmp_int64 device_id); 2019 assert(KmpRoutineEntryPtrTy != nullptr && 2020 "Type kmp_routine_entry_t must be created."); 2021 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2022 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy, 2023 CGM.Int64Ty}; 2024 // Return void * and then cast to particular kmp_task_t type. 2025 auto *FnTy = 2026 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2027 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc"); 2028 break; 2029 } 2030 case OMPRTL__kmpc_omp_task: { 2031 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2032 // *new_task); 2033 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2034 CGM.VoidPtrTy}; 2035 auto *FnTy = 2036 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2037 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 2038 break; 2039 } 2040 case OMPRTL__kmpc_copyprivate: { 2041 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 2042 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 2043 // kmp_int32 didit); 2044 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2045 auto *CpyFnTy = 2046 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 2047 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 2048 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 2049 CGM.Int32Ty}; 2050 auto *FnTy = 2051 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2052 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 2053 break; 2054 } 2055 case OMPRTL__kmpc_reduce: { 2056 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 2057 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 2058 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 2059 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2060 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2061 /*isVarArg=*/false); 2062 llvm::Type *TypeParams[] = { 2063 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2064 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2065 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2066 auto *FnTy = 2067 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2068 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 2069 break; 2070 } 2071 case OMPRTL__kmpc_reduce_nowait: { 2072 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 2073 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 2074 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 2075 // *lck); 2076 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2077 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2078 /*isVarArg=*/false); 2079 llvm::Type *TypeParams[] = { 2080 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2081 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2082 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2083 auto *FnTy = 2084 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2085 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 2086 break; 2087 } 2088 case OMPRTL__kmpc_end_reduce: { 2089 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 2090 // kmp_critical_name *lck); 2091 llvm::Type *TypeParams[] = { 2092 getIdentTyPointerTy(), CGM.Int32Ty, 2093 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2094 auto *FnTy = 2095 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2096 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 2097 break; 2098 } 2099 case OMPRTL__kmpc_end_reduce_nowait: { 2100 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 2101 // kmp_critical_name *lck); 2102 llvm::Type *TypeParams[] = { 2103 getIdentTyPointerTy(), CGM.Int32Ty, 2104 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2105 auto *FnTy = 2106 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2107 RTLFn = 2108 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 2109 break; 2110 } 2111 case OMPRTL__kmpc_omp_task_begin_if0: { 2112 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2113 // *new_task); 2114 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2115 CGM.VoidPtrTy}; 2116 auto *FnTy = 2117 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2118 RTLFn = 2119 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 2120 break; 2121 } 2122 case OMPRTL__kmpc_omp_task_complete_if0: { 2123 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2124 // *new_task); 2125 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2126 CGM.VoidPtrTy}; 2127 auto *FnTy = 2128 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2129 RTLFn = CGM.CreateRuntimeFunction(FnTy, 2130 /*Name=*/"__kmpc_omp_task_complete_if0"); 2131 break; 2132 } 2133 case OMPRTL__kmpc_ordered: { 2134 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 2135 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2136 auto *FnTy = 2137 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2138 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 2139 break; 2140 } 2141 case OMPRTL__kmpc_end_ordered: { 2142 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 2143 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2144 auto *FnTy = 2145 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2146 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 2147 break; 2148 } 2149 case OMPRTL__kmpc_omp_taskwait: { 2150 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 2151 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2152 auto *FnTy = 2153 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2154 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 2155 break; 2156 } 2157 case OMPRTL__kmpc_taskgroup: { 2158 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 2159 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2160 auto *FnTy = 2161 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2162 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 2163 break; 2164 } 2165 case OMPRTL__kmpc_end_taskgroup: { 2166 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 2167 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2168 auto *FnTy = 2169 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2170 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 2171 break; 2172 } 2173 case OMPRTL__kmpc_push_proc_bind: { 2174 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 2175 // int proc_bind) 2176 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2177 auto *FnTy = 2178 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2179 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 2180 break; 2181 } 2182 case OMPRTL__kmpc_omp_task_with_deps: { 2183 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2184 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2185 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 2186 llvm::Type *TypeParams[] = { 2187 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 2188 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 2189 auto *FnTy = 2190 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2191 RTLFn = 2192 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 2193 break; 2194 } 2195 case OMPRTL__kmpc_omp_wait_deps: { 2196 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2197 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2198 // kmp_depend_info_t *noalias_dep_list); 2199 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2200 CGM.Int32Ty, CGM.VoidPtrTy, 2201 CGM.Int32Ty, CGM.VoidPtrTy}; 2202 auto *FnTy = 2203 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2204 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2205 break; 2206 } 2207 case OMPRTL__kmpc_cancellationpoint: { 2208 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2209 // global_tid, kmp_int32 cncl_kind) 2210 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2211 auto *FnTy = 2212 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2213 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2214 break; 2215 } 2216 case OMPRTL__kmpc_cancel: { 2217 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2218 // kmp_int32 cncl_kind) 2219 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2220 auto *FnTy = 2221 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2222 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2223 break; 2224 } 2225 case OMPRTL__kmpc_push_num_teams: { 2226 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2227 // kmp_int32 num_teams, kmp_int32 num_threads) 2228 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2229 CGM.Int32Ty}; 2230 auto *FnTy = 2231 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2232 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2233 break; 2234 } 2235 case OMPRTL__kmpc_fork_teams: { 2236 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2237 // microtask, ...); 2238 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2239 getKmpc_MicroPointerTy()}; 2240 auto *FnTy = 2241 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2242 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2243 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 2244 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 2245 llvm::LLVMContext &Ctx = F->getContext(); 2246 llvm::MDBuilder MDB(Ctx); 2247 // Annotate the callback behavior of the __kmpc_fork_teams: 2248 // - The callback callee is argument number 2 (microtask). 2249 // - The first two arguments of the callback callee are unknown (-1). 2250 // - All variadic arguments to the __kmpc_fork_teams are passed to the 2251 // callback callee. 2252 F->addMetadata( 2253 llvm::LLVMContext::MD_callback, 2254 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 2255 2, {-1, -1}, 2256 /* VarArgsArePassed */ true)})); 2257 } 2258 } 2259 break; 2260 } 2261 case OMPRTL__kmpc_taskloop: { 2262 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2263 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2264 // sched, kmp_uint64 grainsize, void *task_dup); 2265 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2266 CGM.IntTy, 2267 CGM.VoidPtrTy, 2268 CGM.IntTy, 2269 CGM.Int64Ty->getPointerTo(), 2270 CGM.Int64Ty->getPointerTo(), 2271 CGM.Int64Ty, 2272 CGM.IntTy, 2273 CGM.IntTy, 2274 CGM.Int64Ty, 2275 CGM.VoidPtrTy}; 2276 auto *FnTy = 2277 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2278 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2279 break; 2280 } 2281 case OMPRTL__kmpc_doacross_init: { 2282 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2283 // num_dims, struct kmp_dim *dims); 2284 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2285 CGM.Int32Ty, 2286 CGM.Int32Ty, 2287 CGM.VoidPtrTy}; 2288 auto *FnTy = 2289 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2290 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2291 break; 2292 } 2293 case OMPRTL__kmpc_doacross_fini: { 2294 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2295 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2296 auto *FnTy = 2297 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2298 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2299 break; 2300 } 2301 case OMPRTL__kmpc_doacross_post: { 2302 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2303 // *vec); 2304 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2305 CGM.Int64Ty->getPointerTo()}; 2306 auto *FnTy = 2307 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2308 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2309 break; 2310 } 2311 case OMPRTL__kmpc_doacross_wait: { 2312 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2313 // *vec); 2314 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2315 CGM.Int64Ty->getPointerTo()}; 2316 auto *FnTy = 2317 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2318 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2319 break; 2320 } 2321 case OMPRTL__kmpc_task_reduction_init: { 2322 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2323 // *data); 2324 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2325 auto *FnTy = 2326 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2327 RTLFn = 2328 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2329 break; 2330 } 2331 case OMPRTL__kmpc_task_reduction_get_th_data: { 2332 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2333 // *d); 2334 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2335 auto *FnTy = 2336 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2337 RTLFn = CGM.CreateRuntimeFunction( 2338 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2339 break; 2340 } 2341 case OMPRTL__kmpc_alloc: { 2342 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t 2343 // al); omp_allocator_handle_t type is void *. 2344 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy}; 2345 auto *FnTy = 2346 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2347 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc"); 2348 break; 2349 } 2350 case OMPRTL__kmpc_free: { 2351 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t 2352 // al); omp_allocator_handle_t type is void *. 2353 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2354 auto *FnTy = 2355 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2356 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); 2357 break; 2358 } 2359 case OMPRTL__kmpc_push_target_tripcount: { 2360 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 2361 // size); 2362 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; 2363 llvm::FunctionType *FnTy = 2364 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2365 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); 2366 break; 2367 } 2368 case OMPRTL__tgt_target: { 2369 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2370 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2371 // *arg_types); 2372 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2373 CGM.VoidPtrTy, 2374 CGM.Int32Ty, 2375 CGM.VoidPtrPtrTy, 2376 CGM.VoidPtrPtrTy, 2377 CGM.Int64Ty->getPointerTo(), 2378 CGM.Int64Ty->getPointerTo()}; 2379 auto *FnTy = 2380 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2381 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2382 break; 2383 } 2384 case OMPRTL__tgt_target_nowait: { 2385 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2386 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2387 // int64_t *arg_types); 2388 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2389 CGM.VoidPtrTy, 2390 CGM.Int32Ty, 2391 CGM.VoidPtrPtrTy, 2392 CGM.VoidPtrPtrTy, 2393 CGM.Int64Ty->getPointerTo(), 2394 CGM.Int64Ty->getPointerTo()}; 2395 auto *FnTy = 2396 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2397 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2398 break; 2399 } 2400 case OMPRTL__tgt_target_teams: { 2401 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2402 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2403 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2404 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2405 CGM.VoidPtrTy, 2406 CGM.Int32Ty, 2407 CGM.VoidPtrPtrTy, 2408 CGM.VoidPtrPtrTy, 2409 CGM.Int64Ty->getPointerTo(), 2410 CGM.Int64Ty->getPointerTo(), 2411 CGM.Int32Ty, 2412 CGM.Int32Ty}; 2413 auto *FnTy = 2414 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2415 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2416 break; 2417 } 2418 case OMPRTL__tgt_target_teams_nowait: { 2419 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2420 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 2421 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2422 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2423 CGM.VoidPtrTy, 2424 CGM.Int32Ty, 2425 CGM.VoidPtrPtrTy, 2426 CGM.VoidPtrPtrTy, 2427 CGM.Int64Ty->getPointerTo(), 2428 CGM.Int64Ty->getPointerTo(), 2429 CGM.Int32Ty, 2430 CGM.Int32Ty}; 2431 auto *FnTy = 2432 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2433 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2434 break; 2435 } 2436 case OMPRTL__tgt_register_requires: { 2437 // Build void __tgt_register_requires(int64_t flags); 2438 llvm::Type *TypeParams[] = {CGM.Int64Ty}; 2439 auto *FnTy = 2440 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2441 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); 2442 break; 2443 } 2444 case OMPRTL__tgt_target_data_begin: { 2445 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2446 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2447 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2448 CGM.Int32Ty, 2449 CGM.VoidPtrPtrTy, 2450 CGM.VoidPtrPtrTy, 2451 CGM.Int64Ty->getPointerTo(), 2452 CGM.Int64Ty->getPointerTo()}; 2453 auto *FnTy = 2454 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2455 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2456 break; 2457 } 2458 case OMPRTL__tgt_target_data_begin_nowait: { 2459 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2460 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2461 // *arg_types); 2462 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2463 CGM.Int32Ty, 2464 CGM.VoidPtrPtrTy, 2465 CGM.VoidPtrPtrTy, 2466 CGM.Int64Ty->getPointerTo(), 2467 CGM.Int64Ty->getPointerTo()}; 2468 auto *FnTy = 2469 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2470 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2471 break; 2472 } 2473 case OMPRTL__tgt_target_data_end: { 2474 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2475 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2476 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2477 CGM.Int32Ty, 2478 CGM.VoidPtrPtrTy, 2479 CGM.VoidPtrPtrTy, 2480 CGM.Int64Ty->getPointerTo(), 2481 CGM.Int64Ty->getPointerTo()}; 2482 auto *FnTy = 2483 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2484 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2485 break; 2486 } 2487 case OMPRTL__tgt_target_data_end_nowait: { 2488 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2489 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2490 // *arg_types); 2491 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2492 CGM.Int32Ty, 2493 CGM.VoidPtrPtrTy, 2494 CGM.VoidPtrPtrTy, 2495 CGM.Int64Ty->getPointerTo(), 2496 CGM.Int64Ty->getPointerTo()}; 2497 auto *FnTy = 2498 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2499 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2500 break; 2501 } 2502 case OMPRTL__tgt_target_data_update: { 2503 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2504 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2505 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2506 CGM.Int32Ty, 2507 CGM.VoidPtrPtrTy, 2508 CGM.VoidPtrPtrTy, 2509 CGM.Int64Ty->getPointerTo(), 2510 CGM.Int64Ty->getPointerTo()}; 2511 auto *FnTy = 2512 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2513 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2514 break; 2515 } 2516 case OMPRTL__tgt_target_data_update_nowait: { 2517 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2518 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2519 // *arg_types); 2520 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2521 CGM.Int32Ty, 2522 CGM.VoidPtrPtrTy, 2523 CGM.VoidPtrPtrTy, 2524 CGM.Int64Ty->getPointerTo(), 2525 CGM.Int64Ty->getPointerTo()}; 2526 auto *FnTy = 2527 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2528 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2529 break; 2530 } 2531 case OMPRTL__tgt_mapper_num_components: { 2532 // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 2533 llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; 2534 auto *FnTy = 2535 llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); 2536 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components"); 2537 break; 2538 } 2539 case OMPRTL__tgt_push_mapper_component: { 2540 // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void 2541 // *base, void *begin, int64_t size, int64_t type); 2542 llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy, 2543 CGM.Int64Ty, CGM.Int64Ty}; 2544 auto *FnTy = 2545 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2546 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component"); 2547 break; 2548 } 2549 case OMPRTL__kmpc_task_allow_completion_event: { 2550 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 2551 // int gtid, kmp_task_t *task); 2552 auto *FnTy = llvm::FunctionType::get( 2553 CGM.VoidPtrTy, {getIdentTyPointerTy(), CGM.IntTy, CGM.VoidPtrTy}, 2554 /*isVarArg=*/false); 2555 RTLFn = 2556 CGM.CreateRuntimeFunction(FnTy, "__kmpc_task_allow_completion_event"); 2557 break; 2558 } 2559 } 2560 assert(RTLFn && "Unable to find OpenMP runtime function"); 2561 return RTLFn; 2562 } 2563 2564 llvm::FunctionCallee 2565 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 2566 assert((IVSize == 32 || IVSize == 64) && 2567 "IV size is not compatible with the omp runtime"); 2568 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2569 : "__kmpc_for_static_init_4u") 2570 : (IVSigned ? "__kmpc_for_static_init_8" 2571 : "__kmpc_for_static_init_8u"); 2572 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2573 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2574 llvm::Type *TypeParams[] = { 2575 getIdentTyPointerTy(), // loc 2576 CGM.Int32Ty, // tid 2577 CGM.Int32Ty, // schedtype 2578 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2579 PtrTy, // p_lower 2580 PtrTy, // p_upper 2581 PtrTy, // p_stride 2582 ITy, // incr 2583 ITy // chunk 2584 }; 2585 auto *FnTy = 2586 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2587 return CGM.CreateRuntimeFunction(FnTy, Name); 2588 } 2589 2590 llvm::FunctionCallee 2591 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 2592 assert((IVSize == 32 || IVSize == 64) && 2593 "IV size is not compatible with the omp runtime"); 2594 StringRef Name = 2595 IVSize == 32 2596 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2597 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2598 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2599 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2600 CGM.Int32Ty, // tid 2601 CGM.Int32Ty, // schedtype 2602 ITy, // lower 2603 ITy, // upper 2604 ITy, // stride 2605 ITy // chunk 2606 }; 2607 auto *FnTy = 2608 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2609 return CGM.CreateRuntimeFunction(FnTy, Name); 2610 } 2611 2612 llvm::FunctionCallee 2613 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 2614 assert((IVSize == 32 || IVSize == 64) && 2615 "IV size is not compatible with the omp runtime"); 2616 StringRef Name = 2617 IVSize == 32 2618 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2619 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2620 llvm::Type *TypeParams[] = { 2621 getIdentTyPointerTy(), // loc 2622 CGM.Int32Ty, // tid 2623 }; 2624 auto *FnTy = 2625 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2626 return CGM.CreateRuntimeFunction(FnTy, Name); 2627 } 2628 2629 llvm::FunctionCallee 2630 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 2631 assert((IVSize == 32 || IVSize == 64) && 2632 "IV size is not compatible with the omp runtime"); 2633 StringRef Name = 2634 IVSize == 32 2635 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2636 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2637 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2638 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2639 llvm::Type *TypeParams[] = { 2640 getIdentTyPointerTy(), // loc 2641 CGM.Int32Ty, // tid 2642 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2643 PtrTy, // p_lower 2644 PtrTy, // p_upper 2645 PtrTy // p_stride 2646 }; 2647 auto *FnTy = 2648 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2649 return CGM.CreateRuntimeFunction(FnTy, Name); 2650 } 2651 2652 /// Obtain information that uniquely identifies a target entry. This 2653 /// consists of the file and device IDs as well as line number associated with 2654 /// the relevant entry source location. 2655 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2656 unsigned &DeviceID, unsigned &FileID, 2657 unsigned &LineNum) { 2658 SourceManager &SM = C.getSourceManager(); 2659 2660 // The loc should be always valid and have a file ID (the user cannot use 2661 // #pragma directives in macros) 2662 2663 assert(Loc.isValid() && "Source location is expected to be always valid."); 2664 2665 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2666 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2667 2668 llvm::sys::fs::UniqueID ID; 2669 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2670 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2671 << PLoc.getFilename() << EC.message(); 2672 2673 DeviceID = ID.getDevice(); 2674 FileID = ID.getFile(); 2675 LineNum = PLoc.getLine(); 2676 } 2677 2678 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 2679 if (CGM.getLangOpts().OpenMPSimd) 2680 return Address::invalid(); 2681 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2682 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2683 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 2684 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2685 HasRequiresUnifiedSharedMemory))) { 2686 SmallString<64> PtrName; 2687 { 2688 llvm::raw_svector_ostream OS(PtrName); 2689 OS << CGM.getMangledName(GlobalDecl(VD)); 2690 if (!VD->isExternallyVisible()) { 2691 unsigned DeviceID, FileID, Line; 2692 getTargetEntryUniqueInfo(CGM.getContext(), 2693 VD->getCanonicalDecl()->getBeginLoc(), 2694 DeviceID, FileID, Line); 2695 OS << llvm::format("_%x", FileID); 2696 } 2697 OS << "_decl_tgt_ref_ptr"; 2698 } 2699 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2700 if (!Ptr) { 2701 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2702 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2703 PtrName); 2704 2705 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2706 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 2707 2708 if (!CGM.getLangOpts().OpenMPIsDevice) 2709 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2710 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2711 } 2712 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2713 } 2714 return Address::invalid(); 2715 } 2716 2717 llvm::Constant * 2718 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2719 assert(!CGM.getLangOpts().OpenMPUseTLS || 2720 !CGM.getContext().getTargetInfo().isTLSSupported()); 2721 // Lookup the entry, lazily creating it if necessary. 2722 std::string Suffix = getName({"cache", ""}); 2723 return getOrCreateInternalVariable( 2724 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 2725 } 2726 2727 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2728 const VarDecl *VD, 2729 Address VDAddr, 2730 SourceLocation Loc) { 2731 if (CGM.getLangOpts().OpenMPUseTLS && 2732 CGM.getContext().getTargetInfo().isTLSSupported()) 2733 return VDAddr; 2734 2735 llvm::Type *VarTy = VDAddr.getElementType(); 2736 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2737 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2738 CGM.Int8PtrTy), 2739 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2740 getOrCreateThreadPrivateCache(VD)}; 2741 return Address(CGF.EmitRuntimeCall( 2742 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2743 VDAddr.getAlignment()); 2744 } 2745 2746 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2747 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2748 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2749 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2750 // library. 2751 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2752 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2753 OMPLoc); 2754 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2755 // to register constructor/destructor for variable. 2756 llvm::Value *Args[] = { 2757 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2758 Ctor, CopyCtor, Dtor}; 2759 CGF.EmitRuntimeCall( 2760 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2761 } 2762 2763 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2764 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2765 bool PerformInit, CodeGenFunction *CGF) { 2766 if (CGM.getLangOpts().OpenMPUseTLS && 2767 CGM.getContext().getTargetInfo().isTLSSupported()) 2768 return nullptr; 2769 2770 VD = VD->getDefinition(CGM.getContext()); 2771 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 2772 QualType ASTTy = VD->getType(); 2773 2774 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2775 const Expr *Init = VD->getAnyInitializer(); 2776 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2777 // Generate function that re-emits the declaration's initializer into the 2778 // threadprivate copy of the variable VD 2779 CodeGenFunction CtorCGF(CGM); 2780 FunctionArgList Args; 2781 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2782 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2783 ImplicitParamDecl::Other); 2784 Args.push_back(&Dst); 2785 2786 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2787 CGM.getContext().VoidPtrTy, Args); 2788 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2789 std::string Name = getName({"__kmpc_global_ctor_", ""}); 2790 llvm::Function *Fn = 2791 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2792 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2793 Args, Loc, Loc); 2794 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2795 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2796 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2797 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2798 Arg = CtorCGF.Builder.CreateElementBitCast( 2799 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2800 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2801 /*IsInitializer=*/true); 2802 ArgVal = CtorCGF.EmitLoadOfScalar( 2803 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2804 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2805 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2806 CtorCGF.FinishFunction(); 2807 Ctor = Fn; 2808 } 2809 if (VD->getType().isDestructedType() != QualType::DK_none) { 2810 // Generate function that emits destructor call for the threadprivate copy 2811 // of the variable VD 2812 CodeGenFunction DtorCGF(CGM); 2813 FunctionArgList Args; 2814 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2815 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2816 ImplicitParamDecl::Other); 2817 Args.push_back(&Dst); 2818 2819 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2820 CGM.getContext().VoidTy, Args); 2821 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2822 std::string Name = getName({"__kmpc_global_dtor_", ""}); 2823 llvm::Function *Fn = 2824 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2825 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2826 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2827 Loc, Loc); 2828 // Create a scope with an artificial location for the body of this function. 2829 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2830 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2831 DtorCGF.GetAddrOfLocalVar(&Dst), 2832 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2833 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2834 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2835 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2836 DtorCGF.FinishFunction(); 2837 Dtor = Fn; 2838 } 2839 // Do not emit init function if it is not required. 2840 if (!Ctor && !Dtor) 2841 return nullptr; 2842 2843 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2844 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2845 /*isVarArg=*/false) 2846 ->getPointerTo(); 2847 // Copying constructor for the threadprivate variable. 2848 // Must be NULL - reserved by runtime, but currently it requires that this 2849 // parameter is always NULL. Otherwise it fires assertion. 2850 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2851 if (Ctor == nullptr) { 2852 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2853 /*isVarArg=*/false) 2854 ->getPointerTo(); 2855 Ctor = llvm::Constant::getNullValue(CtorTy); 2856 } 2857 if (Dtor == nullptr) { 2858 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2859 /*isVarArg=*/false) 2860 ->getPointerTo(); 2861 Dtor = llvm::Constant::getNullValue(DtorTy); 2862 } 2863 if (!CGF) { 2864 auto *InitFunctionTy = 2865 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2866 std::string Name = getName({"__omp_threadprivate_init_", ""}); 2867 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2868 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 2869 CodeGenFunction InitCGF(CGM); 2870 FunctionArgList ArgList; 2871 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2872 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2873 Loc, Loc); 2874 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2875 InitCGF.FinishFunction(); 2876 return InitFunction; 2877 } 2878 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2879 } 2880 return nullptr; 2881 } 2882 2883 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2884 llvm::GlobalVariable *Addr, 2885 bool PerformInit) { 2886 if (CGM.getLangOpts().OMPTargetTriples.empty() && 2887 !CGM.getLangOpts().OpenMPIsDevice) 2888 return false; 2889 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2890 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2891 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 2892 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2893 HasRequiresUnifiedSharedMemory)) 2894 return CGM.getLangOpts().OpenMPIsDevice; 2895 VD = VD->getDefinition(CGM.getContext()); 2896 assert(VD && "Unknown VarDecl"); 2897 2898 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 2899 return CGM.getLangOpts().OpenMPIsDevice; 2900 2901 QualType ASTTy = VD->getType(); 2902 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 2903 2904 // Produce the unique prefix to identify the new target regions. We use 2905 // the source location of the variable declaration which we know to not 2906 // conflict with any target region. 2907 unsigned DeviceID; 2908 unsigned FileID; 2909 unsigned Line; 2910 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2911 SmallString<128> Buffer, Out; 2912 { 2913 llvm::raw_svector_ostream OS(Buffer); 2914 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2915 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2916 } 2917 2918 const Expr *Init = VD->getAnyInitializer(); 2919 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2920 llvm::Constant *Ctor; 2921 llvm::Constant *ID; 2922 if (CGM.getLangOpts().OpenMPIsDevice) { 2923 // Generate function that re-emits the declaration's initializer into 2924 // the threadprivate copy of the variable VD 2925 CodeGenFunction CtorCGF(CGM); 2926 2927 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2928 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2929 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2930 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2931 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2932 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2933 FunctionArgList(), Loc, Loc); 2934 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2935 CtorCGF.EmitAnyExprToMem(Init, 2936 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2937 Init->getType().getQualifiers(), 2938 /*IsInitializer=*/true); 2939 CtorCGF.FinishFunction(); 2940 Ctor = Fn; 2941 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2942 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 2943 } else { 2944 Ctor = new llvm::GlobalVariable( 2945 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2946 llvm::GlobalValue::PrivateLinkage, 2947 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2948 ID = Ctor; 2949 } 2950 2951 // Register the information for the entry associated with the constructor. 2952 Out.clear(); 2953 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2954 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2955 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2956 } 2957 if (VD->getType().isDestructedType() != QualType::DK_none) { 2958 llvm::Constant *Dtor; 2959 llvm::Constant *ID; 2960 if (CGM.getLangOpts().OpenMPIsDevice) { 2961 // Generate function that emits destructor call for the threadprivate 2962 // copy of the variable VD 2963 CodeGenFunction DtorCGF(CGM); 2964 2965 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2966 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2967 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2968 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2969 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2970 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2971 FunctionArgList(), Loc, Loc); 2972 // Create a scope with an artificial location for the body of this 2973 // function. 2974 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2975 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 2976 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2977 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2978 DtorCGF.FinishFunction(); 2979 Dtor = Fn; 2980 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2981 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 2982 } else { 2983 Dtor = new llvm::GlobalVariable( 2984 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2985 llvm::GlobalValue::PrivateLinkage, 2986 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2987 ID = Dtor; 2988 } 2989 // Register the information for the entry associated with the destructor. 2990 Out.clear(); 2991 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2992 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2993 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2994 } 2995 return CGM.getLangOpts().OpenMPIsDevice; 2996 } 2997 2998 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2999 QualType VarType, 3000 StringRef Name) { 3001 std::string Suffix = getName({"artificial", ""}); 3002 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 3003 llvm::Value *GAddr = 3004 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 3005 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 3006 CGM.getTarget().isTLSSupported()) { 3007 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 3008 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 3009 } 3010 std::string CacheSuffix = getName({"cache", ""}); 3011 llvm::Value *Args[] = { 3012 emitUpdateLocation(CGF, SourceLocation()), 3013 getThreadID(CGF, SourceLocation()), 3014 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 3015 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 3016 /*isSigned=*/false), 3017 getOrCreateInternalVariable( 3018 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 3019 return Address( 3020 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3021 CGF.EmitRuntimeCall( 3022 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 3023 VarLVType->getPointerTo(/*AddrSpace=*/0)), 3024 CGM.getContext().getTypeAlignInChars(VarType)); 3025 } 3026 3027 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 3028 const RegionCodeGenTy &ThenGen, 3029 const RegionCodeGenTy &ElseGen) { 3030 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 3031 3032 // If the condition constant folds and can be elided, try to avoid emitting 3033 // the condition and the dead arm of the if/else. 3034 bool CondConstant; 3035 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 3036 if (CondConstant) 3037 ThenGen(CGF); 3038 else 3039 ElseGen(CGF); 3040 return; 3041 } 3042 3043 // Otherwise, the condition did not fold, or we couldn't elide it. Just 3044 // emit the conditional branch. 3045 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3046 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 3047 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 3048 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 3049 3050 // Emit the 'then' code. 3051 CGF.EmitBlock(ThenBlock); 3052 ThenGen(CGF); 3053 CGF.EmitBranch(ContBlock); 3054 // Emit the 'else' code if present. 3055 // There is no need to emit line number for unconditional branch. 3056 (void)ApplyDebugLocation::CreateEmpty(CGF); 3057 CGF.EmitBlock(ElseBlock); 3058 ElseGen(CGF); 3059 // There is no need to emit line number for unconditional branch. 3060 (void)ApplyDebugLocation::CreateEmpty(CGF); 3061 CGF.EmitBranch(ContBlock); 3062 // Emit the continuation block for code after the if. 3063 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 3064 } 3065 3066 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 3067 llvm::Function *OutlinedFn, 3068 ArrayRef<llvm::Value *> CapturedVars, 3069 const Expr *IfCond) { 3070 if (!CGF.HaveInsertPoint()) 3071 return; 3072 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 3073 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 3074 PrePostActionTy &) { 3075 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 3076 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3077 llvm::Value *Args[] = { 3078 RTLoc, 3079 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 3080 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 3081 llvm::SmallVector<llvm::Value *, 16> RealArgs; 3082 RealArgs.append(std::begin(Args), std::end(Args)); 3083 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 3084 3085 llvm::FunctionCallee RTLFn = 3086 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 3087 CGF.EmitRuntimeCall(RTLFn, RealArgs); 3088 }; 3089 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 3090 PrePostActionTy &) { 3091 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3092 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 3093 // Build calls: 3094 // __kmpc_serialized_parallel(&Loc, GTid); 3095 llvm::Value *Args[] = {RTLoc, ThreadID}; 3096 CGF.EmitRuntimeCall( 3097 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 3098 3099 // OutlinedFn(>id, &zero_bound, CapturedStruct); 3100 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 3101 Address ZeroAddrBound = 3102 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 3103 /*Name=*/".bound.zero.addr"); 3104 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 3105 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 3106 // ThreadId for serialized parallels is 0. 3107 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 3108 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 3109 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 3110 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 3111 3112 // __kmpc_end_serialized_parallel(&Loc, GTid); 3113 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 3114 CGF.EmitRuntimeCall( 3115 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 3116 EndArgs); 3117 }; 3118 if (IfCond) { 3119 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 3120 } else { 3121 RegionCodeGenTy ThenRCG(ThenGen); 3122 ThenRCG(CGF); 3123 } 3124 } 3125 3126 // If we're inside an (outlined) parallel region, use the region info's 3127 // thread-ID variable (it is passed in a first argument of the outlined function 3128 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 3129 // regular serial code region, get thread ID by calling kmp_int32 3130 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 3131 // return the address of that temp. 3132 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 3133 SourceLocation Loc) { 3134 if (auto *OMPRegionInfo = 3135 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3136 if (OMPRegionInfo->getThreadIDVariable()) 3137 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 3138 3139 llvm::Value *ThreadID = getThreadID(CGF, Loc); 3140 QualType Int32Ty = 3141 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 3142 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 3143 CGF.EmitStoreOfScalar(ThreadID, 3144 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 3145 3146 return ThreadIDTemp; 3147 } 3148 3149 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 3150 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 3151 SmallString<256> Buffer; 3152 llvm::raw_svector_ostream Out(Buffer); 3153 Out << Name; 3154 StringRef RuntimeName = Out.str(); 3155 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 3156 if (Elem.second) { 3157 assert(Elem.second->getType()->getPointerElementType() == Ty && 3158 "OMP internal variable has different type than requested"); 3159 return &*Elem.second; 3160 } 3161 3162 return Elem.second = new llvm::GlobalVariable( 3163 CGM.getModule(), Ty, /*IsConstant*/ false, 3164 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 3165 Elem.first(), /*InsertBefore=*/nullptr, 3166 llvm::GlobalValue::NotThreadLocal, AddressSpace); 3167 } 3168 3169 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 3170 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 3171 std::string Name = getName({Prefix, "var"}); 3172 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 3173 } 3174 3175 namespace { 3176 /// Common pre(post)-action for different OpenMP constructs. 3177 class CommonActionTy final : public PrePostActionTy { 3178 llvm::FunctionCallee EnterCallee; 3179 ArrayRef<llvm::Value *> EnterArgs; 3180 llvm::FunctionCallee ExitCallee; 3181 ArrayRef<llvm::Value *> ExitArgs; 3182 bool Conditional; 3183 llvm::BasicBlock *ContBlock = nullptr; 3184 3185 public: 3186 CommonActionTy(llvm::FunctionCallee EnterCallee, 3187 ArrayRef<llvm::Value *> EnterArgs, 3188 llvm::FunctionCallee ExitCallee, 3189 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 3190 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 3191 ExitArgs(ExitArgs), Conditional(Conditional) {} 3192 void Enter(CodeGenFunction &CGF) override { 3193 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 3194 if (Conditional) { 3195 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 3196 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3197 ContBlock = CGF.createBasicBlock("omp_if.end"); 3198 // Generate the branch (If-stmt) 3199 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 3200 CGF.EmitBlock(ThenBlock); 3201 } 3202 } 3203 void Done(CodeGenFunction &CGF) { 3204 // Emit the rest of blocks/branches 3205 CGF.EmitBranch(ContBlock); 3206 CGF.EmitBlock(ContBlock, true); 3207 } 3208 void Exit(CodeGenFunction &CGF) override { 3209 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 3210 } 3211 }; 3212 } // anonymous namespace 3213 3214 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 3215 StringRef CriticalName, 3216 const RegionCodeGenTy &CriticalOpGen, 3217 SourceLocation Loc, const Expr *Hint) { 3218 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 3219 // CriticalOpGen(); 3220 // __kmpc_end_critical(ident_t *, gtid, Lock); 3221 // Prepare arguments and build a call to __kmpc_critical 3222 if (!CGF.HaveInsertPoint()) 3223 return; 3224 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3225 getCriticalRegionLock(CriticalName)}; 3226 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 3227 std::end(Args)); 3228 if (Hint) { 3229 EnterArgs.push_back(CGF.Builder.CreateIntCast( 3230 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 3231 } 3232 CommonActionTy Action( 3233 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 3234 : OMPRTL__kmpc_critical), 3235 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 3236 CriticalOpGen.setAction(Action); 3237 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 3238 } 3239 3240 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 3241 const RegionCodeGenTy &MasterOpGen, 3242 SourceLocation Loc) { 3243 if (!CGF.HaveInsertPoint()) 3244 return; 3245 // if(__kmpc_master(ident_t *, gtid)) { 3246 // MasterOpGen(); 3247 // __kmpc_end_master(ident_t *, gtid); 3248 // } 3249 // Prepare arguments and build a call to __kmpc_master 3250 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3251 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 3252 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 3253 /*Conditional=*/true); 3254 MasterOpGen.setAction(Action); 3255 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 3256 Action.Done(CGF); 3257 } 3258 3259 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 3260 SourceLocation Loc) { 3261 if (!CGF.HaveInsertPoint()) 3262 return; 3263 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3264 if (OMPBuilder) { 3265 OMPBuilder->CreateTaskyield(CGF.Builder); 3266 } else { 3267 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 3268 llvm::Value *Args[] = { 3269 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3270 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 3271 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), 3272 Args); 3273 } 3274 3275 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3276 Region->emitUntiedSwitch(CGF); 3277 } 3278 3279 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 3280 const RegionCodeGenTy &TaskgroupOpGen, 3281 SourceLocation Loc) { 3282 if (!CGF.HaveInsertPoint()) 3283 return; 3284 // __kmpc_taskgroup(ident_t *, gtid); 3285 // TaskgroupOpGen(); 3286 // __kmpc_end_taskgroup(ident_t *, gtid); 3287 // Prepare arguments and build a call to __kmpc_taskgroup 3288 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3289 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3290 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3291 Args); 3292 TaskgroupOpGen.setAction(Action); 3293 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3294 } 3295 3296 /// Given an array of pointers to variables, project the address of a 3297 /// given variable. 3298 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3299 unsigned Index, const VarDecl *Var) { 3300 // Pull out the pointer to the variable. 3301 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 3302 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3303 3304 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3305 Addr = CGF.Builder.CreateElementBitCast( 3306 Addr, CGF.ConvertTypeForMem(Var->getType())); 3307 return Addr; 3308 } 3309 3310 static llvm::Value *emitCopyprivateCopyFunction( 3311 CodeGenModule &CGM, llvm::Type *ArgsType, 3312 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3313 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3314 SourceLocation Loc) { 3315 ASTContext &C = CGM.getContext(); 3316 // void copy_func(void *LHSArg, void *RHSArg); 3317 FunctionArgList Args; 3318 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3319 ImplicitParamDecl::Other); 3320 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3321 ImplicitParamDecl::Other); 3322 Args.push_back(&LHSArg); 3323 Args.push_back(&RHSArg); 3324 const auto &CGFI = 3325 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3326 std::string Name = 3327 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 3328 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 3329 llvm::GlobalValue::InternalLinkage, Name, 3330 &CGM.getModule()); 3331 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3332 Fn->setDoesNotRecurse(); 3333 CodeGenFunction CGF(CGM); 3334 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3335 // Dest = (void*[n])(LHSArg); 3336 // Src = (void*[n])(RHSArg); 3337 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3338 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3339 ArgsType), CGF.getPointerAlign()); 3340 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3341 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3342 ArgsType), CGF.getPointerAlign()); 3343 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3344 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3345 // ... 3346 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3347 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3348 const auto *DestVar = 3349 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3350 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3351 3352 const auto *SrcVar = 3353 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3354 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3355 3356 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3357 QualType Type = VD->getType(); 3358 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3359 } 3360 CGF.FinishFunction(); 3361 return Fn; 3362 } 3363 3364 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3365 const RegionCodeGenTy &SingleOpGen, 3366 SourceLocation Loc, 3367 ArrayRef<const Expr *> CopyprivateVars, 3368 ArrayRef<const Expr *> SrcExprs, 3369 ArrayRef<const Expr *> DstExprs, 3370 ArrayRef<const Expr *> AssignmentOps) { 3371 if (!CGF.HaveInsertPoint()) 3372 return; 3373 assert(CopyprivateVars.size() == SrcExprs.size() && 3374 CopyprivateVars.size() == DstExprs.size() && 3375 CopyprivateVars.size() == AssignmentOps.size()); 3376 ASTContext &C = CGM.getContext(); 3377 // int32 did_it = 0; 3378 // if(__kmpc_single(ident_t *, gtid)) { 3379 // SingleOpGen(); 3380 // __kmpc_end_single(ident_t *, gtid); 3381 // did_it = 1; 3382 // } 3383 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3384 // <copy_func>, did_it); 3385 3386 Address DidIt = Address::invalid(); 3387 if (!CopyprivateVars.empty()) { 3388 // int32 did_it = 0; 3389 QualType KmpInt32Ty = 3390 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3391 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3392 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3393 } 3394 // Prepare arguments and build a call to __kmpc_single 3395 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3396 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3397 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3398 /*Conditional=*/true); 3399 SingleOpGen.setAction(Action); 3400 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3401 if (DidIt.isValid()) { 3402 // did_it = 1; 3403 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3404 } 3405 Action.Done(CGF); 3406 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3407 // <copy_func>, did_it); 3408 if (DidIt.isValid()) { 3409 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3410 QualType CopyprivateArrayTy = C.getConstantArrayType( 3411 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 3412 /*IndexTypeQuals=*/0); 3413 // Create a list of all private variables for copyprivate. 3414 Address CopyprivateList = 3415 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3416 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3417 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 3418 CGF.Builder.CreateStore( 3419 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3420 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 3421 CGF.VoidPtrTy), 3422 Elem); 3423 } 3424 // Build function that copies private values from single region to all other 3425 // threads in the corresponding parallel region. 3426 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3427 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3428 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3429 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3430 Address CL = 3431 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3432 CGF.VoidPtrTy); 3433 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3434 llvm::Value *Args[] = { 3435 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3436 getThreadID(CGF, Loc), // i32 <gtid> 3437 BufSize, // size_t <buf_size> 3438 CL.getPointer(), // void *<copyprivate list> 3439 CpyFn, // void (*) (void *, void *) <copy_func> 3440 DidItVal // i32 did_it 3441 }; 3442 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3443 } 3444 } 3445 3446 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3447 const RegionCodeGenTy &OrderedOpGen, 3448 SourceLocation Loc, bool IsThreads) { 3449 if (!CGF.HaveInsertPoint()) 3450 return; 3451 // __kmpc_ordered(ident_t *, gtid); 3452 // OrderedOpGen(); 3453 // __kmpc_end_ordered(ident_t *, gtid); 3454 // Prepare arguments and build a call to __kmpc_ordered 3455 if (IsThreads) { 3456 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3457 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3458 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3459 Args); 3460 OrderedOpGen.setAction(Action); 3461 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3462 return; 3463 } 3464 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3465 } 3466 3467 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 3468 unsigned Flags; 3469 if (Kind == OMPD_for) 3470 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3471 else if (Kind == OMPD_sections) 3472 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3473 else if (Kind == OMPD_single) 3474 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3475 else if (Kind == OMPD_barrier) 3476 Flags = OMP_IDENT_BARRIER_EXPL; 3477 else 3478 Flags = OMP_IDENT_BARRIER_IMPL; 3479 return Flags; 3480 } 3481 3482 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 3483 CodeGenFunction &CGF, const OMPLoopDirective &S, 3484 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 3485 // Check if the loop directive is actually a doacross loop directive. In this 3486 // case choose static, 1 schedule. 3487 if (llvm::any_of( 3488 S.getClausesOfKind<OMPOrderedClause>(), 3489 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 3490 ScheduleKind = OMPC_SCHEDULE_static; 3491 // Chunk size is 1 in this case. 3492 llvm::APInt ChunkSize(32, 1); 3493 ChunkExpr = IntegerLiteral::Create( 3494 CGF.getContext(), ChunkSize, 3495 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3496 SourceLocation()); 3497 } 3498 } 3499 3500 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3501 OpenMPDirectiveKind Kind, bool EmitChecks, 3502 bool ForceSimpleCall) { 3503 // Check if we should use the OMPBuilder 3504 auto *OMPRegionInfo = 3505 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 3506 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3507 if (OMPBuilder) { 3508 CGF.Builder.restoreIP(OMPBuilder->CreateBarrier( 3509 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 3510 return; 3511 } 3512 3513 if (!CGF.HaveInsertPoint()) 3514 return; 3515 // Build call __kmpc_cancel_barrier(loc, thread_id); 3516 // Build call __kmpc_barrier(loc, thread_id); 3517 unsigned Flags = getDefaultFlagsForBarriers(Kind); 3518 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3519 // thread_id); 3520 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3521 getThreadID(CGF, Loc)}; 3522 if (OMPRegionInfo) { 3523 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3524 llvm::Value *Result = CGF.EmitRuntimeCall( 3525 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3526 if (EmitChecks) { 3527 // if (__kmpc_cancel_barrier()) { 3528 // exit from construct; 3529 // } 3530 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3531 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3532 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3533 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3534 CGF.EmitBlock(ExitBB); 3535 // exit from construct; 3536 CodeGenFunction::JumpDest CancelDestination = 3537 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3538 CGF.EmitBranchThroughCleanup(CancelDestination); 3539 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3540 } 3541 return; 3542 } 3543 } 3544 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3545 } 3546 3547 /// Map the OpenMP loop schedule to the runtime enumeration. 3548 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3549 bool Chunked, bool Ordered) { 3550 switch (ScheduleKind) { 3551 case OMPC_SCHEDULE_static: 3552 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3553 : (Ordered ? OMP_ord_static : OMP_sch_static); 3554 case OMPC_SCHEDULE_dynamic: 3555 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3556 case OMPC_SCHEDULE_guided: 3557 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3558 case OMPC_SCHEDULE_runtime: 3559 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3560 case OMPC_SCHEDULE_auto: 3561 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3562 case OMPC_SCHEDULE_unknown: 3563 assert(!Chunked && "chunk was specified but schedule kind not known"); 3564 return Ordered ? OMP_ord_static : OMP_sch_static; 3565 } 3566 llvm_unreachable("Unexpected runtime schedule"); 3567 } 3568 3569 /// Map the OpenMP distribute schedule to the runtime enumeration. 3570 static OpenMPSchedType 3571 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3572 // only static is allowed for dist_schedule 3573 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3574 } 3575 3576 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3577 bool Chunked) const { 3578 OpenMPSchedType Schedule = 3579 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3580 return Schedule == OMP_sch_static; 3581 } 3582 3583 bool CGOpenMPRuntime::isStaticNonchunked( 3584 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3585 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3586 return Schedule == OMP_dist_sch_static; 3587 } 3588 3589 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 3590 bool Chunked) const { 3591 OpenMPSchedType Schedule = 3592 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3593 return Schedule == OMP_sch_static_chunked; 3594 } 3595 3596 bool CGOpenMPRuntime::isStaticChunked( 3597 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3598 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3599 return Schedule == OMP_dist_sch_static_chunked; 3600 } 3601 3602 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3603 OpenMPSchedType Schedule = 3604 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3605 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3606 return Schedule != OMP_sch_static; 3607 } 3608 3609 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 3610 OpenMPScheduleClauseModifier M1, 3611 OpenMPScheduleClauseModifier M2) { 3612 int Modifier = 0; 3613 switch (M1) { 3614 case OMPC_SCHEDULE_MODIFIER_monotonic: 3615 Modifier = OMP_sch_modifier_monotonic; 3616 break; 3617 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3618 Modifier = OMP_sch_modifier_nonmonotonic; 3619 break; 3620 case OMPC_SCHEDULE_MODIFIER_simd: 3621 if (Schedule == OMP_sch_static_chunked) 3622 Schedule = OMP_sch_static_balanced_chunked; 3623 break; 3624 case OMPC_SCHEDULE_MODIFIER_last: 3625 case OMPC_SCHEDULE_MODIFIER_unknown: 3626 break; 3627 } 3628 switch (M2) { 3629 case OMPC_SCHEDULE_MODIFIER_monotonic: 3630 Modifier = OMP_sch_modifier_monotonic; 3631 break; 3632 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3633 Modifier = OMP_sch_modifier_nonmonotonic; 3634 break; 3635 case OMPC_SCHEDULE_MODIFIER_simd: 3636 if (Schedule == OMP_sch_static_chunked) 3637 Schedule = OMP_sch_static_balanced_chunked; 3638 break; 3639 case OMPC_SCHEDULE_MODIFIER_last: 3640 case OMPC_SCHEDULE_MODIFIER_unknown: 3641 break; 3642 } 3643 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 3644 // If the static schedule kind is specified or if the ordered clause is 3645 // specified, and if the nonmonotonic modifier is not specified, the effect is 3646 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 3647 // modifier is specified, the effect is as if the nonmonotonic modifier is 3648 // specified. 3649 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 3650 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 3651 Schedule == OMP_sch_static_balanced_chunked || 3652 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 3653 Schedule == OMP_dist_sch_static_chunked || 3654 Schedule == OMP_dist_sch_static)) 3655 Modifier = OMP_sch_modifier_nonmonotonic; 3656 } 3657 return Schedule | Modifier; 3658 } 3659 3660 void CGOpenMPRuntime::emitForDispatchInit( 3661 CodeGenFunction &CGF, SourceLocation Loc, 3662 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3663 bool Ordered, const DispatchRTInput &DispatchValues) { 3664 if (!CGF.HaveInsertPoint()) 3665 return; 3666 OpenMPSchedType Schedule = getRuntimeSchedule( 3667 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3668 assert(Ordered || 3669 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3670 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3671 Schedule != OMP_sch_static_balanced_chunked)); 3672 // Call __kmpc_dispatch_init( 3673 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3674 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3675 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3676 3677 // If the Chunk was not specified in the clause - use default value 1. 3678 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3679 : CGF.Builder.getIntN(IVSize, 1); 3680 llvm::Value *Args[] = { 3681 emitUpdateLocation(CGF, Loc), 3682 getThreadID(CGF, Loc), 3683 CGF.Builder.getInt32(addMonoNonMonoModifier( 3684 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3685 DispatchValues.LB, // Lower 3686 DispatchValues.UB, // Upper 3687 CGF.Builder.getIntN(IVSize, 1), // Stride 3688 Chunk // Chunk 3689 }; 3690 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3691 } 3692 3693 static void emitForStaticInitCall( 3694 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3695 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 3696 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3697 const CGOpenMPRuntime::StaticRTInput &Values) { 3698 if (!CGF.HaveInsertPoint()) 3699 return; 3700 3701 assert(!Values.Ordered); 3702 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3703 Schedule == OMP_sch_static_balanced_chunked || 3704 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3705 Schedule == OMP_dist_sch_static || 3706 Schedule == OMP_dist_sch_static_chunked); 3707 3708 // Call __kmpc_for_static_init( 3709 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3710 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3711 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3712 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3713 llvm::Value *Chunk = Values.Chunk; 3714 if (Chunk == nullptr) { 3715 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3716 Schedule == OMP_dist_sch_static) && 3717 "expected static non-chunked schedule"); 3718 // If the Chunk was not specified in the clause - use default value 1. 3719 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3720 } else { 3721 assert((Schedule == OMP_sch_static_chunked || 3722 Schedule == OMP_sch_static_balanced_chunked || 3723 Schedule == OMP_ord_static_chunked || 3724 Schedule == OMP_dist_sch_static_chunked) && 3725 "expected static chunked schedule"); 3726 } 3727 llvm::Value *Args[] = { 3728 UpdateLocation, 3729 ThreadId, 3730 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 3731 M2)), // Schedule type 3732 Values.IL.getPointer(), // &isLastIter 3733 Values.LB.getPointer(), // &LB 3734 Values.UB.getPointer(), // &UB 3735 Values.ST.getPointer(), // &Stride 3736 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3737 Chunk // Chunk 3738 }; 3739 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3740 } 3741 3742 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3743 SourceLocation Loc, 3744 OpenMPDirectiveKind DKind, 3745 const OpenMPScheduleTy &ScheduleKind, 3746 const StaticRTInput &Values) { 3747 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3748 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3749 assert(isOpenMPWorksharingDirective(DKind) && 3750 "Expected loop-based or sections-based directive."); 3751 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3752 isOpenMPLoopDirective(DKind) 3753 ? OMP_IDENT_WORK_LOOP 3754 : OMP_IDENT_WORK_SECTIONS); 3755 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3756 llvm::FunctionCallee StaticInitFunction = 3757 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3758 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 3759 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3760 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3761 } 3762 3763 void CGOpenMPRuntime::emitDistributeStaticInit( 3764 CodeGenFunction &CGF, SourceLocation Loc, 3765 OpenMPDistScheduleClauseKind SchedKind, 3766 const CGOpenMPRuntime::StaticRTInput &Values) { 3767 OpenMPSchedType ScheduleNum = 3768 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3769 llvm::Value *UpdatedLocation = 3770 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3771 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3772 llvm::FunctionCallee StaticInitFunction = 3773 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3774 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3775 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3776 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3777 } 3778 3779 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3780 SourceLocation Loc, 3781 OpenMPDirectiveKind DKind) { 3782 if (!CGF.HaveInsertPoint()) 3783 return; 3784 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3785 llvm::Value *Args[] = { 3786 emitUpdateLocation(CGF, Loc, 3787 isOpenMPDistributeDirective(DKind) 3788 ? OMP_IDENT_WORK_DISTRIBUTE 3789 : isOpenMPLoopDirective(DKind) 3790 ? OMP_IDENT_WORK_LOOP 3791 : OMP_IDENT_WORK_SECTIONS), 3792 getThreadID(CGF, Loc)}; 3793 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 3794 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3795 Args); 3796 } 3797 3798 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3799 SourceLocation Loc, 3800 unsigned IVSize, 3801 bool IVSigned) { 3802 if (!CGF.HaveInsertPoint()) 3803 return; 3804 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3805 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3806 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3807 } 3808 3809 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3810 SourceLocation Loc, unsigned IVSize, 3811 bool IVSigned, Address IL, 3812 Address LB, Address UB, 3813 Address ST) { 3814 // Call __kmpc_dispatch_next( 3815 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3816 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3817 // kmp_int[32|64] *p_stride); 3818 llvm::Value *Args[] = { 3819 emitUpdateLocation(CGF, Loc), 3820 getThreadID(CGF, Loc), 3821 IL.getPointer(), // &isLastIter 3822 LB.getPointer(), // &Lower 3823 UB.getPointer(), // &Upper 3824 ST.getPointer() // &Stride 3825 }; 3826 llvm::Value *Call = 3827 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3828 return CGF.EmitScalarConversion( 3829 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3830 CGF.getContext().BoolTy, Loc); 3831 } 3832 3833 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3834 llvm::Value *NumThreads, 3835 SourceLocation Loc) { 3836 if (!CGF.HaveInsertPoint()) 3837 return; 3838 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3839 llvm::Value *Args[] = { 3840 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3841 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3842 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3843 Args); 3844 } 3845 3846 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3847 ProcBindKind ProcBind, 3848 SourceLocation Loc) { 3849 if (!CGF.HaveInsertPoint()) 3850 return; 3851 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 3852 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3853 llvm::Value *Args[] = { 3854 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3855 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 3856 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3857 } 3858 3859 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3860 SourceLocation Loc, llvm::AtomicOrdering AO) { 3861 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3862 if (OMPBuilder) { 3863 OMPBuilder->CreateFlush(CGF.Builder); 3864 } else { 3865 if (!CGF.HaveInsertPoint()) 3866 return; 3867 // Build call void __kmpc_flush(ident_t *loc) 3868 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3869 emitUpdateLocation(CGF, Loc)); 3870 } 3871 } 3872 3873 namespace { 3874 /// Indexes of fields for type kmp_task_t. 3875 enum KmpTaskTFields { 3876 /// List of shared variables. 3877 KmpTaskTShareds, 3878 /// Task routine. 3879 KmpTaskTRoutine, 3880 /// Partition id for the untied tasks. 3881 KmpTaskTPartId, 3882 /// Function with call of destructors for private variables. 3883 Data1, 3884 /// Task priority. 3885 Data2, 3886 /// (Taskloops only) Lower bound. 3887 KmpTaskTLowerBound, 3888 /// (Taskloops only) Upper bound. 3889 KmpTaskTUpperBound, 3890 /// (Taskloops only) Stride. 3891 KmpTaskTStride, 3892 /// (Taskloops only) Is last iteration flag. 3893 KmpTaskTLastIter, 3894 /// (Taskloops only) Reduction data. 3895 KmpTaskTReductions, 3896 }; 3897 } // anonymous namespace 3898 3899 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3900 return OffloadEntriesTargetRegion.empty() && 3901 OffloadEntriesDeviceGlobalVar.empty(); 3902 } 3903 3904 /// Initialize target region entry. 3905 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3906 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3907 StringRef ParentName, unsigned LineNum, 3908 unsigned Order) { 3909 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3910 "only required for the device " 3911 "code generation."); 3912 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3913 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3914 OMPTargetRegionEntryTargetRegion); 3915 ++OffloadingEntriesNum; 3916 } 3917 3918 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3919 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3920 StringRef ParentName, unsigned LineNum, 3921 llvm::Constant *Addr, llvm::Constant *ID, 3922 OMPTargetRegionEntryKind Flags) { 3923 // If we are emitting code for a target, the entry is already initialized, 3924 // only has to be registered. 3925 if (CGM.getLangOpts().OpenMPIsDevice) { 3926 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3927 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3928 DiagnosticsEngine::Error, 3929 "Unable to find target region on line '%0' in the device code."); 3930 CGM.getDiags().Report(DiagID) << LineNum; 3931 return; 3932 } 3933 auto &Entry = 3934 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3935 assert(Entry.isValid() && "Entry not initialized!"); 3936 Entry.setAddress(Addr); 3937 Entry.setID(ID); 3938 Entry.setFlags(Flags); 3939 } else { 3940 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3941 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3942 ++OffloadingEntriesNum; 3943 } 3944 } 3945 3946 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3947 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3948 unsigned LineNum) const { 3949 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3950 if (PerDevice == OffloadEntriesTargetRegion.end()) 3951 return false; 3952 auto PerFile = PerDevice->second.find(FileID); 3953 if (PerFile == PerDevice->second.end()) 3954 return false; 3955 auto PerParentName = PerFile->second.find(ParentName); 3956 if (PerParentName == PerFile->second.end()) 3957 return false; 3958 auto PerLine = PerParentName->second.find(LineNum); 3959 if (PerLine == PerParentName->second.end()) 3960 return false; 3961 // Fail if this entry is already registered. 3962 if (PerLine->second.getAddress() || PerLine->second.getID()) 3963 return false; 3964 return true; 3965 } 3966 3967 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3968 const OffloadTargetRegionEntryInfoActTy &Action) { 3969 // Scan all target region entries and perform the provided action. 3970 for (const auto &D : OffloadEntriesTargetRegion) 3971 for (const auto &F : D.second) 3972 for (const auto &P : F.second) 3973 for (const auto &L : P.second) 3974 Action(D.first, F.first, P.first(), L.first, L.second); 3975 } 3976 3977 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3978 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3979 OMPTargetGlobalVarEntryKind Flags, 3980 unsigned Order) { 3981 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3982 "only required for the device " 3983 "code generation."); 3984 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3985 ++OffloadingEntriesNum; 3986 } 3987 3988 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3989 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3990 CharUnits VarSize, 3991 OMPTargetGlobalVarEntryKind Flags, 3992 llvm::GlobalValue::LinkageTypes Linkage) { 3993 if (CGM.getLangOpts().OpenMPIsDevice) { 3994 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3995 assert(Entry.isValid() && Entry.getFlags() == Flags && 3996 "Entry not initialized!"); 3997 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3998 "Resetting with the new address."); 3999 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 4000 if (Entry.getVarSize().isZero()) { 4001 Entry.setVarSize(VarSize); 4002 Entry.setLinkage(Linkage); 4003 } 4004 return; 4005 } 4006 Entry.setVarSize(VarSize); 4007 Entry.setLinkage(Linkage); 4008 Entry.setAddress(Addr); 4009 } else { 4010 if (hasDeviceGlobalVarEntryInfo(VarName)) { 4011 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 4012 assert(Entry.isValid() && Entry.getFlags() == Flags && 4013 "Entry not initialized!"); 4014 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 4015 "Resetting with the new address."); 4016 if (Entry.getVarSize().isZero()) { 4017 Entry.setVarSize(VarSize); 4018 Entry.setLinkage(Linkage); 4019 } 4020 return; 4021 } 4022 OffloadEntriesDeviceGlobalVar.try_emplace( 4023 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 4024 ++OffloadingEntriesNum; 4025 } 4026 } 4027 4028 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4029 actOnDeviceGlobalVarEntriesInfo( 4030 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 4031 // Scan all target region entries and perform the provided action. 4032 for (const auto &E : OffloadEntriesDeviceGlobalVar) 4033 Action(E.getKey(), E.getValue()); 4034 } 4035 4036 void CGOpenMPRuntime::createOffloadEntry( 4037 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 4038 llvm::GlobalValue::LinkageTypes Linkage) { 4039 StringRef Name = Addr->getName(); 4040 llvm::Module &M = CGM.getModule(); 4041 llvm::LLVMContext &C = M.getContext(); 4042 4043 // Create constant string with the name. 4044 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 4045 4046 std::string StringName = getName({"omp_offloading", "entry_name"}); 4047 auto *Str = new llvm::GlobalVariable( 4048 M, StrPtrInit->getType(), /*isConstant=*/true, 4049 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 4050 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4051 4052 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 4053 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 4054 llvm::ConstantInt::get(CGM.SizeTy, Size), 4055 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 4056 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 4057 std::string EntryName = getName({"omp_offloading", "entry", ""}); 4058 llvm::GlobalVariable *Entry = createGlobalStruct( 4059 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 4060 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 4061 4062 // The entry has to be created in the section the linker expects it to be. 4063 Entry->setSection("omp_offloading_entries"); 4064 } 4065 4066 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 4067 // Emit the offloading entries and metadata so that the device codegen side 4068 // can easily figure out what to emit. The produced metadata looks like 4069 // this: 4070 // 4071 // !omp_offload.info = !{!1, ...} 4072 // 4073 // Right now we only generate metadata for function that contain target 4074 // regions. 4075 4076 // If we are in simd mode or there are no entries, we don't need to do 4077 // anything. 4078 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 4079 return; 4080 4081 llvm::Module &M = CGM.getModule(); 4082 llvm::LLVMContext &C = M.getContext(); 4083 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 4084 SourceLocation, StringRef>, 4085 16> 4086 OrderedEntries(OffloadEntriesInfoManager.size()); 4087 llvm::SmallVector<StringRef, 16> ParentFunctions( 4088 OffloadEntriesInfoManager.size()); 4089 4090 // Auxiliary methods to create metadata values and strings. 4091 auto &&GetMDInt = [this](unsigned V) { 4092 return llvm::ConstantAsMetadata::get( 4093 llvm::ConstantInt::get(CGM.Int32Ty, V)); 4094 }; 4095 4096 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 4097 4098 // Create the offloading info metadata node. 4099 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 4100 4101 // Create function that emits metadata for each target region entry; 4102 auto &&TargetRegionMetadataEmitter = 4103 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 4104 &GetMDString]( 4105 unsigned DeviceID, unsigned FileID, StringRef ParentName, 4106 unsigned Line, 4107 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 4108 // Generate metadata for target regions. Each entry of this metadata 4109 // contains: 4110 // - Entry 0 -> Kind of this type of metadata (0). 4111 // - Entry 1 -> Device ID of the file where the entry was identified. 4112 // - Entry 2 -> File ID of the file where the entry was identified. 4113 // - Entry 3 -> Mangled name of the function where the entry was 4114 // identified. 4115 // - Entry 4 -> Line in the file where the entry was identified. 4116 // - Entry 5 -> Order the entry was created. 4117 // The first element of the metadata node is the kind. 4118 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 4119 GetMDInt(FileID), GetMDString(ParentName), 4120 GetMDInt(Line), GetMDInt(E.getOrder())}; 4121 4122 SourceLocation Loc; 4123 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 4124 E = CGM.getContext().getSourceManager().fileinfo_end(); 4125 I != E; ++I) { 4126 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 4127 I->getFirst()->getUniqueID().getFile() == FileID) { 4128 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 4129 I->getFirst(), Line, 1); 4130 break; 4131 } 4132 } 4133 // Save this entry in the right position of the ordered entries array. 4134 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 4135 ParentFunctions[E.getOrder()] = ParentName; 4136 4137 // Add metadata to the named metadata node. 4138 MD->addOperand(llvm::MDNode::get(C, Ops)); 4139 }; 4140 4141 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 4142 TargetRegionMetadataEmitter); 4143 4144 // Create function that emits metadata for each device global variable entry; 4145 auto &&DeviceGlobalVarMetadataEmitter = 4146 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 4147 MD](StringRef MangledName, 4148 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 4149 &E) { 4150 // Generate metadata for global variables. Each entry of this metadata 4151 // contains: 4152 // - Entry 0 -> Kind of this type of metadata (1). 4153 // - Entry 1 -> Mangled name of the variable. 4154 // - Entry 2 -> Declare target kind. 4155 // - Entry 3 -> Order the entry was created. 4156 // The first element of the metadata node is the kind. 4157 llvm::Metadata *Ops[] = { 4158 GetMDInt(E.getKind()), GetMDString(MangledName), 4159 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 4160 4161 // Save this entry in the right position of the ordered entries array. 4162 OrderedEntries[E.getOrder()] = 4163 std::make_tuple(&E, SourceLocation(), MangledName); 4164 4165 // Add metadata to the named metadata node. 4166 MD->addOperand(llvm::MDNode::get(C, Ops)); 4167 }; 4168 4169 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 4170 DeviceGlobalVarMetadataEmitter); 4171 4172 for (const auto &E : OrderedEntries) { 4173 assert(std::get<0>(E) && "All ordered entries must exist!"); 4174 if (const auto *CE = 4175 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 4176 std::get<0>(E))) { 4177 if (!CE->getID() || !CE->getAddress()) { 4178 // Do not blame the entry if the parent funtion is not emitted. 4179 StringRef FnName = ParentFunctions[CE->getOrder()]; 4180 if (!CGM.GetGlobalValue(FnName)) 4181 continue; 4182 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4183 DiagnosticsEngine::Error, 4184 "Offloading entry for target region in %0 is incorrect: either the " 4185 "address or the ID is invalid."); 4186 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 4187 continue; 4188 } 4189 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 4190 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 4191 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 4192 OffloadEntryInfoDeviceGlobalVar>( 4193 std::get<0>(E))) { 4194 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 4195 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4196 CE->getFlags()); 4197 switch (Flags) { 4198 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 4199 if (CGM.getLangOpts().OpenMPIsDevice && 4200 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 4201 continue; 4202 if (!CE->getAddress()) { 4203 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4204 DiagnosticsEngine::Error, "Offloading entry for declare target " 4205 "variable %0 is incorrect: the " 4206 "address is invalid."); 4207 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 4208 continue; 4209 } 4210 // The vaiable has no definition - no need to add the entry. 4211 if (CE->getVarSize().isZero()) 4212 continue; 4213 break; 4214 } 4215 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 4216 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 4217 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 4218 "Declaret target link address is set."); 4219 if (CGM.getLangOpts().OpenMPIsDevice) 4220 continue; 4221 if (!CE->getAddress()) { 4222 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4223 DiagnosticsEngine::Error, 4224 "Offloading entry for declare target variable is incorrect: the " 4225 "address is invalid."); 4226 CGM.getDiags().Report(DiagID); 4227 continue; 4228 } 4229 break; 4230 } 4231 createOffloadEntry(CE->getAddress(), CE->getAddress(), 4232 CE->getVarSize().getQuantity(), Flags, 4233 CE->getLinkage()); 4234 } else { 4235 llvm_unreachable("Unsupported entry kind."); 4236 } 4237 } 4238 } 4239 4240 /// Loads all the offload entries information from the host IR 4241 /// metadata. 4242 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 4243 // If we are in target mode, load the metadata from the host IR. This code has 4244 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 4245 4246 if (!CGM.getLangOpts().OpenMPIsDevice) 4247 return; 4248 4249 if (CGM.getLangOpts().OMPHostIRFile.empty()) 4250 return; 4251 4252 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 4253 if (auto EC = Buf.getError()) { 4254 CGM.getDiags().Report(diag::err_cannot_open_file) 4255 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4256 return; 4257 } 4258 4259 llvm::LLVMContext C; 4260 auto ME = expectedToErrorOrAndEmitErrors( 4261 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 4262 4263 if (auto EC = ME.getError()) { 4264 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4265 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 4266 CGM.getDiags().Report(DiagID) 4267 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4268 return; 4269 } 4270 4271 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4272 if (!MD) 4273 return; 4274 4275 for (llvm::MDNode *MN : MD->operands()) { 4276 auto &&GetMDInt = [MN](unsigned Idx) { 4277 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4278 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4279 }; 4280 4281 auto &&GetMDString = [MN](unsigned Idx) { 4282 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4283 return V->getString(); 4284 }; 4285 4286 switch (GetMDInt(0)) { 4287 default: 4288 llvm_unreachable("Unexpected metadata!"); 4289 break; 4290 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4291 OffloadingEntryInfoTargetRegion: 4292 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4293 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4294 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4295 /*Order=*/GetMDInt(5)); 4296 break; 4297 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4298 OffloadingEntryInfoDeviceGlobalVar: 4299 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4300 /*MangledName=*/GetMDString(1), 4301 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4302 /*Flags=*/GetMDInt(2)), 4303 /*Order=*/GetMDInt(3)); 4304 break; 4305 } 4306 } 4307 } 4308 4309 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4310 if (!KmpRoutineEntryPtrTy) { 4311 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4312 ASTContext &C = CGM.getContext(); 4313 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4314 FunctionProtoType::ExtProtoInfo EPI; 4315 KmpRoutineEntryPtrQTy = C.getPointerType( 4316 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4317 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4318 } 4319 } 4320 4321 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4322 // Make sure the type of the entry is already created. This is the type we 4323 // have to create: 4324 // struct __tgt_offload_entry{ 4325 // void *addr; // Pointer to the offload entry info. 4326 // // (function or global) 4327 // char *name; // Name of the function or global. 4328 // size_t size; // Size of the entry info (0 if it a function). 4329 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4330 // int32_t reserved; // Reserved, to use by the runtime library. 4331 // }; 4332 if (TgtOffloadEntryQTy.isNull()) { 4333 ASTContext &C = CGM.getContext(); 4334 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4335 RD->startDefinition(); 4336 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4337 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4338 addFieldToRecordDecl(C, RD, C.getSizeType()); 4339 addFieldToRecordDecl( 4340 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4341 addFieldToRecordDecl( 4342 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4343 RD->completeDefinition(); 4344 RD->addAttr(PackedAttr::CreateImplicit(C)); 4345 TgtOffloadEntryQTy = C.getRecordType(RD); 4346 } 4347 return TgtOffloadEntryQTy; 4348 } 4349 4350 namespace { 4351 struct PrivateHelpersTy { 4352 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 4353 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 4354 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 4355 PrivateElemInit(PrivateElemInit) {} 4356 const Expr *OriginalRef = nullptr; 4357 const VarDecl *Original = nullptr; 4358 const VarDecl *PrivateCopy = nullptr; 4359 const VarDecl *PrivateElemInit = nullptr; 4360 }; 4361 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4362 } // anonymous namespace 4363 4364 static RecordDecl * 4365 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4366 if (!Privates.empty()) { 4367 ASTContext &C = CGM.getContext(); 4368 // Build struct .kmp_privates_t. { 4369 // /* private vars */ 4370 // }; 4371 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4372 RD->startDefinition(); 4373 for (const auto &Pair : Privates) { 4374 const VarDecl *VD = Pair.second.Original; 4375 QualType Type = VD->getType().getNonReferenceType(); 4376 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4377 if (VD->hasAttrs()) { 4378 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4379 E(VD->getAttrs().end()); 4380 I != E; ++I) 4381 FD->addAttr(*I); 4382 } 4383 } 4384 RD->completeDefinition(); 4385 return RD; 4386 } 4387 return nullptr; 4388 } 4389 4390 static RecordDecl * 4391 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4392 QualType KmpInt32Ty, 4393 QualType KmpRoutineEntryPointerQTy) { 4394 ASTContext &C = CGM.getContext(); 4395 // Build struct kmp_task_t { 4396 // void * shareds; 4397 // kmp_routine_entry_t routine; 4398 // kmp_int32 part_id; 4399 // kmp_cmplrdata_t data1; 4400 // kmp_cmplrdata_t data2; 4401 // For taskloops additional fields: 4402 // kmp_uint64 lb; 4403 // kmp_uint64 ub; 4404 // kmp_int64 st; 4405 // kmp_int32 liter; 4406 // void * reductions; 4407 // }; 4408 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4409 UD->startDefinition(); 4410 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4411 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4412 UD->completeDefinition(); 4413 QualType KmpCmplrdataTy = C.getRecordType(UD); 4414 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4415 RD->startDefinition(); 4416 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4417 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4418 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4419 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4420 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4421 if (isOpenMPTaskLoopDirective(Kind)) { 4422 QualType KmpUInt64Ty = 4423 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4424 QualType KmpInt64Ty = 4425 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4426 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4427 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4428 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4429 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4430 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4431 } 4432 RD->completeDefinition(); 4433 return RD; 4434 } 4435 4436 static RecordDecl * 4437 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4438 ArrayRef<PrivateDataTy> Privates) { 4439 ASTContext &C = CGM.getContext(); 4440 // Build struct kmp_task_t_with_privates { 4441 // kmp_task_t task_data; 4442 // .kmp_privates_t. privates; 4443 // }; 4444 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4445 RD->startDefinition(); 4446 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4447 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4448 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4449 RD->completeDefinition(); 4450 return RD; 4451 } 4452 4453 /// Emit a proxy function which accepts kmp_task_t as the second 4454 /// argument. 4455 /// \code 4456 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4457 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4458 /// For taskloops: 4459 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4460 /// tt->reductions, tt->shareds); 4461 /// return 0; 4462 /// } 4463 /// \endcode 4464 static llvm::Function * 4465 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4466 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4467 QualType KmpTaskTWithPrivatesPtrQTy, 4468 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4469 QualType SharedsPtrTy, llvm::Function *TaskFunction, 4470 llvm::Value *TaskPrivatesMap) { 4471 ASTContext &C = CGM.getContext(); 4472 FunctionArgList Args; 4473 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4474 ImplicitParamDecl::Other); 4475 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4476 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4477 ImplicitParamDecl::Other); 4478 Args.push_back(&GtidArg); 4479 Args.push_back(&TaskTypeArg); 4480 const auto &TaskEntryFnInfo = 4481 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4482 llvm::FunctionType *TaskEntryTy = 4483 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4484 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 4485 auto *TaskEntry = llvm::Function::Create( 4486 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4487 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4488 TaskEntry->setDoesNotRecurse(); 4489 CodeGenFunction CGF(CGM); 4490 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4491 Loc, Loc); 4492 4493 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4494 // tt, 4495 // For taskloops: 4496 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4497 // tt->task_data.shareds); 4498 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4499 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4500 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4501 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4502 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4503 const auto *KmpTaskTWithPrivatesQTyRD = 4504 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4505 LValue Base = 4506 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4507 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4508 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4509 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4510 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 4511 4512 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4513 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4514 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4515 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4516 CGF.ConvertTypeForMem(SharedsPtrTy)); 4517 4518 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4519 llvm::Value *PrivatesParam; 4520 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4521 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4522 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4523 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 4524 } else { 4525 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4526 } 4527 4528 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4529 TaskPrivatesMap, 4530 CGF.Builder 4531 .CreatePointerBitCastOrAddrSpaceCast( 4532 TDBase.getAddress(CGF), CGF.VoidPtrTy) 4533 .getPointer()}; 4534 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4535 std::end(CommonArgs)); 4536 if (isOpenMPTaskLoopDirective(Kind)) { 4537 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4538 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4539 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4540 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4541 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4542 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4543 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4544 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4545 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4546 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4547 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4548 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4549 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4550 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4551 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4552 CallArgs.push_back(LBParam); 4553 CallArgs.push_back(UBParam); 4554 CallArgs.push_back(StParam); 4555 CallArgs.push_back(LIParam); 4556 CallArgs.push_back(RParam); 4557 } 4558 CallArgs.push_back(SharedsParam); 4559 4560 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4561 CallArgs); 4562 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4563 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4564 CGF.FinishFunction(); 4565 return TaskEntry; 4566 } 4567 4568 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4569 SourceLocation Loc, 4570 QualType KmpInt32Ty, 4571 QualType KmpTaskTWithPrivatesPtrQTy, 4572 QualType KmpTaskTWithPrivatesQTy) { 4573 ASTContext &C = CGM.getContext(); 4574 FunctionArgList Args; 4575 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4576 ImplicitParamDecl::Other); 4577 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4578 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4579 ImplicitParamDecl::Other); 4580 Args.push_back(&GtidArg); 4581 Args.push_back(&TaskTypeArg); 4582 const auto &DestructorFnInfo = 4583 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4584 llvm::FunctionType *DestructorFnTy = 4585 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4586 std::string Name = 4587 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 4588 auto *DestructorFn = 4589 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4590 Name, &CGM.getModule()); 4591 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4592 DestructorFnInfo); 4593 DestructorFn->setDoesNotRecurse(); 4594 CodeGenFunction CGF(CGM); 4595 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4596 Args, Loc, Loc); 4597 4598 LValue Base = CGF.EmitLoadOfPointerLValue( 4599 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4600 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4601 const auto *KmpTaskTWithPrivatesQTyRD = 4602 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4603 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4604 Base = CGF.EmitLValueForField(Base, *FI); 4605 for (const auto *Field : 4606 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4607 if (QualType::DestructionKind DtorKind = 4608 Field->getType().isDestructedType()) { 4609 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4610 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 4611 } 4612 } 4613 CGF.FinishFunction(); 4614 return DestructorFn; 4615 } 4616 4617 /// Emit a privates mapping function for correct handling of private and 4618 /// firstprivate variables. 4619 /// \code 4620 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4621 /// **noalias priv1,..., <tyn> **noalias privn) { 4622 /// *priv1 = &.privates.priv1; 4623 /// ...; 4624 /// *privn = &.privates.privn; 4625 /// } 4626 /// \endcode 4627 static llvm::Value * 4628 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4629 ArrayRef<const Expr *> PrivateVars, 4630 ArrayRef<const Expr *> FirstprivateVars, 4631 ArrayRef<const Expr *> LastprivateVars, 4632 QualType PrivatesQTy, 4633 ArrayRef<PrivateDataTy> Privates) { 4634 ASTContext &C = CGM.getContext(); 4635 FunctionArgList Args; 4636 ImplicitParamDecl TaskPrivatesArg( 4637 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4638 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4639 ImplicitParamDecl::Other); 4640 Args.push_back(&TaskPrivatesArg); 4641 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4642 unsigned Counter = 1; 4643 for (const Expr *E : PrivateVars) { 4644 Args.push_back(ImplicitParamDecl::Create( 4645 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4646 C.getPointerType(C.getPointerType(E->getType())) 4647 .withConst() 4648 .withRestrict(), 4649 ImplicitParamDecl::Other)); 4650 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4651 PrivateVarsPos[VD] = Counter; 4652 ++Counter; 4653 } 4654 for (const Expr *E : FirstprivateVars) { 4655 Args.push_back(ImplicitParamDecl::Create( 4656 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4657 C.getPointerType(C.getPointerType(E->getType())) 4658 .withConst() 4659 .withRestrict(), 4660 ImplicitParamDecl::Other)); 4661 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4662 PrivateVarsPos[VD] = Counter; 4663 ++Counter; 4664 } 4665 for (const Expr *E : LastprivateVars) { 4666 Args.push_back(ImplicitParamDecl::Create( 4667 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4668 C.getPointerType(C.getPointerType(E->getType())) 4669 .withConst() 4670 .withRestrict(), 4671 ImplicitParamDecl::Other)); 4672 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4673 PrivateVarsPos[VD] = Counter; 4674 ++Counter; 4675 } 4676 const auto &TaskPrivatesMapFnInfo = 4677 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4678 llvm::FunctionType *TaskPrivatesMapTy = 4679 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4680 std::string Name = 4681 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 4682 auto *TaskPrivatesMap = llvm::Function::Create( 4683 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 4684 &CGM.getModule()); 4685 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4686 TaskPrivatesMapFnInfo); 4687 if (CGM.getLangOpts().Optimize) { 4688 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4689 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4690 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4691 } 4692 CodeGenFunction CGF(CGM); 4693 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4694 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4695 4696 // *privi = &.privates.privi; 4697 LValue Base = CGF.EmitLoadOfPointerLValue( 4698 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4699 TaskPrivatesArg.getType()->castAs<PointerType>()); 4700 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4701 Counter = 0; 4702 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4703 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4704 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4705 LValue RefLVal = 4706 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4707 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4708 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 4709 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 4710 ++Counter; 4711 } 4712 CGF.FinishFunction(); 4713 return TaskPrivatesMap; 4714 } 4715 4716 /// Emit initialization for private variables in task-based directives. 4717 static void emitPrivatesInit(CodeGenFunction &CGF, 4718 const OMPExecutableDirective &D, 4719 Address KmpTaskSharedsPtr, LValue TDBase, 4720 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4721 QualType SharedsTy, QualType SharedsPtrTy, 4722 const OMPTaskDataTy &Data, 4723 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4724 ASTContext &C = CGF.getContext(); 4725 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4726 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4727 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4728 ? OMPD_taskloop 4729 : OMPD_task; 4730 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4731 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4732 LValue SrcBase; 4733 bool IsTargetTask = 4734 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4735 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4736 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4737 // PointersArray and SizesArray. The original variables for these arrays are 4738 // not captured and we get their addresses explicitly. 4739 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 4740 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4741 SrcBase = CGF.MakeAddrLValue( 4742 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4743 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4744 SharedsTy); 4745 } 4746 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4747 for (const PrivateDataTy &Pair : Privates) { 4748 const VarDecl *VD = Pair.second.PrivateCopy; 4749 const Expr *Init = VD->getAnyInitializer(); 4750 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4751 !CGF.isTrivialInitializer(Init)))) { 4752 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4753 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4754 const VarDecl *OriginalVD = Pair.second.Original; 4755 // Check if the variable is the target-based BasePointersArray, 4756 // PointersArray or SizesArray. 4757 LValue SharedRefLValue; 4758 QualType Type = PrivateLValue.getType(); 4759 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4760 if (IsTargetTask && !SharedField) { 4761 assert(isa<ImplicitParamDecl>(OriginalVD) && 4762 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4763 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4764 ->getNumParams() == 0 && 4765 isa<TranslationUnitDecl>( 4766 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4767 ->getDeclContext()) && 4768 "Expected artificial target data variable."); 4769 SharedRefLValue = 4770 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4771 } else if (ForDup) { 4772 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4773 SharedRefLValue = CGF.MakeAddrLValue( 4774 Address(SharedRefLValue.getPointer(CGF), 4775 C.getDeclAlign(OriginalVD)), 4776 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4777 SharedRefLValue.getTBAAInfo()); 4778 } else { 4779 InlinedOpenMPRegionRAII Region( 4780 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 4781 /*HasCancel=*/false); 4782 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 4783 } 4784 if (Type->isArrayType()) { 4785 // Initialize firstprivate array. 4786 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4787 // Perform simple memcpy. 4788 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4789 } else { 4790 // Initialize firstprivate array using element-by-element 4791 // initialization. 4792 CGF.EmitOMPAggregateAssign( 4793 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 4794 Type, 4795 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4796 Address SrcElement) { 4797 // Clean up any temporaries needed by the initialization. 4798 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4799 InitScope.addPrivate( 4800 Elem, [SrcElement]() -> Address { return SrcElement; }); 4801 (void)InitScope.Privatize(); 4802 // Emit initialization for single element. 4803 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4804 CGF, &CapturesInfo); 4805 CGF.EmitAnyExprToMem(Init, DestElement, 4806 Init->getType().getQualifiers(), 4807 /*IsInitializer=*/false); 4808 }); 4809 } 4810 } else { 4811 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4812 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 4813 return SharedRefLValue.getAddress(CGF); 4814 }); 4815 (void)InitScope.Privatize(); 4816 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4817 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4818 /*capturedByInit=*/false); 4819 } 4820 } else { 4821 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4822 } 4823 } 4824 ++FI; 4825 } 4826 } 4827 4828 /// Check if duplication function is required for taskloops. 4829 static bool checkInitIsRequired(CodeGenFunction &CGF, 4830 ArrayRef<PrivateDataTy> Privates) { 4831 bool InitRequired = false; 4832 for (const PrivateDataTy &Pair : Privates) { 4833 const VarDecl *VD = Pair.second.PrivateCopy; 4834 const Expr *Init = VD->getAnyInitializer(); 4835 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4836 !CGF.isTrivialInitializer(Init)); 4837 if (InitRequired) 4838 break; 4839 } 4840 return InitRequired; 4841 } 4842 4843 4844 /// Emit task_dup function (for initialization of 4845 /// private/firstprivate/lastprivate vars and last_iter flag) 4846 /// \code 4847 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4848 /// lastpriv) { 4849 /// // setup lastprivate flag 4850 /// task_dst->last = lastpriv; 4851 /// // could be constructor calls here... 4852 /// } 4853 /// \endcode 4854 static llvm::Value * 4855 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4856 const OMPExecutableDirective &D, 4857 QualType KmpTaskTWithPrivatesPtrQTy, 4858 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4859 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4860 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4861 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4862 ASTContext &C = CGM.getContext(); 4863 FunctionArgList Args; 4864 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4865 KmpTaskTWithPrivatesPtrQTy, 4866 ImplicitParamDecl::Other); 4867 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4868 KmpTaskTWithPrivatesPtrQTy, 4869 ImplicitParamDecl::Other); 4870 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4871 ImplicitParamDecl::Other); 4872 Args.push_back(&DstArg); 4873 Args.push_back(&SrcArg); 4874 Args.push_back(&LastprivArg); 4875 const auto &TaskDupFnInfo = 4876 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4877 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4878 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4879 auto *TaskDup = llvm::Function::Create( 4880 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4881 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4882 TaskDup->setDoesNotRecurse(); 4883 CodeGenFunction CGF(CGM); 4884 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4885 Loc); 4886 4887 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4888 CGF.GetAddrOfLocalVar(&DstArg), 4889 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4890 // task_dst->liter = lastpriv; 4891 if (WithLastIter) { 4892 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4893 LValue Base = CGF.EmitLValueForField( 4894 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4895 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4896 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4897 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4898 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4899 } 4900 4901 // Emit initial values for private copies (if any). 4902 assert(!Privates.empty()); 4903 Address KmpTaskSharedsPtr = Address::invalid(); 4904 if (!Data.FirstprivateVars.empty()) { 4905 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4906 CGF.GetAddrOfLocalVar(&SrcArg), 4907 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4908 LValue Base = CGF.EmitLValueForField( 4909 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4910 KmpTaskSharedsPtr = Address( 4911 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4912 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4913 KmpTaskTShareds)), 4914 Loc), 4915 CGF.getNaturalTypeAlignment(SharedsTy)); 4916 } 4917 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4918 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4919 CGF.FinishFunction(); 4920 return TaskDup; 4921 } 4922 4923 /// Checks if destructor function is required to be generated. 4924 /// \return true if cleanups are required, false otherwise. 4925 static bool 4926 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4927 bool NeedsCleanup = false; 4928 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4929 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4930 for (const FieldDecl *FD : PrivateRD->fields()) { 4931 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4932 if (NeedsCleanup) 4933 break; 4934 } 4935 return NeedsCleanup; 4936 } 4937 4938 CGOpenMPRuntime::TaskResultTy 4939 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4940 const OMPExecutableDirective &D, 4941 llvm::Function *TaskFunction, QualType SharedsTy, 4942 Address Shareds, const OMPTaskDataTy &Data) { 4943 ASTContext &C = CGM.getContext(); 4944 llvm::SmallVector<PrivateDataTy, 4> Privates; 4945 // Aggregate privates and sort them by the alignment. 4946 const auto *I = Data.PrivateCopies.begin(); 4947 for (const Expr *E : Data.PrivateVars) { 4948 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4949 Privates.emplace_back( 4950 C.getDeclAlign(VD), 4951 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4952 /*PrivateElemInit=*/nullptr)); 4953 ++I; 4954 } 4955 I = Data.FirstprivateCopies.begin(); 4956 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4957 for (const Expr *E : Data.FirstprivateVars) { 4958 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4959 Privates.emplace_back( 4960 C.getDeclAlign(VD), 4961 PrivateHelpersTy( 4962 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4963 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4964 ++I; 4965 ++IElemInitRef; 4966 } 4967 I = Data.LastprivateCopies.begin(); 4968 for (const Expr *E : Data.LastprivateVars) { 4969 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4970 Privates.emplace_back( 4971 C.getDeclAlign(VD), 4972 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4973 /*PrivateElemInit=*/nullptr)); 4974 ++I; 4975 } 4976 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 4977 return L.first > R.first; 4978 }); 4979 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4980 // Build type kmp_routine_entry_t (if not built yet). 4981 emitKmpRoutineEntryT(KmpInt32Ty); 4982 // Build type kmp_task_t (if not built yet). 4983 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4984 if (SavedKmpTaskloopTQTy.isNull()) { 4985 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4986 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4987 } 4988 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4989 } else { 4990 assert((D.getDirectiveKind() == OMPD_task || 4991 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4992 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4993 "Expected taskloop, task or target directive"); 4994 if (SavedKmpTaskTQTy.isNull()) { 4995 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4996 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4997 } 4998 KmpTaskTQTy = SavedKmpTaskTQTy; 4999 } 5000 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 5001 // Build particular struct kmp_task_t for the given task. 5002 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 5003 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 5004 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 5005 QualType KmpTaskTWithPrivatesPtrQTy = 5006 C.getPointerType(KmpTaskTWithPrivatesQTy); 5007 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 5008 llvm::Type *KmpTaskTWithPrivatesPtrTy = 5009 KmpTaskTWithPrivatesTy->getPointerTo(); 5010 llvm::Value *KmpTaskTWithPrivatesTySize = 5011 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 5012 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 5013 5014 // Emit initial values for private copies (if any). 5015 llvm::Value *TaskPrivatesMap = nullptr; 5016 llvm::Type *TaskPrivatesMapTy = 5017 std::next(TaskFunction->arg_begin(), 3)->getType(); 5018 if (!Privates.empty()) { 5019 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 5020 TaskPrivatesMap = emitTaskPrivateMappingFunction( 5021 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 5022 FI->getType(), Privates); 5023 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5024 TaskPrivatesMap, TaskPrivatesMapTy); 5025 } else { 5026 TaskPrivatesMap = llvm::ConstantPointerNull::get( 5027 cast<llvm::PointerType>(TaskPrivatesMapTy)); 5028 } 5029 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 5030 // kmp_task_t *tt); 5031 llvm::Function *TaskEntry = emitProxyTaskFunction( 5032 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5033 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 5034 TaskPrivatesMap); 5035 5036 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 5037 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 5038 // kmp_routine_entry_t *task_entry); 5039 // Task flags. Format is taken from 5040 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 5041 // description of kmp_tasking_flags struct. 5042 enum { 5043 TiedFlag = 0x1, 5044 FinalFlag = 0x2, 5045 DestructorsFlag = 0x8, 5046 PriorityFlag = 0x20, 5047 DetachableFlag = 0x40, 5048 }; 5049 unsigned Flags = Data.Tied ? TiedFlag : 0; 5050 bool NeedsCleanup = false; 5051 if (!Privates.empty()) { 5052 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 5053 if (NeedsCleanup) 5054 Flags = Flags | DestructorsFlag; 5055 } 5056 if (Data.Priority.getInt()) 5057 Flags = Flags | PriorityFlag; 5058 if (D.hasClausesOfKind<OMPDetachClause>()) 5059 Flags = Flags | DetachableFlag; 5060 llvm::Value *TaskFlags = 5061 Data.Final.getPointer() 5062 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 5063 CGF.Builder.getInt32(FinalFlag), 5064 CGF.Builder.getInt32(/*C=*/0)) 5065 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 5066 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 5067 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 5068 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 5069 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 5070 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5071 TaskEntry, KmpRoutineEntryPtrTy)}; 5072 llvm::Value *NewTask; 5073 if (D.hasClausesOfKind<OMPNowaitClause>()) { 5074 // Check if we have any device clause associated with the directive. 5075 const Expr *Device = nullptr; 5076 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 5077 Device = C->getDevice(); 5078 // Emit device ID if any otherwise use default value. 5079 llvm::Value *DeviceID; 5080 if (Device) 5081 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 5082 CGF.Int64Ty, /*isSigned=*/true); 5083 else 5084 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 5085 AllocArgs.push_back(DeviceID); 5086 NewTask = CGF.EmitRuntimeCall( 5087 createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs); 5088 } else { 5089 NewTask = CGF.EmitRuntimeCall( 5090 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 5091 } 5092 // Emit detach clause initialization. 5093 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 5094 // task_descriptor); 5095 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 5096 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 5097 LValue EvtLVal = CGF.EmitLValue(Evt); 5098 5099 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 5100 // int gtid, kmp_task_t *task); 5101 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 5102 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 5103 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 5104 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 5105 createRuntimeFunction(OMPRTL__kmpc_task_allow_completion_event), 5106 {Loc, Tid, NewTask}); 5107 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 5108 Evt->getExprLoc()); 5109 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 5110 } 5111 llvm::Value *NewTaskNewTaskTTy = 5112 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5113 NewTask, KmpTaskTWithPrivatesPtrTy); 5114 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 5115 KmpTaskTWithPrivatesQTy); 5116 LValue TDBase = 5117 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 5118 // Fill the data in the resulting kmp_task_t record. 5119 // Copy shareds if there are any. 5120 Address KmpTaskSharedsPtr = Address::invalid(); 5121 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 5122 KmpTaskSharedsPtr = 5123 Address(CGF.EmitLoadOfScalar( 5124 CGF.EmitLValueForField( 5125 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 5126 KmpTaskTShareds)), 5127 Loc), 5128 CGF.getNaturalTypeAlignment(SharedsTy)); 5129 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 5130 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 5131 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 5132 } 5133 // Emit initial values for private copies (if any). 5134 TaskResultTy Result; 5135 if (!Privates.empty()) { 5136 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 5137 SharedsTy, SharedsPtrTy, Data, Privates, 5138 /*ForDup=*/false); 5139 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 5140 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 5141 Result.TaskDupFn = emitTaskDupFunction( 5142 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 5143 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 5144 /*WithLastIter=*/!Data.LastprivateVars.empty()); 5145 } 5146 } 5147 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 5148 enum { Priority = 0, Destructors = 1 }; 5149 // Provide pointer to function with destructors for privates. 5150 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 5151 const RecordDecl *KmpCmplrdataUD = 5152 (*FI)->getType()->getAsUnionType()->getDecl(); 5153 if (NeedsCleanup) { 5154 llvm::Value *DestructorFn = emitDestructorsFunction( 5155 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5156 KmpTaskTWithPrivatesQTy); 5157 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 5158 LValue DestructorsLV = CGF.EmitLValueForField( 5159 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 5160 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5161 DestructorFn, KmpRoutineEntryPtrTy), 5162 DestructorsLV); 5163 } 5164 // Set priority. 5165 if (Data.Priority.getInt()) { 5166 LValue Data2LV = CGF.EmitLValueForField( 5167 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 5168 LValue PriorityLV = CGF.EmitLValueForField( 5169 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 5170 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 5171 } 5172 Result.NewTask = NewTask; 5173 Result.TaskEntry = TaskEntry; 5174 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 5175 Result.TDBase = TDBase; 5176 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 5177 return Result; 5178 } 5179 5180 namespace { 5181 /// Dependence kind for RTL. 5182 enum RTLDependenceKindTy { 5183 DepIn = 0x01, 5184 DepInOut = 0x3, 5185 DepMutexInOutSet = 0x4 5186 }; 5187 /// Fields ids in kmp_depend_info record. 5188 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 5189 } // namespace 5190 5191 /// Translates internal dependency kind into the runtime kind. 5192 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 5193 RTLDependenceKindTy DepKind; 5194 switch (K) { 5195 case OMPC_DEPEND_in: 5196 DepKind = DepIn; 5197 break; 5198 // Out and InOut dependencies must use the same code. 5199 case OMPC_DEPEND_out: 5200 case OMPC_DEPEND_inout: 5201 DepKind = DepInOut; 5202 break; 5203 case OMPC_DEPEND_mutexinoutset: 5204 DepKind = DepMutexInOutSet; 5205 break; 5206 case OMPC_DEPEND_source: 5207 case OMPC_DEPEND_sink: 5208 case OMPC_DEPEND_depobj: 5209 case OMPC_DEPEND_unknown: 5210 llvm_unreachable("Unknown task dependence type"); 5211 } 5212 return DepKind; 5213 } 5214 5215 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 5216 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 5217 QualType &FlagsTy) { 5218 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 5219 if (KmpDependInfoTy.isNull()) { 5220 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 5221 KmpDependInfoRD->startDefinition(); 5222 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 5223 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 5224 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 5225 KmpDependInfoRD->completeDefinition(); 5226 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 5227 } 5228 } 5229 5230 std::pair<llvm::Value *, LValue> 5231 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 5232 SourceLocation Loc) { 5233 ASTContext &C = CGM.getContext(); 5234 QualType FlagsTy; 5235 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5236 RecordDecl *KmpDependInfoRD = 5237 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5238 LValue Base = CGF.EmitLoadOfPointerLValue( 5239 DepobjLVal.getAddress(CGF), 5240 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5241 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5242 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5243 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5244 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 5245 Base.getTBAAInfo()); 5246 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5247 Addr.getPointer(), 5248 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5249 LValue NumDepsBase = CGF.MakeAddrLValue( 5250 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 5251 Base.getBaseInfo(), Base.getTBAAInfo()); 5252 // NumDeps = deps[i].base_addr; 5253 LValue BaseAddrLVal = CGF.EmitLValueForField( 5254 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5255 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 5256 return std::make_pair(NumDeps, Base); 5257 } 5258 5259 namespace { 5260 /// Loop generator for OpenMP iterator expression. 5261 class OMPIteratorGeneratorScope final 5262 : public CodeGenFunction::OMPPrivateScope { 5263 CodeGenFunction &CGF; 5264 const OMPIteratorExpr *E = nullptr; 5265 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 5266 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 5267 OMPIteratorGeneratorScope() = delete; 5268 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 5269 5270 public: 5271 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 5272 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 5273 if (!E) 5274 return; 5275 SmallVector<llvm::Value *, 4> Uppers; 5276 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 5277 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 5278 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 5279 addPrivate(VD, [&CGF, VD]() { 5280 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 5281 }); 5282 const OMPIteratorHelperData &HelperData = E->getHelper(I); 5283 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 5284 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 5285 "counter.addr"); 5286 }); 5287 } 5288 Privatize(); 5289 5290 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 5291 const OMPIteratorHelperData &HelperData = E->getHelper(I); 5292 LValue CLVal = 5293 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 5294 HelperData.CounterVD->getType()); 5295 // Counter = 0; 5296 CGF.EmitStoreOfScalar( 5297 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 5298 CLVal); 5299 CodeGenFunction::JumpDest &ContDest = 5300 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 5301 CodeGenFunction::JumpDest &ExitDest = 5302 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 5303 // N = <number-of_iterations>; 5304 llvm::Value *N = Uppers[I]; 5305 // cont: 5306 // if (Counter < N) goto body; else goto exit; 5307 CGF.EmitBlock(ContDest.getBlock()); 5308 auto *CVal = 5309 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 5310 llvm::Value *Cmp = 5311 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 5312 ? CGF.Builder.CreateICmpSLT(CVal, N) 5313 : CGF.Builder.CreateICmpULT(CVal, N); 5314 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 5315 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 5316 // body: 5317 CGF.EmitBlock(BodyBB); 5318 // Iteri = Begini + Counter * Stepi; 5319 CGF.EmitIgnoredExpr(HelperData.Update); 5320 } 5321 } 5322 ~OMPIteratorGeneratorScope() { 5323 if (!E) 5324 return; 5325 for (unsigned I = E->numOfIterators(); I > 0; --I) { 5326 // Counter = Counter + 1; 5327 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 5328 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 5329 // goto cont; 5330 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 5331 // exit: 5332 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 5333 } 5334 } 5335 }; 5336 } // namespace 5337 5338 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 5339 llvm::PointerUnion<unsigned *, LValue *> Pos, 5340 const OMPTaskDataTy::DependData &Data, 5341 Address DependenciesArray) { 5342 CodeGenModule &CGM = CGF.CGM; 5343 ASTContext &C = CGM.getContext(); 5344 QualType FlagsTy; 5345 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5346 RecordDecl *KmpDependInfoRD = 5347 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5348 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5349 5350 OMPIteratorGeneratorScope IteratorScope( 5351 CGF, cast_or_null<OMPIteratorExpr>( 5352 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 5353 : nullptr)); 5354 for (const Expr *E : Data.DepExprs) { 5355 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 5356 llvm::Value *Addr; 5357 if (OASE) { 5358 const Expr *Base = OASE->getBase(); 5359 Addr = CGF.EmitScalarExpr(Base); 5360 } else { 5361 Addr = CGF.EmitLValue(E).getPointer(CGF); 5362 } 5363 llvm::Value *Size; 5364 QualType Ty = E->getType(); 5365 if (OASE) { 5366 Size = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 5367 for (const Expr *SE : OASE->getDimensions()) { 5368 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 5369 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 5370 CGF.getContext().getSizeType(), 5371 SE->getExprLoc()); 5372 Size = CGF.Builder.CreateNUWMul(Size, Sz); 5373 } 5374 } else if (const auto *ASE = 5375 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 5376 LValue UpAddrLVal = 5377 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 5378 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 5379 UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 5380 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGM.SizeTy); 5381 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 5382 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 5383 } else { 5384 Size = CGF.getTypeSize(Ty); 5385 } 5386 LValue Base; 5387 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 5388 Base = CGF.MakeAddrLValue( 5389 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 5390 } else { 5391 LValue &PosLVal = *Pos.get<LValue *>(); 5392 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 5393 Base = CGF.MakeAddrLValue( 5394 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 5395 DependenciesArray.getAlignment()), 5396 KmpDependInfoTy); 5397 } 5398 // deps[i].base_addr = &<Dependencies[i].second>; 5399 LValue BaseAddrLVal = CGF.EmitLValueForField( 5400 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5401 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 5402 BaseAddrLVal); 5403 // deps[i].len = sizeof(<Dependencies[i].second>); 5404 LValue LenLVal = CGF.EmitLValueForField( 5405 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 5406 CGF.EmitStoreOfScalar(Size, LenLVal); 5407 // deps[i].flags = <Dependencies[i].first>; 5408 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 5409 LValue FlagsLVal = CGF.EmitLValueForField( 5410 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5411 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5412 FlagsLVal); 5413 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 5414 ++(*P); 5415 } else { 5416 LValue &PosLVal = *Pos.get<LValue *>(); 5417 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 5418 Idx = CGF.Builder.CreateNUWAdd(Idx, 5419 llvm::ConstantInt::get(Idx->getType(), 1)); 5420 CGF.EmitStoreOfScalar(Idx, PosLVal); 5421 } 5422 } 5423 } 5424 5425 static SmallVector<llvm::Value *, 4> 5426 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 5427 const OMPTaskDataTy::DependData &Data) { 5428 assert(Data.DepKind == OMPC_DEPEND_depobj && 5429 "Expected depobj dependecy kind."); 5430 SmallVector<llvm::Value *, 4> Sizes; 5431 SmallVector<LValue, 4> SizeLVals; 5432 ASTContext &C = CGF.getContext(); 5433 QualType FlagsTy; 5434 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5435 RecordDecl *KmpDependInfoRD = 5436 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5437 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5438 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 5439 { 5440 OMPIteratorGeneratorScope IteratorScope( 5441 CGF, cast_or_null<OMPIteratorExpr>( 5442 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 5443 : nullptr)); 5444 for (const Expr *E : Data.DepExprs) { 5445 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 5446 LValue Base = CGF.EmitLoadOfPointerLValue( 5447 DepobjLVal.getAddress(CGF), 5448 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5449 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5450 Base.getAddress(CGF), KmpDependInfoPtrT); 5451 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 5452 Base.getTBAAInfo()); 5453 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5454 Addr.getPointer(), 5455 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5456 LValue NumDepsBase = CGF.MakeAddrLValue( 5457 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 5458 Base.getBaseInfo(), Base.getTBAAInfo()); 5459 // NumDeps = deps[i].base_addr; 5460 LValue BaseAddrLVal = CGF.EmitLValueForField( 5461 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5462 llvm::Value *NumDeps = 5463 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 5464 LValue NumLVal = CGF.MakeAddrLValue( 5465 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 5466 C.getUIntPtrType()); 5467 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 5468 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 5469 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 5470 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 5471 CGF.EmitStoreOfScalar(Add, NumLVal); 5472 SizeLVals.push_back(NumLVal); 5473 } 5474 } 5475 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 5476 llvm::Value *Size = 5477 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 5478 Sizes.push_back(Size); 5479 } 5480 return Sizes; 5481 } 5482 5483 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 5484 LValue PosLVal, 5485 const OMPTaskDataTy::DependData &Data, 5486 Address DependenciesArray) { 5487 assert(Data.DepKind == OMPC_DEPEND_depobj && 5488 "Expected depobj dependecy kind."); 5489 ASTContext &C = CGF.getContext(); 5490 QualType FlagsTy; 5491 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5492 RecordDecl *KmpDependInfoRD = 5493 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5494 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5495 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 5496 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 5497 { 5498 OMPIteratorGeneratorScope IteratorScope( 5499 CGF, cast_or_null<OMPIteratorExpr>( 5500 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 5501 : nullptr)); 5502 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 5503 const Expr *E = Data.DepExprs[I]; 5504 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 5505 LValue Base = CGF.EmitLoadOfPointerLValue( 5506 DepobjLVal.getAddress(CGF), 5507 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5508 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5509 Base.getAddress(CGF), KmpDependInfoPtrT); 5510 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 5511 Base.getTBAAInfo()); 5512 5513 // Get number of elements in a single depobj. 5514 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5515 Addr.getPointer(), 5516 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5517 LValue NumDepsBase = CGF.MakeAddrLValue( 5518 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 5519 Base.getBaseInfo(), Base.getTBAAInfo()); 5520 // NumDeps = deps[i].base_addr; 5521 LValue BaseAddrLVal = CGF.EmitLValueForField( 5522 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5523 llvm::Value *NumDeps = 5524 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 5525 5526 // memcopy dependency data. 5527 llvm::Value *Size = CGF.Builder.CreateNUWMul( 5528 ElSize, 5529 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 5530 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 5531 Address DepAddr = 5532 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 5533 DependenciesArray.getAlignment()); 5534 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 5535 5536 // Increase pos. 5537 // pos += size; 5538 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 5539 CGF.EmitStoreOfScalar(Add, PosLVal); 5540 } 5541 } 5542 } 5543 5544 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 5545 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 5546 SourceLocation Loc) { 5547 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 5548 return D.DepExprs.empty(); 5549 })) 5550 return std::make_pair(nullptr, Address::invalid()); 5551 // Process list of dependencies. 5552 ASTContext &C = CGM.getContext(); 5553 Address DependenciesArray = Address::invalid(); 5554 llvm::Value *NumOfElements = nullptr; 5555 unsigned NumDependencies = std::accumulate( 5556 Dependencies.begin(), Dependencies.end(), 0, 5557 [](unsigned V, const OMPTaskDataTy::DependData &D) { 5558 return D.DepKind == OMPC_DEPEND_depobj 5559 ? V 5560 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 5561 }); 5562 QualType FlagsTy; 5563 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5564 bool HasDepobjDeps = false; 5565 bool HasRegularWithIterators = false; 5566 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 5567 llvm::Value *NumOfRegularWithIterators = 5568 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 5569 // Calculate number of depobj dependecies and regular deps with the iterators. 5570 for (const OMPTaskDataTy::DependData &D : Dependencies) { 5571 if (D.DepKind == OMPC_DEPEND_depobj) { 5572 SmallVector<llvm::Value *, 4> Sizes = 5573 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 5574 for (llvm::Value *Size : Sizes) { 5575 NumOfDepobjElements = 5576 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 5577 } 5578 HasDepobjDeps = true; 5579 continue; 5580 } 5581 // Include number of iterations, if any. 5582 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 5583 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 5584 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 5585 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 5586 NumOfRegularWithIterators = 5587 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 5588 } 5589 HasRegularWithIterators = true; 5590 continue; 5591 } 5592 } 5593 5594 QualType KmpDependInfoArrayTy; 5595 if (HasDepobjDeps || HasRegularWithIterators) { 5596 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 5597 /*isSigned=*/false); 5598 if (HasDepobjDeps) { 5599 NumOfElements = 5600 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 5601 } 5602 if (HasRegularWithIterators) { 5603 NumOfElements = 5604 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 5605 } 5606 OpaqueValueExpr OVE(Loc, 5607 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 5608 VK_RValue); 5609 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 5610 RValue::get(NumOfElements)); 5611 KmpDependInfoArrayTy = 5612 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 5613 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 5614 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 5615 // Properly emit variable-sized array. 5616 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 5617 ImplicitParamDecl::Other); 5618 CGF.EmitVarDecl(*PD); 5619 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 5620 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 5621 /*isSigned=*/false); 5622 } else { 5623 KmpDependInfoArrayTy = C.getConstantArrayType( 5624 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 5625 ArrayType::Normal, /*IndexTypeQuals=*/0); 5626 DependenciesArray = 5627 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 5628 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 5629 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 5630 /*isSigned=*/false); 5631 } 5632 unsigned Pos = 0; 5633 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 5634 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 5635 Dependencies[I].IteratorExpr) 5636 continue; 5637 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 5638 DependenciesArray); 5639 } 5640 // Copy regular dependecies with iterators. 5641 LValue PosLVal = CGF.MakeAddrLValue( 5642 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 5643 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 5644 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 5645 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 5646 !Dependencies[I].IteratorExpr) 5647 continue; 5648 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 5649 DependenciesArray); 5650 } 5651 // Copy final depobj arrays without iterators. 5652 if (HasDepobjDeps) { 5653 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 5654 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 5655 continue; 5656 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 5657 DependenciesArray); 5658 } 5659 } 5660 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5661 DependenciesArray, CGF.VoidPtrTy); 5662 return std::make_pair(NumOfElements, DependenciesArray); 5663 } 5664 5665 Address CGOpenMPRuntime::emitDepobjDependClause( 5666 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 5667 SourceLocation Loc) { 5668 if (Dependencies.DepExprs.empty()) 5669 return Address::invalid(); 5670 // Process list of dependencies. 5671 ASTContext &C = CGM.getContext(); 5672 Address DependenciesArray = Address::invalid(); 5673 unsigned NumDependencies = Dependencies.DepExprs.size(); 5674 QualType FlagsTy; 5675 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5676 RecordDecl *KmpDependInfoRD = 5677 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5678 5679 llvm::Value *Size; 5680 // Define type kmp_depend_info[<Dependencies.size()>]; 5681 // For depobj reserve one extra element to store the number of elements. 5682 // It is required to handle depobj(x) update(in) construct. 5683 // kmp_depend_info[<Dependencies.size()>] deps; 5684 llvm::Value *NumDepsVal; 5685 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 5686 if (const auto *IE = 5687 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 5688 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 5689 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 5690 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 5691 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 5692 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 5693 } 5694 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 5695 NumDepsVal); 5696 CharUnits SizeInBytes = 5697 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 5698 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 5699 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 5700 NumDepsVal = 5701 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 5702 } else { 5703 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5704 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 5705 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5706 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 5707 Size = CGM.getSize(Sz.alignTo(Align)); 5708 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 5709 } 5710 // Need to allocate on the dynamic memory. 5711 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5712 // Use default allocator. 5713 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5714 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5715 5716 llvm::Value *Addr = CGF.EmitRuntimeCall( 5717 createRuntimeFunction(OMPRTL__kmpc_alloc), Args, ".dep.arr.addr"); 5718 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5719 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 5720 DependenciesArray = Address(Addr, Align); 5721 // Write number of elements in the first element of array for depobj. 5722 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 5723 // deps[i].base_addr = NumDependencies; 5724 LValue BaseAddrLVal = CGF.EmitLValueForField( 5725 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5726 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5727 llvm::PointerUnion<unsigned *, LValue *> Pos; 5728 unsigned Idx = 1; 5729 LValue PosLVal; 5730 if (Dependencies.IteratorExpr) { 5731 PosLVal = CGF.MakeAddrLValue( 5732 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5733 C.getSizeType()); 5734 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5735 /*IsInit=*/true); 5736 Pos = &PosLVal; 5737 } else { 5738 Pos = &Idx; 5739 } 5740 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5741 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5742 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 5743 return DependenciesArray; 5744 } 5745 5746 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5747 SourceLocation Loc) { 5748 ASTContext &C = CGM.getContext(); 5749 QualType FlagsTy; 5750 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5751 LValue Base = CGF.EmitLoadOfPointerLValue( 5752 DepobjLVal.getAddress(CGF), 5753 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5754 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5755 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5756 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5757 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5758 Addr.getPointer(), 5759 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5760 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5761 CGF.VoidPtrTy); 5762 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5763 // Use default allocator. 5764 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5765 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5766 5767 // _kmpc_free(gtid, addr, nullptr); 5768 (void)CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_free), Args); 5769 } 5770 5771 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5772 OpenMPDependClauseKind NewDepKind, 5773 SourceLocation Loc) { 5774 ASTContext &C = CGM.getContext(); 5775 QualType FlagsTy; 5776 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5777 RecordDecl *KmpDependInfoRD = 5778 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5779 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5780 llvm::Value *NumDeps; 5781 LValue Base; 5782 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5783 5784 Address Begin = Base.getAddress(CGF); 5785 // Cast from pointer to array type to pointer to single element. 5786 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5787 // The basic structure here is a while-do loop. 5788 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5789 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5790 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5791 CGF.EmitBlock(BodyBB); 5792 llvm::PHINode *ElementPHI = 5793 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5794 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5795 Begin = Address(ElementPHI, Begin.getAlignment()); 5796 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5797 Base.getTBAAInfo()); 5798 // deps[i].flags = NewDepKind; 5799 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5800 LValue FlagsLVal = CGF.EmitLValueForField( 5801 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5802 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5803 FlagsLVal); 5804 5805 // Shift the address forward by one element. 5806 Address ElementNext = 5807 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5808 ElementPHI->addIncoming(ElementNext.getPointer(), 5809 CGF.Builder.GetInsertBlock()); 5810 llvm::Value *IsEmpty = 5811 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5812 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5813 // Done. 5814 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5815 } 5816 5817 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5818 const OMPExecutableDirective &D, 5819 llvm::Function *TaskFunction, 5820 QualType SharedsTy, Address Shareds, 5821 const Expr *IfCond, 5822 const OMPTaskDataTy &Data) { 5823 if (!CGF.HaveInsertPoint()) 5824 return; 5825 5826 TaskResultTy Result = 5827 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5828 llvm::Value *NewTask = Result.NewTask; 5829 llvm::Function *TaskEntry = Result.TaskEntry; 5830 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5831 LValue TDBase = Result.TDBase; 5832 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5833 // Process list of dependences. 5834 Address DependenciesArray = Address::invalid(); 5835 llvm::Value *NumOfElements; 5836 std::tie(NumOfElements, DependenciesArray) = 5837 emitDependClause(CGF, Data.Dependences, Loc); 5838 5839 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5840 // libcall. 5841 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5842 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5843 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5844 // list is not empty 5845 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5846 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5847 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5848 llvm::Value *DepTaskArgs[7]; 5849 if (!Data.Dependences.empty()) { 5850 DepTaskArgs[0] = UpLoc; 5851 DepTaskArgs[1] = ThreadID; 5852 DepTaskArgs[2] = NewTask; 5853 DepTaskArgs[3] = NumOfElements; 5854 DepTaskArgs[4] = DependenciesArray.getPointer(); 5855 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5856 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5857 } 5858 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5859 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5860 if (!Data.Tied) { 5861 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5862 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5863 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5864 } 5865 if (!Data.Dependences.empty()) { 5866 CGF.EmitRuntimeCall( 5867 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5868 } else { 5869 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5870 TaskArgs); 5871 } 5872 // Check if parent region is untied and build return for untied task; 5873 if (auto *Region = 5874 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5875 Region->emitUntiedSwitch(CGF); 5876 }; 5877 5878 llvm::Value *DepWaitTaskArgs[6]; 5879 if (!Data.Dependences.empty()) { 5880 DepWaitTaskArgs[0] = UpLoc; 5881 DepWaitTaskArgs[1] = ThreadID; 5882 DepWaitTaskArgs[2] = NumOfElements; 5883 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5884 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5885 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5886 } 5887 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5888 &Data, &DepWaitTaskArgs, 5889 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5890 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5891 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5892 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5893 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5894 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5895 // is specified. 5896 if (!Data.Dependences.empty()) 5897 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5898 DepWaitTaskArgs); 5899 // Call proxy_task_entry(gtid, new_task); 5900 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5901 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5902 Action.Enter(CGF); 5903 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5904 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5905 OutlinedFnArgs); 5906 }; 5907 5908 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5909 // kmp_task_t *new_task); 5910 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5911 // kmp_task_t *new_task); 5912 RegionCodeGenTy RCG(CodeGen); 5913 CommonActionTy Action( 5914 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5915 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5916 RCG.setAction(Action); 5917 RCG(CGF); 5918 }; 5919 5920 if (IfCond) { 5921 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5922 } else { 5923 RegionCodeGenTy ThenRCG(ThenCodeGen); 5924 ThenRCG(CGF); 5925 } 5926 } 5927 5928 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5929 const OMPLoopDirective &D, 5930 llvm::Function *TaskFunction, 5931 QualType SharedsTy, Address Shareds, 5932 const Expr *IfCond, 5933 const OMPTaskDataTy &Data) { 5934 if (!CGF.HaveInsertPoint()) 5935 return; 5936 TaskResultTy Result = 5937 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5938 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5939 // libcall. 5940 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5941 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5942 // sched, kmp_uint64 grainsize, void *task_dup); 5943 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5944 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5945 llvm::Value *IfVal; 5946 if (IfCond) { 5947 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5948 /*isSigned=*/true); 5949 } else { 5950 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5951 } 5952 5953 LValue LBLVal = CGF.EmitLValueForField( 5954 Result.TDBase, 5955 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5956 const auto *LBVar = 5957 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5958 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5959 LBLVal.getQuals(), 5960 /*IsInitializer=*/true); 5961 LValue UBLVal = CGF.EmitLValueForField( 5962 Result.TDBase, 5963 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5964 const auto *UBVar = 5965 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5966 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5967 UBLVal.getQuals(), 5968 /*IsInitializer=*/true); 5969 LValue StLVal = CGF.EmitLValueForField( 5970 Result.TDBase, 5971 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5972 const auto *StVar = 5973 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5974 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5975 StLVal.getQuals(), 5976 /*IsInitializer=*/true); 5977 // Store reductions address. 5978 LValue RedLVal = CGF.EmitLValueForField( 5979 Result.TDBase, 5980 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5981 if (Data.Reductions) { 5982 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5983 } else { 5984 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5985 CGF.getContext().VoidPtrTy); 5986 } 5987 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5988 llvm::Value *TaskArgs[] = { 5989 UpLoc, 5990 ThreadID, 5991 Result.NewTask, 5992 IfVal, 5993 LBLVal.getPointer(CGF), 5994 UBLVal.getPointer(CGF), 5995 CGF.EmitLoadOfScalar(StLVal, Loc), 5996 llvm::ConstantInt::getSigned( 5997 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5998 llvm::ConstantInt::getSigned( 5999 CGF.IntTy, Data.Schedule.getPointer() 6000 ? Data.Schedule.getInt() ? NumTasks : Grainsize 6001 : NoSchedule), 6002 Data.Schedule.getPointer() 6003 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 6004 /*isSigned=*/false) 6005 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 6006 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6007 Result.TaskDupFn, CGF.VoidPtrTy) 6008 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 6009 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 6010 } 6011 6012 /// Emit reduction operation for each element of array (required for 6013 /// array sections) LHS op = RHS. 6014 /// \param Type Type of array. 6015 /// \param LHSVar Variable on the left side of the reduction operation 6016 /// (references element of array in original variable). 6017 /// \param RHSVar Variable on the right side of the reduction operation 6018 /// (references element of array in original variable). 6019 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 6020 /// RHSVar. 6021 static void EmitOMPAggregateReduction( 6022 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 6023 const VarDecl *RHSVar, 6024 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 6025 const Expr *, const Expr *)> &RedOpGen, 6026 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 6027 const Expr *UpExpr = nullptr) { 6028 // Perform element-by-element initialization. 6029 QualType ElementTy; 6030 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 6031 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 6032 6033 // Drill down to the base element type on both arrays. 6034 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 6035 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 6036 6037 llvm::Value *RHSBegin = RHSAddr.getPointer(); 6038 llvm::Value *LHSBegin = LHSAddr.getPointer(); 6039 // Cast from pointer to array type to pointer to single element. 6040 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 6041 // The basic structure here is a while-do loop. 6042 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 6043 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 6044 llvm::Value *IsEmpty = 6045 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 6046 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 6047 6048 // Enter the loop body, making that address the current address. 6049 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 6050 CGF.EmitBlock(BodyBB); 6051 6052 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 6053 6054 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 6055 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 6056 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 6057 Address RHSElementCurrent = 6058 Address(RHSElementPHI, 6059 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 6060 6061 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 6062 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 6063 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 6064 Address LHSElementCurrent = 6065 Address(LHSElementPHI, 6066 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 6067 6068 // Emit copy. 6069 CodeGenFunction::OMPPrivateScope Scope(CGF); 6070 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 6071 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 6072 Scope.Privatize(); 6073 RedOpGen(CGF, XExpr, EExpr, UpExpr); 6074 Scope.ForceCleanup(); 6075 6076 // Shift the address forward by one element. 6077 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 6078 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 6079 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 6080 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 6081 // Check whether we've reached the end. 6082 llvm::Value *Done = 6083 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 6084 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 6085 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 6086 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 6087 6088 // Done. 6089 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 6090 } 6091 6092 /// Emit reduction combiner. If the combiner is a simple expression emit it as 6093 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 6094 /// UDR combiner function. 6095 static void emitReductionCombiner(CodeGenFunction &CGF, 6096 const Expr *ReductionOp) { 6097 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 6098 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 6099 if (const auto *DRE = 6100 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 6101 if (const auto *DRD = 6102 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 6103 std::pair<llvm::Function *, llvm::Function *> Reduction = 6104 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 6105 RValue Func = RValue::get(Reduction.first); 6106 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 6107 CGF.EmitIgnoredExpr(ReductionOp); 6108 return; 6109 } 6110 CGF.EmitIgnoredExpr(ReductionOp); 6111 } 6112 6113 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 6114 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 6115 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 6116 ArrayRef<const Expr *> ReductionOps) { 6117 ASTContext &C = CGM.getContext(); 6118 6119 // void reduction_func(void *LHSArg, void *RHSArg); 6120 FunctionArgList Args; 6121 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6122 ImplicitParamDecl::Other); 6123 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6124 ImplicitParamDecl::Other); 6125 Args.push_back(&LHSArg); 6126 Args.push_back(&RHSArg); 6127 const auto &CGFI = 6128 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6129 std::string Name = getName({"omp", "reduction", "reduction_func"}); 6130 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 6131 llvm::GlobalValue::InternalLinkage, Name, 6132 &CGM.getModule()); 6133 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 6134 Fn->setDoesNotRecurse(); 6135 CodeGenFunction CGF(CGM); 6136 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 6137 6138 // Dst = (void*[n])(LHSArg); 6139 // Src = (void*[n])(RHSArg); 6140 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6141 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 6142 ArgsType), CGF.getPointerAlign()); 6143 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6144 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 6145 ArgsType), CGF.getPointerAlign()); 6146 6147 // ... 6148 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 6149 // ... 6150 CodeGenFunction::OMPPrivateScope Scope(CGF); 6151 auto IPriv = Privates.begin(); 6152 unsigned Idx = 0; 6153 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 6154 const auto *RHSVar = 6155 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 6156 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 6157 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 6158 }); 6159 const auto *LHSVar = 6160 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 6161 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 6162 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 6163 }); 6164 QualType PrivTy = (*IPriv)->getType(); 6165 if (PrivTy->isVariablyModifiedType()) { 6166 // Get array size and emit VLA type. 6167 ++Idx; 6168 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 6169 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 6170 const VariableArrayType *VLA = 6171 CGF.getContext().getAsVariableArrayType(PrivTy); 6172 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 6173 CodeGenFunction::OpaqueValueMapping OpaqueMap( 6174 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 6175 CGF.EmitVariablyModifiedType(PrivTy); 6176 } 6177 } 6178 Scope.Privatize(); 6179 IPriv = Privates.begin(); 6180 auto ILHS = LHSExprs.begin(); 6181 auto IRHS = RHSExprs.begin(); 6182 for (const Expr *E : ReductionOps) { 6183 if ((*IPriv)->getType()->isArrayType()) { 6184 // Emit reduction for array section. 6185 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 6186 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 6187 EmitOMPAggregateReduction( 6188 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 6189 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 6190 emitReductionCombiner(CGF, E); 6191 }); 6192 } else { 6193 // Emit reduction for array subscript or single variable. 6194 emitReductionCombiner(CGF, E); 6195 } 6196 ++IPriv; 6197 ++ILHS; 6198 ++IRHS; 6199 } 6200 Scope.ForceCleanup(); 6201 CGF.FinishFunction(); 6202 return Fn; 6203 } 6204 6205 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 6206 const Expr *ReductionOp, 6207 const Expr *PrivateRef, 6208 const DeclRefExpr *LHS, 6209 const DeclRefExpr *RHS) { 6210 if (PrivateRef->getType()->isArrayType()) { 6211 // Emit reduction for array section. 6212 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 6213 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 6214 EmitOMPAggregateReduction( 6215 CGF, PrivateRef->getType(), LHSVar, RHSVar, 6216 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 6217 emitReductionCombiner(CGF, ReductionOp); 6218 }); 6219 } else { 6220 // Emit reduction for array subscript or single variable. 6221 emitReductionCombiner(CGF, ReductionOp); 6222 } 6223 } 6224 6225 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 6226 ArrayRef<const Expr *> Privates, 6227 ArrayRef<const Expr *> LHSExprs, 6228 ArrayRef<const Expr *> RHSExprs, 6229 ArrayRef<const Expr *> ReductionOps, 6230 ReductionOptionsTy Options) { 6231 if (!CGF.HaveInsertPoint()) 6232 return; 6233 6234 bool WithNowait = Options.WithNowait; 6235 bool SimpleReduction = Options.SimpleReduction; 6236 6237 // Next code should be emitted for reduction: 6238 // 6239 // static kmp_critical_name lock = { 0 }; 6240 // 6241 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 6242 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 6243 // ... 6244 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 6245 // *(Type<n>-1*)rhs[<n>-1]); 6246 // } 6247 // 6248 // ... 6249 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 6250 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 6251 // RedList, reduce_func, &<lock>)) { 6252 // case 1: 6253 // ... 6254 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 6255 // ... 6256 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 6257 // break; 6258 // case 2: 6259 // ... 6260 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 6261 // ... 6262 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 6263 // break; 6264 // default:; 6265 // } 6266 // 6267 // if SimpleReduction is true, only the next code is generated: 6268 // ... 6269 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 6270 // ... 6271 6272 ASTContext &C = CGM.getContext(); 6273 6274 if (SimpleReduction) { 6275 CodeGenFunction::RunCleanupsScope Scope(CGF); 6276 auto IPriv = Privates.begin(); 6277 auto ILHS = LHSExprs.begin(); 6278 auto IRHS = RHSExprs.begin(); 6279 for (const Expr *E : ReductionOps) { 6280 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 6281 cast<DeclRefExpr>(*IRHS)); 6282 ++IPriv; 6283 ++ILHS; 6284 ++IRHS; 6285 } 6286 return; 6287 } 6288 6289 // 1. Build a list of reduction variables. 6290 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 6291 auto Size = RHSExprs.size(); 6292 for (const Expr *E : Privates) { 6293 if (E->getType()->isVariablyModifiedType()) 6294 // Reserve place for array size. 6295 ++Size; 6296 } 6297 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 6298 QualType ReductionArrayTy = 6299 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 6300 /*IndexTypeQuals=*/0); 6301 Address ReductionList = 6302 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 6303 auto IPriv = Privates.begin(); 6304 unsigned Idx = 0; 6305 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 6306 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 6307 CGF.Builder.CreateStore( 6308 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6309 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 6310 Elem); 6311 if ((*IPriv)->getType()->isVariablyModifiedType()) { 6312 // Store array size. 6313 ++Idx; 6314 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 6315 llvm::Value *Size = CGF.Builder.CreateIntCast( 6316 CGF.getVLASize( 6317 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 6318 .NumElts, 6319 CGF.SizeTy, /*isSigned=*/false); 6320 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 6321 Elem); 6322 } 6323 } 6324 6325 // 2. Emit reduce_func(). 6326 llvm::Function *ReductionFn = emitReductionFunction( 6327 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 6328 LHSExprs, RHSExprs, ReductionOps); 6329 6330 // 3. Create static kmp_critical_name lock = { 0 }; 6331 std::string Name = getName({"reduction"}); 6332 llvm::Value *Lock = getCriticalRegionLock(Name); 6333 6334 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 6335 // RedList, reduce_func, &<lock>); 6336 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 6337 llvm::Value *ThreadId = getThreadID(CGF, Loc); 6338 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 6339 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6340 ReductionList.getPointer(), CGF.VoidPtrTy); 6341 llvm::Value *Args[] = { 6342 IdentTLoc, // ident_t *<loc> 6343 ThreadId, // i32 <gtid> 6344 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 6345 ReductionArrayTySize, // size_type sizeof(RedList) 6346 RL, // void *RedList 6347 ReductionFn, // void (*) (void *, void *) <reduce_func> 6348 Lock // kmp_critical_name *&<lock> 6349 }; 6350 llvm::Value *Res = CGF.EmitRuntimeCall( 6351 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 6352 : OMPRTL__kmpc_reduce), 6353 Args); 6354 6355 // 5. Build switch(res) 6356 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 6357 llvm::SwitchInst *SwInst = 6358 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 6359 6360 // 6. Build case 1: 6361 // ... 6362 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 6363 // ... 6364 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 6365 // break; 6366 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 6367 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 6368 CGF.EmitBlock(Case1BB); 6369 6370 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 6371 llvm::Value *EndArgs[] = { 6372 IdentTLoc, // ident_t *<loc> 6373 ThreadId, // i32 <gtid> 6374 Lock // kmp_critical_name *&<lock> 6375 }; 6376 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 6377 CodeGenFunction &CGF, PrePostActionTy &Action) { 6378 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6379 auto IPriv = Privates.begin(); 6380 auto ILHS = LHSExprs.begin(); 6381 auto IRHS = RHSExprs.begin(); 6382 for (const Expr *E : ReductionOps) { 6383 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 6384 cast<DeclRefExpr>(*IRHS)); 6385 ++IPriv; 6386 ++ILHS; 6387 ++IRHS; 6388 } 6389 }; 6390 RegionCodeGenTy RCG(CodeGen); 6391 CommonActionTy Action( 6392 nullptr, llvm::None, 6393 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 6394 : OMPRTL__kmpc_end_reduce), 6395 EndArgs); 6396 RCG.setAction(Action); 6397 RCG(CGF); 6398 6399 CGF.EmitBranch(DefaultBB); 6400 6401 // 7. Build case 2: 6402 // ... 6403 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 6404 // ... 6405 // break; 6406 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 6407 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 6408 CGF.EmitBlock(Case2BB); 6409 6410 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 6411 CodeGenFunction &CGF, PrePostActionTy &Action) { 6412 auto ILHS = LHSExprs.begin(); 6413 auto IRHS = RHSExprs.begin(); 6414 auto IPriv = Privates.begin(); 6415 for (const Expr *E : ReductionOps) { 6416 const Expr *XExpr = nullptr; 6417 const Expr *EExpr = nullptr; 6418 const Expr *UpExpr = nullptr; 6419 BinaryOperatorKind BO = BO_Comma; 6420 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 6421 if (BO->getOpcode() == BO_Assign) { 6422 XExpr = BO->getLHS(); 6423 UpExpr = BO->getRHS(); 6424 } 6425 } 6426 // Try to emit update expression as a simple atomic. 6427 const Expr *RHSExpr = UpExpr; 6428 if (RHSExpr) { 6429 // Analyze RHS part of the whole expression. 6430 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 6431 RHSExpr->IgnoreParenImpCasts())) { 6432 // If this is a conditional operator, analyze its condition for 6433 // min/max reduction operator. 6434 RHSExpr = ACO->getCond(); 6435 } 6436 if (const auto *BORHS = 6437 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 6438 EExpr = BORHS->getRHS(); 6439 BO = BORHS->getOpcode(); 6440 } 6441 } 6442 if (XExpr) { 6443 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 6444 auto &&AtomicRedGen = [BO, VD, 6445 Loc](CodeGenFunction &CGF, const Expr *XExpr, 6446 const Expr *EExpr, const Expr *UpExpr) { 6447 LValue X = CGF.EmitLValue(XExpr); 6448 RValue E; 6449 if (EExpr) 6450 E = CGF.EmitAnyExpr(EExpr); 6451 CGF.EmitOMPAtomicSimpleUpdateExpr( 6452 X, E, BO, /*IsXLHSInRHSPart=*/true, 6453 llvm::AtomicOrdering::Monotonic, Loc, 6454 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 6455 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6456 PrivateScope.addPrivate( 6457 VD, [&CGF, VD, XRValue, Loc]() { 6458 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 6459 CGF.emitOMPSimpleStore( 6460 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 6461 VD->getType().getNonReferenceType(), Loc); 6462 return LHSTemp; 6463 }); 6464 (void)PrivateScope.Privatize(); 6465 return CGF.EmitAnyExpr(UpExpr); 6466 }); 6467 }; 6468 if ((*IPriv)->getType()->isArrayType()) { 6469 // Emit atomic reduction for array section. 6470 const auto *RHSVar = 6471 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 6472 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 6473 AtomicRedGen, XExpr, EExpr, UpExpr); 6474 } else { 6475 // Emit atomic reduction for array subscript or single variable. 6476 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 6477 } 6478 } else { 6479 // Emit as a critical region. 6480 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 6481 const Expr *, const Expr *) { 6482 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6483 std::string Name = RT.getName({"atomic_reduction"}); 6484 RT.emitCriticalRegion( 6485 CGF, Name, 6486 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 6487 Action.Enter(CGF); 6488 emitReductionCombiner(CGF, E); 6489 }, 6490 Loc); 6491 }; 6492 if ((*IPriv)->getType()->isArrayType()) { 6493 const auto *LHSVar = 6494 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 6495 const auto *RHSVar = 6496 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 6497 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 6498 CritRedGen); 6499 } else { 6500 CritRedGen(CGF, nullptr, nullptr, nullptr); 6501 } 6502 } 6503 ++ILHS; 6504 ++IRHS; 6505 ++IPriv; 6506 } 6507 }; 6508 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 6509 if (!WithNowait) { 6510 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 6511 llvm::Value *EndArgs[] = { 6512 IdentTLoc, // ident_t *<loc> 6513 ThreadId, // i32 <gtid> 6514 Lock // kmp_critical_name *&<lock> 6515 }; 6516 CommonActionTy Action(nullptr, llvm::None, 6517 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 6518 EndArgs); 6519 AtomicRCG.setAction(Action); 6520 AtomicRCG(CGF); 6521 } else { 6522 AtomicRCG(CGF); 6523 } 6524 6525 CGF.EmitBranch(DefaultBB); 6526 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 6527 } 6528 6529 /// Generates unique name for artificial threadprivate variables. 6530 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 6531 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 6532 const Expr *Ref) { 6533 SmallString<256> Buffer; 6534 llvm::raw_svector_ostream Out(Buffer); 6535 const clang::DeclRefExpr *DE; 6536 const VarDecl *D = ::getBaseDecl(Ref, DE); 6537 if (!D) 6538 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 6539 D = D->getCanonicalDecl(); 6540 std::string Name = CGM.getOpenMPRuntime().getName( 6541 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 6542 Out << Prefix << Name << "_" 6543 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 6544 return std::string(Out.str()); 6545 } 6546 6547 /// Emits reduction initializer function: 6548 /// \code 6549 /// void @.red_init(void* %arg) { 6550 /// %0 = bitcast void* %arg to <type>* 6551 /// store <type> <init>, <type>* %0 6552 /// ret void 6553 /// } 6554 /// \endcode 6555 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 6556 SourceLocation Loc, 6557 ReductionCodeGen &RCG, unsigned N) { 6558 ASTContext &C = CGM.getContext(); 6559 FunctionArgList Args; 6560 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6561 ImplicitParamDecl::Other); 6562 Args.emplace_back(&Param); 6563 const auto &FnInfo = 6564 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6565 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6566 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 6567 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6568 Name, &CGM.getModule()); 6569 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6570 Fn->setDoesNotRecurse(); 6571 CodeGenFunction CGF(CGM); 6572 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6573 Address PrivateAddr = CGF.EmitLoadOfPointer( 6574 CGF.GetAddrOfLocalVar(&Param), 6575 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6576 llvm::Value *Size = nullptr; 6577 // If the size of the reduction item is non-constant, load it from global 6578 // threadprivate variable. 6579 if (RCG.getSizes(N).second) { 6580 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6581 CGF, CGM.getContext().getSizeType(), 6582 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6583 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6584 CGM.getContext().getSizeType(), Loc); 6585 } 6586 RCG.emitAggregateType(CGF, N, Size); 6587 LValue SharedLVal; 6588 // If initializer uses initializer from declare reduction construct, emit a 6589 // pointer to the address of the original reduction item (reuired by reduction 6590 // initializer) 6591 if (RCG.usesReductionInitializer(N)) { 6592 Address SharedAddr = 6593 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6594 CGF, CGM.getContext().VoidPtrTy, 6595 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6596 SharedAddr = CGF.EmitLoadOfPointer( 6597 SharedAddr, 6598 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 6599 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 6600 } else { 6601 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 6602 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 6603 CGM.getContext().VoidPtrTy); 6604 } 6605 // Emit the initializer: 6606 // %0 = bitcast void* %arg to <type>* 6607 // store <type> <init>, <type>* %0 6608 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 6609 [](CodeGenFunction &) { return false; }); 6610 CGF.FinishFunction(); 6611 return Fn; 6612 } 6613 6614 /// Emits reduction combiner function: 6615 /// \code 6616 /// void @.red_comb(void* %arg0, void* %arg1) { 6617 /// %lhs = bitcast void* %arg0 to <type>* 6618 /// %rhs = bitcast void* %arg1 to <type>* 6619 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 6620 /// store <type> %2, <type>* %lhs 6621 /// ret void 6622 /// } 6623 /// \endcode 6624 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 6625 SourceLocation Loc, 6626 ReductionCodeGen &RCG, unsigned N, 6627 const Expr *ReductionOp, 6628 const Expr *LHS, const Expr *RHS, 6629 const Expr *PrivateRef) { 6630 ASTContext &C = CGM.getContext(); 6631 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 6632 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 6633 FunctionArgList Args; 6634 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 6635 C.VoidPtrTy, ImplicitParamDecl::Other); 6636 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6637 ImplicitParamDecl::Other); 6638 Args.emplace_back(&ParamInOut); 6639 Args.emplace_back(&ParamIn); 6640 const auto &FnInfo = 6641 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6642 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6643 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 6644 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6645 Name, &CGM.getModule()); 6646 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6647 Fn->setDoesNotRecurse(); 6648 CodeGenFunction CGF(CGM); 6649 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6650 llvm::Value *Size = nullptr; 6651 // If the size of the reduction item is non-constant, load it from global 6652 // threadprivate variable. 6653 if (RCG.getSizes(N).second) { 6654 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6655 CGF, CGM.getContext().getSizeType(), 6656 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6657 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6658 CGM.getContext().getSizeType(), Loc); 6659 } 6660 RCG.emitAggregateType(CGF, N, Size); 6661 // Remap lhs and rhs variables to the addresses of the function arguments. 6662 // %lhs = bitcast void* %arg0 to <type>* 6663 // %rhs = bitcast void* %arg1 to <type>* 6664 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6665 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6666 // Pull out the pointer to the variable. 6667 Address PtrAddr = CGF.EmitLoadOfPointer( 6668 CGF.GetAddrOfLocalVar(&ParamInOut), 6669 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6670 return CGF.Builder.CreateElementBitCast( 6671 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6672 }); 6673 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6674 // Pull out the pointer to the variable. 6675 Address PtrAddr = CGF.EmitLoadOfPointer( 6676 CGF.GetAddrOfLocalVar(&ParamIn), 6677 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6678 return CGF.Builder.CreateElementBitCast( 6679 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6680 }); 6681 PrivateScope.Privatize(); 6682 // Emit the combiner body: 6683 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6684 // store <type> %2, <type>* %lhs 6685 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6686 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6687 cast<DeclRefExpr>(RHS)); 6688 CGF.FinishFunction(); 6689 return Fn; 6690 } 6691 6692 /// Emits reduction finalizer function: 6693 /// \code 6694 /// void @.red_fini(void* %arg) { 6695 /// %0 = bitcast void* %arg to <type>* 6696 /// <destroy>(<type>* %0) 6697 /// ret void 6698 /// } 6699 /// \endcode 6700 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6701 SourceLocation Loc, 6702 ReductionCodeGen &RCG, unsigned N) { 6703 if (!RCG.needCleanups(N)) 6704 return nullptr; 6705 ASTContext &C = CGM.getContext(); 6706 FunctionArgList Args; 6707 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6708 ImplicitParamDecl::Other); 6709 Args.emplace_back(&Param); 6710 const auto &FnInfo = 6711 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6712 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6713 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6714 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6715 Name, &CGM.getModule()); 6716 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6717 Fn->setDoesNotRecurse(); 6718 CodeGenFunction CGF(CGM); 6719 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6720 Address PrivateAddr = CGF.EmitLoadOfPointer( 6721 CGF.GetAddrOfLocalVar(&Param), 6722 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6723 llvm::Value *Size = nullptr; 6724 // If the size of the reduction item is non-constant, load it from global 6725 // threadprivate variable. 6726 if (RCG.getSizes(N).second) { 6727 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6728 CGF, CGM.getContext().getSizeType(), 6729 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6730 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6731 CGM.getContext().getSizeType(), Loc); 6732 } 6733 RCG.emitAggregateType(CGF, N, Size); 6734 // Emit the finalizer body: 6735 // <destroy>(<type>* %0) 6736 RCG.emitCleanups(CGF, N, PrivateAddr); 6737 CGF.FinishFunction(Loc); 6738 return Fn; 6739 } 6740 6741 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6742 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6743 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6744 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6745 return nullptr; 6746 6747 // Build typedef struct: 6748 // kmp_task_red_input { 6749 // void *reduce_shar; // shared reduction item 6750 // size_t reduce_size; // size of data item 6751 // void *reduce_init; // data initialization routine 6752 // void *reduce_fini; // data finalization routine 6753 // void *reduce_comb; // data combiner routine 6754 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6755 // } kmp_task_red_input_t; 6756 ASTContext &C = CGM.getContext(); 6757 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 6758 RD->startDefinition(); 6759 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6760 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6761 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6762 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6763 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6764 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6765 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6766 RD->completeDefinition(); 6767 QualType RDType = C.getRecordType(RD); 6768 unsigned Size = Data.ReductionVars.size(); 6769 llvm::APInt ArraySize(/*numBits=*/64, Size); 6770 QualType ArrayRDType = C.getConstantArrayType( 6771 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6772 // kmp_task_red_input_t .rd_input.[Size]; 6773 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6774 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 6775 Data.ReductionOps); 6776 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6777 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6778 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6779 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6780 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6781 TaskRedInput.getPointer(), Idxs, 6782 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6783 ".rd_input.gep."); 6784 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6785 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6786 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6787 RCG.emitSharedLValue(CGF, Cnt); 6788 llvm::Value *CastedShared = 6789 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6790 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6791 RCG.emitAggregateType(CGF, Cnt); 6792 llvm::Value *SizeValInChars; 6793 llvm::Value *SizeVal; 6794 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6795 // We use delayed creation/initialization for VLAs, array sections and 6796 // custom reduction initializations. It is required because runtime does not 6797 // provide the way to pass the sizes of VLAs/array sections to 6798 // initializer/combiner/finalizer functions and does not pass the pointer to 6799 // original reduction item to the initializer. Instead threadprivate global 6800 // variables are used to store these values and use them in the functions. 6801 bool DelayedCreation = !!SizeVal; 6802 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6803 /*isSigned=*/false); 6804 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6805 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6806 // ElemLVal.reduce_init = init; 6807 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6808 llvm::Value *InitAddr = 6809 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6810 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6811 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 6812 // ElemLVal.reduce_fini = fini; 6813 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6814 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6815 llvm::Value *FiniAddr = Fini 6816 ? CGF.EmitCastToVoidPtr(Fini) 6817 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6818 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6819 // ElemLVal.reduce_comb = comb; 6820 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6821 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6822 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6823 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6824 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6825 // ElemLVal.flags = 0; 6826 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6827 if (DelayedCreation) { 6828 CGF.EmitStoreOfScalar( 6829 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6830 FlagsLVal); 6831 } else 6832 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6833 FlagsLVal.getType()); 6834 } 6835 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 6836 // *data); 6837 llvm::Value *Args[] = { 6838 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6839 /*isSigned=*/true), 6840 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6841 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6842 CGM.VoidPtrTy)}; 6843 return CGF.EmitRuntimeCall( 6844 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 6845 } 6846 6847 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6848 SourceLocation Loc, 6849 ReductionCodeGen &RCG, 6850 unsigned N) { 6851 auto Sizes = RCG.getSizes(N); 6852 // Emit threadprivate global variable if the type is non-constant 6853 // (Sizes.second = nullptr). 6854 if (Sizes.second) { 6855 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6856 /*isSigned=*/false); 6857 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6858 CGF, CGM.getContext().getSizeType(), 6859 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6860 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6861 } 6862 // Store address of the original reduction item if custom initializer is used. 6863 if (RCG.usesReductionInitializer(N)) { 6864 Address SharedAddr = getAddrOfArtificialThreadPrivate( 6865 CGF, CGM.getContext().VoidPtrTy, 6866 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6867 CGF.Builder.CreateStore( 6868 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6869 RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy), 6870 SharedAddr, /*IsVolatile=*/false); 6871 } 6872 } 6873 6874 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6875 SourceLocation Loc, 6876 llvm::Value *ReductionsPtr, 6877 LValue SharedLVal) { 6878 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6879 // *d); 6880 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6881 CGM.IntTy, 6882 /*isSigned=*/true), 6883 ReductionsPtr, 6884 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6885 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6886 return Address( 6887 CGF.EmitRuntimeCall( 6888 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6889 SharedLVal.getAlignment()); 6890 } 6891 6892 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6893 SourceLocation Loc) { 6894 if (!CGF.HaveInsertPoint()) 6895 return; 6896 6897 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 6898 if (OMPBuilder) { 6899 OMPBuilder->CreateTaskwait(CGF.Builder); 6900 } else { 6901 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6902 // global_tid); 6903 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6904 // Ignore return result until untied tasks are supported. 6905 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 6906 } 6907 6908 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6909 Region->emitUntiedSwitch(CGF); 6910 } 6911 6912 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6913 OpenMPDirectiveKind InnerKind, 6914 const RegionCodeGenTy &CodeGen, 6915 bool HasCancel) { 6916 if (!CGF.HaveInsertPoint()) 6917 return; 6918 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6919 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6920 } 6921 6922 namespace { 6923 enum RTCancelKind { 6924 CancelNoreq = 0, 6925 CancelParallel = 1, 6926 CancelLoop = 2, 6927 CancelSections = 3, 6928 CancelTaskgroup = 4 6929 }; 6930 } // anonymous namespace 6931 6932 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6933 RTCancelKind CancelKind = CancelNoreq; 6934 if (CancelRegion == OMPD_parallel) 6935 CancelKind = CancelParallel; 6936 else if (CancelRegion == OMPD_for) 6937 CancelKind = CancelLoop; 6938 else if (CancelRegion == OMPD_sections) 6939 CancelKind = CancelSections; 6940 else { 6941 assert(CancelRegion == OMPD_taskgroup); 6942 CancelKind = CancelTaskgroup; 6943 } 6944 return CancelKind; 6945 } 6946 6947 void CGOpenMPRuntime::emitCancellationPointCall( 6948 CodeGenFunction &CGF, SourceLocation Loc, 6949 OpenMPDirectiveKind CancelRegion) { 6950 if (!CGF.HaveInsertPoint()) 6951 return; 6952 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6953 // global_tid, kmp_int32 cncl_kind); 6954 if (auto *OMPRegionInfo = 6955 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6956 // For 'cancellation point taskgroup', the task region info may not have a 6957 // cancel. This may instead happen in another adjacent task. 6958 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6959 llvm::Value *Args[] = { 6960 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6961 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6962 // Ignore return result until untied tasks are supported. 6963 llvm::Value *Result = CGF.EmitRuntimeCall( 6964 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 6965 // if (__kmpc_cancellationpoint()) { 6966 // exit from construct; 6967 // } 6968 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6969 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6970 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6971 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6972 CGF.EmitBlock(ExitBB); 6973 // exit from construct; 6974 CodeGenFunction::JumpDest CancelDest = 6975 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6976 CGF.EmitBranchThroughCleanup(CancelDest); 6977 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6978 } 6979 } 6980 } 6981 6982 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6983 const Expr *IfCond, 6984 OpenMPDirectiveKind CancelRegion) { 6985 if (!CGF.HaveInsertPoint()) 6986 return; 6987 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6988 // kmp_int32 cncl_kind); 6989 if (auto *OMPRegionInfo = 6990 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6991 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 6992 PrePostActionTy &) { 6993 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6994 llvm::Value *Args[] = { 6995 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6996 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6997 // Ignore return result until untied tasks are supported. 6998 llvm::Value *Result = CGF.EmitRuntimeCall( 6999 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 7000 // if (__kmpc_cancel()) { 7001 // exit from construct; 7002 // } 7003 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 7004 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 7005 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 7006 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 7007 CGF.EmitBlock(ExitBB); 7008 // exit from construct; 7009 CodeGenFunction::JumpDest CancelDest = 7010 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 7011 CGF.EmitBranchThroughCleanup(CancelDest); 7012 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 7013 }; 7014 if (IfCond) { 7015 emitIfClause(CGF, IfCond, ThenGen, 7016 [](CodeGenFunction &, PrePostActionTy &) {}); 7017 } else { 7018 RegionCodeGenTy ThenRCG(ThenGen); 7019 ThenRCG(CGF); 7020 } 7021 } 7022 } 7023 7024 void CGOpenMPRuntime::emitTargetOutlinedFunction( 7025 const OMPExecutableDirective &D, StringRef ParentName, 7026 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 7027 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 7028 assert(!ParentName.empty() && "Invalid target region parent name!"); 7029 HasEmittedTargetRegion = true; 7030 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 7031 IsOffloadEntry, CodeGen); 7032 } 7033 7034 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 7035 const OMPExecutableDirective &D, StringRef ParentName, 7036 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 7037 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 7038 // Create a unique name for the entry function using the source location 7039 // information of the current target region. The name will be something like: 7040 // 7041 // __omp_offloading_DD_FFFF_PP_lBB 7042 // 7043 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 7044 // mangled name of the function that encloses the target region and BB is the 7045 // line number of the target region. 7046 7047 unsigned DeviceID; 7048 unsigned FileID; 7049 unsigned Line; 7050 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 7051 Line); 7052 SmallString<64> EntryFnName; 7053 { 7054 llvm::raw_svector_ostream OS(EntryFnName); 7055 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 7056 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 7057 } 7058 7059 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 7060 7061 CodeGenFunction CGF(CGM, true); 7062 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 7063 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7064 7065 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 7066 7067 // If this target outline function is not an offload entry, we don't need to 7068 // register it. 7069 if (!IsOffloadEntry) 7070 return; 7071 7072 // The target region ID is used by the runtime library to identify the current 7073 // target region, so it only has to be unique and not necessarily point to 7074 // anything. It could be the pointer to the outlined function that implements 7075 // the target region, but we aren't using that so that the compiler doesn't 7076 // need to keep that, and could therefore inline the host function if proven 7077 // worthwhile during optimization. In the other hand, if emitting code for the 7078 // device, the ID has to be the function address so that it can retrieved from 7079 // the offloading entry and launched by the runtime library. We also mark the 7080 // outlined function to have external linkage in case we are emitting code for 7081 // the device, because these functions will be entry points to the device. 7082 7083 if (CGM.getLangOpts().OpenMPIsDevice) { 7084 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 7085 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 7086 OutlinedFn->setDSOLocal(false); 7087 } else { 7088 std::string Name = getName({EntryFnName, "region_id"}); 7089 OutlinedFnID = new llvm::GlobalVariable( 7090 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 7091 llvm::GlobalValue::WeakAnyLinkage, 7092 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 7093 } 7094 7095 // Register the information for the entry associated with this target region. 7096 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 7097 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 7098 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 7099 } 7100 7101 /// Checks if the expression is constant or does not have non-trivial function 7102 /// calls. 7103 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 7104 // We can skip constant expressions. 7105 // We can skip expressions with trivial calls or simple expressions. 7106 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 7107 !E->hasNonTrivialCall(Ctx)) && 7108 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 7109 } 7110 7111 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 7112 const Stmt *Body) { 7113 const Stmt *Child = Body->IgnoreContainers(); 7114 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 7115 Child = nullptr; 7116 for (const Stmt *S : C->body()) { 7117 if (const auto *E = dyn_cast<Expr>(S)) { 7118 if (isTrivial(Ctx, E)) 7119 continue; 7120 } 7121 // Some of the statements can be ignored. 7122 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 7123 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 7124 continue; 7125 // Analyze declarations. 7126 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 7127 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 7128 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 7129 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 7130 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 7131 isa<UsingDirectiveDecl>(D) || 7132 isa<OMPDeclareReductionDecl>(D) || 7133 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 7134 return true; 7135 const auto *VD = dyn_cast<VarDecl>(D); 7136 if (!VD) 7137 return false; 7138 return VD->isConstexpr() || 7139 ((VD->getType().isTrivialType(Ctx) || 7140 VD->getType()->isReferenceType()) && 7141 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 7142 })) 7143 continue; 7144 } 7145 // Found multiple children - cannot get the one child only. 7146 if (Child) 7147 return nullptr; 7148 Child = S; 7149 } 7150 if (Child) 7151 Child = Child->IgnoreContainers(); 7152 } 7153 return Child; 7154 } 7155 7156 /// Emit the number of teams for a target directive. Inspect the num_teams 7157 /// clause associated with a teams construct combined or closely nested 7158 /// with the target directive. 7159 /// 7160 /// Emit a team of size one for directives such as 'target parallel' that 7161 /// have no associated teams construct. 7162 /// 7163 /// Otherwise, return nullptr. 7164 static llvm::Value * 7165 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 7166 const OMPExecutableDirective &D) { 7167 assert(!CGF.getLangOpts().OpenMPIsDevice && 7168 "Clauses associated with the teams directive expected to be emitted " 7169 "only for the host!"); 7170 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7171 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7172 "Expected target-based executable directive."); 7173 CGBuilderTy &Bld = CGF.Builder; 7174 switch (DirectiveKind) { 7175 case OMPD_target: { 7176 const auto *CS = D.getInnermostCapturedStmt(); 7177 const auto *Body = 7178 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 7179 const Stmt *ChildStmt = 7180 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 7181 if (const auto *NestedDir = 7182 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 7183 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 7184 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 7185 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7186 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7187 const Expr *NumTeams = 7188 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 7189 llvm::Value *NumTeamsVal = 7190 CGF.EmitScalarExpr(NumTeams, 7191 /*IgnoreResultAssign*/ true); 7192 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 7193 /*isSigned=*/true); 7194 } 7195 return Bld.getInt32(0); 7196 } 7197 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 7198 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 7199 return Bld.getInt32(1); 7200 return Bld.getInt32(0); 7201 } 7202 return nullptr; 7203 } 7204 case OMPD_target_teams: 7205 case OMPD_target_teams_distribute: 7206 case OMPD_target_teams_distribute_simd: 7207 case OMPD_target_teams_distribute_parallel_for: 7208 case OMPD_target_teams_distribute_parallel_for_simd: { 7209 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 7210 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 7211 const Expr *NumTeams = 7212 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 7213 llvm::Value *NumTeamsVal = 7214 CGF.EmitScalarExpr(NumTeams, 7215 /*IgnoreResultAssign*/ true); 7216 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 7217 /*isSigned=*/true); 7218 } 7219 return Bld.getInt32(0); 7220 } 7221 case OMPD_target_parallel: 7222 case OMPD_target_parallel_for: 7223 case OMPD_target_parallel_for_simd: 7224 case OMPD_target_simd: 7225 return Bld.getInt32(1); 7226 case OMPD_parallel: 7227 case OMPD_for: 7228 case OMPD_parallel_for: 7229 case OMPD_parallel_master: 7230 case OMPD_parallel_sections: 7231 case OMPD_for_simd: 7232 case OMPD_parallel_for_simd: 7233 case OMPD_cancel: 7234 case OMPD_cancellation_point: 7235 case OMPD_ordered: 7236 case OMPD_threadprivate: 7237 case OMPD_allocate: 7238 case OMPD_task: 7239 case OMPD_simd: 7240 case OMPD_sections: 7241 case OMPD_section: 7242 case OMPD_single: 7243 case OMPD_master: 7244 case OMPD_critical: 7245 case OMPD_taskyield: 7246 case OMPD_barrier: 7247 case OMPD_taskwait: 7248 case OMPD_taskgroup: 7249 case OMPD_atomic: 7250 case OMPD_flush: 7251 case OMPD_depobj: 7252 case OMPD_scan: 7253 case OMPD_teams: 7254 case OMPD_target_data: 7255 case OMPD_target_exit_data: 7256 case OMPD_target_enter_data: 7257 case OMPD_distribute: 7258 case OMPD_distribute_simd: 7259 case OMPD_distribute_parallel_for: 7260 case OMPD_distribute_parallel_for_simd: 7261 case OMPD_teams_distribute: 7262 case OMPD_teams_distribute_simd: 7263 case OMPD_teams_distribute_parallel_for: 7264 case OMPD_teams_distribute_parallel_for_simd: 7265 case OMPD_target_update: 7266 case OMPD_declare_simd: 7267 case OMPD_declare_variant: 7268 case OMPD_begin_declare_variant: 7269 case OMPD_end_declare_variant: 7270 case OMPD_declare_target: 7271 case OMPD_end_declare_target: 7272 case OMPD_declare_reduction: 7273 case OMPD_declare_mapper: 7274 case OMPD_taskloop: 7275 case OMPD_taskloop_simd: 7276 case OMPD_master_taskloop: 7277 case OMPD_master_taskloop_simd: 7278 case OMPD_parallel_master_taskloop: 7279 case OMPD_parallel_master_taskloop_simd: 7280 case OMPD_requires: 7281 case OMPD_unknown: 7282 break; 7283 } 7284 llvm_unreachable("Unexpected directive kind."); 7285 } 7286 7287 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 7288 llvm::Value *DefaultThreadLimitVal) { 7289 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7290 CGF.getContext(), CS->getCapturedStmt()); 7291 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7292 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 7293 llvm::Value *NumThreads = nullptr; 7294 llvm::Value *CondVal = nullptr; 7295 // Handle if clause. If if clause present, the number of threads is 7296 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7297 if (Dir->hasClausesOfKind<OMPIfClause>()) { 7298 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7299 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7300 const OMPIfClause *IfClause = nullptr; 7301 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 7302 if (C->getNameModifier() == OMPD_unknown || 7303 C->getNameModifier() == OMPD_parallel) { 7304 IfClause = C; 7305 break; 7306 } 7307 } 7308 if (IfClause) { 7309 const Expr *Cond = IfClause->getCondition(); 7310 bool Result; 7311 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7312 if (!Result) 7313 return CGF.Builder.getInt32(1); 7314 } else { 7315 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 7316 if (const auto *PreInit = 7317 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 7318 for (const auto *I : PreInit->decls()) { 7319 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7320 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7321 } else { 7322 CodeGenFunction::AutoVarEmission Emission = 7323 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7324 CGF.EmitAutoVarCleanups(Emission); 7325 } 7326 } 7327 } 7328 CondVal = CGF.EvaluateExprAsBool(Cond); 7329 } 7330 } 7331 } 7332 // Check the value of num_threads clause iff if clause was not specified 7333 // or is not evaluated to false. 7334 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 7335 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7336 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7337 const auto *NumThreadsClause = 7338 Dir->getSingleClause<OMPNumThreadsClause>(); 7339 CodeGenFunction::LexicalScope Scope( 7340 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 7341 if (const auto *PreInit = 7342 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 7343 for (const auto *I : PreInit->decls()) { 7344 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7345 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7346 } else { 7347 CodeGenFunction::AutoVarEmission Emission = 7348 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7349 CGF.EmitAutoVarCleanups(Emission); 7350 } 7351 } 7352 } 7353 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 7354 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 7355 /*isSigned=*/false); 7356 if (DefaultThreadLimitVal) 7357 NumThreads = CGF.Builder.CreateSelect( 7358 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 7359 DefaultThreadLimitVal, NumThreads); 7360 } else { 7361 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 7362 : CGF.Builder.getInt32(0); 7363 } 7364 // Process condition of the if clause. 7365 if (CondVal) { 7366 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 7367 CGF.Builder.getInt32(1)); 7368 } 7369 return NumThreads; 7370 } 7371 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 7372 return CGF.Builder.getInt32(1); 7373 return DefaultThreadLimitVal; 7374 } 7375 return DefaultThreadLimitVal ? DefaultThreadLimitVal 7376 : CGF.Builder.getInt32(0); 7377 } 7378 7379 /// Emit the number of threads for a target directive. Inspect the 7380 /// thread_limit clause associated with a teams construct combined or closely 7381 /// nested with the target directive. 7382 /// 7383 /// Emit the num_threads clause for directives such as 'target parallel' that 7384 /// have no associated teams construct. 7385 /// 7386 /// Otherwise, return nullptr. 7387 static llvm::Value * 7388 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 7389 const OMPExecutableDirective &D) { 7390 assert(!CGF.getLangOpts().OpenMPIsDevice && 7391 "Clauses associated with the teams directive expected to be emitted " 7392 "only for the host!"); 7393 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7394 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7395 "Expected target-based executable directive."); 7396 CGBuilderTy &Bld = CGF.Builder; 7397 llvm::Value *ThreadLimitVal = nullptr; 7398 llvm::Value *NumThreadsVal = nullptr; 7399 switch (DirectiveKind) { 7400 case OMPD_target: { 7401 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7402 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7403 return NumThreads; 7404 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7405 CGF.getContext(), CS->getCapturedStmt()); 7406 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7407 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 7408 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7409 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7410 const auto *ThreadLimitClause = 7411 Dir->getSingleClause<OMPThreadLimitClause>(); 7412 CodeGenFunction::LexicalScope Scope( 7413 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 7414 if (const auto *PreInit = 7415 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 7416 for (const auto *I : PreInit->decls()) { 7417 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7418 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7419 } else { 7420 CodeGenFunction::AutoVarEmission Emission = 7421 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7422 CGF.EmitAutoVarCleanups(Emission); 7423 } 7424 } 7425 } 7426 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7427 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7428 ThreadLimitVal = 7429 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7430 } 7431 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 7432 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 7433 CS = Dir->getInnermostCapturedStmt(); 7434 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7435 CGF.getContext(), CS->getCapturedStmt()); 7436 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 7437 } 7438 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 7439 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 7440 CS = Dir->getInnermostCapturedStmt(); 7441 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7442 return NumThreads; 7443 } 7444 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 7445 return Bld.getInt32(1); 7446 } 7447 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7448 } 7449 case OMPD_target_teams: { 7450 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7451 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7452 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7453 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7454 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7455 ThreadLimitVal = 7456 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7457 } 7458 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7459 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7460 return NumThreads; 7461 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7462 CGF.getContext(), CS->getCapturedStmt()); 7463 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7464 if (Dir->getDirectiveKind() == OMPD_distribute) { 7465 CS = Dir->getInnermostCapturedStmt(); 7466 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7467 return NumThreads; 7468 } 7469 } 7470 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7471 } 7472 case OMPD_target_teams_distribute: 7473 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7474 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7475 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7476 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7477 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7478 ThreadLimitVal = 7479 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7480 } 7481 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7482 case OMPD_target_parallel: 7483 case OMPD_target_parallel_for: 7484 case OMPD_target_parallel_for_simd: 7485 case OMPD_target_teams_distribute_parallel_for: 7486 case OMPD_target_teams_distribute_parallel_for_simd: { 7487 llvm::Value *CondVal = nullptr; 7488 // Handle if clause. If if clause present, the number of threads is 7489 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7490 if (D.hasClausesOfKind<OMPIfClause>()) { 7491 const OMPIfClause *IfClause = nullptr; 7492 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7493 if (C->getNameModifier() == OMPD_unknown || 7494 C->getNameModifier() == OMPD_parallel) { 7495 IfClause = C; 7496 break; 7497 } 7498 } 7499 if (IfClause) { 7500 const Expr *Cond = IfClause->getCondition(); 7501 bool Result; 7502 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7503 if (!Result) 7504 return Bld.getInt32(1); 7505 } else { 7506 CodeGenFunction::RunCleanupsScope Scope(CGF); 7507 CondVal = CGF.EvaluateExprAsBool(Cond); 7508 } 7509 } 7510 } 7511 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7512 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7513 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7514 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7515 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7516 ThreadLimitVal = 7517 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7518 } 7519 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7520 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7521 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7522 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7523 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7524 NumThreadsVal = 7525 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7526 ThreadLimitVal = ThreadLimitVal 7527 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7528 ThreadLimitVal), 7529 NumThreadsVal, ThreadLimitVal) 7530 : NumThreadsVal; 7531 } 7532 if (!ThreadLimitVal) 7533 ThreadLimitVal = Bld.getInt32(0); 7534 if (CondVal) 7535 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7536 return ThreadLimitVal; 7537 } 7538 case OMPD_target_teams_distribute_simd: 7539 case OMPD_target_simd: 7540 return Bld.getInt32(1); 7541 case OMPD_parallel: 7542 case OMPD_for: 7543 case OMPD_parallel_for: 7544 case OMPD_parallel_master: 7545 case OMPD_parallel_sections: 7546 case OMPD_for_simd: 7547 case OMPD_parallel_for_simd: 7548 case OMPD_cancel: 7549 case OMPD_cancellation_point: 7550 case OMPD_ordered: 7551 case OMPD_threadprivate: 7552 case OMPD_allocate: 7553 case OMPD_task: 7554 case OMPD_simd: 7555 case OMPD_sections: 7556 case OMPD_section: 7557 case OMPD_single: 7558 case OMPD_master: 7559 case OMPD_critical: 7560 case OMPD_taskyield: 7561 case OMPD_barrier: 7562 case OMPD_taskwait: 7563 case OMPD_taskgroup: 7564 case OMPD_atomic: 7565 case OMPD_flush: 7566 case OMPD_depobj: 7567 case OMPD_scan: 7568 case OMPD_teams: 7569 case OMPD_target_data: 7570 case OMPD_target_exit_data: 7571 case OMPD_target_enter_data: 7572 case OMPD_distribute: 7573 case OMPD_distribute_simd: 7574 case OMPD_distribute_parallel_for: 7575 case OMPD_distribute_parallel_for_simd: 7576 case OMPD_teams_distribute: 7577 case OMPD_teams_distribute_simd: 7578 case OMPD_teams_distribute_parallel_for: 7579 case OMPD_teams_distribute_parallel_for_simd: 7580 case OMPD_target_update: 7581 case OMPD_declare_simd: 7582 case OMPD_declare_variant: 7583 case OMPD_begin_declare_variant: 7584 case OMPD_end_declare_variant: 7585 case OMPD_declare_target: 7586 case OMPD_end_declare_target: 7587 case OMPD_declare_reduction: 7588 case OMPD_declare_mapper: 7589 case OMPD_taskloop: 7590 case OMPD_taskloop_simd: 7591 case OMPD_master_taskloop: 7592 case OMPD_master_taskloop_simd: 7593 case OMPD_parallel_master_taskloop: 7594 case OMPD_parallel_master_taskloop_simd: 7595 case OMPD_requires: 7596 case OMPD_unknown: 7597 break; 7598 } 7599 llvm_unreachable("Unsupported directive kind."); 7600 } 7601 7602 namespace { 7603 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7604 7605 // Utility to handle information from clauses associated with a given 7606 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7607 // It provides a convenient interface to obtain the information and generate 7608 // code for that information. 7609 class MappableExprsHandler { 7610 public: 7611 /// Values for bit flags used to specify the mapping type for 7612 /// offloading. 7613 enum OpenMPOffloadMappingFlags : uint64_t { 7614 /// No flags 7615 OMP_MAP_NONE = 0x0, 7616 /// Allocate memory on the device and move data from host to device. 7617 OMP_MAP_TO = 0x01, 7618 /// Allocate memory on the device and move data from device to host. 7619 OMP_MAP_FROM = 0x02, 7620 /// Always perform the requested mapping action on the element, even 7621 /// if it was already mapped before. 7622 OMP_MAP_ALWAYS = 0x04, 7623 /// Delete the element from the device environment, ignoring the 7624 /// current reference count associated with the element. 7625 OMP_MAP_DELETE = 0x08, 7626 /// The element being mapped is a pointer-pointee pair; both the 7627 /// pointer and the pointee should be mapped. 7628 OMP_MAP_PTR_AND_OBJ = 0x10, 7629 /// This flags signals that the base address of an entry should be 7630 /// passed to the target kernel as an argument. 7631 OMP_MAP_TARGET_PARAM = 0x20, 7632 /// Signal that the runtime library has to return the device pointer 7633 /// in the current position for the data being mapped. Used when we have the 7634 /// use_device_ptr clause. 7635 OMP_MAP_RETURN_PARAM = 0x40, 7636 /// This flag signals that the reference being passed is a pointer to 7637 /// private data. 7638 OMP_MAP_PRIVATE = 0x80, 7639 /// Pass the element to the device by value. 7640 OMP_MAP_LITERAL = 0x100, 7641 /// Implicit map 7642 OMP_MAP_IMPLICIT = 0x200, 7643 /// Close is a hint to the runtime to allocate memory close to 7644 /// the target device. 7645 OMP_MAP_CLOSE = 0x400, 7646 /// The 16 MSBs of the flags indicate whether the entry is member of some 7647 /// struct/class. 7648 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7649 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7650 }; 7651 7652 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7653 static unsigned getFlagMemberOffset() { 7654 unsigned Offset = 0; 7655 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7656 Remain = Remain >> 1) 7657 Offset++; 7658 return Offset; 7659 } 7660 7661 /// Class that associates information with a base pointer to be passed to the 7662 /// runtime library. 7663 class BasePointerInfo { 7664 /// The base pointer. 7665 llvm::Value *Ptr = nullptr; 7666 /// The base declaration that refers to this device pointer, or null if 7667 /// there is none. 7668 const ValueDecl *DevPtrDecl = nullptr; 7669 7670 public: 7671 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7672 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7673 llvm::Value *operator*() const { return Ptr; } 7674 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7675 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7676 }; 7677 7678 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7679 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7680 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7681 7682 /// Map between a struct and the its lowest & highest elements which have been 7683 /// mapped. 7684 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7685 /// HE(FieldIndex, Pointer)} 7686 struct StructRangeInfoTy { 7687 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7688 0, Address::invalid()}; 7689 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7690 0, Address::invalid()}; 7691 Address Base = Address::invalid(); 7692 }; 7693 7694 private: 7695 /// Kind that defines how a device pointer has to be returned. 7696 struct MapInfo { 7697 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7698 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7699 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7700 bool ReturnDevicePointer = false; 7701 bool IsImplicit = false; 7702 7703 MapInfo() = default; 7704 MapInfo( 7705 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7706 OpenMPMapClauseKind MapType, 7707 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7708 bool ReturnDevicePointer, bool IsImplicit) 7709 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7710 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 7711 }; 7712 7713 /// If use_device_ptr is used on a pointer which is a struct member and there 7714 /// is no map information about it, then emission of that entry is deferred 7715 /// until the whole struct has been processed. 7716 struct DeferredDevicePtrEntryTy { 7717 const Expr *IE = nullptr; 7718 const ValueDecl *VD = nullptr; 7719 7720 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 7721 : IE(IE), VD(VD) {} 7722 }; 7723 7724 /// The target directive from where the mappable clauses were extracted. It 7725 /// is either a executable directive or a user-defined mapper directive. 7726 llvm::PointerUnion<const OMPExecutableDirective *, 7727 const OMPDeclareMapperDecl *> 7728 CurDir; 7729 7730 /// Function the directive is being generated for. 7731 CodeGenFunction &CGF; 7732 7733 /// Set of all first private variables in the current directive. 7734 /// bool data is set to true if the variable is implicitly marked as 7735 /// firstprivate, false otherwise. 7736 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7737 7738 /// Map between device pointer declarations and their expression components. 7739 /// The key value for declarations in 'this' is null. 7740 llvm::DenseMap< 7741 const ValueDecl *, 7742 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7743 DevPointersMap; 7744 7745 llvm::Value *getExprTypeSize(const Expr *E) const { 7746 QualType ExprTy = E->getType().getCanonicalType(); 7747 7748 // Calculate the size for array shaping expression. 7749 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7750 llvm::Value *Size = 7751 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7752 for (const Expr *SE : OAE->getDimensions()) { 7753 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7754 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7755 CGF.getContext().getSizeType(), 7756 SE->getExprLoc()); 7757 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7758 } 7759 return Size; 7760 } 7761 7762 // Reference types are ignored for mapping purposes. 7763 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7764 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7765 7766 // Given that an array section is considered a built-in type, we need to 7767 // do the calculation based on the length of the section instead of relying 7768 // on CGF.getTypeSize(E->getType()). 7769 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7770 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7771 OAE->getBase()->IgnoreParenImpCasts()) 7772 .getCanonicalType(); 7773 7774 // If there is no length associated with the expression and lower bound is 7775 // not specified too, that means we are using the whole length of the 7776 // base. 7777 if (!OAE->getLength() && OAE->getColonLoc().isValid() && 7778 !OAE->getLowerBound()) 7779 return CGF.getTypeSize(BaseTy); 7780 7781 llvm::Value *ElemSize; 7782 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7783 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7784 } else { 7785 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7786 assert(ATy && "Expecting array type if not a pointer type."); 7787 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7788 } 7789 7790 // If we don't have a length at this point, that is because we have an 7791 // array section with a single element. 7792 if (!OAE->getLength() && OAE->getColonLoc().isInvalid()) 7793 return ElemSize; 7794 7795 if (const Expr *LenExpr = OAE->getLength()) { 7796 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7797 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7798 CGF.getContext().getSizeType(), 7799 LenExpr->getExprLoc()); 7800 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7801 } 7802 assert(!OAE->getLength() && OAE->getColonLoc().isValid() && 7803 OAE->getLowerBound() && "expected array_section[lb:]."); 7804 // Size = sizetype - lb * elemtype; 7805 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7806 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7807 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7808 CGF.getContext().getSizeType(), 7809 OAE->getLowerBound()->getExprLoc()); 7810 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7811 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7812 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7813 LengthVal = CGF.Builder.CreateSelect( 7814 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7815 return LengthVal; 7816 } 7817 return CGF.getTypeSize(ExprTy); 7818 } 7819 7820 /// Return the corresponding bits for a given map clause modifier. Add 7821 /// a flag marking the map as a pointer if requested. Add a flag marking the 7822 /// map as the first one of a series of maps that relate to the same map 7823 /// expression. 7824 OpenMPOffloadMappingFlags getMapTypeBits( 7825 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7826 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7827 OpenMPOffloadMappingFlags Bits = 7828 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7829 switch (MapType) { 7830 case OMPC_MAP_alloc: 7831 case OMPC_MAP_release: 7832 // alloc and release is the default behavior in the runtime library, i.e. 7833 // if we don't pass any bits alloc/release that is what the runtime is 7834 // going to do. Therefore, we don't need to signal anything for these two 7835 // type modifiers. 7836 break; 7837 case OMPC_MAP_to: 7838 Bits |= OMP_MAP_TO; 7839 break; 7840 case OMPC_MAP_from: 7841 Bits |= OMP_MAP_FROM; 7842 break; 7843 case OMPC_MAP_tofrom: 7844 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7845 break; 7846 case OMPC_MAP_delete: 7847 Bits |= OMP_MAP_DELETE; 7848 break; 7849 case OMPC_MAP_unknown: 7850 llvm_unreachable("Unexpected map type!"); 7851 } 7852 if (AddPtrFlag) 7853 Bits |= OMP_MAP_PTR_AND_OBJ; 7854 if (AddIsTargetParamFlag) 7855 Bits |= OMP_MAP_TARGET_PARAM; 7856 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7857 != MapModifiers.end()) 7858 Bits |= OMP_MAP_ALWAYS; 7859 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7860 != MapModifiers.end()) 7861 Bits |= OMP_MAP_CLOSE; 7862 return Bits; 7863 } 7864 7865 /// Return true if the provided expression is a final array section. A 7866 /// final array section, is one whose length can't be proved to be one. 7867 bool isFinalArraySectionExpression(const Expr *E) const { 7868 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7869 7870 // It is not an array section and therefore not a unity-size one. 7871 if (!OASE) 7872 return false; 7873 7874 // An array section with no colon always refer to a single element. 7875 if (OASE->getColonLoc().isInvalid()) 7876 return false; 7877 7878 const Expr *Length = OASE->getLength(); 7879 7880 // If we don't have a length we have to check if the array has size 1 7881 // for this dimension. Also, we should always expect a length if the 7882 // base type is pointer. 7883 if (!Length) { 7884 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7885 OASE->getBase()->IgnoreParenImpCasts()) 7886 .getCanonicalType(); 7887 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7888 return ATy->getSize().getSExtValue() != 1; 7889 // If we don't have a constant dimension length, we have to consider 7890 // the current section as having any size, so it is not necessarily 7891 // unitary. If it happen to be unity size, that's user fault. 7892 return true; 7893 } 7894 7895 // Check if the length evaluates to 1. 7896 Expr::EvalResult Result; 7897 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7898 return true; // Can have more that size 1. 7899 7900 llvm::APSInt ConstLength = Result.Val.getInt(); 7901 return ConstLength.getSExtValue() != 1; 7902 } 7903 7904 /// Generate the base pointers, section pointers, sizes and map type 7905 /// bits for the provided map type, map modifier, and expression components. 7906 /// \a IsFirstComponent should be set to true if the provided set of 7907 /// components is the first associated with a capture. 7908 void generateInfoForComponentList( 7909 OpenMPMapClauseKind MapType, 7910 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7911 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7912 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7913 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7914 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 7915 bool IsImplicit, 7916 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7917 OverlappedElements = llvm::None) const { 7918 // The following summarizes what has to be generated for each map and the 7919 // types below. The generated information is expressed in this order: 7920 // base pointer, section pointer, size, flags 7921 // (to add to the ones that come from the map type and modifier). 7922 // 7923 // double d; 7924 // int i[100]; 7925 // float *p; 7926 // 7927 // struct S1 { 7928 // int i; 7929 // float f[50]; 7930 // } 7931 // struct S2 { 7932 // int i; 7933 // float f[50]; 7934 // S1 s; 7935 // double *p; 7936 // struct S2 *ps; 7937 // } 7938 // S2 s; 7939 // S2 *ps; 7940 // 7941 // map(d) 7942 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7943 // 7944 // map(i) 7945 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7946 // 7947 // map(i[1:23]) 7948 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7949 // 7950 // map(p) 7951 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7952 // 7953 // map(p[1:24]) 7954 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7955 // 7956 // map(s) 7957 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7958 // 7959 // map(s.i) 7960 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7961 // 7962 // map(s.s.f) 7963 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7964 // 7965 // map(s.p) 7966 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7967 // 7968 // map(to: s.p[:22]) 7969 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7970 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7971 // &(s.p), &(s.p[0]), 22*sizeof(double), 7972 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7973 // (*) alloc space for struct members, only this is a target parameter 7974 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7975 // optimizes this entry out, same in the examples below) 7976 // (***) map the pointee (map: to) 7977 // 7978 // map(s.ps) 7979 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7980 // 7981 // map(from: s.ps->s.i) 7982 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7983 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7984 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7985 // 7986 // map(to: s.ps->ps) 7987 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7988 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7989 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7990 // 7991 // map(s.ps->ps->ps) 7992 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7993 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7994 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7995 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7996 // 7997 // map(to: s.ps->ps->s.f[:22]) 7998 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7999 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 8000 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 8001 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 8002 // 8003 // map(ps) 8004 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 8005 // 8006 // map(ps->i) 8007 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 8008 // 8009 // map(ps->s.f) 8010 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 8011 // 8012 // map(from: ps->p) 8013 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 8014 // 8015 // map(to: ps->p[:22]) 8016 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 8017 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 8018 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 8019 // 8020 // map(ps->ps) 8021 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 8022 // 8023 // map(from: ps->ps->s.i) 8024 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 8025 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 8026 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 8027 // 8028 // map(from: ps->ps->ps) 8029 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 8030 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 8031 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 8032 // 8033 // map(ps->ps->ps->ps) 8034 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 8035 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 8036 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 8037 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 8038 // 8039 // map(to: ps->ps->ps->s.f[:22]) 8040 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 8041 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 8042 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 8043 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 8044 // 8045 // map(to: s.f[:22]) map(from: s.p[:33]) 8046 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 8047 // sizeof(double*) (**), TARGET_PARAM 8048 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 8049 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 8050 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 8051 // (*) allocate contiguous space needed to fit all mapped members even if 8052 // we allocate space for members not mapped (in this example, 8053 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 8054 // them as well because they fall between &s.f[0] and &s.p) 8055 // 8056 // map(from: s.f[:22]) map(to: ps->p[:33]) 8057 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 8058 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 8059 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 8060 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 8061 // (*) the struct this entry pertains to is the 2nd element in the list of 8062 // arguments, hence MEMBER_OF(2) 8063 // 8064 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 8065 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 8066 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 8067 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 8068 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 8069 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 8070 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 8071 // (*) the struct this entry pertains to is the 4th element in the list 8072 // of arguments, hence MEMBER_OF(4) 8073 8074 // Track if the map information being generated is the first for a capture. 8075 bool IsCaptureFirstInfo = IsFirstComponentList; 8076 // When the variable is on a declare target link or in a to clause with 8077 // unified memory, a reference is needed to hold the host/device address 8078 // of the variable. 8079 bool RequiresReference = false; 8080 8081 // Scan the components from the base to the complete expression. 8082 auto CI = Components.rbegin(); 8083 auto CE = Components.rend(); 8084 auto I = CI; 8085 8086 // Track if the map information being generated is the first for a list of 8087 // components. 8088 bool IsExpressionFirstInfo = true; 8089 Address BP = Address::invalid(); 8090 const Expr *AssocExpr = I->getAssociatedExpression(); 8091 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 8092 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8093 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 8094 8095 if (isa<MemberExpr>(AssocExpr)) { 8096 // The base is the 'this' pointer. The content of the pointer is going 8097 // to be the base of the field being mapped. 8098 BP = CGF.LoadCXXThisAddress(); 8099 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 8100 (OASE && 8101 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 8102 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 8103 } else if (OAShE && 8104 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 8105 BP = Address( 8106 CGF.EmitScalarExpr(OAShE->getBase()), 8107 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 8108 } else { 8109 // The base is the reference to the variable. 8110 // BP = &Var. 8111 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 8112 if (const auto *VD = 8113 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 8114 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8115 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 8116 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 8117 (*Res == OMPDeclareTargetDeclAttr::MT_To && 8118 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 8119 RequiresReference = true; 8120 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 8121 } 8122 } 8123 } 8124 8125 // If the variable is a pointer and is being dereferenced (i.e. is not 8126 // the last component), the base has to be the pointer itself, not its 8127 // reference. References are ignored for mapping purposes. 8128 QualType Ty = 8129 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 8130 if (Ty->isAnyPointerType() && std::next(I) != CE) { 8131 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8132 8133 // We do not need to generate individual map information for the 8134 // pointer, it can be associated with the combined storage. 8135 ++I; 8136 } 8137 } 8138 8139 // Track whether a component of the list should be marked as MEMBER_OF some 8140 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 8141 // in a component list should be marked as MEMBER_OF, all subsequent entries 8142 // do not belong to the base struct. E.g. 8143 // struct S2 s; 8144 // s.ps->ps->ps->f[:] 8145 // (1) (2) (3) (4) 8146 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 8147 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 8148 // is the pointee of ps(2) which is not member of struct s, so it should not 8149 // be marked as such (it is still PTR_AND_OBJ). 8150 // The variable is initialized to false so that PTR_AND_OBJ entries which 8151 // are not struct members are not considered (e.g. array of pointers to 8152 // data). 8153 bool ShouldBeMemberOf = false; 8154 8155 // Variable keeping track of whether or not we have encountered a component 8156 // in the component list which is a member expression. Useful when we have a 8157 // pointer or a final array section, in which case it is the previous 8158 // component in the list which tells us whether we have a member expression. 8159 // E.g. X.f[:] 8160 // While processing the final array section "[:]" it is "f" which tells us 8161 // whether we are dealing with a member of a declared struct. 8162 const MemberExpr *EncounteredME = nullptr; 8163 8164 for (; I != CE; ++I) { 8165 // If the current component is member of a struct (parent struct) mark it. 8166 if (!EncounteredME) { 8167 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 8168 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 8169 // as MEMBER_OF the parent struct. 8170 if (EncounteredME) 8171 ShouldBeMemberOf = true; 8172 } 8173 8174 auto Next = std::next(I); 8175 8176 // We need to generate the addresses and sizes if this is the last 8177 // component, if the component is a pointer or if it is an array section 8178 // whose length can't be proved to be one. If this is a pointer, it 8179 // becomes the base address for the following components. 8180 8181 // A final array section, is one whose length can't be proved to be one. 8182 bool IsFinalArraySection = 8183 isFinalArraySectionExpression(I->getAssociatedExpression()); 8184 8185 // Get information on whether the element is a pointer. Have to do a 8186 // special treatment for array sections given that they are built-in 8187 // types. 8188 const auto *OASE = 8189 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 8190 const auto *OAShE = 8191 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 8192 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 8193 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 8194 bool IsPointer = 8195 OAShE || 8196 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 8197 .getCanonicalType() 8198 ->isAnyPointerType()) || 8199 I->getAssociatedExpression()->getType()->isAnyPointerType(); 8200 bool IsNonDerefPointer = IsPointer && !UO && !BO; 8201 8202 if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { 8203 // If this is not the last component, we expect the pointer to be 8204 // associated with an array expression or member expression. 8205 assert((Next == CE || 8206 isa<MemberExpr>(Next->getAssociatedExpression()) || 8207 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 8208 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 8209 isa<UnaryOperator>(Next->getAssociatedExpression()) || 8210 isa<BinaryOperator>(Next->getAssociatedExpression())) && 8211 "Unexpected expression"); 8212 8213 Address LB = Address::invalid(); 8214 if (OAShE) { 8215 LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 8216 CGF.getContext().getTypeAlignInChars( 8217 OAShE->getBase()->getType())); 8218 } else { 8219 LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 8220 .getAddress(CGF); 8221 } 8222 8223 // If this component is a pointer inside the base struct then we don't 8224 // need to create any entry for it - it will be combined with the object 8225 // it is pointing to into a single PTR_AND_OBJ entry. 8226 bool IsMemberPointer = 8227 IsPointer && EncounteredME && 8228 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 8229 EncounteredME); 8230 if (!OverlappedElements.empty()) { 8231 // Handle base element with the info for overlapped elements. 8232 assert(!PartialStruct.Base.isValid() && "The base element is set."); 8233 assert(Next == CE && 8234 "Expected last element for the overlapped elements."); 8235 assert(!IsPointer && 8236 "Unexpected base element with the pointer type."); 8237 // Mark the whole struct as the struct that requires allocation on the 8238 // device. 8239 PartialStruct.LowestElem = {0, LB}; 8240 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 8241 I->getAssociatedExpression()->getType()); 8242 Address HB = CGF.Builder.CreateConstGEP( 8243 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 8244 CGF.VoidPtrTy), 8245 TypeSize.getQuantity() - 1); 8246 PartialStruct.HighestElem = { 8247 std::numeric_limits<decltype( 8248 PartialStruct.HighestElem.first)>::max(), 8249 HB}; 8250 PartialStruct.Base = BP; 8251 // Emit data for non-overlapped data. 8252 OpenMPOffloadMappingFlags Flags = 8253 OMP_MAP_MEMBER_OF | 8254 getMapTypeBits(MapType, MapModifiers, IsImplicit, 8255 /*AddPtrFlag=*/false, 8256 /*AddIsTargetParamFlag=*/false); 8257 LB = BP; 8258 llvm::Value *Size = nullptr; 8259 // Do bitcopy of all non-overlapped structure elements. 8260 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 8261 Component : OverlappedElements) { 8262 Address ComponentLB = Address::invalid(); 8263 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 8264 Component) { 8265 if (MC.getAssociatedDeclaration()) { 8266 ComponentLB = 8267 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 8268 .getAddress(CGF); 8269 Size = CGF.Builder.CreatePtrDiff( 8270 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 8271 CGF.EmitCastToVoidPtr(LB.getPointer())); 8272 break; 8273 } 8274 } 8275 BasePointers.push_back(BP.getPointer()); 8276 Pointers.push_back(LB.getPointer()); 8277 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, 8278 /*isSigned=*/true)); 8279 Types.push_back(Flags); 8280 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 8281 } 8282 BasePointers.push_back(BP.getPointer()); 8283 Pointers.push_back(LB.getPointer()); 8284 Size = CGF.Builder.CreatePtrDiff( 8285 CGF.EmitCastToVoidPtr( 8286 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 8287 CGF.EmitCastToVoidPtr(LB.getPointer())); 8288 Sizes.push_back( 8289 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8290 Types.push_back(Flags); 8291 break; 8292 } 8293 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 8294 if (!IsMemberPointer) { 8295 BasePointers.push_back(BP.getPointer()); 8296 Pointers.push_back(LB.getPointer()); 8297 Sizes.push_back( 8298 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8299 8300 // We need to add a pointer flag for each map that comes from the 8301 // same expression except for the first one. We also need to signal 8302 // this map is the first one that relates with the current capture 8303 // (there is a set of entries for each capture). 8304 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 8305 MapType, MapModifiers, IsImplicit, 8306 !IsExpressionFirstInfo || RequiresReference, 8307 IsCaptureFirstInfo && !RequiresReference); 8308 8309 if (!IsExpressionFirstInfo) { 8310 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 8311 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 8312 if (IsPointer) 8313 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 8314 OMP_MAP_DELETE | OMP_MAP_CLOSE); 8315 8316 if (ShouldBeMemberOf) { 8317 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 8318 // should be later updated with the correct value of MEMBER_OF. 8319 Flags |= OMP_MAP_MEMBER_OF; 8320 // From now on, all subsequent PTR_AND_OBJ entries should not be 8321 // marked as MEMBER_OF. 8322 ShouldBeMemberOf = false; 8323 } 8324 } 8325 8326 Types.push_back(Flags); 8327 } 8328 8329 // If we have encountered a member expression so far, keep track of the 8330 // mapped member. If the parent is "*this", then the value declaration 8331 // is nullptr. 8332 if (EncounteredME) { 8333 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 8334 unsigned FieldIndex = FD->getFieldIndex(); 8335 8336 // Update info about the lowest and highest elements for this struct 8337 if (!PartialStruct.Base.isValid()) { 8338 PartialStruct.LowestElem = {FieldIndex, LB}; 8339 PartialStruct.HighestElem = {FieldIndex, LB}; 8340 PartialStruct.Base = BP; 8341 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8342 PartialStruct.LowestElem = {FieldIndex, LB}; 8343 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8344 PartialStruct.HighestElem = {FieldIndex, LB}; 8345 } 8346 } 8347 8348 // If we have a final array section, we are done with this expression. 8349 if (IsFinalArraySection) 8350 break; 8351 8352 // The pointer becomes the base for the next element. 8353 if (Next != CE) 8354 BP = LB; 8355 8356 IsExpressionFirstInfo = false; 8357 IsCaptureFirstInfo = false; 8358 } 8359 } 8360 } 8361 8362 /// Return the adjusted map modifiers if the declaration a capture refers to 8363 /// appears in a first-private clause. This is expected to be used only with 8364 /// directives that start with 'target'. 8365 MappableExprsHandler::OpenMPOffloadMappingFlags 8366 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8367 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8368 8369 // A first private variable captured by reference will use only the 8370 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8371 // declaration is known as first-private in this handler. 8372 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8373 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 8374 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 8375 return MappableExprsHandler::OMP_MAP_ALWAYS | 8376 MappableExprsHandler::OMP_MAP_TO; 8377 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8378 return MappableExprsHandler::OMP_MAP_TO | 8379 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8380 return MappableExprsHandler::OMP_MAP_PRIVATE | 8381 MappableExprsHandler::OMP_MAP_TO; 8382 } 8383 return MappableExprsHandler::OMP_MAP_TO | 8384 MappableExprsHandler::OMP_MAP_FROM; 8385 } 8386 8387 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8388 // Rotate by getFlagMemberOffset() bits. 8389 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8390 << getFlagMemberOffset()); 8391 } 8392 8393 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8394 OpenMPOffloadMappingFlags MemberOfFlag) { 8395 // If the entry is PTR_AND_OBJ but has not been marked with the special 8396 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8397 // marked as MEMBER_OF. 8398 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8399 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8400 return; 8401 8402 // Reset the placeholder value to prepare the flag for the assignment of the 8403 // proper MEMBER_OF value. 8404 Flags &= ~OMP_MAP_MEMBER_OF; 8405 Flags |= MemberOfFlag; 8406 } 8407 8408 void getPlainLayout(const CXXRecordDecl *RD, 8409 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8410 bool AsBase) const { 8411 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8412 8413 llvm::StructType *St = 8414 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8415 8416 unsigned NumElements = St->getNumElements(); 8417 llvm::SmallVector< 8418 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8419 RecordLayout(NumElements); 8420 8421 // Fill bases. 8422 for (const auto &I : RD->bases()) { 8423 if (I.isVirtual()) 8424 continue; 8425 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8426 // Ignore empty bases. 8427 if (Base->isEmpty() || CGF.getContext() 8428 .getASTRecordLayout(Base) 8429 .getNonVirtualSize() 8430 .isZero()) 8431 continue; 8432 8433 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8434 RecordLayout[FieldIndex] = Base; 8435 } 8436 // Fill in virtual bases. 8437 for (const auto &I : RD->vbases()) { 8438 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8439 // Ignore empty bases. 8440 if (Base->isEmpty()) 8441 continue; 8442 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8443 if (RecordLayout[FieldIndex]) 8444 continue; 8445 RecordLayout[FieldIndex] = Base; 8446 } 8447 // Fill in all the fields. 8448 assert(!RD->isUnion() && "Unexpected union."); 8449 for (const auto *Field : RD->fields()) { 8450 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8451 // will fill in later.) 8452 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8453 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8454 RecordLayout[FieldIndex] = Field; 8455 } 8456 } 8457 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8458 &Data : RecordLayout) { 8459 if (Data.isNull()) 8460 continue; 8461 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8462 getPlainLayout(Base, Layout, /*AsBase=*/true); 8463 else 8464 Layout.push_back(Data.get<const FieldDecl *>()); 8465 } 8466 } 8467 8468 public: 8469 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8470 : CurDir(&Dir), CGF(CGF) { 8471 // Extract firstprivate clause information. 8472 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8473 for (const auto *D : C->varlists()) 8474 FirstPrivateDecls.try_emplace( 8475 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8476 // Extract device pointer clause information. 8477 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8478 for (auto L : C->component_lists()) 8479 DevPointersMap[L.first].push_back(L.second); 8480 } 8481 8482 /// Constructor for the declare mapper directive. 8483 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8484 : CurDir(&Dir), CGF(CGF) {} 8485 8486 /// Generate code for the combined entry if we have a partially mapped struct 8487 /// and take care of the mapping flags of the arguments corresponding to 8488 /// individual struct members. 8489 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 8490 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8491 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 8492 const StructRangeInfoTy &PartialStruct) const { 8493 // Base is the base of the struct 8494 BasePointers.push_back(PartialStruct.Base.getPointer()); 8495 // Pointer is the address of the lowest element 8496 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 8497 Pointers.push_back(LB); 8498 // Size is (addr of {highest+1} element) - (addr of lowest element) 8499 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 8500 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 8501 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8502 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8503 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8504 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8505 /*isSigned=*/false); 8506 Sizes.push_back(Size); 8507 // Map type is always TARGET_PARAM 8508 Types.push_back(OMP_MAP_TARGET_PARAM); 8509 // Remove TARGET_PARAM flag from the first element 8510 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8511 8512 // All other current entries will be MEMBER_OF the combined entry 8513 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8514 // 0xFFFF in the MEMBER_OF field). 8515 OpenMPOffloadMappingFlags MemberOfFlag = 8516 getMemberOfFlag(BasePointers.size() - 1); 8517 for (auto &M : CurTypes) 8518 setCorrectMemberOfFlag(M, MemberOfFlag); 8519 } 8520 8521 /// Generate all the base pointers, section pointers, sizes and map 8522 /// types for the extracted mappable expressions. Also, for each item that 8523 /// relates with a device pointer, a pair of the relevant declaration and 8524 /// index where it occurs is appended to the device pointers info array. 8525 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 8526 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8527 MapFlagsArrayTy &Types) const { 8528 // We have to process the component lists that relate with the same 8529 // declaration in a single chunk so that we can generate the map flags 8530 // correctly. Therefore, we organize all lists in a map. 8531 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8532 8533 // Helper function to fill the information map for the different supported 8534 // clauses. 8535 auto &&InfoGen = [&Info]( 8536 const ValueDecl *D, 8537 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8538 OpenMPMapClauseKind MapType, 8539 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8540 bool ReturnDevicePointer, bool IsImplicit) { 8541 const ValueDecl *VD = 8542 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8543 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8544 IsImplicit); 8545 }; 8546 8547 assert(CurDir.is<const OMPExecutableDirective *>() && 8548 "Expect a executable directive"); 8549 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8550 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 8551 for (const auto L : C->component_lists()) { 8552 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 8553 /*ReturnDevicePointer=*/false, C->isImplicit()); 8554 } 8555 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 8556 for (const auto L : C->component_lists()) { 8557 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 8558 /*ReturnDevicePointer=*/false, C->isImplicit()); 8559 } 8560 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 8561 for (const auto L : C->component_lists()) { 8562 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 8563 /*ReturnDevicePointer=*/false, C->isImplicit()); 8564 } 8565 8566 // Look at the use_device_ptr clause information and mark the existing map 8567 // entries as such. If there is no map information for an entry in the 8568 // use_device_ptr list, we create one with map type 'alloc' and zero size 8569 // section. It is the user fault if that was not mapped before. If there is 8570 // no map information and the pointer is a struct member, then we defer the 8571 // emission of that entry until the whole struct has been processed. 8572 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 8573 DeferredInfo; 8574 8575 for (const auto *C : 8576 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 8577 for (const auto L : C->component_lists()) { 8578 assert(!L.second.empty() && "Not expecting empty list of components!"); 8579 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 8580 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8581 const Expr *IE = L.second.back().getAssociatedExpression(); 8582 // If the first component is a member expression, we have to look into 8583 // 'this', which maps to null in the map of map information. Otherwise 8584 // look directly for the information. 8585 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8586 8587 // We potentially have map information for this declaration already. 8588 // Look for the first set of components that refer to it. 8589 if (It != Info.end()) { 8590 auto CI = std::find_if( 8591 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 8592 return MI.Components.back().getAssociatedDeclaration() == VD; 8593 }); 8594 // If we found a map entry, signal that the pointer has to be returned 8595 // and move on to the next declaration. 8596 if (CI != It->second.end()) { 8597 CI->ReturnDevicePointer = true; 8598 continue; 8599 } 8600 } 8601 8602 // We didn't find any match in our map information - generate a zero 8603 // size array section - if the pointer is a struct member we defer this 8604 // action until the whole struct has been processed. 8605 if (isa<MemberExpr>(IE)) { 8606 // Insert the pointer into Info to be processed by 8607 // generateInfoForComponentList. Because it is a member pointer 8608 // without a pointee, no entry will be generated for it, therefore 8609 // we need to generate one after the whole struct has been processed. 8610 // Nonetheless, generateInfoForComponentList must be called to take 8611 // the pointer into account for the calculation of the range of the 8612 // partial struct. 8613 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 8614 /*ReturnDevicePointer=*/false, C->isImplicit()); 8615 DeferredInfo[nullptr].emplace_back(IE, VD); 8616 } else { 8617 llvm::Value *Ptr = 8618 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8619 BasePointers.emplace_back(Ptr, VD); 8620 Pointers.push_back(Ptr); 8621 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8622 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8623 } 8624 } 8625 } 8626 8627 for (const auto &M : Info) { 8628 // We need to know when we generate information for the first component 8629 // associated with a capture, because the mapping flags depend on it. 8630 bool IsFirstComponentList = true; 8631 8632 // Temporary versions of arrays 8633 MapBaseValuesArrayTy CurBasePointers; 8634 MapValuesArrayTy CurPointers; 8635 MapValuesArrayTy CurSizes; 8636 MapFlagsArrayTy CurTypes; 8637 StructRangeInfoTy PartialStruct; 8638 8639 for (const MapInfo &L : M.second) { 8640 assert(!L.Components.empty() && 8641 "Not expecting declaration with no component lists."); 8642 8643 // Remember the current base pointer index. 8644 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 8645 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8646 CurBasePointers, CurPointers, CurSizes, 8647 CurTypes, PartialStruct, 8648 IsFirstComponentList, L.IsImplicit); 8649 8650 // If this entry relates with a device pointer, set the relevant 8651 // declaration and add the 'return pointer' flag. 8652 if (L.ReturnDevicePointer) { 8653 assert(CurBasePointers.size() > CurrentBasePointersIdx && 8654 "Unexpected number of mapped base pointers."); 8655 8656 const ValueDecl *RelevantVD = 8657 L.Components.back().getAssociatedDeclaration(); 8658 assert(RelevantVD && 8659 "No relevant declaration related with device pointer??"); 8660 8661 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 8662 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8663 } 8664 IsFirstComponentList = false; 8665 } 8666 8667 // Append any pending zero-length pointers which are struct members and 8668 // used with use_device_ptr. 8669 auto CI = DeferredInfo.find(M.first); 8670 if (CI != DeferredInfo.end()) { 8671 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8672 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8673 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 8674 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 8675 CurBasePointers.emplace_back(BasePtr, L.VD); 8676 CurPointers.push_back(Ptr); 8677 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8678 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8679 // value MEMBER_OF=FFFF so that the entry is later updated with the 8680 // correct value of MEMBER_OF. 8681 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8682 OMP_MAP_MEMBER_OF); 8683 } 8684 } 8685 8686 // If there is an entry in PartialStruct it means we have a struct with 8687 // individual members mapped. Emit an extra combined entry. 8688 if (PartialStruct.Base.isValid()) 8689 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8690 PartialStruct); 8691 8692 // We need to append the results of this capture to what we already have. 8693 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8694 Pointers.append(CurPointers.begin(), CurPointers.end()); 8695 Sizes.append(CurSizes.begin(), CurSizes.end()); 8696 Types.append(CurTypes.begin(), CurTypes.end()); 8697 } 8698 } 8699 8700 /// Generate all the base pointers, section pointers, sizes and map types for 8701 /// the extracted map clauses of user-defined mapper. 8702 void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, 8703 MapValuesArrayTy &Pointers, 8704 MapValuesArrayTy &Sizes, 8705 MapFlagsArrayTy &Types) const { 8706 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8707 "Expect a declare mapper directive"); 8708 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8709 // We have to process the component lists that relate with the same 8710 // declaration in a single chunk so that we can generate the map flags 8711 // correctly. Therefore, we organize all lists in a map. 8712 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8713 8714 // Helper function to fill the information map for the different supported 8715 // clauses. 8716 auto &&InfoGen = [&Info]( 8717 const ValueDecl *D, 8718 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8719 OpenMPMapClauseKind MapType, 8720 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8721 bool ReturnDevicePointer, bool IsImplicit) { 8722 const ValueDecl *VD = 8723 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8724 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8725 IsImplicit); 8726 }; 8727 8728 for (const auto *C : CurMapperDir->clauselists()) { 8729 const auto *MC = cast<OMPMapClause>(C); 8730 for (const auto L : MC->component_lists()) { 8731 InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), 8732 /*ReturnDevicePointer=*/false, MC->isImplicit()); 8733 } 8734 } 8735 8736 for (const auto &M : Info) { 8737 // We need to know when we generate information for the first component 8738 // associated with a capture, because the mapping flags depend on it. 8739 bool IsFirstComponentList = true; 8740 8741 // Temporary versions of arrays 8742 MapBaseValuesArrayTy CurBasePointers; 8743 MapValuesArrayTy CurPointers; 8744 MapValuesArrayTy CurSizes; 8745 MapFlagsArrayTy CurTypes; 8746 StructRangeInfoTy PartialStruct; 8747 8748 for (const MapInfo &L : M.second) { 8749 assert(!L.Components.empty() && 8750 "Not expecting declaration with no component lists."); 8751 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8752 CurBasePointers, CurPointers, CurSizes, 8753 CurTypes, PartialStruct, 8754 IsFirstComponentList, L.IsImplicit); 8755 IsFirstComponentList = false; 8756 } 8757 8758 // If there is an entry in PartialStruct it means we have a struct with 8759 // individual members mapped. Emit an extra combined entry. 8760 if (PartialStruct.Base.isValid()) 8761 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8762 PartialStruct); 8763 8764 // We need to append the results of this capture to what we already have. 8765 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8766 Pointers.append(CurPointers.begin(), CurPointers.end()); 8767 Sizes.append(CurSizes.begin(), CurSizes.end()); 8768 Types.append(CurTypes.begin(), CurTypes.end()); 8769 } 8770 } 8771 8772 /// Emit capture info for lambdas for variables captured by reference. 8773 void generateInfoForLambdaCaptures( 8774 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 8775 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8776 MapFlagsArrayTy &Types, 8777 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8778 const auto *RD = VD->getType() 8779 .getCanonicalType() 8780 .getNonReferenceType() 8781 ->getAsCXXRecordDecl(); 8782 if (!RD || !RD->isLambda()) 8783 return; 8784 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8785 LValue VDLVal = CGF.MakeAddrLValue( 8786 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8787 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8788 FieldDecl *ThisCapture = nullptr; 8789 RD->getCaptureFields(Captures, ThisCapture); 8790 if (ThisCapture) { 8791 LValue ThisLVal = 8792 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8793 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8794 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8795 VDLVal.getPointer(CGF)); 8796 BasePointers.push_back(ThisLVal.getPointer(CGF)); 8797 Pointers.push_back(ThisLValVal.getPointer(CGF)); 8798 Sizes.push_back( 8799 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8800 CGF.Int64Ty, /*isSigned=*/true)); 8801 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8802 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8803 } 8804 for (const LambdaCapture &LC : RD->captures()) { 8805 if (!LC.capturesVariable()) 8806 continue; 8807 const VarDecl *VD = LC.getCapturedVar(); 8808 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8809 continue; 8810 auto It = Captures.find(VD); 8811 assert(It != Captures.end() && "Found lambda capture without field."); 8812 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8813 if (LC.getCaptureKind() == LCK_ByRef) { 8814 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8815 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8816 VDLVal.getPointer(CGF)); 8817 BasePointers.push_back(VarLVal.getPointer(CGF)); 8818 Pointers.push_back(VarLValVal.getPointer(CGF)); 8819 Sizes.push_back(CGF.Builder.CreateIntCast( 8820 CGF.getTypeSize( 8821 VD->getType().getCanonicalType().getNonReferenceType()), 8822 CGF.Int64Ty, /*isSigned=*/true)); 8823 } else { 8824 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8825 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8826 VDLVal.getPointer(CGF)); 8827 BasePointers.push_back(VarLVal.getPointer(CGF)); 8828 Pointers.push_back(VarRVal.getScalarVal()); 8829 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8830 } 8831 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8832 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8833 } 8834 } 8835 8836 /// Set correct indices for lambdas captures. 8837 void adjustMemberOfForLambdaCaptures( 8838 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8839 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8840 MapFlagsArrayTy &Types) const { 8841 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8842 // Set correct member_of idx for all implicit lambda captures. 8843 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8844 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8845 continue; 8846 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8847 assert(BasePtr && "Unable to find base lambda address."); 8848 int TgtIdx = -1; 8849 for (unsigned J = I; J > 0; --J) { 8850 unsigned Idx = J - 1; 8851 if (Pointers[Idx] != BasePtr) 8852 continue; 8853 TgtIdx = Idx; 8854 break; 8855 } 8856 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8857 // All other current entries will be MEMBER_OF the combined entry 8858 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8859 // 0xFFFF in the MEMBER_OF field). 8860 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8861 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8862 } 8863 } 8864 8865 /// Generate the base pointers, section pointers, sizes and map types 8866 /// associated to a given capture. 8867 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8868 llvm::Value *Arg, 8869 MapBaseValuesArrayTy &BasePointers, 8870 MapValuesArrayTy &Pointers, 8871 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8872 StructRangeInfoTy &PartialStruct) const { 8873 assert(!Cap->capturesVariableArrayType() && 8874 "Not expecting to generate map info for a variable array type!"); 8875 8876 // We need to know when we generating information for the first component 8877 const ValueDecl *VD = Cap->capturesThis() 8878 ? nullptr 8879 : Cap->getCapturedVar()->getCanonicalDecl(); 8880 8881 // If this declaration appears in a is_device_ptr clause we just have to 8882 // pass the pointer by value. If it is a reference to a declaration, we just 8883 // pass its value. 8884 if (DevPointersMap.count(VD)) { 8885 BasePointers.emplace_back(Arg, VD); 8886 Pointers.push_back(Arg); 8887 Sizes.push_back( 8888 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8889 CGF.Int64Ty, /*isSigned=*/true)); 8890 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8891 return; 8892 } 8893 8894 using MapData = 8895 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8896 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 8897 SmallVector<MapData, 4> DeclComponentLists; 8898 assert(CurDir.is<const OMPExecutableDirective *>() && 8899 "Expect a executable directive"); 8900 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8901 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8902 for (const auto L : C->decl_component_lists(VD)) { 8903 assert(L.first == VD && 8904 "We got information for the wrong declaration??"); 8905 assert(!L.second.empty() && 8906 "Not expecting declaration with no component lists."); 8907 DeclComponentLists.emplace_back(L.second, C->getMapType(), 8908 C->getMapTypeModifiers(), 8909 C->isImplicit()); 8910 } 8911 } 8912 8913 // Find overlapping elements (including the offset from the base element). 8914 llvm::SmallDenseMap< 8915 const MapData *, 8916 llvm::SmallVector< 8917 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8918 4> 8919 OverlappedData; 8920 size_t Count = 0; 8921 for (const MapData &L : DeclComponentLists) { 8922 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8923 OpenMPMapClauseKind MapType; 8924 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8925 bool IsImplicit; 8926 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8927 ++Count; 8928 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8929 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8930 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 8931 auto CI = Components.rbegin(); 8932 auto CE = Components.rend(); 8933 auto SI = Components1.rbegin(); 8934 auto SE = Components1.rend(); 8935 for (; CI != CE && SI != SE; ++CI, ++SI) { 8936 if (CI->getAssociatedExpression()->getStmtClass() != 8937 SI->getAssociatedExpression()->getStmtClass()) 8938 break; 8939 // Are we dealing with different variables/fields? 8940 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8941 break; 8942 } 8943 // Found overlapping if, at least for one component, reached the head of 8944 // the components list. 8945 if (CI == CE || SI == SE) { 8946 assert((CI != CE || SI != SE) && 8947 "Unexpected full match of the mapping components."); 8948 const MapData &BaseData = CI == CE ? L : L1; 8949 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8950 SI == SE ? Components : Components1; 8951 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8952 OverlappedElements.getSecond().push_back(SubData); 8953 } 8954 } 8955 } 8956 // Sort the overlapped elements for each item. 8957 llvm::SmallVector<const FieldDecl *, 4> Layout; 8958 if (!OverlappedData.empty()) { 8959 if (const auto *CRD = 8960 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8961 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8962 else { 8963 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8964 Layout.append(RD->field_begin(), RD->field_end()); 8965 } 8966 } 8967 for (auto &Pair : OverlappedData) { 8968 llvm::sort( 8969 Pair.getSecond(), 8970 [&Layout]( 8971 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8972 OMPClauseMappableExprCommon::MappableExprComponentListRef 8973 Second) { 8974 auto CI = First.rbegin(); 8975 auto CE = First.rend(); 8976 auto SI = Second.rbegin(); 8977 auto SE = Second.rend(); 8978 for (; CI != CE && SI != SE; ++CI, ++SI) { 8979 if (CI->getAssociatedExpression()->getStmtClass() != 8980 SI->getAssociatedExpression()->getStmtClass()) 8981 break; 8982 // Are we dealing with different variables/fields? 8983 if (CI->getAssociatedDeclaration() != 8984 SI->getAssociatedDeclaration()) 8985 break; 8986 } 8987 8988 // Lists contain the same elements. 8989 if (CI == CE && SI == SE) 8990 return false; 8991 8992 // List with less elements is less than list with more elements. 8993 if (CI == CE || SI == SE) 8994 return CI == CE; 8995 8996 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8997 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8998 if (FD1->getParent() == FD2->getParent()) 8999 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9000 const auto It = 9001 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9002 return FD == FD1 || FD == FD2; 9003 }); 9004 return *It == FD1; 9005 }); 9006 } 9007 9008 // Associated with a capture, because the mapping flags depend on it. 9009 // Go through all of the elements with the overlapped elements. 9010 for (const auto &Pair : OverlappedData) { 9011 const MapData &L = *Pair.getFirst(); 9012 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9013 OpenMPMapClauseKind MapType; 9014 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9015 bool IsImplicit; 9016 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 9017 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9018 OverlappedComponents = Pair.getSecond(); 9019 bool IsFirstComponentList = true; 9020 generateInfoForComponentList(MapType, MapModifiers, Components, 9021 BasePointers, Pointers, Sizes, Types, 9022 PartialStruct, IsFirstComponentList, 9023 IsImplicit, OverlappedComponents); 9024 } 9025 // Go through other elements without overlapped elements. 9026 bool IsFirstComponentList = OverlappedData.empty(); 9027 for (const MapData &L : DeclComponentLists) { 9028 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9029 OpenMPMapClauseKind MapType; 9030 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9031 bool IsImplicit; 9032 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 9033 auto It = OverlappedData.find(&L); 9034 if (It == OverlappedData.end()) 9035 generateInfoForComponentList(MapType, MapModifiers, Components, 9036 BasePointers, Pointers, Sizes, Types, 9037 PartialStruct, IsFirstComponentList, 9038 IsImplicit); 9039 IsFirstComponentList = false; 9040 } 9041 } 9042 9043 /// Generate the base pointers, section pointers, sizes and map types 9044 /// associated with the declare target link variables. 9045 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 9046 MapValuesArrayTy &Pointers, 9047 MapValuesArrayTy &Sizes, 9048 MapFlagsArrayTy &Types) const { 9049 assert(CurDir.is<const OMPExecutableDirective *>() && 9050 "Expect a executable directive"); 9051 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9052 // Map other list items in the map clause which are not captured variables 9053 // but "declare target link" global variables. 9054 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 9055 for (const auto L : C->component_lists()) { 9056 if (!L.first) 9057 continue; 9058 const auto *VD = dyn_cast<VarDecl>(L.first); 9059 if (!VD) 9060 continue; 9061 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9062 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9063 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 9064 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 9065 continue; 9066 StructRangeInfoTy PartialStruct; 9067 generateInfoForComponentList( 9068 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 9069 Pointers, Sizes, Types, PartialStruct, 9070 /*IsFirstComponentList=*/true, C->isImplicit()); 9071 assert(!PartialStruct.Base.isValid() && 9072 "No partial structs for declare target link expected."); 9073 } 9074 } 9075 } 9076 9077 /// Generate the default map information for a given capture \a CI, 9078 /// record field declaration \a RI and captured value \a CV. 9079 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9080 const FieldDecl &RI, llvm::Value *CV, 9081 MapBaseValuesArrayTy &CurBasePointers, 9082 MapValuesArrayTy &CurPointers, 9083 MapValuesArrayTy &CurSizes, 9084 MapFlagsArrayTy &CurMapTypes) const { 9085 bool IsImplicit = true; 9086 // Do the default mapping. 9087 if (CI.capturesThis()) { 9088 CurBasePointers.push_back(CV); 9089 CurPointers.push_back(CV); 9090 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9091 CurSizes.push_back( 9092 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9093 CGF.Int64Ty, /*isSigned=*/true)); 9094 // Default map type. 9095 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9096 } else if (CI.capturesVariableByCopy()) { 9097 CurBasePointers.push_back(CV); 9098 CurPointers.push_back(CV); 9099 if (!RI.getType()->isAnyPointerType()) { 9100 // We have to signal to the runtime captures passed by value that are 9101 // not pointers. 9102 CurMapTypes.push_back(OMP_MAP_LITERAL); 9103 CurSizes.push_back(CGF.Builder.CreateIntCast( 9104 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9105 } else { 9106 // Pointers are implicitly mapped with a zero size and no flags 9107 // (other than first map that is added for all implicit maps). 9108 CurMapTypes.push_back(OMP_MAP_NONE); 9109 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9110 } 9111 const VarDecl *VD = CI.getCapturedVar(); 9112 auto I = FirstPrivateDecls.find(VD); 9113 if (I != FirstPrivateDecls.end()) 9114 IsImplicit = I->getSecond(); 9115 } else { 9116 assert(CI.capturesVariable() && "Expected captured reference."); 9117 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9118 QualType ElementType = PtrTy->getPointeeType(); 9119 CurSizes.push_back(CGF.Builder.CreateIntCast( 9120 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9121 // The default map type for a scalar/complex type is 'to' because by 9122 // default the value doesn't have to be retrieved. For an aggregate 9123 // type, the default is 'tofrom'. 9124 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 9125 const VarDecl *VD = CI.getCapturedVar(); 9126 auto I = FirstPrivateDecls.find(VD); 9127 if (I != FirstPrivateDecls.end() && 9128 VD->getType().isConstant(CGF.getContext())) { 9129 llvm::Constant *Addr = 9130 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 9131 // Copy the value of the original variable to the new global copy. 9132 CGF.Builder.CreateMemCpy( 9133 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 9134 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 9135 CurSizes.back(), /*IsVolatile=*/false); 9136 // Use new global variable as the base pointers. 9137 CurBasePointers.push_back(Addr); 9138 CurPointers.push_back(Addr); 9139 } else { 9140 CurBasePointers.push_back(CV); 9141 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9142 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9143 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9144 AlignmentSource::Decl)); 9145 CurPointers.push_back(PtrAddr.getPointer()); 9146 } else { 9147 CurPointers.push_back(CV); 9148 } 9149 } 9150 if (I != FirstPrivateDecls.end()) 9151 IsImplicit = I->getSecond(); 9152 } 9153 // Every default map produces a single argument which is a target parameter. 9154 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 9155 9156 // Add flag stating this is an implicit map. 9157 if (IsImplicit) 9158 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 9159 } 9160 }; 9161 } // anonymous namespace 9162 9163 /// Emit the arrays used to pass the captures and map information to the 9164 /// offloading runtime library. If there is no map or capture information, 9165 /// return nullptr by reference. 9166 static void 9167 emitOffloadingArrays(CodeGenFunction &CGF, 9168 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 9169 MappableExprsHandler::MapValuesArrayTy &Pointers, 9170 MappableExprsHandler::MapValuesArrayTy &Sizes, 9171 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 9172 CGOpenMPRuntime::TargetDataInfo &Info) { 9173 CodeGenModule &CGM = CGF.CGM; 9174 ASTContext &Ctx = CGF.getContext(); 9175 9176 // Reset the array information. 9177 Info.clearArrayInfo(); 9178 Info.NumberOfPtrs = BasePointers.size(); 9179 9180 if (Info.NumberOfPtrs) { 9181 // Detect if we have any capture size requiring runtime evaluation of the 9182 // size so that a constant array could be eventually used. 9183 bool hasRuntimeEvaluationCaptureSize = false; 9184 for (llvm::Value *S : Sizes) 9185 if (!isa<llvm::Constant>(S)) { 9186 hasRuntimeEvaluationCaptureSize = true; 9187 break; 9188 } 9189 9190 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9191 QualType PointerArrayType = Ctx.getConstantArrayType( 9192 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9193 /*IndexTypeQuals=*/0); 9194 9195 Info.BasePointersArray = 9196 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9197 Info.PointersArray = 9198 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9199 9200 // If we don't have any VLA types or other types that require runtime 9201 // evaluation, we can use a constant array for the map sizes, otherwise we 9202 // need to fill up the arrays as we do for the pointers. 9203 QualType Int64Ty = 9204 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9205 if (hasRuntimeEvaluationCaptureSize) { 9206 QualType SizeArrayType = Ctx.getConstantArrayType( 9207 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9208 /*IndexTypeQuals=*/0); 9209 Info.SizesArray = 9210 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9211 } else { 9212 // We expect all the sizes to be constant, so we collect them to create 9213 // a constant array. 9214 SmallVector<llvm::Constant *, 16> ConstSizes; 9215 for (llvm::Value *S : Sizes) 9216 ConstSizes.push_back(cast<llvm::Constant>(S)); 9217 9218 auto *SizesArrayInit = llvm::ConstantArray::get( 9219 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9220 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9221 auto *SizesArrayGbl = new llvm::GlobalVariable( 9222 CGM.getModule(), SizesArrayInit->getType(), 9223 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9224 SizesArrayInit, Name); 9225 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9226 Info.SizesArray = SizesArrayGbl; 9227 } 9228 9229 // The map types are always constant so we don't need to generate code to 9230 // fill arrays. Instead, we create an array constant. 9231 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 9232 llvm::copy(MapTypes, Mapping.begin()); 9233 llvm::Constant *MapTypesArrayInit = 9234 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 9235 std::string MaptypesName = 9236 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9237 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 9238 CGM.getModule(), MapTypesArrayInit->getType(), 9239 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9240 MapTypesArrayInit, MaptypesName); 9241 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9242 Info.MapTypesArray = MapTypesArrayGbl; 9243 9244 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9245 llvm::Value *BPVal = *BasePointers[I]; 9246 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9247 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9248 Info.BasePointersArray, 0, I); 9249 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9250 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9251 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9252 CGF.Builder.CreateStore(BPVal, BPAddr); 9253 9254 if (Info.requiresDevicePointerInfo()) 9255 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 9256 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9257 9258 llvm::Value *PVal = Pointers[I]; 9259 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9260 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9261 Info.PointersArray, 0, I); 9262 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9263 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9264 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9265 CGF.Builder.CreateStore(PVal, PAddr); 9266 9267 if (hasRuntimeEvaluationCaptureSize) { 9268 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9269 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9270 Info.SizesArray, 9271 /*Idx0=*/0, 9272 /*Idx1=*/I); 9273 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9274 CGF.Builder.CreateStore( 9275 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true), 9276 SAddr); 9277 } 9278 } 9279 } 9280 } 9281 9282 /// Emit the arguments to be passed to the runtime library based on the 9283 /// arrays of pointers, sizes and map types. 9284 static void emitOffloadingArraysArgument( 9285 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9286 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9287 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 9288 CodeGenModule &CGM = CGF.CGM; 9289 if (Info.NumberOfPtrs) { 9290 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9291 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9292 Info.BasePointersArray, 9293 /*Idx0=*/0, /*Idx1=*/0); 9294 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9295 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9296 Info.PointersArray, 9297 /*Idx0=*/0, 9298 /*Idx1=*/0); 9299 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9300 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9301 /*Idx0=*/0, /*Idx1=*/0); 9302 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9303 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9304 Info.MapTypesArray, 9305 /*Idx0=*/0, 9306 /*Idx1=*/0); 9307 } else { 9308 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9309 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9310 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9311 MapTypesArrayArg = 9312 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9313 } 9314 } 9315 9316 /// Check for inner distribute directive. 9317 static const OMPExecutableDirective * 9318 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9319 const auto *CS = D.getInnermostCapturedStmt(); 9320 const auto *Body = 9321 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9322 const Stmt *ChildStmt = 9323 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9324 9325 if (const auto *NestedDir = 9326 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9327 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9328 switch (D.getDirectiveKind()) { 9329 case OMPD_target: 9330 if (isOpenMPDistributeDirective(DKind)) 9331 return NestedDir; 9332 if (DKind == OMPD_teams) { 9333 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9334 /*IgnoreCaptured=*/true); 9335 if (!Body) 9336 return nullptr; 9337 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9338 if (const auto *NND = 9339 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9340 DKind = NND->getDirectiveKind(); 9341 if (isOpenMPDistributeDirective(DKind)) 9342 return NND; 9343 } 9344 } 9345 return nullptr; 9346 case OMPD_target_teams: 9347 if (isOpenMPDistributeDirective(DKind)) 9348 return NestedDir; 9349 return nullptr; 9350 case OMPD_target_parallel: 9351 case OMPD_target_simd: 9352 case OMPD_target_parallel_for: 9353 case OMPD_target_parallel_for_simd: 9354 return nullptr; 9355 case OMPD_target_teams_distribute: 9356 case OMPD_target_teams_distribute_simd: 9357 case OMPD_target_teams_distribute_parallel_for: 9358 case OMPD_target_teams_distribute_parallel_for_simd: 9359 case OMPD_parallel: 9360 case OMPD_for: 9361 case OMPD_parallel_for: 9362 case OMPD_parallel_master: 9363 case OMPD_parallel_sections: 9364 case OMPD_for_simd: 9365 case OMPD_parallel_for_simd: 9366 case OMPD_cancel: 9367 case OMPD_cancellation_point: 9368 case OMPD_ordered: 9369 case OMPD_threadprivate: 9370 case OMPD_allocate: 9371 case OMPD_task: 9372 case OMPD_simd: 9373 case OMPD_sections: 9374 case OMPD_section: 9375 case OMPD_single: 9376 case OMPD_master: 9377 case OMPD_critical: 9378 case OMPD_taskyield: 9379 case OMPD_barrier: 9380 case OMPD_taskwait: 9381 case OMPD_taskgroup: 9382 case OMPD_atomic: 9383 case OMPD_flush: 9384 case OMPD_depobj: 9385 case OMPD_scan: 9386 case OMPD_teams: 9387 case OMPD_target_data: 9388 case OMPD_target_exit_data: 9389 case OMPD_target_enter_data: 9390 case OMPD_distribute: 9391 case OMPD_distribute_simd: 9392 case OMPD_distribute_parallel_for: 9393 case OMPD_distribute_parallel_for_simd: 9394 case OMPD_teams_distribute: 9395 case OMPD_teams_distribute_simd: 9396 case OMPD_teams_distribute_parallel_for: 9397 case OMPD_teams_distribute_parallel_for_simd: 9398 case OMPD_target_update: 9399 case OMPD_declare_simd: 9400 case OMPD_declare_variant: 9401 case OMPD_begin_declare_variant: 9402 case OMPD_end_declare_variant: 9403 case OMPD_declare_target: 9404 case OMPD_end_declare_target: 9405 case OMPD_declare_reduction: 9406 case OMPD_declare_mapper: 9407 case OMPD_taskloop: 9408 case OMPD_taskloop_simd: 9409 case OMPD_master_taskloop: 9410 case OMPD_master_taskloop_simd: 9411 case OMPD_parallel_master_taskloop: 9412 case OMPD_parallel_master_taskloop_simd: 9413 case OMPD_requires: 9414 case OMPD_unknown: 9415 llvm_unreachable("Unexpected directive."); 9416 } 9417 } 9418 9419 return nullptr; 9420 } 9421 9422 /// Emit the user-defined mapper function. The code generation follows the 9423 /// pattern in the example below. 9424 /// \code 9425 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9426 /// void *base, void *begin, 9427 /// int64_t size, int64_t type) { 9428 /// // Allocate space for an array section first. 9429 /// if (size > 1 && !maptype.IsDelete) 9430 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9431 /// size*sizeof(Ty), clearToFrom(type)); 9432 /// // Map members. 9433 /// for (unsigned i = 0; i < size; i++) { 9434 /// // For each component specified by this mapper: 9435 /// for (auto c : all_components) { 9436 /// if (c.hasMapper()) 9437 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9438 /// c.arg_type); 9439 /// else 9440 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9441 /// c.arg_begin, c.arg_size, c.arg_type); 9442 /// } 9443 /// } 9444 /// // Delete the array section. 9445 /// if (size > 1 && maptype.IsDelete) 9446 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9447 /// size*sizeof(Ty), clearToFrom(type)); 9448 /// } 9449 /// \endcode 9450 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9451 CodeGenFunction *CGF) { 9452 if (UDMMap.count(D) > 0) 9453 return; 9454 ASTContext &C = CGM.getContext(); 9455 QualType Ty = D->getType(); 9456 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9457 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9458 auto *MapperVarDecl = 9459 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9460 SourceLocation Loc = D->getLocation(); 9461 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9462 9463 // Prepare mapper function arguments and attributes. 9464 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9465 C.VoidPtrTy, ImplicitParamDecl::Other); 9466 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9467 ImplicitParamDecl::Other); 9468 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9469 C.VoidPtrTy, ImplicitParamDecl::Other); 9470 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9471 ImplicitParamDecl::Other); 9472 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9473 ImplicitParamDecl::Other); 9474 FunctionArgList Args; 9475 Args.push_back(&HandleArg); 9476 Args.push_back(&BaseArg); 9477 Args.push_back(&BeginArg); 9478 Args.push_back(&SizeArg); 9479 Args.push_back(&TypeArg); 9480 const CGFunctionInfo &FnInfo = 9481 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9482 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9483 SmallString<64> TyStr; 9484 llvm::raw_svector_ostream Out(TyStr); 9485 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9486 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9487 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9488 Name, &CGM.getModule()); 9489 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9490 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9491 // Start the mapper function code generation. 9492 CodeGenFunction MapperCGF(CGM); 9493 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9494 // Compute the starting and end addreses of array elements. 9495 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9496 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9497 C.getPointerType(Int64Ty), Loc); 9498 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9499 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 9500 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 9501 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9502 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9503 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9504 C.getPointerType(Int64Ty), Loc); 9505 // Prepare common arguments for array initiation and deletion. 9506 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9507 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9508 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9509 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9510 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9511 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9512 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9513 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9514 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9515 9516 // Emit array initiation if this is an array section and \p MapType indicates 9517 // that memory allocation is required. 9518 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9519 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9520 ElementSize, HeadBB, /*IsInit=*/true); 9521 9522 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9523 9524 // Emit the loop header block. 9525 MapperCGF.EmitBlock(HeadBB); 9526 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9527 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9528 // Evaluate whether the initial condition is satisfied. 9529 llvm::Value *IsEmpty = 9530 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9531 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9532 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9533 9534 // Emit the loop body block. 9535 MapperCGF.EmitBlock(BodyBB); 9536 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9537 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9538 PtrPHI->addIncoming(PtrBegin, EntryBB); 9539 Address PtrCurrent = 9540 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9541 .getAlignment() 9542 .alignmentOfArrayElement(ElementSize)); 9543 // Privatize the declared variable of mapper to be the current array element. 9544 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9545 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 9546 return MapperCGF 9547 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 9548 .getAddress(MapperCGF); 9549 }); 9550 (void)Scope.Privatize(); 9551 9552 // Get map clause information. Fill up the arrays with all mapped variables. 9553 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9554 MappableExprsHandler::MapValuesArrayTy Pointers; 9555 MappableExprsHandler::MapValuesArrayTy Sizes; 9556 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9557 MappableExprsHandler MEHandler(*D, MapperCGF); 9558 MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); 9559 9560 // Call the runtime API __tgt_mapper_num_components to get the number of 9561 // pre-existing components. 9562 llvm::Value *OffloadingArgs[] = {Handle}; 9563 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9564 createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs); 9565 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9566 PreviousSize, 9567 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9568 9569 // Fill up the runtime mapper handle for all components. 9570 for (unsigned I = 0; I < BasePointers.size(); ++I) { 9571 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9572 *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9573 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9574 Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9575 llvm::Value *CurSizeArg = Sizes[I]; 9576 9577 // Extract the MEMBER_OF field from the map type. 9578 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 9579 MapperCGF.EmitBlock(MemberBB); 9580 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); 9581 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 9582 OriMapType, 9583 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 9584 llvm::BasicBlock *MemberCombineBB = 9585 MapperCGF.createBasicBlock("omp.member.combine"); 9586 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 9587 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 9588 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 9589 // Add the number of pre-existing components to the MEMBER_OF field if it 9590 // is valid. 9591 MapperCGF.EmitBlock(MemberCombineBB); 9592 llvm::Value *CombinedMember = 9593 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9594 // Do nothing if it is not a member of previous components. 9595 MapperCGF.EmitBlock(TypeBB); 9596 llvm::PHINode *MemberMapType = 9597 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9598 MemberMapType->addIncoming(OriMapType, MemberBB); 9599 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9600 9601 // Combine the map type inherited from user-defined mapper with that 9602 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9603 // bits of the \a MapType, which is the input argument of the mapper 9604 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9605 // bits of MemberMapType. 9606 // [OpenMP 5.0], 1.2.6. map-type decay. 9607 // | alloc | to | from | tofrom | release | delete 9608 // ---------------------------------------------------------- 9609 // alloc | alloc | alloc | alloc | alloc | release | delete 9610 // to | alloc | to | alloc | to | release | delete 9611 // from | alloc | alloc | from | from | release | delete 9612 // tofrom | alloc | to | from | tofrom | release | delete 9613 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9614 MapType, 9615 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9616 MappableExprsHandler::OMP_MAP_FROM)); 9617 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9618 llvm::BasicBlock *AllocElseBB = 9619 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9620 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9621 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9622 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9623 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9624 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9625 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9626 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9627 MapperCGF.EmitBlock(AllocBB); 9628 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9629 MemberMapType, 9630 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9631 MappableExprsHandler::OMP_MAP_FROM))); 9632 MapperCGF.Builder.CreateBr(EndBB); 9633 MapperCGF.EmitBlock(AllocElseBB); 9634 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9635 LeftToFrom, 9636 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9637 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9638 // In case of to, clear OMP_MAP_FROM. 9639 MapperCGF.EmitBlock(ToBB); 9640 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9641 MemberMapType, 9642 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9643 MapperCGF.Builder.CreateBr(EndBB); 9644 MapperCGF.EmitBlock(ToElseBB); 9645 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9646 LeftToFrom, 9647 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9648 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9649 // In case of from, clear OMP_MAP_TO. 9650 MapperCGF.EmitBlock(FromBB); 9651 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9652 MemberMapType, 9653 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9654 // In case of tofrom, do nothing. 9655 MapperCGF.EmitBlock(EndBB); 9656 llvm::PHINode *CurMapType = 9657 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9658 CurMapType->addIncoming(AllocMapType, AllocBB); 9659 CurMapType->addIncoming(ToMapType, ToBB); 9660 CurMapType->addIncoming(FromMapType, FromBB); 9661 CurMapType->addIncoming(MemberMapType, ToElseBB); 9662 9663 // TODO: call the corresponding mapper function if a user-defined mapper is 9664 // associated with this map clause. 9665 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9666 // data structure. 9667 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9668 CurSizeArg, CurMapType}; 9669 MapperCGF.EmitRuntimeCall( 9670 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), 9671 OffloadingArgs); 9672 } 9673 9674 // Update the pointer to point to the next element that needs to be mapped, 9675 // and check whether we have mapped all elements. 9676 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9677 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9678 PtrPHI->addIncoming(PtrNext, BodyBB); 9679 llvm::Value *IsDone = 9680 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9681 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9682 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9683 9684 MapperCGF.EmitBlock(ExitBB); 9685 // Emit array deletion if this is an array section and \p MapType indicates 9686 // that deletion is required. 9687 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9688 ElementSize, DoneBB, /*IsInit=*/false); 9689 9690 // Emit the function exit block. 9691 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9692 MapperCGF.FinishFunction(); 9693 UDMMap.try_emplace(D, Fn); 9694 if (CGF) { 9695 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9696 Decls.second.push_back(D); 9697 } 9698 } 9699 9700 /// Emit the array initialization or deletion portion for user-defined mapper 9701 /// code generation. First, it evaluates whether an array section is mapped and 9702 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9703 /// true, and \a MapType indicates to not delete this array, array 9704 /// initialization code is generated. If \a IsInit is false, and \a MapType 9705 /// indicates to not this array, array deletion code is generated. 9706 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9707 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9708 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9709 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9710 StringRef Prefix = IsInit ? ".init" : ".del"; 9711 9712 // Evaluate if this is an array section. 9713 llvm::BasicBlock *IsDeleteBB = 9714 MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"})); 9715 llvm::BasicBlock *BodyBB = 9716 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9717 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9718 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9719 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9720 9721 // Evaluate if we are going to delete this section. 9722 MapperCGF.EmitBlock(IsDeleteBB); 9723 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9724 MapType, 9725 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9726 llvm::Value *DeleteCond; 9727 if (IsInit) { 9728 DeleteCond = MapperCGF.Builder.CreateIsNull( 9729 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9730 } else { 9731 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9732 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9733 } 9734 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9735 9736 MapperCGF.EmitBlock(BodyBB); 9737 // Get the array size by multiplying element size and element number (i.e., \p 9738 // Size). 9739 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9740 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9741 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9742 // memory allocation/deletion purpose only. 9743 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9744 MapType, 9745 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9746 MappableExprsHandler::OMP_MAP_FROM))); 9747 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9748 // data structure. 9749 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9750 MapperCGF.EmitRuntimeCall( 9751 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs); 9752 } 9753 9754 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9755 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9756 llvm::Value *DeviceID, 9757 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9758 const OMPLoopDirective &D)> 9759 SizeEmitter) { 9760 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9761 const OMPExecutableDirective *TD = &D; 9762 // Get nested teams distribute kind directive, if any. 9763 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9764 TD = getNestedDistributeDirective(CGM.getContext(), D); 9765 if (!TD) 9766 return; 9767 const auto *LD = cast<OMPLoopDirective>(TD); 9768 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, 9769 PrePostActionTy &) { 9770 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9771 llvm::Value *Args[] = {DeviceID, NumIterations}; 9772 CGF.EmitRuntimeCall( 9773 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); 9774 } 9775 }; 9776 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9777 } 9778 9779 void CGOpenMPRuntime::emitTargetCall( 9780 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9781 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9782 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9783 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9784 const OMPLoopDirective &D)> 9785 SizeEmitter) { 9786 if (!CGF.HaveInsertPoint()) 9787 return; 9788 9789 assert(OutlinedFn && "Invalid outlined function!"); 9790 9791 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 9792 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9793 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9794 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9795 PrePostActionTy &) { 9796 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9797 }; 9798 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9799 9800 CodeGenFunction::OMPTargetDataInfo InputInfo; 9801 llvm::Value *MapTypesArray = nullptr; 9802 // Fill up the pointer arrays and transfer execution to the device. 9803 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9804 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, 9805 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9806 if (Device.getInt() == OMPC_DEVICE_ancestor) { 9807 // Reverse offloading is not supported, so just execute on the host. 9808 if (RequiresOuterTask) { 9809 CapturedVars.clear(); 9810 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9811 } 9812 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9813 return; 9814 } 9815 9816 // On top of the arrays that were filled up, the target offloading call 9817 // takes as arguments the device id as well as the host pointer. The host 9818 // pointer is used by the runtime library to identify the current target 9819 // region, so it only has to be unique and not necessarily point to 9820 // anything. It could be the pointer to the outlined function that 9821 // implements the target region, but we aren't using that so that the 9822 // compiler doesn't need to keep that, and could therefore inline the host 9823 // function if proven worthwhile during optimization. 9824 9825 // From this point on, we need to have an ID of the target region defined. 9826 assert(OutlinedFnID && "Invalid outlined function ID!"); 9827 9828 // Emit device ID if any. 9829 llvm::Value *DeviceID; 9830 if (Device.getPointer()) { 9831 assert((Device.getInt() == OMPC_DEVICE_unknown || 9832 Device.getInt() == OMPC_DEVICE_device_num) && 9833 "Expected device_num modifier."); 9834 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 9835 DeviceID = 9836 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 9837 } else { 9838 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9839 } 9840 9841 // Emit the number of elements in the offloading arrays. 9842 llvm::Value *PointerNum = 9843 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9844 9845 // Return value of the runtime offloading call. 9846 llvm::Value *Return; 9847 9848 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9849 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9850 9851 // Emit tripcount for the target loop-based directive. 9852 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 9853 9854 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9855 // The target region is an outlined function launched by the runtime 9856 // via calls __tgt_target() or __tgt_target_teams(). 9857 // 9858 // __tgt_target() launches a target region with one team and one thread, 9859 // executing a serial region. This master thread may in turn launch 9860 // more threads within its team upon encountering a parallel region, 9861 // however, no additional teams can be launched on the device. 9862 // 9863 // __tgt_target_teams() launches a target region with one or more teams, 9864 // each with one or more threads. This call is required for target 9865 // constructs such as: 9866 // 'target teams' 9867 // 'target' / 'teams' 9868 // 'target teams distribute parallel for' 9869 // 'target parallel' 9870 // and so on. 9871 // 9872 // Note that on the host and CPU targets, the runtime implementation of 9873 // these calls simply call the outlined function without forking threads. 9874 // The outlined functions themselves have runtime calls to 9875 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9876 // the compiler in emitTeamsCall() and emitParallelCall(). 9877 // 9878 // In contrast, on the NVPTX target, the implementation of 9879 // __tgt_target_teams() launches a GPU kernel with the requested number 9880 // of teams and threads so no additional calls to the runtime are required. 9881 if (NumTeams) { 9882 // If we have NumTeams defined this means that we have an enclosed teams 9883 // region. Therefore we also expect to have NumThreads defined. These two 9884 // values should be defined in the presence of a teams directive, 9885 // regardless of having any clauses associated. If the user is using teams 9886 // but no clauses, these two values will be the default that should be 9887 // passed to the runtime library - a 32-bit integer with the value zero. 9888 assert(NumThreads && "Thread limit expression should be available along " 9889 "with number of teams."); 9890 llvm::Value *OffloadingArgs[] = {DeviceID, 9891 OutlinedFnID, 9892 PointerNum, 9893 InputInfo.BasePointersArray.getPointer(), 9894 InputInfo.PointersArray.getPointer(), 9895 InputInfo.SizesArray.getPointer(), 9896 MapTypesArray, 9897 NumTeams, 9898 NumThreads}; 9899 Return = CGF.EmitRuntimeCall( 9900 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 9901 : OMPRTL__tgt_target_teams), 9902 OffloadingArgs); 9903 } else { 9904 llvm::Value *OffloadingArgs[] = {DeviceID, 9905 OutlinedFnID, 9906 PointerNum, 9907 InputInfo.BasePointersArray.getPointer(), 9908 InputInfo.PointersArray.getPointer(), 9909 InputInfo.SizesArray.getPointer(), 9910 MapTypesArray}; 9911 Return = CGF.EmitRuntimeCall( 9912 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 9913 : OMPRTL__tgt_target), 9914 OffloadingArgs); 9915 } 9916 9917 // Check the error code and execute the host version if required. 9918 llvm::BasicBlock *OffloadFailedBlock = 9919 CGF.createBasicBlock("omp_offload.failed"); 9920 llvm::BasicBlock *OffloadContBlock = 9921 CGF.createBasicBlock("omp_offload.cont"); 9922 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9923 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9924 9925 CGF.EmitBlock(OffloadFailedBlock); 9926 if (RequiresOuterTask) { 9927 CapturedVars.clear(); 9928 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9929 } 9930 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9931 CGF.EmitBranch(OffloadContBlock); 9932 9933 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9934 }; 9935 9936 // Notify that the host version must be executed. 9937 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 9938 RequiresOuterTask](CodeGenFunction &CGF, 9939 PrePostActionTy &) { 9940 if (RequiresOuterTask) { 9941 CapturedVars.clear(); 9942 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9943 } 9944 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9945 }; 9946 9947 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 9948 &CapturedVars, RequiresOuterTask, 9949 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 9950 // Fill up the arrays with all the captured variables. 9951 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9952 MappableExprsHandler::MapValuesArrayTy Pointers; 9953 MappableExprsHandler::MapValuesArrayTy Sizes; 9954 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9955 9956 // Get mappable expression information. 9957 MappableExprsHandler MEHandler(D, CGF); 9958 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9959 9960 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9961 auto CV = CapturedVars.begin(); 9962 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9963 CE = CS.capture_end(); 9964 CI != CE; ++CI, ++RI, ++CV) { 9965 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 9966 MappableExprsHandler::MapValuesArrayTy CurPointers; 9967 MappableExprsHandler::MapValuesArrayTy CurSizes; 9968 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 9969 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9970 9971 // VLA sizes are passed to the outlined region by copy and do not have map 9972 // information associated. 9973 if (CI->capturesVariableArrayType()) { 9974 CurBasePointers.push_back(*CV); 9975 CurPointers.push_back(*CV); 9976 CurSizes.push_back(CGF.Builder.CreateIntCast( 9977 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9978 // Copy to the device as an argument. No need to retrieve it. 9979 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 9980 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 9981 MappableExprsHandler::OMP_MAP_IMPLICIT); 9982 } else { 9983 // If we have any information in the map clause, we use it, otherwise we 9984 // just do a default mapping. 9985 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 9986 CurSizes, CurMapTypes, PartialStruct); 9987 if (CurBasePointers.empty()) 9988 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 9989 CurPointers, CurSizes, CurMapTypes); 9990 // Generate correct mapping for variables captured by reference in 9991 // lambdas. 9992 if (CI->capturesVariable()) 9993 MEHandler.generateInfoForLambdaCaptures( 9994 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 9995 CurMapTypes, LambdaPointers); 9996 } 9997 // We expect to have at least an element of information for this capture. 9998 assert(!CurBasePointers.empty() && 9999 "Non-existing map pointer for capture!"); 10000 assert(CurBasePointers.size() == CurPointers.size() && 10001 CurBasePointers.size() == CurSizes.size() && 10002 CurBasePointers.size() == CurMapTypes.size() && 10003 "Inconsistent map information sizes!"); 10004 10005 // If there is an entry in PartialStruct it means we have a struct with 10006 // individual members mapped. Emit an extra combined entry. 10007 if (PartialStruct.Base.isValid()) 10008 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 10009 CurMapTypes, PartialStruct); 10010 10011 // We need to append the results of this capture to what we already have. 10012 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 10013 Pointers.append(CurPointers.begin(), CurPointers.end()); 10014 Sizes.append(CurSizes.begin(), CurSizes.end()); 10015 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 10016 } 10017 // Adjust MEMBER_OF flags for the lambdas captures. 10018 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 10019 Pointers, MapTypes); 10020 // Map other list items in the map clause which are not captured variables 10021 // but "declare target link" global variables. 10022 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 10023 MapTypes); 10024 10025 TargetDataInfo Info; 10026 // Fill up the arrays and create the arguments. 10027 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10028 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10029 Info.PointersArray, Info.SizesArray, 10030 Info.MapTypesArray, Info); 10031 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10032 InputInfo.BasePointersArray = 10033 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10034 InputInfo.PointersArray = 10035 Address(Info.PointersArray, CGM.getPointerAlign()); 10036 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10037 MapTypesArray = Info.MapTypesArray; 10038 if (RequiresOuterTask) 10039 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10040 else 10041 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10042 }; 10043 10044 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10045 CodeGenFunction &CGF, PrePostActionTy &) { 10046 if (RequiresOuterTask) { 10047 CodeGenFunction::OMPTargetDataInfo InputInfo; 10048 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10049 } else { 10050 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10051 } 10052 }; 10053 10054 // If we have a target function ID it means that we need to support 10055 // offloading, otherwise, just execute on the host. We need to execute on host 10056 // regardless of the conditional in the if clause if, e.g., the user do not 10057 // specify target triples. 10058 if (OutlinedFnID) { 10059 if (IfCond) { 10060 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10061 } else { 10062 RegionCodeGenTy ThenRCG(TargetThenGen); 10063 ThenRCG(CGF); 10064 } 10065 } else { 10066 RegionCodeGenTy ElseRCG(TargetElseGen); 10067 ElseRCG(CGF); 10068 } 10069 } 10070 10071 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10072 StringRef ParentName) { 10073 if (!S) 10074 return; 10075 10076 // Codegen OMP target directives that offload compute to the device. 10077 bool RequiresDeviceCodegen = 10078 isa<OMPExecutableDirective>(S) && 10079 isOpenMPTargetExecutionDirective( 10080 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10081 10082 if (RequiresDeviceCodegen) { 10083 const auto &E = *cast<OMPExecutableDirective>(S); 10084 unsigned DeviceID; 10085 unsigned FileID; 10086 unsigned Line; 10087 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10088 FileID, Line); 10089 10090 // Is this a target region that should not be emitted as an entry point? If 10091 // so just signal we are done with this target region. 10092 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10093 ParentName, Line)) 10094 return; 10095 10096 switch (E.getDirectiveKind()) { 10097 case OMPD_target: 10098 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10099 cast<OMPTargetDirective>(E)); 10100 break; 10101 case OMPD_target_parallel: 10102 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10103 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10104 break; 10105 case OMPD_target_teams: 10106 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10107 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10108 break; 10109 case OMPD_target_teams_distribute: 10110 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10111 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10112 break; 10113 case OMPD_target_teams_distribute_simd: 10114 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10115 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10116 break; 10117 case OMPD_target_parallel_for: 10118 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10119 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10120 break; 10121 case OMPD_target_parallel_for_simd: 10122 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10123 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10124 break; 10125 case OMPD_target_simd: 10126 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10127 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10128 break; 10129 case OMPD_target_teams_distribute_parallel_for: 10130 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10131 CGM, ParentName, 10132 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10133 break; 10134 case OMPD_target_teams_distribute_parallel_for_simd: 10135 CodeGenFunction:: 10136 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10137 CGM, ParentName, 10138 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10139 break; 10140 case OMPD_parallel: 10141 case OMPD_for: 10142 case OMPD_parallel_for: 10143 case OMPD_parallel_master: 10144 case OMPD_parallel_sections: 10145 case OMPD_for_simd: 10146 case OMPD_parallel_for_simd: 10147 case OMPD_cancel: 10148 case OMPD_cancellation_point: 10149 case OMPD_ordered: 10150 case OMPD_threadprivate: 10151 case OMPD_allocate: 10152 case OMPD_task: 10153 case OMPD_simd: 10154 case OMPD_sections: 10155 case OMPD_section: 10156 case OMPD_single: 10157 case OMPD_master: 10158 case OMPD_critical: 10159 case OMPD_taskyield: 10160 case OMPD_barrier: 10161 case OMPD_taskwait: 10162 case OMPD_taskgroup: 10163 case OMPD_atomic: 10164 case OMPD_flush: 10165 case OMPD_depobj: 10166 case OMPD_scan: 10167 case OMPD_teams: 10168 case OMPD_target_data: 10169 case OMPD_target_exit_data: 10170 case OMPD_target_enter_data: 10171 case OMPD_distribute: 10172 case OMPD_distribute_simd: 10173 case OMPD_distribute_parallel_for: 10174 case OMPD_distribute_parallel_for_simd: 10175 case OMPD_teams_distribute: 10176 case OMPD_teams_distribute_simd: 10177 case OMPD_teams_distribute_parallel_for: 10178 case OMPD_teams_distribute_parallel_for_simd: 10179 case OMPD_target_update: 10180 case OMPD_declare_simd: 10181 case OMPD_declare_variant: 10182 case OMPD_begin_declare_variant: 10183 case OMPD_end_declare_variant: 10184 case OMPD_declare_target: 10185 case OMPD_end_declare_target: 10186 case OMPD_declare_reduction: 10187 case OMPD_declare_mapper: 10188 case OMPD_taskloop: 10189 case OMPD_taskloop_simd: 10190 case OMPD_master_taskloop: 10191 case OMPD_master_taskloop_simd: 10192 case OMPD_parallel_master_taskloop: 10193 case OMPD_parallel_master_taskloop_simd: 10194 case OMPD_requires: 10195 case OMPD_unknown: 10196 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10197 } 10198 return; 10199 } 10200 10201 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10202 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10203 return; 10204 10205 scanForTargetRegionsFunctions( 10206 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 10207 return; 10208 } 10209 10210 // If this is a lambda function, look into its body. 10211 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10212 S = L->getBody(); 10213 10214 // Keep looking for target regions recursively. 10215 for (const Stmt *II : S->children()) 10216 scanForTargetRegionsFunctions(II, ParentName); 10217 } 10218 10219 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10220 // If emitting code for the host, we do not process FD here. Instead we do 10221 // the normal code generation. 10222 if (!CGM.getLangOpts().OpenMPIsDevice) { 10223 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 10224 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10225 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10226 // Do not emit device_type(nohost) functions for the host. 10227 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10228 return true; 10229 } 10230 return false; 10231 } 10232 10233 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10234 // Try to detect target regions in the function. 10235 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10236 StringRef Name = CGM.getMangledName(GD); 10237 scanForTargetRegionsFunctions(FD->getBody(), Name); 10238 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10239 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10240 // Do not emit device_type(nohost) functions for the host. 10241 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10242 return true; 10243 } 10244 10245 // Do not to emit function if it is not marked as declare target. 10246 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10247 AlreadyEmittedTargetDecls.count(VD) == 0; 10248 } 10249 10250 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10251 if (!CGM.getLangOpts().OpenMPIsDevice) 10252 return false; 10253 10254 // Check if there are Ctors/Dtors in this declaration and look for target 10255 // regions in it. We use the complete variant to produce the kernel name 10256 // mangling. 10257 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10258 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10259 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10260 StringRef ParentName = 10261 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10262 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10263 } 10264 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10265 StringRef ParentName = 10266 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10267 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10268 } 10269 } 10270 10271 // Do not to emit variable if it is not marked as declare target. 10272 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10273 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10274 cast<VarDecl>(GD.getDecl())); 10275 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10276 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10277 HasRequiresUnifiedSharedMemory)) { 10278 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10279 return true; 10280 } 10281 return false; 10282 } 10283 10284 llvm::Constant * 10285 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 10286 const VarDecl *VD) { 10287 assert(VD->getType().isConstant(CGM.getContext()) && 10288 "Expected constant variable."); 10289 StringRef VarName; 10290 llvm::Constant *Addr; 10291 llvm::GlobalValue::LinkageTypes Linkage; 10292 QualType Ty = VD->getType(); 10293 SmallString<128> Buffer; 10294 { 10295 unsigned DeviceID; 10296 unsigned FileID; 10297 unsigned Line; 10298 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 10299 FileID, Line); 10300 llvm::raw_svector_ostream OS(Buffer); 10301 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 10302 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 10303 VarName = OS.str(); 10304 } 10305 Linkage = llvm::GlobalValue::InternalLinkage; 10306 Addr = 10307 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 10308 getDefaultFirstprivateAddressSpace()); 10309 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 10310 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 10311 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 10312 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10313 VarName, Addr, VarSize, 10314 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 10315 return Addr; 10316 } 10317 10318 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10319 llvm::Constant *Addr) { 10320 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10321 !CGM.getLangOpts().OpenMPIsDevice) 10322 return; 10323 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10324 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10325 if (!Res) { 10326 if (CGM.getLangOpts().OpenMPIsDevice) { 10327 // Register non-target variables being emitted in device code (debug info 10328 // may cause this). 10329 StringRef VarName = CGM.getMangledName(VD); 10330 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10331 } 10332 return; 10333 } 10334 // Register declare target variables. 10335 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10336 StringRef VarName; 10337 CharUnits VarSize; 10338 llvm::GlobalValue::LinkageTypes Linkage; 10339 10340 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10341 !HasRequiresUnifiedSharedMemory) { 10342 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10343 VarName = CGM.getMangledName(VD); 10344 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10345 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10346 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10347 } else { 10348 VarSize = CharUnits::Zero(); 10349 } 10350 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10351 // Temp solution to prevent optimizations of the internal variables. 10352 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10353 std::string RefName = getName({VarName, "ref"}); 10354 if (!CGM.GetGlobalValue(RefName)) { 10355 llvm::Constant *AddrRef = 10356 getOrCreateInternalVariable(Addr->getType(), RefName); 10357 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10358 GVAddrRef->setConstant(/*Val=*/true); 10359 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10360 GVAddrRef->setInitializer(Addr); 10361 CGM.addCompilerUsedGlobal(GVAddrRef); 10362 } 10363 } 10364 } else { 10365 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10366 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10367 HasRequiresUnifiedSharedMemory)) && 10368 "Declare target attribute must link or to with unified memory."); 10369 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10370 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10371 else 10372 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10373 10374 if (CGM.getLangOpts().OpenMPIsDevice) { 10375 VarName = Addr->getName(); 10376 Addr = nullptr; 10377 } else { 10378 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10379 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10380 } 10381 VarSize = CGM.getPointerSize(); 10382 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10383 } 10384 10385 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10386 VarName, Addr, VarSize, Flags, Linkage); 10387 } 10388 10389 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10390 if (isa<FunctionDecl>(GD.getDecl()) || 10391 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10392 return emitTargetFunctions(GD); 10393 10394 return emitTargetGlobalVariable(GD); 10395 } 10396 10397 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10398 for (const VarDecl *VD : DeferredGlobalVariables) { 10399 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10400 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10401 if (!Res) 10402 continue; 10403 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10404 !HasRequiresUnifiedSharedMemory) { 10405 CGM.EmitGlobal(VD); 10406 } else { 10407 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10408 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10409 HasRequiresUnifiedSharedMemory)) && 10410 "Expected link clause or to clause with unified memory."); 10411 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10412 } 10413 } 10414 } 10415 10416 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10417 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10418 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10419 " Expected target-based directive."); 10420 } 10421 10422 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10423 for (const OMPClause *Clause : D->clauselists()) { 10424 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10425 HasRequiresUnifiedSharedMemory = true; 10426 } else if (const auto *AC = 10427 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10428 switch (AC->getAtomicDefaultMemOrderKind()) { 10429 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10430 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10431 break; 10432 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10433 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10434 break; 10435 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10436 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10437 break; 10438 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10439 break; 10440 } 10441 } 10442 } 10443 } 10444 10445 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10446 return RequiresAtomicOrdering; 10447 } 10448 10449 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10450 LangAS &AS) { 10451 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10452 return false; 10453 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10454 switch(A->getAllocatorType()) { 10455 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10456 // Not supported, fallback to the default mem space. 10457 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10458 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10459 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10460 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10461 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10462 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10463 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10464 AS = LangAS::Default; 10465 return true; 10466 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10467 llvm_unreachable("Expected predefined allocator for the variables with the " 10468 "static storage."); 10469 } 10470 return false; 10471 } 10472 10473 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10474 return HasRequiresUnifiedSharedMemory; 10475 } 10476 10477 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10478 CodeGenModule &CGM) 10479 : CGM(CGM) { 10480 if (CGM.getLangOpts().OpenMPIsDevice) { 10481 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10482 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10483 } 10484 } 10485 10486 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10487 if (CGM.getLangOpts().OpenMPIsDevice) 10488 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10489 } 10490 10491 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10492 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10493 return true; 10494 10495 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10496 // Do not to emit function if it is marked as declare target as it was already 10497 // emitted. 10498 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10499 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10500 if (auto *F = dyn_cast_or_null<llvm::Function>( 10501 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10502 return !F->isDeclaration(); 10503 return false; 10504 } 10505 return true; 10506 } 10507 10508 return !AlreadyEmittedTargetDecls.insert(D).second; 10509 } 10510 10511 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10512 // If we don't have entries or if we are emitting code for the device, we 10513 // don't need to do anything. 10514 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10515 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10516 (OffloadEntriesInfoManager.empty() && 10517 !HasEmittedDeclareTargetRegion && 10518 !HasEmittedTargetRegion)) 10519 return nullptr; 10520 10521 // Create and register the function that handles the requires directives. 10522 ASTContext &C = CGM.getContext(); 10523 10524 llvm::Function *RequiresRegFn; 10525 { 10526 CodeGenFunction CGF(CGM); 10527 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10528 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10529 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10530 RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); 10531 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10532 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10533 // TODO: check for other requires clauses. 10534 // The requires directive takes effect only when a target region is 10535 // present in the compilation unit. Otherwise it is ignored and not 10536 // passed to the runtime. This avoids the runtime from throwing an error 10537 // for mismatching requires clauses across compilation units that don't 10538 // contain at least 1 target region. 10539 assert((HasEmittedTargetRegion || 10540 HasEmittedDeclareTargetRegion || 10541 !OffloadEntriesInfoManager.empty()) && 10542 "Target or declare target region expected."); 10543 if (HasRequiresUnifiedSharedMemory) 10544 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10545 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires), 10546 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10547 CGF.FinishFunction(); 10548 } 10549 return RequiresRegFn; 10550 } 10551 10552 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10553 const OMPExecutableDirective &D, 10554 SourceLocation Loc, 10555 llvm::Function *OutlinedFn, 10556 ArrayRef<llvm::Value *> CapturedVars) { 10557 if (!CGF.HaveInsertPoint()) 10558 return; 10559 10560 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10561 CodeGenFunction::RunCleanupsScope Scope(CGF); 10562 10563 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10564 llvm::Value *Args[] = { 10565 RTLoc, 10566 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10567 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10568 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10569 RealArgs.append(std::begin(Args), std::end(Args)); 10570 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10571 10572 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 10573 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10574 } 10575 10576 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10577 const Expr *NumTeams, 10578 const Expr *ThreadLimit, 10579 SourceLocation Loc) { 10580 if (!CGF.HaveInsertPoint()) 10581 return; 10582 10583 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10584 10585 llvm::Value *NumTeamsVal = 10586 NumTeams 10587 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10588 CGF.CGM.Int32Ty, /* isSigned = */ true) 10589 : CGF.Builder.getInt32(0); 10590 10591 llvm::Value *ThreadLimitVal = 10592 ThreadLimit 10593 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10594 CGF.CGM.Int32Ty, /* isSigned = */ true) 10595 : CGF.Builder.getInt32(0); 10596 10597 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10598 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10599 ThreadLimitVal}; 10600 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 10601 PushNumTeamsArgs); 10602 } 10603 10604 void CGOpenMPRuntime::emitTargetDataCalls( 10605 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10606 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10607 if (!CGF.HaveInsertPoint()) 10608 return; 10609 10610 // Action used to replace the default codegen action and turn privatization 10611 // off. 10612 PrePostActionTy NoPrivAction; 10613 10614 // Generate the code for the opening of the data environment. Capture all the 10615 // arguments of the runtime call by reference because they are used in the 10616 // closing of the region. 10617 auto &&BeginThenGen = [this, &D, Device, &Info, 10618 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10619 // Fill up the arrays with all the mapped variables. 10620 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10621 MappableExprsHandler::MapValuesArrayTy Pointers; 10622 MappableExprsHandler::MapValuesArrayTy Sizes; 10623 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10624 10625 // Get map clause information. 10626 MappableExprsHandler MCHandler(D, CGF); 10627 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10628 10629 // Fill up the arrays and create the arguments. 10630 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10631 10632 llvm::Value *BasePointersArrayArg = nullptr; 10633 llvm::Value *PointersArrayArg = nullptr; 10634 llvm::Value *SizesArrayArg = nullptr; 10635 llvm::Value *MapTypesArrayArg = nullptr; 10636 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10637 SizesArrayArg, MapTypesArrayArg, Info); 10638 10639 // Emit device ID if any. 10640 llvm::Value *DeviceID = nullptr; 10641 if (Device) { 10642 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10643 CGF.Int64Ty, /*isSigned=*/true); 10644 } else { 10645 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10646 } 10647 10648 // Emit the number of elements in the offloading arrays. 10649 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10650 10651 llvm::Value *OffloadingArgs[] = { 10652 DeviceID, PointerNum, BasePointersArrayArg, 10653 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10654 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 10655 OffloadingArgs); 10656 10657 // If device pointer privatization is required, emit the body of the region 10658 // here. It will have to be duplicated: with and without privatization. 10659 if (!Info.CaptureDeviceAddrMap.empty()) 10660 CodeGen(CGF); 10661 }; 10662 10663 // Generate code for the closing of the data region. 10664 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10665 PrePostActionTy &) { 10666 assert(Info.isValid() && "Invalid data environment closing arguments."); 10667 10668 llvm::Value *BasePointersArrayArg = nullptr; 10669 llvm::Value *PointersArrayArg = nullptr; 10670 llvm::Value *SizesArrayArg = nullptr; 10671 llvm::Value *MapTypesArrayArg = nullptr; 10672 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10673 SizesArrayArg, MapTypesArrayArg, Info); 10674 10675 // Emit device ID if any. 10676 llvm::Value *DeviceID = nullptr; 10677 if (Device) { 10678 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10679 CGF.Int64Ty, /*isSigned=*/true); 10680 } else { 10681 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10682 } 10683 10684 // Emit the number of elements in the offloading arrays. 10685 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10686 10687 llvm::Value *OffloadingArgs[] = { 10688 DeviceID, PointerNum, BasePointersArrayArg, 10689 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10690 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 10691 OffloadingArgs); 10692 }; 10693 10694 // If we need device pointer privatization, we need to emit the body of the 10695 // region with no privatization in the 'else' branch of the conditional. 10696 // Otherwise, we don't have to do anything. 10697 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10698 PrePostActionTy &) { 10699 if (!Info.CaptureDeviceAddrMap.empty()) { 10700 CodeGen.setAction(NoPrivAction); 10701 CodeGen(CGF); 10702 } 10703 }; 10704 10705 // We don't have to do anything to close the region if the if clause evaluates 10706 // to false. 10707 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10708 10709 if (IfCond) { 10710 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10711 } else { 10712 RegionCodeGenTy RCG(BeginThenGen); 10713 RCG(CGF); 10714 } 10715 10716 // If we don't require privatization of device pointers, we emit the body in 10717 // between the runtime calls. This avoids duplicating the body code. 10718 if (Info.CaptureDeviceAddrMap.empty()) { 10719 CodeGen.setAction(NoPrivAction); 10720 CodeGen(CGF); 10721 } 10722 10723 if (IfCond) { 10724 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10725 } else { 10726 RegionCodeGenTy RCG(EndThenGen); 10727 RCG(CGF); 10728 } 10729 } 10730 10731 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10732 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10733 const Expr *Device) { 10734 if (!CGF.HaveInsertPoint()) 10735 return; 10736 10737 assert((isa<OMPTargetEnterDataDirective>(D) || 10738 isa<OMPTargetExitDataDirective>(D) || 10739 isa<OMPTargetUpdateDirective>(D)) && 10740 "Expecting either target enter, exit data, or update directives."); 10741 10742 CodeGenFunction::OMPTargetDataInfo InputInfo; 10743 llvm::Value *MapTypesArray = nullptr; 10744 // Generate the code for the opening of the data environment. 10745 auto &&ThenGen = [this, &D, Device, &InputInfo, 10746 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10747 // Emit device ID if any. 10748 llvm::Value *DeviceID = nullptr; 10749 if (Device) { 10750 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10751 CGF.Int64Ty, /*isSigned=*/true); 10752 } else { 10753 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10754 } 10755 10756 // Emit the number of elements in the offloading arrays. 10757 llvm::Constant *PointerNum = 10758 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10759 10760 llvm::Value *OffloadingArgs[] = {DeviceID, 10761 PointerNum, 10762 InputInfo.BasePointersArray.getPointer(), 10763 InputInfo.PointersArray.getPointer(), 10764 InputInfo.SizesArray.getPointer(), 10765 MapTypesArray}; 10766 10767 // Select the right runtime function call for each expected standalone 10768 // directive. 10769 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10770 OpenMPRTLFunction RTLFn; 10771 switch (D.getDirectiveKind()) { 10772 case OMPD_target_enter_data: 10773 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 10774 : OMPRTL__tgt_target_data_begin; 10775 break; 10776 case OMPD_target_exit_data: 10777 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 10778 : OMPRTL__tgt_target_data_end; 10779 break; 10780 case OMPD_target_update: 10781 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 10782 : OMPRTL__tgt_target_data_update; 10783 break; 10784 case OMPD_parallel: 10785 case OMPD_for: 10786 case OMPD_parallel_for: 10787 case OMPD_parallel_master: 10788 case OMPD_parallel_sections: 10789 case OMPD_for_simd: 10790 case OMPD_parallel_for_simd: 10791 case OMPD_cancel: 10792 case OMPD_cancellation_point: 10793 case OMPD_ordered: 10794 case OMPD_threadprivate: 10795 case OMPD_allocate: 10796 case OMPD_task: 10797 case OMPD_simd: 10798 case OMPD_sections: 10799 case OMPD_section: 10800 case OMPD_single: 10801 case OMPD_master: 10802 case OMPD_critical: 10803 case OMPD_taskyield: 10804 case OMPD_barrier: 10805 case OMPD_taskwait: 10806 case OMPD_taskgroup: 10807 case OMPD_atomic: 10808 case OMPD_flush: 10809 case OMPD_depobj: 10810 case OMPD_scan: 10811 case OMPD_teams: 10812 case OMPD_target_data: 10813 case OMPD_distribute: 10814 case OMPD_distribute_simd: 10815 case OMPD_distribute_parallel_for: 10816 case OMPD_distribute_parallel_for_simd: 10817 case OMPD_teams_distribute: 10818 case OMPD_teams_distribute_simd: 10819 case OMPD_teams_distribute_parallel_for: 10820 case OMPD_teams_distribute_parallel_for_simd: 10821 case OMPD_declare_simd: 10822 case OMPD_declare_variant: 10823 case OMPD_begin_declare_variant: 10824 case OMPD_end_declare_variant: 10825 case OMPD_declare_target: 10826 case OMPD_end_declare_target: 10827 case OMPD_declare_reduction: 10828 case OMPD_declare_mapper: 10829 case OMPD_taskloop: 10830 case OMPD_taskloop_simd: 10831 case OMPD_master_taskloop: 10832 case OMPD_master_taskloop_simd: 10833 case OMPD_parallel_master_taskloop: 10834 case OMPD_parallel_master_taskloop_simd: 10835 case OMPD_target: 10836 case OMPD_target_simd: 10837 case OMPD_target_teams_distribute: 10838 case OMPD_target_teams_distribute_simd: 10839 case OMPD_target_teams_distribute_parallel_for: 10840 case OMPD_target_teams_distribute_parallel_for_simd: 10841 case OMPD_target_teams: 10842 case OMPD_target_parallel: 10843 case OMPD_target_parallel_for: 10844 case OMPD_target_parallel_for_simd: 10845 case OMPD_requires: 10846 case OMPD_unknown: 10847 llvm_unreachable("Unexpected standalone target data directive."); 10848 break; 10849 } 10850 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 10851 }; 10852 10853 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 10854 CodeGenFunction &CGF, PrePostActionTy &) { 10855 // Fill up the arrays with all the mapped variables. 10856 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10857 MappableExprsHandler::MapValuesArrayTy Pointers; 10858 MappableExprsHandler::MapValuesArrayTy Sizes; 10859 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10860 10861 // Get map clause information. 10862 MappableExprsHandler MEHandler(D, CGF); 10863 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10864 10865 TargetDataInfo Info; 10866 // Fill up the arrays and create the arguments. 10867 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10868 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10869 Info.PointersArray, Info.SizesArray, 10870 Info.MapTypesArray, Info); 10871 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10872 InputInfo.BasePointersArray = 10873 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10874 InputInfo.PointersArray = 10875 Address(Info.PointersArray, CGM.getPointerAlign()); 10876 InputInfo.SizesArray = 10877 Address(Info.SizesArray, CGM.getPointerAlign()); 10878 MapTypesArray = Info.MapTypesArray; 10879 if (D.hasClausesOfKind<OMPDependClause>()) 10880 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10881 else 10882 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10883 }; 10884 10885 if (IfCond) { 10886 emitIfClause(CGF, IfCond, TargetThenGen, 10887 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10888 } else { 10889 RegionCodeGenTy ThenRCG(TargetThenGen); 10890 ThenRCG(CGF); 10891 } 10892 } 10893 10894 namespace { 10895 /// Kind of parameter in a function with 'declare simd' directive. 10896 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 10897 /// Attribute set of the parameter. 10898 struct ParamAttrTy { 10899 ParamKindTy Kind = Vector; 10900 llvm::APSInt StrideOrArg; 10901 llvm::APSInt Alignment; 10902 }; 10903 } // namespace 10904 10905 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10906 ArrayRef<ParamAttrTy> ParamAttrs) { 10907 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10908 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10909 // of that clause. The VLEN value must be power of 2. 10910 // In other case the notion of the function`s "characteristic data type" (CDT) 10911 // is used to compute the vector length. 10912 // CDT is defined in the following order: 10913 // a) For non-void function, the CDT is the return type. 10914 // b) If the function has any non-uniform, non-linear parameters, then the 10915 // CDT is the type of the first such parameter. 10916 // c) If the CDT determined by a) or b) above is struct, union, or class 10917 // type which is pass-by-value (except for the type that maps to the 10918 // built-in complex data type), the characteristic data type is int. 10919 // d) If none of the above three cases is applicable, the CDT is int. 10920 // The VLEN is then determined based on the CDT and the size of vector 10921 // register of that ISA for which current vector version is generated. The 10922 // VLEN is computed using the formula below: 10923 // VLEN = sizeof(vector_register) / sizeof(CDT), 10924 // where vector register size specified in section 3.2.1 Registers and the 10925 // Stack Frame of original AMD64 ABI document. 10926 QualType RetType = FD->getReturnType(); 10927 if (RetType.isNull()) 10928 return 0; 10929 ASTContext &C = FD->getASTContext(); 10930 QualType CDT; 10931 if (!RetType.isNull() && !RetType->isVoidType()) { 10932 CDT = RetType; 10933 } else { 10934 unsigned Offset = 0; 10935 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10936 if (ParamAttrs[Offset].Kind == Vector) 10937 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10938 ++Offset; 10939 } 10940 if (CDT.isNull()) { 10941 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10942 if (ParamAttrs[I + Offset].Kind == Vector) { 10943 CDT = FD->getParamDecl(I)->getType(); 10944 break; 10945 } 10946 } 10947 } 10948 } 10949 if (CDT.isNull()) 10950 CDT = C.IntTy; 10951 CDT = CDT->getCanonicalTypeUnqualified(); 10952 if (CDT->isRecordType() || CDT->isUnionType()) 10953 CDT = C.IntTy; 10954 return C.getTypeSize(CDT); 10955 } 10956 10957 static void 10958 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10959 const llvm::APSInt &VLENVal, 10960 ArrayRef<ParamAttrTy> ParamAttrs, 10961 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10962 struct ISADataTy { 10963 char ISA; 10964 unsigned VecRegSize; 10965 }; 10966 ISADataTy ISAData[] = { 10967 { 10968 'b', 128 10969 }, // SSE 10970 { 10971 'c', 256 10972 }, // AVX 10973 { 10974 'd', 256 10975 }, // AVX2 10976 { 10977 'e', 512 10978 }, // AVX512 10979 }; 10980 llvm::SmallVector<char, 2> Masked; 10981 switch (State) { 10982 case OMPDeclareSimdDeclAttr::BS_Undefined: 10983 Masked.push_back('N'); 10984 Masked.push_back('M'); 10985 break; 10986 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10987 Masked.push_back('N'); 10988 break; 10989 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10990 Masked.push_back('M'); 10991 break; 10992 } 10993 for (char Mask : Masked) { 10994 for (const ISADataTy &Data : ISAData) { 10995 SmallString<256> Buffer; 10996 llvm::raw_svector_ostream Out(Buffer); 10997 Out << "_ZGV" << Data.ISA << Mask; 10998 if (!VLENVal) { 10999 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11000 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11001 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11002 } else { 11003 Out << VLENVal; 11004 } 11005 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11006 switch (ParamAttr.Kind){ 11007 case LinearWithVarStride: 11008 Out << 's' << ParamAttr.StrideOrArg; 11009 break; 11010 case Linear: 11011 Out << 'l'; 11012 if (!!ParamAttr.StrideOrArg) 11013 Out << ParamAttr.StrideOrArg; 11014 break; 11015 case Uniform: 11016 Out << 'u'; 11017 break; 11018 case Vector: 11019 Out << 'v'; 11020 break; 11021 } 11022 if (!!ParamAttr.Alignment) 11023 Out << 'a' << ParamAttr.Alignment; 11024 } 11025 Out << '_' << Fn->getName(); 11026 Fn->addFnAttr(Out.str()); 11027 } 11028 } 11029 } 11030 11031 // This are the Functions that are needed to mangle the name of the 11032 // vector functions generated by the compiler, according to the rules 11033 // defined in the "Vector Function ABI specifications for AArch64", 11034 // available at 11035 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11036 11037 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11038 /// 11039 /// TODO: Need to implement the behavior for reference marked with a 11040 /// var or no linear modifiers (1.b in the section). For this, we 11041 /// need to extend ParamKindTy to support the linear modifiers. 11042 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11043 QT = QT.getCanonicalType(); 11044 11045 if (QT->isVoidType()) 11046 return false; 11047 11048 if (Kind == ParamKindTy::Uniform) 11049 return false; 11050 11051 if (Kind == ParamKindTy::Linear) 11052 return false; 11053 11054 // TODO: Handle linear references with modifiers 11055 11056 if (Kind == ParamKindTy::LinearWithVarStride) 11057 return false; 11058 11059 return true; 11060 } 11061 11062 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11063 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11064 QT = QT.getCanonicalType(); 11065 unsigned Size = C.getTypeSize(QT); 11066 11067 // Only scalars and complex within 16 bytes wide set PVB to true. 11068 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11069 return false; 11070 11071 if (QT->isFloatingType()) 11072 return true; 11073 11074 if (QT->isIntegerType()) 11075 return true; 11076 11077 if (QT->isPointerType()) 11078 return true; 11079 11080 // TODO: Add support for complex types (section 3.1.2, item 2). 11081 11082 return false; 11083 } 11084 11085 /// Computes the lane size (LS) of a return type or of an input parameter, 11086 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11087 /// TODO: Add support for references, section 3.2.1, item 1. 11088 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11089 if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11090 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11091 if (getAArch64PBV(PTy, C)) 11092 return C.getTypeSize(PTy); 11093 } 11094 if (getAArch64PBV(QT, C)) 11095 return C.getTypeSize(QT); 11096 11097 return C.getTypeSize(C.getUIntPtrType()); 11098 } 11099 11100 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11101 // signature of the scalar function, as defined in 3.2.2 of the 11102 // AAVFABI. 11103 static std::tuple<unsigned, unsigned, bool> 11104 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11105 QualType RetType = FD->getReturnType().getCanonicalType(); 11106 11107 ASTContext &C = FD->getASTContext(); 11108 11109 bool OutputBecomesInput = false; 11110 11111 llvm::SmallVector<unsigned, 8> Sizes; 11112 if (!RetType->isVoidType()) { 11113 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11114 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11115 OutputBecomesInput = true; 11116 } 11117 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11118 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11119 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11120 } 11121 11122 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11123 // The LS of a function parameter / return value can only be a power 11124 // of 2, starting from 8 bits, up to 128. 11125 assert(std::all_of(Sizes.begin(), Sizes.end(), 11126 [](unsigned Size) { 11127 return Size == 8 || Size == 16 || Size == 32 || 11128 Size == 64 || Size == 128; 11129 }) && 11130 "Invalid size"); 11131 11132 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11133 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11134 OutputBecomesInput); 11135 } 11136 11137 /// Mangle the parameter part of the vector function name according to 11138 /// their OpenMP classification. The mangling function is defined in 11139 /// section 3.5 of the AAVFABI. 11140 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11141 SmallString<256> Buffer; 11142 llvm::raw_svector_ostream Out(Buffer); 11143 for (const auto &ParamAttr : ParamAttrs) { 11144 switch (ParamAttr.Kind) { 11145 case LinearWithVarStride: 11146 Out << "ls" << ParamAttr.StrideOrArg; 11147 break; 11148 case Linear: 11149 Out << 'l'; 11150 // Don't print the step value if it is not present or if it is 11151 // equal to 1. 11152 if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1) 11153 Out << ParamAttr.StrideOrArg; 11154 break; 11155 case Uniform: 11156 Out << 'u'; 11157 break; 11158 case Vector: 11159 Out << 'v'; 11160 break; 11161 } 11162 11163 if (!!ParamAttr.Alignment) 11164 Out << 'a' << ParamAttr.Alignment; 11165 } 11166 11167 return std::string(Out.str()); 11168 } 11169 11170 // Function used to add the attribute. The parameter `VLEN` is 11171 // templated to allow the use of "x" when targeting scalable functions 11172 // for SVE. 11173 template <typename T> 11174 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11175 char ISA, StringRef ParSeq, 11176 StringRef MangledName, bool OutputBecomesInput, 11177 llvm::Function *Fn) { 11178 SmallString<256> Buffer; 11179 llvm::raw_svector_ostream Out(Buffer); 11180 Out << Prefix << ISA << LMask << VLEN; 11181 if (OutputBecomesInput) 11182 Out << "v"; 11183 Out << ParSeq << "_" << MangledName; 11184 Fn->addFnAttr(Out.str()); 11185 } 11186 11187 // Helper function to generate the Advanced SIMD names depending on 11188 // the value of the NDS when simdlen is not present. 11189 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11190 StringRef Prefix, char ISA, 11191 StringRef ParSeq, StringRef MangledName, 11192 bool OutputBecomesInput, 11193 llvm::Function *Fn) { 11194 switch (NDS) { 11195 case 8: 11196 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11197 OutputBecomesInput, Fn); 11198 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11199 OutputBecomesInput, Fn); 11200 break; 11201 case 16: 11202 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11203 OutputBecomesInput, Fn); 11204 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11205 OutputBecomesInput, Fn); 11206 break; 11207 case 32: 11208 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11209 OutputBecomesInput, Fn); 11210 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11211 OutputBecomesInput, Fn); 11212 break; 11213 case 64: 11214 case 128: 11215 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11216 OutputBecomesInput, Fn); 11217 break; 11218 default: 11219 llvm_unreachable("Scalar type is too wide."); 11220 } 11221 } 11222 11223 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11224 static void emitAArch64DeclareSimdFunction( 11225 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11226 ArrayRef<ParamAttrTy> ParamAttrs, 11227 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11228 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11229 11230 // Get basic data for building the vector signature. 11231 const auto Data = getNDSWDS(FD, ParamAttrs); 11232 const unsigned NDS = std::get<0>(Data); 11233 const unsigned WDS = std::get<1>(Data); 11234 const bool OutputBecomesInput = std::get<2>(Data); 11235 11236 // Check the values provided via `simdlen` by the user. 11237 // 1. A `simdlen(1)` doesn't produce vector signatures, 11238 if (UserVLEN == 1) { 11239 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11240 DiagnosticsEngine::Warning, 11241 "The clause simdlen(1) has no effect when targeting aarch64."); 11242 CGM.getDiags().Report(SLoc, DiagID); 11243 return; 11244 } 11245 11246 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11247 // Advanced SIMD output. 11248 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11249 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11250 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11251 "power of 2 when targeting Advanced SIMD."); 11252 CGM.getDiags().Report(SLoc, DiagID); 11253 return; 11254 } 11255 11256 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11257 // limits. 11258 if (ISA == 's' && UserVLEN != 0) { 11259 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11260 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11261 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11262 "lanes in the architectural constraints " 11263 "for SVE (min is 128-bit, max is " 11264 "2048-bit, by steps of 128-bit)"); 11265 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11266 return; 11267 } 11268 } 11269 11270 // Sort out parameter sequence. 11271 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11272 StringRef Prefix = "_ZGV"; 11273 // Generate simdlen from user input (if any). 11274 if (UserVLEN) { 11275 if (ISA == 's') { 11276 // SVE generates only a masked function. 11277 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11278 OutputBecomesInput, Fn); 11279 } else { 11280 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11281 // Advanced SIMD generates one or two functions, depending on 11282 // the `[not]inbranch` clause. 11283 switch (State) { 11284 case OMPDeclareSimdDeclAttr::BS_Undefined: 11285 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11286 OutputBecomesInput, Fn); 11287 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11288 OutputBecomesInput, Fn); 11289 break; 11290 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11291 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11292 OutputBecomesInput, Fn); 11293 break; 11294 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11295 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11296 OutputBecomesInput, Fn); 11297 break; 11298 } 11299 } 11300 } else { 11301 // If no user simdlen is provided, follow the AAVFABI rules for 11302 // generating the vector length. 11303 if (ISA == 's') { 11304 // SVE, section 3.4.1, item 1. 11305 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11306 OutputBecomesInput, Fn); 11307 } else { 11308 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11309 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11310 // two vector names depending on the use of the clause 11311 // `[not]inbranch`. 11312 switch (State) { 11313 case OMPDeclareSimdDeclAttr::BS_Undefined: 11314 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11315 OutputBecomesInput, Fn); 11316 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11317 OutputBecomesInput, Fn); 11318 break; 11319 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11320 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11321 OutputBecomesInput, Fn); 11322 break; 11323 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11324 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11325 OutputBecomesInput, Fn); 11326 break; 11327 } 11328 } 11329 } 11330 } 11331 11332 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11333 llvm::Function *Fn) { 11334 ASTContext &C = CGM.getContext(); 11335 FD = FD->getMostRecentDecl(); 11336 // Map params to their positions in function decl. 11337 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11338 if (isa<CXXMethodDecl>(FD)) 11339 ParamPositions.try_emplace(FD, 0); 11340 unsigned ParamPos = ParamPositions.size(); 11341 for (const ParmVarDecl *P : FD->parameters()) { 11342 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11343 ++ParamPos; 11344 } 11345 while (FD) { 11346 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11347 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11348 // Mark uniform parameters. 11349 for (const Expr *E : Attr->uniforms()) { 11350 E = E->IgnoreParenImpCasts(); 11351 unsigned Pos; 11352 if (isa<CXXThisExpr>(E)) { 11353 Pos = ParamPositions[FD]; 11354 } else { 11355 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11356 ->getCanonicalDecl(); 11357 Pos = ParamPositions[PVD]; 11358 } 11359 ParamAttrs[Pos].Kind = Uniform; 11360 } 11361 // Get alignment info. 11362 auto NI = Attr->alignments_begin(); 11363 for (const Expr *E : Attr->aligneds()) { 11364 E = E->IgnoreParenImpCasts(); 11365 unsigned Pos; 11366 QualType ParmTy; 11367 if (isa<CXXThisExpr>(E)) { 11368 Pos = ParamPositions[FD]; 11369 ParmTy = E->getType(); 11370 } else { 11371 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11372 ->getCanonicalDecl(); 11373 Pos = ParamPositions[PVD]; 11374 ParmTy = PVD->getType(); 11375 } 11376 ParamAttrs[Pos].Alignment = 11377 (*NI) 11378 ? (*NI)->EvaluateKnownConstInt(C) 11379 : llvm::APSInt::getUnsigned( 11380 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11381 .getQuantity()); 11382 ++NI; 11383 } 11384 // Mark linear parameters. 11385 auto SI = Attr->steps_begin(); 11386 auto MI = Attr->modifiers_begin(); 11387 for (const Expr *E : Attr->linears()) { 11388 E = E->IgnoreParenImpCasts(); 11389 unsigned Pos; 11390 if (isa<CXXThisExpr>(E)) { 11391 Pos = ParamPositions[FD]; 11392 } else { 11393 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11394 ->getCanonicalDecl(); 11395 Pos = ParamPositions[PVD]; 11396 } 11397 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11398 ParamAttr.Kind = Linear; 11399 if (*SI) { 11400 Expr::EvalResult Result; 11401 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11402 if (const auto *DRE = 11403 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11404 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 11405 ParamAttr.Kind = LinearWithVarStride; 11406 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 11407 ParamPositions[StridePVD->getCanonicalDecl()]); 11408 } 11409 } 11410 } else { 11411 ParamAttr.StrideOrArg = Result.Val.getInt(); 11412 } 11413 } 11414 ++SI; 11415 ++MI; 11416 } 11417 llvm::APSInt VLENVal; 11418 SourceLocation ExprLoc; 11419 const Expr *VLENExpr = Attr->getSimdlen(); 11420 if (VLENExpr) { 11421 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11422 ExprLoc = VLENExpr->getExprLoc(); 11423 } 11424 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11425 if (CGM.getTriple().isX86()) { 11426 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11427 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11428 unsigned VLEN = VLENVal.getExtValue(); 11429 StringRef MangledName = Fn->getName(); 11430 if (CGM.getTarget().hasFeature("sve")) 11431 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11432 MangledName, 's', 128, Fn, ExprLoc); 11433 if (CGM.getTarget().hasFeature("neon")) 11434 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11435 MangledName, 'n', 128, Fn, ExprLoc); 11436 } 11437 } 11438 FD = FD->getPreviousDecl(); 11439 } 11440 } 11441 11442 namespace { 11443 /// Cleanup action for doacross support. 11444 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11445 public: 11446 static const int DoacrossFinArgs = 2; 11447 11448 private: 11449 llvm::FunctionCallee RTLFn; 11450 llvm::Value *Args[DoacrossFinArgs]; 11451 11452 public: 11453 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11454 ArrayRef<llvm::Value *> CallArgs) 11455 : RTLFn(RTLFn) { 11456 assert(CallArgs.size() == DoacrossFinArgs); 11457 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11458 } 11459 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11460 if (!CGF.HaveInsertPoint()) 11461 return; 11462 CGF.EmitRuntimeCall(RTLFn, Args); 11463 } 11464 }; 11465 } // namespace 11466 11467 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11468 const OMPLoopDirective &D, 11469 ArrayRef<Expr *> NumIterations) { 11470 if (!CGF.HaveInsertPoint()) 11471 return; 11472 11473 ASTContext &C = CGM.getContext(); 11474 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11475 RecordDecl *RD; 11476 if (KmpDimTy.isNull()) { 11477 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11478 // kmp_int64 lo; // lower 11479 // kmp_int64 up; // upper 11480 // kmp_int64 st; // stride 11481 // }; 11482 RD = C.buildImplicitRecord("kmp_dim"); 11483 RD->startDefinition(); 11484 addFieldToRecordDecl(C, RD, Int64Ty); 11485 addFieldToRecordDecl(C, RD, Int64Ty); 11486 addFieldToRecordDecl(C, RD, Int64Ty); 11487 RD->completeDefinition(); 11488 KmpDimTy = C.getRecordType(RD); 11489 } else { 11490 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11491 } 11492 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11493 QualType ArrayTy = 11494 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11495 11496 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11497 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11498 enum { LowerFD = 0, UpperFD, StrideFD }; 11499 // Fill dims with data. 11500 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11501 LValue DimsLVal = CGF.MakeAddrLValue( 11502 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11503 // dims.upper = num_iterations; 11504 LValue UpperLVal = CGF.EmitLValueForField( 11505 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11506 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11507 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11508 Int64Ty, NumIterations[I]->getExprLoc()); 11509 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11510 // dims.stride = 1; 11511 LValue StrideLVal = CGF.EmitLValueForField( 11512 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11513 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11514 StrideLVal); 11515 } 11516 11517 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11518 // kmp_int32 num_dims, struct kmp_dim * dims); 11519 llvm::Value *Args[] = { 11520 emitUpdateLocation(CGF, D.getBeginLoc()), 11521 getThreadID(CGF, D.getBeginLoc()), 11522 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11523 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11524 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11525 CGM.VoidPtrTy)}; 11526 11527 llvm::FunctionCallee RTLFn = 11528 createRuntimeFunction(OMPRTL__kmpc_doacross_init); 11529 CGF.EmitRuntimeCall(RTLFn, Args); 11530 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11531 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11532 llvm::FunctionCallee FiniRTLFn = 11533 createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 11534 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11535 llvm::makeArrayRef(FiniArgs)); 11536 } 11537 11538 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11539 const OMPDependClause *C) { 11540 QualType Int64Ty = 11541 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11542 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11543 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11544 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11545 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11546 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11547 const Expr *CounterVal = C->getLoopData(I); 11548 assert(CounterVal); 11549 llvm::Value *CntVal = CGF.EmitScalarConversion( 11550 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11551 CounterVal->getExprLoc()); 11552 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11553 /*Volatile=*/false, Int64Ty); 11554 } 11555 llvm::Value *Args[] = { 11556 emitUpdateLocation(CGF, C->getBeginLoc()), 11557 getThreadID(CGF, C->getBeginLoc()), 11558 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11559 llvm::FunctionCallee RTLFn; 11560 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11561 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 11562 } else { 11563 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11564 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 11565 } 11566 CGF.EmitRuntimeCall(RTLFn, Args); 11567 } 11568 11569 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11570 llvm::FunctionCallee Callee, 11571 ArrayRef<llvm::Value *> Args) const { 11572 assert(Loc.isValid() && "Outlined function call location must be valid."); 11573 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11574 11575 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11576 if (Fn->doesNotThrow()) { 11577 CGF.EmitNounwindRuntimeCall(Fn, Args); 11578 return; 11579 } 11580 } 11581 CGF.EmitRuntimeCall(Callee, Args); 11582 } 11583 11584 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11585 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11586 ArrayRef<llvm::Value *> Args) const { 11587 emitCall(CGF, Loc, OutlinedFn, Args); 11588 } 11589 11590 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11591 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11592 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11593 HasEmittedDeclareTargetRegion = true; 11594 } 11595 11596 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11597 const VarDecl *NativeParam, 11598 const VarDecl *TargetParam) const { 11599 return CGF.GetAddrOfLocalVar(NativeParam); 11600 } 11601 11602 namespace { 11603 /// Cleanup action for allocate support. 11604 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11605 public: 11606 static const int CleanupArgs = 3; 11607 11608 private: 11609 llvm::FunctionCallee RTLFn; 11610 llvm::Value *Args[CleanupArgs]; 11611 11612 public: 11613 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 11614 ArrayRef<llvm::Value *> CallArgs) 11615 : RTLFn(RTLFn) { 11616 assert(CallArgs.size() == CleanupArgs && 11617 "Size of arguments does not match."); 11618 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11619 } 11620 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11621 if (!CGF.HaveInsertPoint()) 11622 return; 11623 CGF.EmitRuntimeCall(RTLFn, Args); 11624 } 11625 }; 11626 } // namespace 11627 11628 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11629 const VarDecl *VD) { 11630 if (!VD) 11631 return Address::invalid(); 11632 const VarDecl *CVD = VD->getCanonicalDecl(); 11633 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 11634 return Address::invalid(); 11635 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11636 // Use the default allocation. 11637 if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 11638 !AA->getAllocator()) 11639 return Address::invalid(); 11640 llvm::Value *Size; 11641 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11642 if (CVD->getType()->isVariablyModifiedType()) { 11643 Size = CGF.getTypeSize(CVD->getType()); 11644 // Align the size: ((size + align - 1) / align) * align 11645 Size = CGF.Builder.CreateNUWAdd( 11646 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11647 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11648 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11649 } else { 11650 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11651 Size = CGM.getSize(Sz.alignTo(Align)); 11652 } 11653 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11654 assert(AA->getAllocator() && 11655 "Expected allocator expression for non-default allocator."); 11656 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11657 // According to the standard, the original allocator type is a enum (integer). 11658 // Convert to pointer type, if required. 11659 if (Allocator->getType()->isIntegerTy()) 11660 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 11661 else if (Allocator->getType()->isPointerTy()) 11662 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 11663 CGM.VoidPtrTy); 11664 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11665 11666 llvm::Value *Addr = 11667 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, 11668 getName({CVD->getName(), ".void.addr"})); 11669 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 11670 Allocator}; 11671 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); 11672 11673 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11674 llvm::makeArrayRef(FiniArgs)); 11675 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11676 Addr, 11677 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 11678 getName({CVD->getName(), ".addr"})); 11679 return Address(Addr, Align); 11680 } 11681 11682 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11683 CodeGenModule &CGM, const OMPLoopDirective &S) 11684 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11685 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11686 if (!NeedToPush) 11687 return; 11688 NontemporalDeclsSet &DS = 11689 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11690 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11691 for (const Stmt *Ref : C->private_refs()) { 11692 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11693 const ValueDecl *VD; 11694 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11695 VD = DRE->getDecl(); 11696 } else { 11697 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11698 assert((ME->isImplicitCXXThis() || 11699 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11700 "Expected member of current class."); 11701 VD = ME->getMemberDecl(); 11702 } 11703 DS.insert(VD); 11704 } 11705 } 11706 } 11707 11708 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11709 if (!NeedToPush) 11710 return; 11711 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11712 } 11713 11714 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11715 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11716 11717 return llvm::any_of( 11718 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11719 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 11720 } 11721 11722 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11723 const OMPExecutableDirective &S, 11724 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11725 const { 11726 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11727 // Vars in target/task regions must be excluded completely. 11728 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11729 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11730 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11731 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11732 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11733 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11734 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11735 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11736 } 11737 } 11738 // Exclude vars in private clauses. 11739 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11740 for (const Expr *Ref : C->varlists()) { 11741 if (!Ref->getType()->isScalarType()) 11742 continue; 11743 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11744 if (!DRE) 11745 continue; 11746 NeedToCheckForLPCs.insert(DRE->getDecl()); 11747 } 11748 } 11749 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11750 for (const Expr *Ref : C->varlists()) { 11751 if (!Ref->getType()->isScalarType()) 11752 continue; 11753 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11754 if (!DRE) 11755 continue; 11756 NeedToCheckForLPCs.insert(DRE->getDecl()); 11757 } 11758 } 11759 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11760 for (const Expr *Ref : C->varlists()) { 11761 if (!Ref->getType()->isScalarType()) 11762 continue; 11763 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11764 if (!DRE) 11765 continue; 11766 NeedToCheckForLPCs.insert(DRE->getDecl()); 11767 } 11768 } 11769 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11770 for (const Expr *Ref : C->varlists()) { 11771 if (!Ref->getType()->isScalarType()) 11772 continue; 11773 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11774 if (!DRE) 11775 continue; 11776 NeedToCheckForLPCs.insert(DRE->getDecl()); 11777 } 11778 } 11779 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 11780 for (const Expr *Ref : C->varlists()) { 11781 if (!Ref->getType()->isScalarType()) 11782 continue; 11783 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11784 if (!DRE) 11785 continue; 11786 NeedToCheckForLPCs.insert(DRE->getDecl()); 11787 } 11788 } 11789 for (const Decl *VD : NeedToCheckForLPCs) { 11790 for (const LastprivateConditionalData &Data : 11791 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 11792 if (Data.DeclToUniqueName.count(VD) > 0) { 11793 if (!Data.Disabled) 11794 NeedToAddForLPCsAsDisabled.insert(VD); 11795 break; 11796 } 11797 } 11798 } 11799 } 11800 11801 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11802 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 11803 : CGM(CGF.CGM), 11804 Action((CGM.getLangOpts().OpenMP >= 50 && 11805 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11806 [](const OMPLastprivateClause *C) { 11807 return C->getKind() == 11808 OMPC_LASTPRIVATE_conditional; 11809 })) 11810 ? ActionToDo::PushAsLastprivateConditional 11811 : ActionToDo::DoNotPush) { 11812 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11813 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 11814 return; 11815 assert(Action == ActionToDo::PushAsLastprivateConditional && 11816 "Expected a push action."); 11817 LastprivateConditionalData &Data = 11818 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11819 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11820 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 11821 continue; 11822 11823 for (const Expr *Ref : C->varlists()) { 11824 Data.DeclToUniqueName.insert(std::make_pair( 11825 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 11826 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 11827 } 11828 } 11829 Data.IVLVal = IVLVal; 11830 Data.Fn = CGF.CurFn; 11831 } 11832 11833 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11834 CodeGenFunction &CGF, const OMPExecutableDirective &S) 11835 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 11836 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11837 if (CGM.getLangOpts().OpenMP < 50) 11838 return; 11839 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 11840 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 11841 if (!NeedToAddForLPCsAsDisabled.empty()) { 11842 Action = ActionToDo::DisableLastprivateConditional; 11843 LastprivateConditionalData &Data = 11844 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11845 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 11846 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 11847 Data.Fn = CGF.CurFn; 11848 Data.Disabled = true; 11849 } 11850 } 11851 11852 CGOpenMPRuntime::LastprivateConditionalRAII 11853 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 11854 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 11855 return LastprivateConditionalRAII(CGF, S); 11856 } 11857 11858 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 11859 if (CGM.getLangOpts().OpenMP < 50) 11860 return; 11861 if (Action == ActionToDo::DisableLastprivateConditional) { 11862 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11863 "Expected list of disabled private vars."); 11864 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11865 } 11866 if (Action == ActionToDo::PushAsLastprivateConditional) { 11867 assert( 11868 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11869 "Expected list of lastprivate conditional vars."); 11870 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11871 } 11872 } 11873 11874 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 11875 const VarDecl *VD) { 11876 ASTContext &C = CGM.getContext(); 11877 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 11878 if (I == LastprivateConditionalToTypes.end()) 11879 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 11880 QualType NewType; 11881 const FieldDecl *VDField; 11882 const FieldDecl *FiredField; 11883 LValue BaseLVal; 11884 auto VI = I->getSecond().find(VD); 11885 if (VI == I->getSecond().end()) { 11886 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 11887 RD->startDefinition(); 11888 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 11889 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 11890 RD->completeDefinition(); 11891 NewType = C.getRecordType(RD); 11892 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 11893 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 11894 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 11895 } else { 11896 NewType = std::get<0>(VI->getSecond()); 11897 VDField = std::get<1>(VI->getSecond()); 11898 FiredField = std::get<2>(VI->getSecond()); 11899 BaseLVal = std::get<3>(VI->getSecond()); 11900 } 11901 LValue FiredLVal = 11902 CGF.EmitLValueForField(BaseLVal, FiredField); 11903 CGF.EmitStoreOfScalar( 11904 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 11905 FiredLVal); 11906 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 11907 } 11908 11909 namespace { 11910 /// Checks if the lastprivate conditional variable is referenced in LHS. 11911 class LastprivateConditionalRefChecker final 11912 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 11913 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 11914 const Expr *FoundE = nullptr; 11915 const Decl *FoundD = nullptr; 11916 StringRef UniqueDeclName; 11917 LValue IVLVal; 11918 llvm::Function *FoundFn = nullptr; 11919 SourceLocation Loc; 11920 11921 public: 11922 bool VisitDeclRefExpr(const DeclRefExpr *E) { 11923 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11924 llvm::reverse(LPM)) { 11925 auto It = D.DeclToUniqueName.find(E->getDecl()); 11926 if (It == D.DeclToUniqueName.end()) 11927 continue; 11928 if (D.Disabled) 11929 return false; 11930 FoundE = E; 11931 FoundD = E->getDecl()->getCanonicalDecl(); 11932 UniqueDeclName = It->second; 11933 IVLVal = D.IVLVal; 11934 FoundFn = D.Fn; 11935 break; 11936 } 11937 return FoundE == E; 11938 } 11939 bool VisitMemberExpr(const MemberExpr *E) { 11940 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 11941 return false; 11942 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11943 llvm::reverse(LPM)) { 11944 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 11945 if (It == D.DeclToUniqueName.end()) 11946 continue; 11947 if (D.Disabled) 11948 return false; 11949 FoundE = E; 11950 FoundD = E->getMemberDecl()->getCanonicalDecl(); 11951 UniqueDeclName = It->second; 11952 IVLVal = D.IVLVal; 11953 FoundFn = D.Fn; 11954 break; 11955 } 11956 return FoundE == E; 11957 } 11958 bool VisitStmt(const Stmt *S) { 11959 for (const Stmt *Child : S->children()) { 11960 if (!Child) 11961 continue; 11962 if (const auto *E = dyn_cast<Expr>(Child)) 11963 if (!E->isGLValue()) 11964 continue; 11965 if (Visit(Child)) 11966 return true; 11967 } 11968 return false; 11969 } 11970 explicit LastprivateConditionalRefChecker( 11971 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 11972 : LPM(LPM) {} 11973 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 11974 getFoundData() const { 11975 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 11976 } 11977 }; 11978 } // namespace 11979 11980 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 11981 LValue IVLVal, 11982 StringRef UniqueDeclName, 11983 LValue LVal, 11984 SourceLocation Loc) { 11985 // Last updated loop counter for the lastprivate conditional var. 11986 // int<xx> last_iv = 0; 11987 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 11988 llvm::Constant *LastIV = 11989 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 11990 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 11991 IVLVal.getAlignment().getAsAlign()); 11992 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 11993 11994 // Last value of the lastprivate conditional. 11995 // decltype(priv_a) last_a; 11996 llvm::Constant *Last = getOrCreateInternalVariable( 11997 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 11998 cast<llvm::GlobalVariable>(Last)->setAlignment( 11999 LVal.getAlignment().getAsAlign()); 12000 LValue LastLVal = 12001 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 12002 12003 // Global loop counter. Required to handle inner parallel-for regions. 12004 // iv 12005 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12006 12007 // #pragma omp critical(a) 12008 // if (last_iv <= iv) { 12009 // last_iv = iv; 12010 // last_a = priv_a; 12011 // } 12012 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12013 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12014 Action.Enter(CGF); 12015 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12016 // (last_iv <= iv) ? Check if the variable is updated and store new 12017 // value in global var. 12018 llvm::Value *CmpRes; 12019 if (IVLVal.getType()->isSignedIntegerType()) { 12020 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12021 } else { 12022 assert(IVLVal.getType()->isUnsignedIntegerType() && 12023 "Loop iteration variable must be integer."); 12024 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12025 } 12026 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12027 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12028 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12029 // { 12030 CGF.EmitBlock(ThenBB); 12031 12032 // last_iv = iv; 12033 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12034 12035 // last_a = priv_a; 12036 switch (CGF.getEvaluationKind(LVal.getType())) { 12037 case TEK_Scalar: { 12038 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12039 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12040 break; 12041 } 12042 case TEK_Complex: { 12043 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12044 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12045 break; 12046 } 12047 case TEK_Aggregate: 12048 llvm_unreachable( 12049 "Aggregates are not supported in lastprivate conditional."); 12050 } 12051 // } 12052 CGF.EmitBranch(ExitBB); 12053 // There is no need to emit line number for unconditional branch. 12054 (void)ApplyDebugLocation::CreateEmpty(CGF); 12055 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12056 }; 12057 12058 if (CGM.getLangOpts().OpenMPSimd) { 12059 // Do not emit as a critical region as no parallel region could be emitted. 12060 RegionCodeGenTy ThenRCG(CodeGen); 12061 ThenRCG(CGF); 12062 } else { 12063 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12064 } 12065 } 12066 12067 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12068 const Expr *LHS) { 12069 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12070 return; 12071 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12072 if (!Checker.Visit(LHS)) 12073 return; 12074 const Expr *FoundE; 12075 const Decl *FoundD; 12076 StringRef UniqueDeclName; 12077 LValue IVLVal; 12078 llvm::Function *FoundFn; 12079 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12080 Checker.getFoundData(); 12081 if (FoundFn != CGF.CurFn) { 12082 // Special codegen for inner parallel regions. 12083 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12084 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12085 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12086 "Lastprivate conditional is not found in outer region."); 12087 QualType StructTy = std::get<0>(It->getSecond()); 12088 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12089 LValue PrivLVal = CGF.EmitLValue(FoundE); 12090 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12091 PrivLVal.getAddress(CGF), 12092 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12093 LValue BaseLVal = 12094 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12095 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12096 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12097 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12098 FiredLVal, llvm::AtomicOrdering::Unordered, 12099 /*IsVolatile=*/true, /*isInit=*/false); 12100 return; 12101 } 12102 12103 // Private address of the lastprivate conditional in the current context. 12104 // priv_a 12105 LValue LVal = CGF.EmitLValue(FoundE); 12106 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12107 FoundE->getExprLoc()); 12108 } 12109 12110 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12111 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12112 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12113 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12114 return; 12115 auto Range = llvm::reverse(LastprivateConditionalStack); 12116 auto It = llvm::find_if( 12117 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12118 if (It == Range.end() || It->Fn != CGF.CurFn) 12119 return; 12120 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12121 assert(LPCI != LastprivateConditionalToTypes.end() && 12122 "Lastprivates must be registered already."); 12123 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12124 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12125 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12126 for (const auto &Pair : It->DeclToUniqueName) { 12127 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12128 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 12129 continue; 12130 auto I = LPCI->getSecond().find(Pair.first); 12131 assert(I != LPCI->getSecond().end() && 12132 "Lastprivate must be rehistered already."); 12133 // bool Cmp = priv_a.Fired != 0; 12134 LValue BaseLVal = std::get<3>(I->getSecond()); 12135 LValue FiredLVal = 12136 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12137 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12138 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12139 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12140 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12141 // if (Cmp) { 12142 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12143 CGF.EmitBlock(ThenBB); 12144 Address Addr = CGF.GetAddrOfLocalVar(VD); 12145 LValue LVal; 12146 if (VD->getType()->isReferenceType()) 12147 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12148 AlignmentSource::Decl); 12149 else 12150 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12151 AlignmentSource::Decl); 12152 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12153 D.getBeginLoc()); 12154 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12155 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12156 // } 12157 } 12158 } 12159 12160 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12161 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12162 SourceLocation Loc) { 12163 if (CGF.getLangOpts().OpenMP < 50) 12164 return; 12165 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12166 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12167 "Unknown lastprivate conditional variable."); 12168 StringRef UniqueName = It->second; 12169 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12170 // The variable was not updated in the region - exit. 12171 if (!GV) 12172 return; 12173 LValue LPLVal = CGF.MakeAddrLValue( 12174 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 12175 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12176 CGF.EmitStoreOfScalar(Res, PrivLVal); 12177 } 12178 12179 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12180 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12181 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12182 llvm_unreachable("Not supported in SIMD-only mode"); 12183 } 12184 12185 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12186 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12187 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12188 llvm_unreachable("Not supported in SIMD-only mode"); 12189 } 12190 12191 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12192 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12193 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12194 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12195 bool Tied, unsigned &NumberOfParts) { 12196 llvm_unreachable("Not supported in SIMD-only mode"); 12197 } 12198 12199 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12200 SourceLocation Loc, 12201 llvm::Function *OutlinedFn, 12202 ArrayRef<llvm::Value *> CapturedVars, 12203 const Expr *IfCond) { 12204 llvm_unreachable("Not supported in SIMD-only mode"); 12205 } 12206 12207 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12208 CodeGenFunction &CGF, StringRef CriticalName, 12209 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12210 const Expr *Hint) { 12211 llvm_unreachable("Not supported in SIMD-only mode"); 12212 } 12213 12214 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12215 const RegionCodeGenTy &MasterOpGen, 12216 SourceLocation Loc) { 12217 llvm_unreachable("Not supported in SIMD-only mode"); 12218 } 12219 12220 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12221 SourceLocation Loc) { 12222 llvm_unreachable("Not supported in SIMD-only mode"); 12223 } 12224 12225 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12226 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12227 SourceLocation Loc) { 12228 llvm_unreachable("Not supported in SIMD-only mode"); 12229 } 12230 12231 void CGOpenMPSIMDRuntime::emitSingleRegion( 12232 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12233 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12234 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12235 ArrayRef<const Expr *> AssignmentOps) { 12236 llvm_unreachable("Not supported in SIMD-only mode"); 12237 } 12238 12239 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12240 const RegionCodeGenTy &OrderedOpGen, 12241 SourceLocation Loc, 12242 bool IsThreads) { 12243 llvm_unreachable("Not supported in SIMD-only mode"); 12244 } 12245 12246 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12247 SourceLocation Loc, 12248 OpenMPDirectiveKind Kind, 12249 bool EmitChecks, 12250 bool ForceSimpleCall) { 12251 llvm_unreachable("Not supported in SIMD-only mode"); 12252 } 12253 12254 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12255 CodeGenFunction &CGF, SourceLocation Loc, 12256 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12257 bool Ordered, const DispatchRTInput &DispatchValues) { 12258 llvm_unreachable("Not supported in SIMD-only mode"); 12259 } 12260 12261 void CGOpenMPSIMDRuntime::emitForStaticInit( 12262 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12263 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12264 llvm_unreachable("Not supported in SIMD-only mode"); 12265 } 12266 12267 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12268 CodeGenFunction &CGF, SourceLocation Loc, 12269 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12270 llvm_unreachable("Not supported in SIMD-only mode"); 12271 } 12272 12273 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12274 SourceLocation Loc, 12275 unsigned IVSize, 12276 bool IVSigned) { 12277 llvm_unreachable("Not supported in SIMD-only mode"); 12278 } 12279 12280 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12281 SourceLocation Loc, 12282 OpenMPDirectiveKind DKind) { 12283 llvm_unreachable("Not supported in SIMD-only mode"); 12284 } 12285 12286 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12287 SourceLocation Loc, 12288 unsigned IVSize, bool IVSigned, 12289 Address IL, Address LB, 12290 Address UB, Address ST) { 12291 llvm_unreachable("Not supported in SIMD-only mode"); 12292 } 12293 12294 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12295 llvm::Value *NumThreads, 12296 SourceLocation Loc) { 12297 llvm_unreachable("Not supported in SIMD-only mode"); 12298 } 12299 12300 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12301 ProcBindKind ProcBind, 12302 SourceLocation Loc) { 12303 llvm_unreachable("Not supported in SIMD-only mode"); 12304 } 12305 12306 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12307 const VarDecl *VD, 12308 Address VDAddr, 12309 SourceLocation Loc) { 12310 llvm_unreachable("Not supported in SIMD-only mode"); 12311 } 12312 12313 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12314 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12315 CodeGenFunction *CGF) { 12316 llvm_unreachable("Not supported in SIMD-only mode"); 12317 } 12318 12319 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12320 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12321 llvm_unreachable("Not supported in SIMD-only mode"); 12322 } 12323 12324 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12325 ArrayRef<const Expr *> Vars, 12326 SourceLocation Loc, 12327 llvm::AtomicOrdering AO) { 12328 llvm_unreachable("Not supported in SIMD-only mode"); 12329 } 12330 12331 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12332 const OMPExecutableDirective &D, 12333 llvm::Function *TaskFunction, 12334 QualType SharedsTy, Address Shareds, 12335 const Expr *IfCond, 12336 const OMPTaskDataTy &Data) { 12337 llvm_unreachable("Not supported in SIMD-only mode"); 12338 } 12339 12340 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12341 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12342 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12343 const Expr *IfCond, const OMPTaskDataTy &Data) { 12344 llvm_unreachable("Not supported in SIMD-only mode"); 12345 } 12346 12347 void CGOpenMPSIMDRuntime::emitReduction( 12348 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12349 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12350 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12351 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12352 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12353 ReductionOps, Options); 12354 } 12355 12356 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12357 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12358 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12359 llvm_unreachable("Not supported in SIMD-only mode"); 12360 } 12361 12362 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12363 SourceLocation Loc, 12364 ReductionCodeGen &RCG, 12365 unsigned N) { 12366 llvm_unreachable("Not supported in SIMD-only mode"); 12367 } 12368 12369 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12370 SourceLocation Loc, 12371 llvm::Value *ReductionsPtr, 12372 LValue SharedLVal) { 12373 llvm_unreachable("Not supported in SIMD-only mode"); 12374 } 12375 12376 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12377 SourceLocation Loc) { 12378 llvm_unreachable("Not supported in SIMD-only mode"); 12379 } 12380 12381 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12382 CodeGenFunction &CGF, SourceLocation Loc, 12383 OpenMPDirectiveKind CancelRegion) { 12384 llvm_unreachable("Not supported in SIMD-only mode"); 12385 } 12386 12387 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12388 SourceLocation Loc, const Expr *IfCond, 12389 OpenMPDirectiveKind CancelRegion) { 12390 llvm_unreachable("Not supported in SIMD-only mode"); 12391 } 12392 12393 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12394 const OMPExecutableDirective &D, StringRef ParentName, 12395 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12396 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12397 llvm_unreachable("Not supported in SIMD-only mode"); 12398 } 12399 12400 void CGOpenMPSIMDRuntime::emitTargetCall( 12401 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12402 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12403 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12404 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12405 const OMPLoopDirective &D)> 12406 SizeEmitter) { 12407 llvm_unreachable("Not supported in SIMD-only mode"); 12408 } 12409 12410 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12411 llvm_unreachable("Not supported in SIMD-only mode"); 12412 } 12413 12414 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12415 llvm_unreachable("Not supported in SIMD-only mode"); 12416 } 12417 12418 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12419 return false; 12420 } 12421 12422 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12423 const OMPExecutableDirective &D, 12424 SourceLocation Loc, 12425 llvm::Function *OutlinedFn, 12426 ArrayRef<llvm::Value *> CapturedVars) { 12427 llvm_unreachable("Not supported in SIMD-only mode"); 12428 } 12429 12430 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12431 const Expr *NumTeams, 12432 const Expr *ThreadLimit, 12433 SourceLocation Loc) { 12434 llvm_unreachable("Not supported in SIMD-only mode"); 12435 } 12436 12437 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12438 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12439 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12440 llvm_unreachable("Not supported in SIMD-only mode"); 12441 } 12442 12443 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12444 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12445 const Expr *Device) { 12446 llvm_unreachable("Not supported in SIMD-only mode"); 12447 } 12448 12449 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12450 const OMPLoopDirective &D, 12451 ArrayRef<Expr *> NumIterations) { 12452 llvm_unreachable("Not supported in SIMD-only mode"); 12453 } 12454 12455 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12456 const OMPDependClause *C) { 12457 llvm_unreachable("Not supported in SIMD-only mode"); 12458 } 12459 12460 const VarDecl * 12461 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12462 const VarDecl *NativeParam) const { 12463 llvm_unreachable("Not supported in SIMD-only mode"); 12464 } 12465 12466 Address 12467 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12468 const VarDecl *NativeParam, 12469 const VarDecl *TargetParam) const { 12470 llvm_unreachable("Not supported in SIMD-only mode"); 12471 } 12472