1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/FileManager.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/CodeGen/ConstantInitBuilder.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/SetOperations.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/Bitcode/BitcodeReader.h" 32 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 33 #include "llvm/IR/Constants.h" 34 #include "llvm/IR/DerivedTypes.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/IR/Value.h" 37 #include "llvm/Support/AtomicOrdering.h" 38 #include "llvm/Support/Format.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include <cassert> 41 42 using namespace clang; 43 using namespace CodeGen; 44 using namespace llvm::omp; 45 46 namespace { 47 /// Base class for handling code generation inside OpenMP regions. 48 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 49 public: 50 /// Kinds of OpenMP regions used in codegen. 51 enum CGOpenMPRegionKind { 52 /// Region with outlined function for standalone 'parallel' 53 /// directive. 54 ParallelOutlinedRegion, 55 /// Region with outlined function for standalone 'task' directive. 56 TaskOutlinedRegion, 57 /// Region for constructs that do not require function outlining, 58 /// like 'for', 'sections', 'atomic' etc. directives. 59 InlinedRegion, 60 /// Region with outlined function for standalone 'target' directive. 61 TargetRegion, 62 }; 63 64 CGOpenMPRegionInfo(const CapturedStmt &CS, 65 const CGOpenMPRegionKind RegionKind, 66 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 67 bool HasCancel) 68 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 69 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 70 71 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 73 bool HasCancel) 74 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 75 Kind(Kind), HasCancel(HasCancel) {} 76 77 /// Get a variable or parameter for storing global thread id 78 /// inside OpenMP construct. 79 virtual const VarDecl *getThreadIDVariable() const = 0; 80 81 /// Emit the captured statement body. 82 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 83 84 /// Get an LValue for the current ThreadID variable. 85 /// \return LValue for thread id variable. This LValue always has type int32*. 86 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 87 88 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 89 90 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 91 92 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 93 94 bool hasCancel() const { return HasCancel; } 95 96 static bool classof(const CGCapturedStmtInfo *Info) { 97 return Info->getKind() == CR_OpenMP; 98 } 99 100 ~CGOpenMPRegionInfo() override = default; 101 102 protected: 103 CGOpenMPRegionKind RegionKind; 104 RegionCodeGenTy CodeGen; 105 OpenMPDirectiveKind Kind; 106 bool HasCancel; 107 }; 108 109 /// API for captured statement code generation in OpenMP constructs. 110 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 111 public: 112 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 113 const RegionCodeGenTy &CodeGen, 114 OpenMPDirectiveKind Kind, bool HasCancel, 115 StringRef HelperName) 116 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 117 HasCancel), 118 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 119 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 120 } 121 122 /// Get a variable or parameter for storing global thread id 123 /// inside OpenMP construct. 124 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 125 126 /// Get the name of the capture helper. 127 StringRef getHelperName() const override { return HelperName; } 128 129 static bool classof(const CGCapturedStmtInfo *Info) { 130 return CGOpenMPRegionInfo::classof(Info) && 131 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 132 ParallelOutlinedRegion; 133 } 134 135 private: 136 /// A variable or parameter storing global thread id for OpenMP 137 /// constructs. 138 const VarDecl *ThreadIDVar; 139 StringRef HelperName; 140 }; 141 142 /// API for captured statement code generation in OpenMP constructs. 143 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 144 public: 145 class UntiedTaskActionTy final : public PrePostActionTy { 146 bool Untied; 147 const VarDecl *PartIDVar; 148 const RegionCodeGenTy UntiedCodeGen; 149 llvm::SwitchInst *UntiedSwitch = nullptr; 150 151 public: 152 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 153 const RegionCodeGenTy &UntiedCodeGen) 154 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 155 void Enter(CodeGenFunction &CGF) override { 156 if (Untied) { 157 // Emit task switching point. 158 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 159 CGF.GetAddrOfLocalVar(PartIDVar), 160 PartIDVar->getType()->castAs<PointerType>()); 161 llvm::Value *Res = 162 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 163 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 164 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 165 CGF.EmitBlock(DoneBB); 166 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 167 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 168 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 169 CGF.Builder.GetInsertBlock()); 170 emitUntiedSwitch(CGF); 171 } 172 } 173 void emitUntiedSwitch(CodeGenFunction &CGF) const { 174 if (Untied) { 175 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 176 CGF.GetAddrOfLocalVar(PartIDVar), 177 PartIDVar->getType()->castAs<PointerType>()); 178 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 179 PartIdLVal); 180 UntiedCodeGen(CGF); 181 CodeGenFunction::JumpDest CurPoint = 182 CGF.getJumpDestInCurrentScope(".untied.next."); 183 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 184 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 185 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 186 CGF.Builder.GetInsertBlock()); 187 CGF.EmitBranchThroughCleanup(CurPoint); 188 CGF.EmitBlock(CurPoint.getBlock()); 189 } 190 } 191 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 192 }; 193 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 194 const VarDecl *ThreadIDVar, 195 const RegionCodeGenTy &CodeGen, 196 OpenMPDirectiveKind Kind, bool HasCancel, 197 const UntiedTaskActionTy &Action) 198 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 199 ThreadIDVar(ThreadIDVar), Action(Action) { 200 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 201 } 202 203 /// Get a variable or parameter for storing global thread id 204 /// inside OpenMP construct. 205 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 206 207 /// Get an LValue for the current ThreadID variable. 208 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 209 210 /// Get the name of the capture helper. 211 StringRef getHelperName() const override { return ".omp_outlined."; } 212 213 void emitUntiedSwitch(CodeGenFunction &CGF) override { 214 Action.emitUntiedSwitch(CGF); 215 } 216 217 static bool classof(const CGCapturedStmtInfo *Info) { 218 return CGOpenMPRegionInfo::classof(Info) && 219 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 220 TaskOutlinedRegion; 221 } 222 223 private: 224 /// A variable or parameter storing global thread id for OpenMP 225 /// constructs. 226 const VarDecl *ThreadIDVar; 227 /// Action for emitting code for untied tasks. 228 const UntiedTaskActionTy &Action; 229 }; 230 231 /// API for inlined captured statement code generation in OpenMP 232 /// constructs. 233 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 234 public: 235 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 236 const RegionCodeGenTy &CodeGen, 237 OpenMPDirectiveKind Kind, bool HasCancel) 238 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 239 OldCSI(OldCSI), 240 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 241 242 // Retrieve the value of the context parameter. 243 llvm::Value *getContextValue() const override { 244 if (OuterRegionInfo) 245 return OuterRegionInfo->getContextValue(); 246 llvm_unreachable("No context value for inlined OpenMP region"); 247 } 248 249 void setContextValue(llvm::Value *V) override { 250 if (OuterRegionInfo) { 251 OuterRegionInfo->setContextValue(V); 252 return; 253 } 254 llvm_unreachable("No context value for inlined OpenMP region"); 255 } 256 257 /// Lookup the captured field decl for a variable. 258 const FieldDecl *lookup(const VarDecl *VD) const override { 259 if (OuterRegionInfo) 260 return OuterRegionInfo->lookup(VD); 261 // If there is no outer outlined region,no need to lookup in a list of 262 // captured variables, we can use the original one. 263 return nullptr; 264 } 265 266 FieldDecl *getThisFieldDecl() const override { 267 if (OuterRegionInfo) 268 return OuterRegionInfo->getThisFieldDecl(); 269 return nullptr; 270 } 271 272 /// Get a variable or parameter for storing global thread id 273 /// inside OpenMP construct. 274 const VarDecl *getThreadIDVariable() const override { 275 if (OuterRegionInfo) 276 return OuterRegionInfo->getThreadIDVariable(); 277 return nullptr; 278 } 279 280 /// Get an LValue for the current ThreadID variable. 281 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 282 if (OuterRegionInfo) 283 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 284 llvm_unreachable("No LValue for inlined OpenMP construct"); 285 } 286 287 /// Get the name of the capture helper. 288 StringRef getHelperName() const override { 289 if (auto *OuterRegionInfo = getOldCSI()) 290 return OuterRegionInfo->getHelperName(); 291 llvm_unreachable("No helper name for inlined OpenMP construct"); 292 } 293 294 void emitUntiedSwitch(CodeGenFunction &CGF) override { 295 if (OuterRegionInfo) 296 OuterRegionInfo->emitUntiedSwitch(CGF); 297 } 298 299 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 300 301 static bool classof(const CGCapturedStmtInfo *Info) { 302 return CGOpenMPRegionInfo::classof(Info) && 303 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 304 } 305 306 ~CGOpenMPInlinedRegionInfo() override = default; 307 308 private: 309 /// CodeGen info about outer OpenMP region. 310 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 311 CGOpenMPRegionInfo *OuterRegionInfo; 312 }; 313 314 /// API for captured statement code generation in OpenMP target 315 /// constructs. For this captures, implicit parameters are used instead of the 316 /// captured fields. The name of the target region has to be unique in a given 317 /// application so it is provided by the client, because only the client has 318 /// the information to generate that. 319 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 320 public: 321 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 322 const RegionCodeGenTy &CodeGen, StringRef HelperName) 323 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 324 /*HasCancel=*/false), 325 HelperName(HelperName) {} 326 327 /// This is unused for target regions because each starts executing 328 /// with a single thread. 329 const VarDecl *getThreadIDVariable() const override { return nullptr; } 330 331 /// Get the name of the capture helper. 332 StringRef getHelperName() const override { return HelperName; } 333 334 static bool classof(const CGCapturedStmtInfo *Info) { 335 return CGOpenMPRegionInfo::classof(Info) && 336 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 337 } 338 339 private: 340 StringRef HelperName; 341 }; 342 343 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 344 llvm_unreachable("No codegen for expressions"); 345 } 346 /// API for generation of expressions captured in a innermost OpenMP 347 /// region. 348 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 349 public: 350 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 351 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 352 OMPD_unknown, 353 /*HasCancel=*/false), 354 PrivScope(CGF) { 355 // Make sure the globals captured in the provided statement are local by 356 // using the privatization logic. We assume the same variable is not 357 // captured more than once. 358 for (const auto &C : CS.captures()) { 359 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 360 continue; 361 362 const VarDecl *VD = C.getCapturedVar(); 363 if (VD->isLocalVarDeclOrParm()) 364 continue; 365 366 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 367 /*RefersToEnclosingVariableOrCapture=*/false, 368 VD->getType().getNonReferenceType(), VK_LValue, 369 C.getLocation()); 370 PrivScope.addPrivate( 371 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 372 } 373 (void)PrivScope.Privatize(); 374 } 375 376 /// Lookup the captured field decl for a variable. 377 const FieldDecl *lookup(const VarDecl *VD) const override { 378 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 379 return FD; 380 return nullptr; 381 } 382 383 /// Emit the captured statement body. 384 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 385 llvm_unreachable("No body for expressions"); 386 } 387 388 /// Get a variable or parameter for storing global thread id 389 /// inside OpenMP construct. 390 const VarDecl *getThreadIDVariable() const override { 391 llvm_unreachable("No thread id for expressions"); 392 } 393 394 /// Get the name of the capture helper. 395 StringRef getHelperName() const override { 396 llvm_unreachable("No helper name for expressions"); 397 } 398 399 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 400 401 private: 402 /// Private scope to capture global variables. 403 CodeGenFunction::OMPPrivateScope PrivScope; 404 }; 405 406 /// RAII for emitting code of OpenMP constructs. 407 class InlinedOpenMPRegionRAII { 408 CodeGenFunction &CGF; 409 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 410 FieldDecl *LambdaThisCaptureField = nullptr; 411 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 412 413 public: 414 /// Constructs region for combined constructs. 415 /// \param CodeGen Code generation sequence for combined directives. Includes 416 /// a list of functions used for code generation of implicitly inlined 417 /// regions. 418 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 419 OpenMPDirectiveKind Kind, bool HasCancel) 420 : CGF(CGF) { 421 // Start emission for the construct. 422 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 423 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 424 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 425 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 426 CGF.LambdaThisCaptureField = nullptr; 427 BlockInfo = CGF.BlockInfo; 428 CGF.BlockInfo = nullptr; 429 } 430 431 ~InlinedOpenMPRegionRAII() { 432 // Restore original CapturedStmtInfo only if we're done with code emission. 433 auto *OldCSI = 434 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 435 delete CGF.CapturedStmtInfo; 436 CGF.CapturedStmtInfo = OldCSI; 437 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 438 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 439 CGF.BlockInfo = BlockInfo; 440 } 441 }; 442 443 /// Values for bit flags used in the ident_t to describe the fields. 444 /// All enumeric elements are named and described in accordance with the code 445 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 446 enum OpenMPLocationFlags : unsigned { 447 /// Use trampoline for internal microtask. 448 OMP_IDENT_IMD = 0x01, 449 /// Use c-style ident structure. 450 OMP_IDENT_KMPC = 0x02, 451 /// Atomic reduction option for kmpc_reduce. 452 OMP_ATOMIC_REDUCE = 0x10, 453 /// Explicit 'barrier' directive. 454 OMP_IDENT_BARRIER_EXPL = 0x20, 455 /// Implicit barrier in code. 456 OMP_IDENT_BARRIER_IMPL = 0x40, 457 /// Implicit barrier in 'for' directive. 458 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 459 /// Implicit barrier in 'sections' directive. 460 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 461 /// Implicit barrier in 'single' directive. 462 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 463 /// Call of __kmp_for_static_init for static loop. 464 OMP_IDENT_WORK_LOOP = 0x200, 465 /// Call of __kmp_for_static_init for sections. 466 OMP_IDENT_WORK_SECTIONS = 0x400, 467 /// Call of __kmp_for_static_init for distribute. 468 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 469 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 470 }; 471 472 namespace { 473 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 474 /// Values for bit flags for marking which requires clauses have been used. 475 enum OpenMPOffloadingRequiresDirFlags : int64_t { 476 /// flag undefined. 477 OMP_REQ_UNDEFINED = 0x000, 478 /// no requires clause present. 479 OMP_REQ_NONE = 0x001, 480 /// reverse_offload clause. 481 OMP_REQ_REVERSE_OFFLOAD = 0x002, 482 /// unified_address clause. 483 OMP_REQ_UNIFIED_ADDRESS = 0x004, 484 /// unified_shared_memory clause. 485 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 486 /// dynamic_allocators clause. 487 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 488 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 489 }; 490 491 enum OpenMPOffloadingReservedDeviceIDs { 492 /// Device ID if the device was not defined, runtime should get it 493 /// from environment variables in the spec. 494 OMP_DEVICEID_UNDEF = -1, 495 }; 496 } // anonymous namespace 497 498 /// Describes ident structure that describes a source location. 499 /// All descriptions are taken from 500 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 501 /// Original structure: 502 /// typedef struct ident { 503 /// kmp_int32 reserved_1; /**< might be used in Fortran; 504 /// see above */ 505 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 506 /// KMP_IDENT_KMPC identifies this union 507 /// member */ 508 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 509 /// see above */ 510 ///#if USE_ITT_BUILD 511 /// /* but currently used for storing 512 /// region-specific ITT */ 513 /// /* contextual information. */ 514 ///#endif /* USE_ITT_BUILD */ 515 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 516 /// C++ */ 517 /// char const *psource; /**< String describing the source location. 518 /// The string is composed of semi-colon separated 519 // fields which describe the source file, 520 /// the function and a pair of line numbers that 521 /// delimit the construct. 522 /// */ 523 /// } ident_t; 524 enum IdentFieldIndex { 525 /// might be used in Fortran 526 IdentField_Reserved_1, 527 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 528 IdentField_Flags, 529 /// Not really used in Fortran any more 530 IdentField_Reserved_2, 531 /// Source[4] in Fortran, do not use for C++ 532 IdentField_Reserved_3, 533 /// String describing the source location. The string is composed of 534 /// semi-colon separated fields which describe the source file, the function 535 /// and a pair of line numbers that delimit the construct. 536 IdentField_PSource 537 }; 538 539 /// Schedule types for 'omp for' loops (these enumerators are taken from 540 /// the enum sched_type in kmp.h). 541 enum OpenMPSchedType { 542 /// Lower bound for default (unordered) versions. 543 OMP_sch_lower = 32, 544 OMP_sch_static_chunked = 33, 545 OMP_sch_static = 34, 546 OMP_sch_dynamic_chunked = 35, 547 OMP_sch_guided_chunked = 36, 548 OMP_sch_runtime = 37, 549 OMP_sch_auto = 38, 550 /// static with chunk adjustment (e.g., simd) 551 OMP_sch_static_balanced_chunked = 45, 552 /// Lower bound for 'ordered' versions. 553 OMP_ord_lower = 64, 554 OMP_ord_static_chunked = 65, 555 OMP_ord_static = 66, 556 OMP_ord_dynamic_chunked = 67, 557 OMP_ord_guided_chunked = 68, 558 OMP_ord_runtime = 69, 559 OMP_ord_auto = 70, 560 OMP_sch_default = OMP_sch_static, 561 /// dist_schedule types 562 OMP_dist_sch_static_chunked = 91, 563 OMP_dist_sch_static = 92, 564 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 565 /// Set if the monotonic schedule modifier was present. 566 OMP_sch_modifier_monotonic = (1 << 29), 567 /// Set if the nonmonotonic schedule modifier was present. 568 OMP_sch_modifier_nonmonotonic = (1 << 30), 569 }; 570 571 enum OpenMPRTLFunction { 572 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 573 /// kmpc_micro microtask, ...); 574 OMPRTL__kmpc_fork_call, 575 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, 576 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 577 OMPRTL__kmpc_threadprivate_cached, 578 /// Call to void __kmpc_threadprivate_register( ident_t *, 579 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 580 OMPRTL__kmpc_threadprivate_register, 581 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 582 OMPRTL__kmpc_global_thread_num, 583 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 584 // kmp_critical_name *crit); 585 OMPRTL__kmpc_critical, 586 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 587 // global_tid, kmp_critical_name *crit, uintptr_t hint); 588 OMPRTL__kmpc_critical_with_hint, 589 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 590 // kmp_critical_name *crit); 591 OMPRTL__kmpc_end_critical, 592 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 593 // global_tid); 594 OMPRTL__kmpc_cancel_barrier, 595 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 596 OMPRTL__kmpc_barrier, 597 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 598 OMPRTL__kmpc_for_static_fini, 599 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 600 // global_tid); 601 OMPRTL__kmpc_serialized_parallel, 602 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 603 // global_tid); 604 OMPRTL__kmpc_end_serialized_parallel, 605 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 606 // kmp_int32 num_threads); 607 OMPRTL__kmpc_push_num_threads, 608 // Call to void __kmpc_flush(ident_t *loc); 609 OMPRTL__kmpc_flush, 610 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 611 OMPRTL__kmpc_master, 612 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 613 OMPRTL__kmpc_end_master, 614 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 615 // int end_part); 616 OMPRTL__kmpc_omp_taskyield, 617 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 618 OMPRTL__kmpc_single, 619 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 620 OMPRTL__kmpc_end_single, 621 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 622 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 623 // kmp_routine_entry_t *task_entry); 624 OMPRTL__kmpc_omp_task_alloc, 625 // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *, 626 // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, 627 // size_t sizeof_shareds, kmp_routine_entry_t *task_entry, 628 // kmp_int64 device_id); 629 OMPRTL__kmpc_omp_target_task_alloc, 630 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 631 // new_task); 632 OMPRTL__kmpc_omp_task, 633 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 634 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 635 // kmp_int32 didit); 636 OMPRTL__kmpc_copyprivate, 637 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 638 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 639 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 640 OMPRTL__kmpc_reduce, 641 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 642 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 643 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 644 // *lck); 645 OMPRTL__kmpc_reduce_nowait, 646 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 647 // kmp_critical_name *lck); 648 OMPRTL__kmpc_end_reduce, 649 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 650 // kmp_critical_name *lck); 651 OMPRTL__kmpc_end_reduce_nowait, 652 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 653 // kmp_task_t * new_task); 654 OMPRTL__kmpc_omp_task_begin_if0, 655 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 656 // kmp_task_t * new_task); 657 OMPRTL__kmpc_omp_task_complete_if0, 658 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 659 OMPRTL__kmpc_ordered, 660 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 661 OMPRTL__kmpc_end_ordered, 662 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 663 // global_tid); 664 OMPRTL__kmpc_omp_taskwait, 665 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 666 OMPRTL__kmpc_taskgroup, 667 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 668 OMPRTL__kmpc_end_taskgroup, 669 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 670 // int proc_bind); 671 OMPRTL__kmpc_push_proc_bind, 672 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 673 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 674 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 675 OMPRTL__kmpc_omp_task_with_deps, 676 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 677 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 678 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 679 OMPRTL__kmpc_omp_wait_deps, 680 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 681 // global_tid, kmp_int32 cncl_kind); 682 OMPRTL__kmpc_cancellationpoint, 683 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 684 // kmp_int32 cncl_kind); 685 OMPRTL__kmpc_cancel, 686 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 687 // kmp_int32 num_teams, kmp_int32 thread_limit); 688 OMPRTL__kmpc_push_num_teams, 689 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 690 // microtask, ...); 691 OMPRTL__kmpc_fork_teams, 692 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 693 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 694 // sched, kmp_uint64 grainsize, void *task_dup); 695 OMPRTL__kmpc_taskloop, 696 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 697 // num_dims, struct kmp_dim *dims); 698 OMPRTL__kmpc_doacross_init, 699 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 700 OMPRTL__kmpc_doacross_fini, 701 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 702 // *vec); 703 OMPRTL__kmpc_doacross_post, 704 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 705 // *vec); 706 OMPRTL__kmpc_doacross_wait, 707 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 708 // *data); 709 OMPRTL__kmpc_task_reduction_init, 710 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 711 // *d); 712 OMPRTL__kmpc_task_reduction_get_th_data, 713 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); 714 OMPRTL__kmpc_alloc, 715 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); 716 OMPRTL__kmpc_free, 717 718 // 719 // Offloading related calls 720 // 721 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 722 // size); 723 OMPRTL__kmpc_push_target_tripcount, 724 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 725 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 726 // *arg_types); 727 OMPRTL__tgt_target, 728 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 729 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 730 // *arg_types); 731 OMPRTL__tgt_target_nowait, 732 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 733 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 734 // *arg_types, int32_t num_teams, int32_t thread_limit); 735 OMPRTL__tgt_target_teams, 736 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 737 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 738 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 739 OMPRTL__tgt_target_teams_nowait, 740 // Call to void __tgt_register_requires(int64_t flags); 741 OMPRTL__tgt_register_requires, 742 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 743 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 744 OMPRTL__tgt_target_data_begin, 745 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 746 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 747 // *arg_types); 748 OMPRTL__tgt_target_data_begin_nowait, 749 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 750 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 751 OMPRTL__tgt_target_data_end, 752 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 753 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 754 // *arg_types); 755 OMPRTL__tgt_target_data_end_nowait, 756 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 757 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 758 OMPRTL__tgt_target_data_update, 759 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 760 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 761 // *arg_types); 762 OMPRTL__tgt_target_data_update_nowait, 763 // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 764 OMPRTL__tgt_mapper_num_components, 765 // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void 766 // *base, void *begin, int64_t size, int64_t type); 767 OMPRTL__tgt_push_mapper_component, 768 // Call to kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 769 // int gtid, kmp_task_t *task); 770 OMPRTL__kmpc_task_allow_completion_event, 771 }; 772 773 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 774 /// region. 775 class CleanupTy final : public EHScopeStack::Cleanup { 776 PrePostActionTy *Action; 777 778 public: 779 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 780 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 781 if (!CGF.HaveInsertPoint()) 782 return; 783 Action->Exit(CGF); 784 } 785 }; 786 787 } // anonymous namespace 788 789 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 790 CodeGenFunction::RunCleanupsScope Scope(CGF); 791 if (PrePostAction) { 792 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 793 Callback(CodeGen, CGF, *PrePostAction); 794 } else { 795 PrePostActionTy Action; 796 Callback(CodeGen, CGF, Action); 797 } 798 } 799 800 /// Check if the combiner is a call to UDR combiner and if it is so return the 801 /// UDR decl used for reduction. 802 static const OMPDeclareReductionDecl * 803 getReductionInit(const Expr *ReductionOp) { 804 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 805 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 806 if (const auto *DRE = 807 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 808 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 809 return DRD; 810 return nullptr; 811 } 812 813 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 814 const OMPDeclareReductionDecl *DRD, 815 const Expr *InitOp, 816 Address Private, Address Original, 817 QualType Ty) { 818 if (DRD->getInitializer()) { 819 std::pair<llvm::Function *, llvm::Function *> Reduction = 820 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 821 const auto *CE = cast<CallExpr>(InitOp); 822 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 823 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 824 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 825 const auto *LHSDRE = 826 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 827 const auto *RHSDRE = 828 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 829 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 830 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 831 [=]() { return Private; }); 832 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 833 [=]() { return Original; }); 834 (void)PrivateScope.Privatize(); 835 RValue Func = RValue::get(Reduction.second); 836 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 837 CGF.EmitIgnoredExpr(InitOp); 838 } else { 839 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 840 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 841 auto *GV = new llvm::GlobalVariable( 842 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 843 llvm::GlobalValue::PrivateLinkage, Init, Name); 844 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 845 RValue InitRVal; 846 switch (CGF.getEvaluationKind(Ty)) { 847 case TEK_Scalar: 848 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 849 break; 850 case TEK_Complex: 851 InitRVal = 852 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 853 break; 854 case TEK_Aggregate: 855 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 856 break; 857 } 858 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 859 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 860 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 861 /*IsInitializer=*/false); 862 } 863 } 864 865 /// Emit initialization of arrays of complex types. 866 /// \param DestAddr Address of the array. 867 /// \param Type Type of array. 868 /// \param Init Initial expression of array. 869 /// \param SrcAddr Address of the original array. 870 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 871 QualType Type, bool EmitDeclareReductionInit, 872 const Expr *Init, 873 const OMPDeclareReductionDecl *DRD, 874 Address SrcAddr = Address::invalid()) { 875 // Perform element-by-element initialization. 876 QualType ElementTy; 877 878 // Drill down to the base element type on both arrays. 879 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 880 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 881 DestAddr = 882 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 883 if (DRD) 884 SrcAddr = 885 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 886 887 llvm::Value *SrcBegin = nullptr; 888 if (DRD) 889 SrcBegin = SrcAddr.getPointer(); 890 llvm::Value *DestBegin = DestAddr.getPointer(); 891 // Cast from pointer to array type to pointer to single element. 892 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 893 // The basic structure here is a while-do loop. 894 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 895 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 896 llvm::Value *IsEmpty = 897 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 898 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 899 900 // Enter the loop body, making that address the current address. 901 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 902 CGF.EmitBlock(BodyBB); 903 904 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 905 906 llvm::PHINode *SrcElementPHI = nullptr; 907 Address SrcElementCurrent = Address::invalid(); 908 if (DRD) { 909 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 910 "omp.arraycpy.srcElementPast"); 911 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 912 SrcElementCurrent = 913 Address(SrcElementPHI, 914 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 915 } 916 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 917 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 918 DestElementPHI->addIncoming(DestBegin, EntryBB); 919 Address DestElementCurrent = 920 Address(DestElementPHI, 921 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 922 923 // Emit copy. 924 { 925 CodeGenFunction::RunCleanupsScope InitScope(CGF); 926 if (EmitDeclareReductionInit) { 927 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 928 SrcElementCurrent, ElementTy); 929 } else 930 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 931 /*IsInitializer=*/false); 932 } 933 934 if (DRD) { 935 // Shift the address forward by one element. 936 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 937 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 938 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 939 } 940 941 // Shift the address forward by one element. 942 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 943 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 944 // Check whether we've reached the end. 945 llvm::Value *Done = 946 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 947 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 948 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 949 950 // Done. 951 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 952 } 953 954 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 955 return CGF.EmitOMPSharedLValue(E); 956 } 957 958 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 959 const Expr *E) { 960 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 961 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 962 return LValue(); 963 } 964 965 void ReductionCodeGen::emitAggregateInitialization( 966 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 967 const OMPDeclareReductionDecl *DRD) { 968 // Emit VarDecl with copy init for arrays. 969 // Get the address of the original variable captured in current 970 // captured region. 971 const auto *PrivateVD = 972 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 973 bool EmitDeclareReductionInit = 974 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 975 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 976 EmitDeclareReductionInit, 977 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 978 : PrivateVD->getInit(), 979 DRD, SharedLVal.getAddress(CGF)); 980 } 981 982 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 983 ArrayRef<const Expr *> Privates, 984 ArrayRef<const Expr *> ReductionOps) { 985 ClausesData.reserve(Shareds.size()); 986 SharedAddresses.reserve(Shareds.size()); 987 Sizes.reserve(Shareds.size()); 988 BaseDecls.reserve(Shareds.size()); 989 auto IPriv = Privates.begin(); 990 auto IRed = ReductionOps.begin(); 991 for (const Expr *Ref : Shareds) { 992 ClausesData.emplace_back(Ref, *IPriv, *IRed); 993 std::advance(IPriv, 1); 994 std::advance(IRed, 1); 995 } 996 } 997 998 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 999 assert(SharedAddresses.size() == N && 1000 "Number of generated lvalues must be exactly N."); 1001 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 1002 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 1003 SharedAddresses.emplace_back(First, Second); 1004 } 1005 1006 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 1007 const auto *PrivateVD = 1008 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1009 QualType PrivateType = PrivateVD->getType(); 1010 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 1011 if (!PrivateType->isVariablyModifiedType()) { 1012 Sizes.emplace_back( 1013 CGF.getTypeSize( 1014 SharedAddresses[N].first.getType().getNonReferenceType()), 1015 nullptr); 1016 return; 1017 } 1018 llvm::Value *Size; 1019 llvm::Value *SizeInChars; 1020 auto *ElemType = cast<llvm::PointerType>( 1021 SharedAddresses[N].first.getPointer(CGF)->getType()) 1022 ->getElementType(); 1023 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 1024 if (AsArraySection) { 1025 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF), 1026 SharedAddresses[N].first.getPointer(CGF)); 1027 Size = CGF.Builder.CreateNUWAdd( 1028 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 1029 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 1030 } else { 1031 SizeInChars = CGF.getTypeSize( 1032 SharedAddresses[N].first.getType().getNonReferenceType()); 1033 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 1034 } 1035 Sizes.emplace_back(SizeInChars, Size); 1036 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1037 CGF, 1038 cast<OpaqueValueExpr>( 1039 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1040 RValue::get(Size)); 1041 CGF.EmitVariablyModifiedType(PrivateType); 1042 } 1043 1044 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 1045 llvm::Value *Size) { 1046 const auto *PrivateVD = 1047 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1048 QualType PrivateType = PrivateVD->getType(); 1049 if (!PrivateType->isVariablyModifiedType()) { 1050 assert(!Size && !Sizes[N].second && 1051 "Size should be nullptr for non-variably modified reduction " 1052 "items."); 1053 return; 1054 } 1055 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1056 CGF, 1057 cast<OpaqueValueExpr>( 1058 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1059 RValue::get(Size)); 1060 CGF.EmitVariablyModifiedType(PrivateType); 1061 } 1062 1063 void ReductionCodeGen::emitInitialization( 1064 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1065 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1066 assert(SharedAddresses.size() > N && "No variable was generated"); 1067 const auto *PrivateVD = 1068 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1069 const OMPDeclareReductionDecl *DRD = 1070 getReductionInit(ClausesData[N].ReductionOp); 1071 QualType PrivateType = PrivateVD->getType(); 1072 PrivateAddr = CGF.Builder.CreateElementBitCast( 1073 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1074 QualType SharedType = SharedAddresses[N].first.getType(); 1075 SharedLVal = CGF.MakeAddrLValue( 1076 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 1077 CGF.ConvertTypeForMem(SharedType)), 1078 SharedType, SharedAddresses[N].first.getBaseInfo(), 1079 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1080 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1081 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1082 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1083 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1084 PrivateAddr, SharedLVal.getAddress(CGF), 1085 SharedLVal.getType()); 1086 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1087 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1088 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1089 PrivateVD->getType().getQualifiers(), 1090 /*IsInitializer=*/false); 1091 } 1092 } 1093 1094 bool ReductionCodeGen::needCleanups(unsigned N) { 1095 const auto *PrivateVD = 1096 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1097 QualType PrivateType = PrivateVD->getType(); 1098 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1099 return DTorKind != QualType::DK_none; 1100 } 1101 1102 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1103 Address PrivateAddr) { 1104 const auto *PrivateVD = 1105 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1106 QualType PrivateType = PrivateVD->getType(); 1107 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1108 if (needCleanups(N)) { 1109 PrivateAddr = CGF.Builder.CreateElementBitCast( 1110 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1111 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1112 } 1113 } 1114 1115 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1116 LValue BaseLV) { 1117 BaseTy = BaseTy.getNonReferenceType(); 1118 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1119 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1120 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1121 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 1122 } else { 1123 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 1124 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1125 } 1126 BaseTy = BaseTy->getPointeeType(); 1127 } 1128 return CGF.MakeAddrLValue( 1129 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 1130 CGF.ConvertTypeForMem(ElTy)), 1131 BaseLV.getType(), BaseLV.getBaseInfo(), 1132 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1133 } 1134 1135 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1136 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1137 llvm::Value *Addr) { 1138 Address Tmp = Address::invalid(); 1139 Address TopTmp = Address::invalid(); 1140 Address MostTopTmp = Address::invalid(); 1141 BaseTy = BaseTy.getNonReferenceType(); 1142 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1143 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1144 Tmp = CGF.CreateMemTemp(BaseTy); 1145 if (TopTmp.isValid()) 1146 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1147 else 1148 MostTopTmp = Tmp; 1149 TopTmp = Tmp; 1150 BaseTy = BaseTy->getPointeeType(); 1151 } 1152 llvm::Type *Ty = BaseLVType; 1153 if (Tmp.isValid()) 1154 Ty = Tmp.getElementType(); 1155 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1156 if (Tmp.isValid()) { 1157 CGF.Builder.CreateStore(Addr, Tmp); 1158 return MostTopTmp; 1159 } 1160 return Address(Addr, BaseLVAlignment); 1161 } 1162 1163 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1164 const VarDecl *OrigVD = nullptr; 1165 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1166 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1167 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1168 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1169 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1170 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1171 DE = cast<DeclRefExpr>(Base); 1172 OrigVD = cast<VarDecl>(DE->getDecl()); 1173 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1174 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1175 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1176 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1177 DE = cast<DeclRefExpr>(Base); 1178 OrigVD = cast<VarDecl>(DE->getDecl()); 1179 } 1180 return OrigVD; 1181 } 1182 1183 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1184 Address PrivateAddr) { 1185 const DeclRefExpr *DE; 1186 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1187 BaseDecls.emplace_back(OrigVD); 1188 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1189 LValue BaseLValue = 1190 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1191 OriginalBaseLValue); 1192 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1193 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1194 llvm::Value *PrivatePointer = 1195 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1196 PrivateAddr.getPointer(), 1197 SharedAddresses[N].first.getAddress(CGF).getType()); 1198 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1199 return castToBase(CGF, OrigVD->getType(), 1200 SharedAddresses[N].first.getType(), 1201 OriginalBaseLValue.getAddress(CGF).getType(), 1202 OriginalBaseLValue.getAlignment(), Ptr); 1203 } 1204 BaseDecls.emplace_back( 1205 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1206 return PrivateAddr; 1207 } 1208 1209 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1210 const OMPDeclareReductionDecl *DRD = 1211 getReductionInit(ClausesData[N].ReductionOp); 1212 return DRD && DRD->getInitializer(); 1213 } 1214 1215 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1216 return CGF.EmitLoadOfPointerLValue( 1217 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1218 getThreadIDVariable()->getType()->castAs<PointerType>()); 1219 } 1220 1221 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1222 if (!CGF.HaveInsertPoint()) 1223 return; 1224 // 1.2.2 OpenMP Language Terminology 1225 // Structured block - An executable statement with a single entry at the 1226 // top and a single exit at the bottom. 1227 // The point of exit cannot be a branch out of the structured block. 1228 // longjmp() and throw() must not violate the entry/exit criteria. 1229 CGF.EHStack.pushTerminate(); 1230 CodeGen(CGF); 1231 CGF.EHStack.popTerminate(); 1232 } 1233 1234 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1235 CodeGenFunction &CGF) { 1236 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1237 getThreadIDVariable()->getType(), 1238 AlignmentSource::Decl); 1239 } 1240 1241 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1242 QualType FieldTy) { 1243 auto *Field = FieldDecl::Create( 1244 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1245 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1246 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1247 Field->setAccess(AS_public); 1248 DC->addDecl(Field); 1249 return Field; 1250 } 1251 1252 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1253 StringRef Separator) 1254 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1255 OffloadEntriesInfoManager(CGM) { 1256 ASTContext &C = CGM.getContext(); 1257 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1258 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1259 RD->startDefinition(); 1260 // reserved_1 1261 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1262 // flags 1263 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1264 // reserved_2 1265 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1266 // reserved_3 1267 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1268 // psource 1269 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1270 RD->completeDefinition(); 1271 IdentQTy = C.getRecordType(RD); 1272 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1273 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1274 1275 loadOffloadInfoMetadata(); 1276 } 1277 1278 void CGOpenMPRuntime::clear() { 1279 InternalVars.clear(); 1280 // Clean non-target variable declarations possibly used only in debug info. 1281 for (const auto &Data : EmittedNonTargetVariables) { 1282 if (!Data.getValue().pointsToAliveValue()) 1283 continue; 1284 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1285 if (!GV) 1286 continue; 1287 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1288 continue; 1289 GV->eraseFromParent(); 1290 } 1291 } 1292 1293 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1294 SmallString<128> Buffer; 1295 llvm::raw_svector_ostream OS(Buffer); 1296 StringRef Sep = FirstSeparator; 1297 for (StringRef Part : Parts) { 1298 OS << Sep << Part; 1299 Sep = Separator; 1300 } 1301 return std::string(OS.str()); 1302 } 1303 1304 static llvm::Function * 1305 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1306 const Expr *CombinerInitializer, const VarDecl *In, 1307 const VarDecl *Out, bool IsCombiner) { 1308 // void .omp_combiner.(Ty *in, Ty *out); 1309 ASTContext &C = CGM.getContext(); 1310 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1311 FunctionArgList Args; 1312 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1313 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1314 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1315 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1316 Args.push_back(&OmpOutParm); 1317 Args.push_back(&OmpInParm); 1318 const CGFunctionInfo &FnInfo = 1319 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1320 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1321 std::string Name = CGM.getOpenMPRuntime().getName( 1322 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1323 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1324 Name, &CGM.getModule()); 1325 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1326 if (CGM.getLangOpts().Optimize) { 1327 Fn->removeFnAttr(llvm::Attribute::NoInline); 1328 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1329 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1330 } 1331 CodeGenFunction CGF(CGM); 1332 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1333 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1334 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1335 Out->getLocation()); 1336 CodeGenFunction::OMPPrivateScope Scope(CGF); 1337 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1338 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1339 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1340 .getAddress(CGF); 1341 }); 1342 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1343 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1344 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1345 .getAddress(CGF); 1346 }); 1347 (void)Scope.Privatize(); 1348 if (!IsCombiner && Out->hasInit() && 1349 !CGF.isTrivialInitializer(Out->getInit())) { 1350 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1351 Out->getType().getQualifiers(), 1352 /*IsInitializer=*/true); 1353 } 1354 if (CombinerInitializer) 1355 CGF.EmitIgnoredExpr(CombinerInitializer); 1356 Scope.ForceCleanup(); 1357 CGF.FinishFunction(); 1358 return Fn; 1359 } 1360 1361 void CGOpenMPRuntime::emitUserDefinedReduction( 1362 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1363 if (UDRMap.count(D) > 0) 1364 return; 1365 llvm::Function *Combiner = emitCombinerOrInitializer( 1366 CGM, D->getType(), D->getCombiner(), 1367 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1368 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1369 /*IsCombiner=*/true); 1370 llvm::Function *Initializer = nullptr; 1371 if (const Expr *Init = D->getInitializer()) { 1372 Initializer = emitCombinerOrInitializer( 1373 CGM, D->getType(), 1374 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1375 : nullptr, 1376 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1377 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1378 /*IsCombiner=*/false); 1379 } 1380 UDRMap.try_emplace(D, Combiner, Initializer); 1381 if (CGF) { 1382 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1383 Decls.second.push_back(D); 1384 } 1385 } 1386 1387 std::pair<llvm::Function *, llvm::Function *> 1388 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1389 auto I = UDRMap.find(D); 1390 if (I != UDRMap.end()) 1391 return I->second; 1392 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1393 return UDRMap.lookup(D); 1394 } 1395 1396 namespace { 1397 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1398 // Builder if one is present. 1399 struct PushAndPopStackRAII { 1400 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1401 bool HasCancel) 1402 : OMPBuilder(OMPBuilder) { 1403 if (!OMPBuilder) 1404 return; 1405 1406 // The following callback is the crucial part of clangs cleanup process. 1407 // 1408 // NOTE: 1409 // Once the OpenMPIRBuilder is used to create parallel regions (and 1410 // similar), the cancellation destination (Dest below) is determined via 1411 // IP. That means if we have variables to finalize we split the block at IP, 1412 // use the new block (=BB) as destination to build a JumpDest (via 1413 // getJumpDestInCurrentScope(BB)) which then is fed to 1414 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1415 // to push & pop an FinalizationInfo object. 1416 // The FiniCB will still be needed but at the point where the 1417 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1418 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1419 assert(IP.getBlock()->end() == IP.getPoint() && 1420 "Clang CG should cause non-terminated block!"); 1421 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1422 CGF.Builder.restoreIP(IP); 1423 CodeGenFunction::JumpDest Dest = 1424 CGF.getOMPCancelDestination(OMPD_parallel); 1425 CGF.EmitBranchThroughCleanup(Dest); 1426 }; 1427 1428 // TODO: Remove this once we emit parallel regions through the 1429 // OpenMPIRBuilder as it can do this setup internally. 1430 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1431 {FiniCB, OMPD_parallel, HasCancel}); 1432 OMPBuilder->pushFinalizationCB(std::move(FI)); 1433 } 1434 ~PushAndPopStackRAII() { 1435 if (OMPBuilder) 1436 OMPBuilder->popFinalizationCB(); 1437 } 1438 llvm::OpenMPIRBuilder *OMPBuilder; 1439 }; 1440 } // namespace 1441 1442 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1443 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1444 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1445 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1446 assert(ThreadIDVar->getType()->isPointerType() && 1447 "thread id variable must be of type kmp_int32 *"); 1448 CodeGenFunction CGF(CGM, true); 1449 bool HasCancel = false; 1450 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1451 HasCancel = OPD->hasCancel(); 1452 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1453 HasCancel = OPSD->hasCancel(); 1454 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1455 HasCancel = OPFD->hasCancel(); 1456 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1457 HasCancel = OPFD->hasCancel(); 1458 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1459 HasCancel = OPFD->hasCancel(); 1460 else if (const auto *OPFD = 1461 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1462 HasCancel = OPFD->hasCancel(); 1463 else if (const auto *OPFD = 1464 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1465 HasCancel = OPFD->hasCancel(); 1466 1467 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1468 // parallel region to make cancellation barriers work properly. 1469 llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder(); 1470 PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel); 1471 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1472 HasCancel, OutlinedHelperName); 1473 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1474 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1475 } 1476 1477 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1478 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1479 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1480 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1481 return emitParallelOrTeamsOutlinedFunction( 1482 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1483 } 1484 1485 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1486 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1487 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1488 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1489 return emitParallelOrTeamsOutlinedFunction( 1490 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1491 } 1492 1493 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1494 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1495 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1496 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1497 bool Tied, unsigned &NumberOfParts) { 1498 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1499 PrePostActionTy &) { 1500 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1501 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1502 llvm::Value *TaskArgs[] = { 1503 UpLoc, ThreadID, 1504 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1505 TaskTVar->getType()->castAs<PointerType>()) 1506 .getPointer(CGF)}; 1507 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1508 }; 1509 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1510 UntiedCodeGen); 1511 CodeGen.setAction(Action); 1512 assert(!ThreadIDVar->getType()->isPointerType() && 1513 "thread id variable must be of type kmp_int32 for tasks"); 1514 const OpenMPDirectiveKind Region = 1515 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1516 : OMPD_task; 1517 const CapturedStmt *CS = D.getCapturedStmt(Region); 1518 bool HasCancel = false; 1519 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1520 HasCancel = TD->hasCancel(); 1521 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1522 HasCancel = TD->hasCancel(); 1523 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1524 HasCancel = TD->hasCancel(); 1525 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1526 HasCancel = TD->hasCancel(); 1527 1528 CodeGenFunction CGF(CGM, true); 1529 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1530 InnermostKind, HasCancel, Action); 1531 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1532 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1533 if (!Tied) 1534 NumberOfParts = Action.getNumberOfParts(); 1535 return Res; 1536 } 1537 1538 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1539 const RecordDecl *RD, const CGRecordLayout &RL, 1540 ArrayRef<llvm::Constant *> Data) { 1541 llvm::StructType *StructTy = RL.getLLVMType(); 1542 unsigned PrevIdx = 0; 1543 ConstantInitBuilder CIBuilder(CGM); 1544 auto DI = Data.begin(); 1545 for (const FieldDecl *FD : RD->fields()) { 1546 unsigned Idx = RL.getLLVMFieldNo(FD); 1547 // Fill the alignment. 1548 for (unsigned I = PrevIdx; I < Idx; ++I) 1549 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1550 PrevIdx = Idx + 1; 1551 Fields.add(*DI); 1552 ++DI; 1553 } 1554 } 1555 1556 template <class... As> 1557 static llvm::GlobalVariable * 1558 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1559 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1560 As &&... Args) { 1561 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1562 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1563 ConstantInitBuilder CIBuilder(CGM); 1564 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1565 buildStructValue(Fields, CGM, RD, RL, Data); 1566 return Fields.finishAndCreateGlobal( 1567 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1568 std::forward<As>(Args)...); 1569 } 1570 1571 template <typename T> 1572 static void 1573 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1574 ArrayRef<llvm::Constant *> Data, 1575 T &Parent) { 1576 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1577 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1578 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1579 buildStructValue(Fields, CGM, RD, RL, Data); 1580 Fields.finishAndAddTo(Parent); 1581 } 1582 1583 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1584 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1585 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1586 FlagsTy FlagsKey(Flags, Reserved2Flags); 1587 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1588 if (!Entry) { 1589 if (!DefaultOpenMPPSource) { 1590 // Initialize default location for psource field of ident_t structure of 1591 // all ident_t objects. Format is ";file;function;line;column;;". 1592 // Taken from 1593 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1594 DefaultOpenMPPSource = 1595 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1596 DefaultOpenMPPSource = 1597 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1598 } 1599 1600 llvm::Constant *Data[] = { 1601 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1602 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1603 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1604 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1605 llvm::GlobalValue *DefaultOpenMPLocation = 1606 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1607 llvm::GlobalValue::PrivateLinkage); 1608 DefaultOpenMPLocation->setUnnamedAddr( 1609 llvm::GlobalValue::UnnamedAddr::Global); 1610 1611 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1612 } 1613 return Address(Entry, Align); 1614 } 1615 1616 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1617 bool AtCurrentPoint) { 1618 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1619 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1620 1621 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1622 if (AtCurrentPoint) { 1623 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1624 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1625 } else { 1626 Elem.second.ServiceInsertPt = 1627 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1628 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1629 } 1630 } 1631 1632 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1633 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1634 if (Elem.second.ServiceInsertPt) { 1635 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1636 Elem.second.ServiceInsertPt = nullptr; 1637 Ptr->eraseFromParent(); 1638 } 1639 } 1640 1641 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1642 SourceLocation Loc, 1643 unsigned Flags) { 1644 Flags |= OMP_IDENT_KMPC; 1645 // If no debug info is generated - return global default location. 1646 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1647 Loc.isInvalid()) 1648 return getOrCreateDefaultLocation(Flags).getPointer(); 1649 1650 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1651 1652 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1653 Address LocValue = Address::invalid(); 1654 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1655 if (I != OpenMPLocThreadIDMap.end()) 1656 LocValue = Address(I->second.DebugLoc, Align); 1657 1658 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1659 // GetOpenMPThreadID was called before this routine. 1660 if (!LocValue.isValid()) { 1661 // Generate "ident_t .kmpc_loc.addr;" 1662 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1663 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1664 Elem.second.DebugLoc = AI.getPointer(); 1665 LocValue = AI; 1666 1667 if (!Elem.second.ServiceInsertPt) 1668 setLocThreadIdInsertPt(CGF); 1669 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1670 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1671 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1672 CGF.getTypeSize(IdentQTy)); 1673 } 1674 1675 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1676 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1677 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1678 LValue PSource = 1679 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1680 1681 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1682 if (OMPDebugLoc == nullptr) { 1683 SmallString<128> Buffer2; 1684 llvm::raw_svector_ostream OS2(Buffer2); 1685 // Build debug location 1686 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1687 OS2 << ";" << PLoc.getFilename() << ";"; 1688 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1689 OS2 << FD->getQualifiedNameAsString(); 1690 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1691 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1692 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1693 } 1694 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1695 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1696 1697 // Our callers always pass this to a runtime function, so for 1698 // convenience, go ahead and return a naked pointer. 1699 return LocValue.getPointer(); 1700 } 1701 1702 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1703 SourceLocation Loc) { 1704 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1705 1706 llvm::Value *ThreadID = nullptr; 1707 // Check whether we've already cached a load of the thread id in this 1708 // function. 1709 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1710 if (I != OpenMPLocThreadIDMap.end()) { 1711 ThreadID = I->second.ThreadID; 1712 if (ThreadID != nullptr) 1713 return ThreadID; 1714 } 1715 // If exceptions are enabled, do not use parameter to avoid possible crash. 1716 if (auto *OMPRegionInfo = 1717 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1718 if (OMPRegionInfo->getThreadIDVariable()) { 1719 // Check if this an outlined function with thread id passed as argument. 1720 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1721 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1722 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1723 !CGF.getLangOpts().CXXExceptions || 1724 CGF.Builder.GetInsertBlock() == TopBlock || 1725 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1726 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1727 TopBlock || 1728 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1729 CGF.Builder.GetInsertBlock()) { 1730 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1731 // If value loaded in entry block, cache it and use it everywhere in 1732 // function. 1733 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1734 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1735 Elem.second.ThreadID = ThreadID; 1736 } 1737 return ThreadID; 1738 } 1739 } 1740 } 1741 1742 // This is not an outlined function region - need to call __kmpc_int32 1743 // kmpc_global_thread_num(ident_t *loc). 1744 // Generate thread id value and cache this value for use across the 1745 // function. 1746 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1747 if (!Elem.second.ServiceInsertPt) 1748 setLocThreadIdInsertPt(CGF); 1749 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1750 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1751 llvm::CallInst *Call = CGF.Builder.CreateCall( 1752 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1753 emitUpdateLocation(CGF, Loc)); 1754 Call->setCallingConv(CGF.getRuntimeCC()); 1755 Elem.second.ThreadID = Call; 1756 return Call; 1757 } 1758 1759 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1760 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1761 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1762 clearLocThreadIdInsertPt(CGF); 1763 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1764 } 1765 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1766 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1767 UDRMap.erase(D); 1768 FunctionUDRMap.erase(CGF.CurFn); 1769 } 1770 auto I = FunctionUDMMap.find(CGF.CurFn); 1771 if (I != FunctionUDMMap.end()) { 1772 for(const auto *D : I->second) 1773 UDMMap.erase(D); 1774 FunctionUDMMap.erase(I); 1775 } 1776 LastprivateConditionalToTypes.erase(CGF.CurFn); 1777 } 1778 1779 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1780 return IdentTy->getPointerTo(); 1781 } 1782 1783 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1784 if (!Kmpc_MicroTy) { 1785 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1786 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1787 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1788 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1789 } 1790 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1791 } 1792 1793 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1794 llvm::FunctionCallee RTLFn = nullptr; 1795 switch (static_cast<OpenMPRTLFunction>(Function)) { 1796 case OMPRTL__kmpc_fork_call: { 1797 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1798 // microtask, ...); 1799 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1800 getKmpc_MicroPointerTy()}; 1801 auto *FnTy = 1802 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1803 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1804 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 1805 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 1806 llvm::LLVMContext &Ctx = F->getContext(); 1807 llvm::MDBuilder MDB(Ctx); 1808 // Annotate the callback behavior of the __kmpc_fork_call: 1809 // - The callback callee is argument number 2 (microtask). 1810 // - The first two arguments of the callback callee are unknown (-1). 1811 // - All variadic arguments to the __kmpc_fork_call are passed to the 1812 // callback callee. 1813 F->addMetadata( 1814 llvm::LLVMContext::MD_callback, 1815 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 1816 2, {-1, -1}, 1817 /* VarArgsArePassed */ true)})); 1818 } 1819 } 1820 break; 1821 } 1822 case OMPRTL__kmpc_global_thread_num: { 1823 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1824 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1825 auto *FnTy = 1826 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1827 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1828 break; 1829 } 1830 case OMPRTL__kmpc_threadprivate_cached: { 1831 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1832 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1833 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1834 CGM.VoidPtrTy, CGM.SizeTy, 1835 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1836 auto *FnTy = 1837 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1838 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1839 break; 1840 } 1841 case OMPRTL__kmpc_critical: { 1842 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1843 // kmp_critical_name *crit); 1844 llvm::Type *TypeParams[] = { 1845 getIdentTyPointerTy(), CGM.Int32Ty, 1846 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1847 auto *FnTy = 1848 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1849 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1850 break; 1851 } 1852 case OMPRTL__kmpc_critical_with_hint: { 1853 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1854 // kmp_critical_name *crit, uintptr_t hint); 1855 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1856 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1857 CGM.IntPtrTy}; 1858 auto *FnTy = 1859 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1860 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1861 break; 1862 } 1863 case OMPRTL__kmpc_threadprivate_register: { 1864 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1865 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1866 // typedef void *(*kmpc_ctor)(void *); 1867 auto *KmpcCtorTy = 1868 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1869 /*isVarArg*/ false)->getPointerTo(); 1870 // typedef void *(*kmpc_cctor)(void *, void *); 1871 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1872 auto *KmpcCopyCtorTy = 1873 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1874 /*isVarArg*/ false) 1875 ->getPointerTo(); 1876 // typedef void (*kmpc_dtor)(void *); 1877 auto *KmpcDtorTy = 1878 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1879 ->getPointerTo(); 1880 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1881 KmpcCopyCtorTy, KmpcDtorTy}; 1882 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1883 /*isVarArg*/ false); 1884 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1885 break; 1886 } 1887 case OMPRTL__kmpc_end_critical: { 1888 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1889 // kmp_critical_name *crit); 1890 llvm::Type *TypeParams[] = { 1891 getIdentTyPointerTy(), CGM.Int32Ty, 1892 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1893 auto *FnTy = 1894 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1895 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1896 break; 1897 } 1898 case OMPRTL__kmpc_cancel_barrier: { 1899 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1900 // global_tid); 1901 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1902 auto *FnTy = 1903 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1904 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1905 break; 1906 } 1907 case OMPRTL__kmpc_barrier: { 1908 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1909 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1910 auto *FnTy = 1911 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1912 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1913 break; 1914 } 1915 case OMPRTL__kmpc_for_static_fini: { 1916 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1917 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1918 auto *FnTy = 1919 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1920 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1921 break; 1922 } 1923 case OMPRTL__kmpc_push_num_threads: { 1924 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1925 // kmp_int32 num_threads) 1926 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1927 CGM.Int32Ty}; 1928 auto *FnTy = 1929 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1930 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1931 break; 1932 } 1933 case OMPRTL__kmpc_serialized_parallel: { 1934 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1935 // global_tid); 1936 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1937 auto *FnTy = 1938 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1939 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1940 break; 1941 } 1942 case OMPRTL__kmpc_end_serialized_parallel: { 1943 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1944 // global_tid); 1945 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1946 auto *FnTy = 1947 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1948 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1949 break; 1950 } 1951 case OMPRTL__kmpc_flush: { 1952 // Build void __kmpc_flush(ident_t *loc); 1953 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1954 auto *FnTy = 1955 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1956 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1957 break; 1958 } 1959 case OMPRTL__kmpc_master: { 1960 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1961 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1962 auto *FnTy = 1963 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1964 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1965 break; 1966 } 1967 case OMPRTL__kmpc_end_master: { 1968 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1969 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1970 auto *FnTy = 1971 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1972 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1973 break; 1974 } 1975 case OMPRTL__kmpc_omp_taskyield: { 1976 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1977 // int end_part); 1978 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1979 auto *FnTy = 1980 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1981 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1982 break; 1983 } 1984 case OMPRTL__kmpc_single: { 1985 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1986 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1987 auto *FnTy = 1988 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1989 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1990 break; 1991 } 1992 case OMPRTL__kmpc_end_single: { 1993 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1994 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1995 auto *FnTy = 1996 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1997 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1998 break; 1999 } 2000 case OMPRTL__kmpc_omp_task_alloc: { 2001 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 2002 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2003 // kmp_routine_entry_t *task_entry); 2004 assert(KmpRoutineEntryPtrTy != nullptr && 2005 "Type kmp_routine_entry_t must be created."); 2006 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2007 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 2008 // Return void * and then cast to particular kmp_task_t type. 2009 auto *FnTy = 2010 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2011 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 2012 break; 2013 } 2014 case OMPRTL__kmpc_omp_target_task_alloc: { 2015 // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid, 2016 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2017 // kmp_routine_entry_t *task_entry, kmp_int64 device_id); 2018 assert(KmpRoutineEntryPtrTy != nullptr && 2019 "Type kmp_routine_entry_t must be created."); 2020 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2021 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy, 2022 CGM.Int64Ty}; 2023 // Return void * and then cast to particular kmp_task_t type. 2024 auto *FnTy = 2025 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2026 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc"); 2027 break; 2028 } 2029 case OMPRTL__kmpc_omp_task: { 2030 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2031 // *new_task); 2032 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2033 CGM.VoidPtrTy}; 2034 auto *FnTy = 2035 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2036 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 2037 break; 2038 } 2039 case OMPRTL__kmpc_copyprivate: { 2040 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 2041 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 2042 // kmp_int32 didit); 2043 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2044 auto *CpyFnTy = 2045 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 2046 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 2047 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 2048 CGM.Int32Ty}; 2049 auto *FnTy = 2050 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2051 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 2052 break; 2053 } 2054 case OMPRTL__kmpc_reduce: { 2055 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 2056 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 2057 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 2058 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2059 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2060 /*isVarArg=*/false); 2061 llvm::Type *TypeParams[] = { 2062 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2063 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2064 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2065 auto *FnTy = 2066 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2067 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 2068 break; 2069 } 2070 case OMPRTL__kmpc_reduce_nowait: { 2071 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 2072 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 2073 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 2074 // *lck); 2075 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2076 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2077 /*isVarArg=*/false); 2078 llvm::Type *TypeParams[] = { 2079 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2080 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2081 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2082 auto *FnTy = 2083 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2084 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 2085 break; 2086 } 2087 case OMPRTL__kmpc_end_reduce: { 2088 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 2089 // kmp_critical_name *lck); 2090 llvm::Type *TypeParams[] = { 2091 getIdentTyPointerTy(), CGM.Int32Ty, 2092 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2093 auto *FnTy = 2094 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2095 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 2096 break; 2097 } 2098 case OMPRTL__kmpc_end_reduce_nowait: { 2099 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 2100 // kmp_critical_name *lck); 2101 llvm::Type *TypeParams[] = { 2102 getIdentTyPointerTy(), CGM.Int32Ty, 2103 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2104 auto *FnTy = 2105 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2106 RTLFn = 2107 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 2108 break; 2109 } 2110 case OMPRTL__kmpc_omp_task_begin_if0: { 2111 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2112 // *new_task); 2113 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2114 CGM.VoidPtrTy}; 2115 auto *FnTy = 2116 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2117 RTLFn = 2118 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 2119 break; 2120 } 2121 case OMPRTL__kmpc_omp_task_complete_if0: { 2122 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2123 // *new_task); 2124 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2125 CGM.VoidPtrTy}; 2126 auto *FnTy = 2127 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2128 RTLFn = CGM.CreateRuntimeFunction(FnTy, 2129 /*Name=*/"__kmpc_omp_task_complete_if0"); 2130 break; 2131 } 2132 case OMPRTL__kmpc_ordered: { 2133 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 2134 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2135 auto *FnTy = 2136 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2137 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 2138 break; 2139 } 2140 case OMPRTL__kmpc_end_ordered: { 2141 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 2142 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2143 auto *FnTy = 2144 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2145 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 2146 break; 2147 } 2148 case OMPRTL__kmpc_omp_taskwait: { 2149 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 2150 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2151 auto *FnTy = 2152 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2153 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 2154 break; 2155 } 2156 case OMPRTL__kmpc_taskgroup: { 2157 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 2158 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2159 auto *FnTy = 2160 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2161 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 2162 break; 2163 } 2164 case OMPRTL__kmpc_end_taskgroup: { 2165 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 2166 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2167 auto *FnTy = 2168 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2169 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 2170 break; 2171 } 2172 case OMPRTL__kmpc_push_proc_bind: { 2173 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 2174 // int proc_bind) 2175 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2176 auto *FnTy = 2177 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2178 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 2179 break; 2180 } 2181 case OMPRTL__kmpc_omp_task_with_deps: { 2182 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2183 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2184 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 2185 llvm::Type *TypeParams[] = { 2186 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 2187 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 2188 auto *FnTy = 2189 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2190 RTLFn = 2191 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 2192 break; 2193 } 2194 case OMPRTL__kmpc_omp_wait_deps: { 2195 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2196 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2197 // kmp_depend_info_t *noalias_dep_list); 2198 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2199 CGM.Int32Ty, CGM.VoidPtrTy, 2200 CGM.Int32Ty, CGM.VoidPtrTy}; 2201 auto *FnTy = 2202 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2203 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2204 break; 2205 } 2206 case OMPRTL__kmpc_cancellationpoint: { 2207 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2208 // global_tid, kmp_int32 cncl_kind) 2209 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2210 auto *FnTy = 2211 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2212 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2213 break; 2214 } 2215 case OMPRTL__kmpc_cancel: { 2216 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2217 // kmp_int32 cncl_kind) 2218 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2219 auto *FnTy = 2220 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2221 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2222 break; 2223 } 2224 case OMPRTL__kmpc_push_num_teams: { 2225 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2226 // kmp_int32 num_teams, kmp_int32 num_threads) 2227 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2228 CGM.Int32Ty}; 2229 auto *FnTy = 2230 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2231 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2232 break; 2233 } 2234 case OMPRTL__kmpc_fork_teams: { 2235 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2236 // microtask, ...); 2237 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2238 getKmpc_MicroPointerTy()}; 2239 auto *FnTy = 2240 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2241 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2242 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 2243 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 2244 llvm::LLVMContext &Ctx = F->getContext(); 2245 llvm::MDBuilder MDB(Ctx); 2246 // Annotate the callback behavior of the __kmpc_fork_teams: 2247 // - The callback callee is argument number 2 (microtask). 2248 // - The first two arguments of the callback callee are unknown (-1). 2249 // - All variadic arguments to the __kmpc_fork_teams are passed to the 2250 // callback callee. 2251 F->addMetadata( 2252 llvm::LLVMContext::MD_callback, 2253 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 2254 2, {-1, -1}, 2255 /* VarArgsArePassed */ true)})); 2256 } 2257 } 2258 break; 2259 } 2260 case OMPRTL__kmpc_taskloop: { 2261 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2262 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2263 // sched, kmp_uint64 grainsize, void *task_dup); 2264 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2265 CGM.IntTy, 2266 CGM.VoidPtrTy, 2267 CGM.IntTy, 2268 CGM.Int64Ty->getPointerTo(), 2269 CGM.Int64Ty->getPointerTo(), 2270 CGM.Int64Ty, 2271 CGM.IntTy, 2272 CGM.IntTy, 2273 CGM.Int64Ty, 2274 CGM.VoidPtrTy}; 2275 auto *FnTy = 2276 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2277 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2278 break; 2279 } 2280 case OMPRTL__kmpc_doacross_init: { 2281 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2282 // num_dims, struct kmp_dim *dims); 2283 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2284 CGM.Int32Ty, 2285 CGM.Int32Ty, 2286 CGM.VoidPtrTy}; 2287 auto *FnTy = 2288 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2289 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2290 break; 2291 } 2292 case OMPRTL__kmpc_doacross_fini: { 2293 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2294 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2295 auto *FnTy = 2296 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2297 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2298 break; 2299 } 2300 case OMPRTL__kmpc_doacross_post: { 2301 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2302 // *vec); 2303 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2304 CGM.Int64Ty->getPointerTo()}; 2305 auto *FnTy = 2306 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2307 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2308 break; 2309 } 2310 case OMPRTL__kmpc_doacross_wait: { 2311 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2312 // *vec); 2313 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2314 CGM.Int64Ty->getPointerTo()}; 2315 auto *FnTy = 2316 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2317 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2318 break; 2319 } 2320 case OMPRTL__kmpc_task_reduction_init: { 2321 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2322 // *data); 2323 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2324 auto *FnTy = 2325 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2326 RTLFn = 2327 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2328 break; 2329 } 2330 case OMPRTL__kmpc_task_reduction_get_th_data: { 2331 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2332 // *d); 2333 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2334 auto *FnTy = 2335 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2336 RTLFn = CGM.CreateRuntimeFunction( 2337 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2338 break; 2339 } 2340 case OMPRTL__kmpc_alloc: { 2341 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t 2342 // al); omp_allocator_handle_t type is void *. 2343 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy}; 2344 auto *FnTy = 2345 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2346 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc"); 2347 break; 2348 } 2349 case OMPRTL__kmpc_free: { 2350 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t 2351 // al); omp_allocator_handle_t type is void *. 2352 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2353 auto *FnTy = 2354 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2355 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); 2356 break; 2357 } 2358 case OMPRTL__kmpc_push_target_tripcount: { 2359 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 2360 // size); 2361 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; 2362 llvm::FunctionType *FnTy = 2363 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2364 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); 2365 break; 2366 } 2367 case OMPRTL__tgt_target: { 2368 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2369 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2370 // *arg_types); 2371 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2372 CGM.VoidPtrTy, 2373 CGM.Int32Ty, 2374 CGM.VoidPtrPtrTy, 2375 CGM.VoidPtrPtrTy, 2376 CGM.Int64Ty->getPointerTo(), 2377 CGM.Int64Ty->getPointerTo()}; 2378 auto *FnTy = 2379 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2380 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2381 break; 2382 } 2383 case OMPRTL__tgt_target_nowait: { 2384 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2385 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2386 // int64_t *arg_types); 2387 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2388 CGM.VoidPtrTy, 2389 CGM.Int32Ty, 2390 CGM.VoidPtrPtrTy, 2391 CGM.VoidPtrPtrTy, 2392 CGM.Int64Ty->getPointerTo(), 2393 CGM.Int64Ty->getPointerTo()}; 2394 auto *FnTy = 2395 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2396 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2397 break; 2398 } 2399 case OMPRTL__tgt_target_teams: { 2400 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2401 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2402 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2403 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2404 CGM.VoidPtrTy, 2405 CGM.Int32Ty, 2406 CGM.VoidPtrPtrTy, 2407 CGM.VoidPtrPtrTy, 2408 CGM.Int64Ty->getPointerTo(), 2409 CGM.Int64Ty->getPointerTo(), 2410 CGM.Int32Ty, 2411 CGM.Int32Ty}; 2412 auto *FnTy = 2413 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2414 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2415 break; 2416 } 2417 case OMPRTL__tgt_target_teams_nowait: { 2418 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2419 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 2420 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2421 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2422 CGM.VoidPtrTy, 2423 CGM.Int32Ty, 2424 CGM.VoidPtrPtrTy, 2425 CGM.VoidPtrPtrTy, 2426 CGM.Int64Ty->getPointerTo(), 2427 CGM.Int64Ty->getPointerTo(), 2428 CGM.Int32Ty, 2429 CGM.Int32Ty}; 2430 auto *FnTy = 2431 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2432 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2433 break; 2434 } 2435 case OMPRTL__tgt_register_requires: { 2436 // Build void __tgt_register_requires(int64_t flags); 2437 llvm::Type *TypeParams[] = {CGM.Int64Ty}; 2438 auto *FnTy = 2439 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2440 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); 2441 break; 2442 } 2443 case OMPRTL__tgt_target_data_begin: { 2444 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2445 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2446 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2447 CGM.Int32Ty, 2448 CGM.VoidPtrPtrTy, 2449 CGM.VoidPtrPtrTy, 2450 CGM.Int64Ty->getPointerTo(), 2451 CGM.Int64Ty->getPointerTo()}; 2452 auto *FnTy = 2453 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2454 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2455 break; 2456 } 2457 case OMPRTL__tgt_target_data_begin_nowait: { 2458 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2459 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2460 // *arg_types); 2461 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2462 CGM.Int32Ty, 2463 CGM.VoidPtrPtrTy, 2464 CGM.VoidPtrPtrTy, 2465 CGM.Int64Ty->getPointerTo(), 2466 CGM.Int64Ty->getPointerTo()}; 2467 auto *FnTy = 2468 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2469 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2470 break; 2471 } 2472 case OMPRTL__tgt_target_data_end: { 2473 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2474 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2475 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2476 CGM.Int32Ty, 2477 CGM.VoidPtrPtrTy, 2478 CGM.VoidPtrPtrTy, 2479 CGM.Int64Ty->getPointerTo(), 2480 CGM.Int64Ty->getPointerTo()}; 2481 auto *FnTy = 2482 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2483 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2484 break; 2485 } 2486 case OMPRTL__tgt_target_data_end_nowait: { 2487 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2488 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2489 // *arg_types); 2490 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2491 CGM.Int32Ty, 2492 CGM.VoidPtrPtrTy, 2493 CGM.VoidPtrPtrTy, 2494 CGM.Int64Ty->getPointerTo(), 2495 CGM.Int64Ty->getPointerTo()}; 2496 auto *FnTy = 2497 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2498 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2499 break; 2500 } 2501 case OMPRTL__tgt_target_data_update: { 2502 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2503 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2504 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2505 CGM.Int32Ty, 2506 CGM.VoidPtrPtrTy, 2507 CGM.VoidPtrPtrTy, 2508 CGM.Int64Ty->getPointerTo(), 2509 CGM.Int64Ty->getPointerTo()}; 2510 auto *FnTy = 2511 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2512 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2513 break; 2514 } 2515 case OMPRTL__tgt_target_data_update_nowait: { 2516 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2517 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2518 // *arg_types); 2519 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2520 CGM.Int32Ty, 2521 CGM.VoidPtrPtrTy, 2522 CGM.VoidPtrPtrTy, 2523 CGM.Int64Ty->getPointerTo(), 2524 CGM.Int64Ty->getPointerTo()}; 2525 auto *FnTy = 2526 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2527 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2528 break; 2529 } 2530 case OMPRTL__tgt_mapper_num_components: { 2531 // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 2532 llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; 2533 auto *FnTy = 2534 llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); 2535 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components"); 2536 break; 2537 } 2538 case OMPRTL__tgt_push_mapper_component: { 2539 // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void 2540 // *base, void *begin, int64_t size, int64_t type); 2541 llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy, 2542 CGM.Int64Ty, CGM.Int64Ty}; 2543 auto *FnTy = 2544 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2545 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component"); 2546 break; 2547 } 2548 case OMPRTL__kmpc_task_allow_completion_event: { 2549 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 2550 // int gtid, kmp_task_t *task); 2551 auto *FnTy = llvm::FunctionType::get( 2552 CGM.VoidPtrTy, {getIdentTyPointerTy(), CGM.IntTy, CGM.VoidPtrTy}, 2553 /*isVarArg=*/false); 2554 RTLFn = 2555 CGM.CreateRuntimeFunction(FnTy, "__kmpc_task_allow_completion_event"); 2556 break; 2557 } 2558 } 2559 assert(RTLFn && "Unable to find OpenMP runtime function"); 2560 return RTLFn; 2561 } 2562 2563 llvm::FunctionCallee 2564 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 2565 assert((IVSize == 32 || IVSize == 64) && 2566 "IV size is not compatible with the omp runtime"); 2567 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2568 : "__kmpc_for_static_init_4u") 2569 : (IVSigned ? "__kmpc_for_static_init_8" 2570 : "__kmpc_for_static_init_8u"); 2571 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2572 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2573 llvm::Type *TypeParams[] = { 2574 getIdentTyPointerTy(), // loc 2575 CGM.Int32Ty, // tid 2576 CGM.Int32Ty, // schedtype 2577 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2578 PtrTy, // p_lower 2579 PtrTy, // p_upper 2580 PtrTy, // p_stride 2581 ITy, // incr 2582 ITy // chunk 2583 }; 2584 auto *FnTy = 2585 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2586 return CGM.CreateRuntimeFunction(FnTy, Name); 2587 } 2588 2589 llvm::FunctionCallee 2590 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 2591 assert((IVSize == 32 || IVSize == 64) && 2592 "IV size is not compatible with the omp runtime"); 2593 StringRef Name = 2594 IVSize == 32 2595 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2596 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2597 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2598 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2599 CGM.Int32Ty, // tid 2600 CGM.Int32Ty, // schedtype 2601 ITy, // lower 2602 ITy, // upper 2603 ITy, // stride 2604 ITy // chunk 2605 }; 2606 auto *FnTy = 2607 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2608 return CGM.CreateRuntimeFunction(FnTy, Name); 2609 } 2610 2611 llvm::FunctionCallee 2612 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 2613 assert((IVSize == 32 || IVSize == 64) && 2614 "IV size is not compatible with the omp runtime"); 2615 StringRef Name = 2616 IVSize == 32 2617 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2618 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2619 llvm::Type *TypeParams[] = { 2620 getIdentTyPointerTy(), // loc 2621 CGM.Int32Ty, // tid 2622 }; 2623 auto *FnTy = 2624 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2625 return CGM.CreateRuntimeFunction(FnTy, Name); 2626 } 2627 2628 llvm::FunctionCallee 2629 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 2630 assert((IVSize == 32 || IVSize == 64) && 2631 "IV size is not compatible with the omp runtime"); 2632 StringRef Name = 2633 IVSize == 32 2634 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2635 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2636 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2637 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2638 llvm::Type *TypeParams[] = { 2639 getIdentTyPointerTy(), // loc 2640 CGM.Int32Ty, // tid 2641 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2642 PtrTy, // p_lower 2643 PtrTy, // p_upper 2644 PtrTy // p_stride 2645 }; 2646 auto *FnTy = 2647 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2648 return CGM.CreateRuntimeFunction(FnTy, Name); 2649 } 2650 2651 /// Obtain information that uniquely identifies a target entry. This 2652 /// consists of the file and device IDs as well as line number associated with 2653 /// the relevant entry source location. 2654 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2655 unsigned &DeviceID, unsigned &FileID, 2656 unsigned &LineNum) { 2657 SourceManager &SM = C.getSourceManager(); 2658 2659 // The loc should be always valid and have a file ID (the user cannot use 2660 // #pragma directives in macros) 2661 2662 assert(Loc.isValid() && "Source location is expected to be always valid."); 2663 2664 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2665 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2666 2667 llvm::sys::fs::UniqueID ID; 2668 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2669 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2670 << PLoc.getFilename() << EC.message(); 2671 2672 DeviceID = ID.getDevice(); 2673 FileID = ID.getFile(); 2674 LineNum = PLoc.getLine(); 2675 } 2676 2677 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 2678 if (CGM.getLangOpts().OpenMPSimd) 2679 return Address::invalid(); 2680 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2681 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2682 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 2683 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2684 HasRequiresUnifiedSharedMemory))) { 2685 SmallString<64> PtrName; 2686 { 2687 llvm::raw_svector_ostream OS(PtrName); 2688 OS << CGM.getMangledName(GlobalDecl(VD)); 2689 if (!VD->isExternallyVisible()) { 2690 unsigned DeviceID, FileID, Line; 2691 getTargetEntryUniqueInfo(CGM.getContext(), 2692 VD->getCanonicalDecl()->getBeginLoc(), 2693 DeviceID, FileID, Line); 2694 OS << llvm::format("_%x", FileID); 2695 } 2696 OS << "_decl_tgt_ref_ptr"; 2697 } 2698 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2699 if (!Ptr) { 2700 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2701 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2702 PtrName); 2703 2704 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2705 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 2706 2707 if (!CGM.getLangOpts().OpenMPIsDevice) 2708 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2709 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2710 } 2711 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2712 } 2713 return Address::invalid(); 2714 } 2715 2716 llvm::Constant * 2717 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2718 assert(!CGM.getLangOpts().OpenMPUseTLS || 2719 !CGM.getContext().getTargetInfo().isTLSSupported()); 2720 // Lookup the entry, lazily creating it if necessary. 2721 std::string Suffix = getName({"cache", ""}); 2722 return getOrCreateInternalVariable( 2723 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 2724 } 2725 2726 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2727 const VarDecl *VD, 2728 Address VDAddr, 2729 SourceLocation Loc) { 2730 if (CGM.getLangOpts().OpenMPUseTLS && 2731 CGM.getContext().getTargetInfo().isTLSSupported()) 2732 return VDAddr; 2733 2734 llvm::Type *VarTy = VDAddr.getElementType(); 2735 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2736 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2737 CGM.Int8PtrTy), 2738 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2739 getOrCreateThreadPrivateCache(VD)}; 2740 return Address(CGF.EmitRuntimeCall( 2741 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2742 VDAddr.getAlignment()); 2743 } 2744 2745 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2746 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2747 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2748 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2749 // library. 2750 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2751 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2752 OMPLoc); 2753 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2754 // to register constructor/destructor for variable. 2755 llvm::Value *Args[] = { 2756 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2757 Ctor, CopyCtor, Dtor}; 2758 CGF.EmitRuntimeCall( 2759 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2760 } 2761 2762 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2763 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2764 bool PerformInit, CodeGenFunction *CGF) { 2765 if (CGM.getLangOpts().OpenMPUseTLS && 2766 CGM.getContext().getTargetInfo().isTLSSupported()) 2767 return nullptr; 2768 2769 VD = VD->getDefinition(CGM.getContext()); 2770 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 2771 QualType ASTTy = VD->getType(); 2772 2773 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2774 const Expr *Init = VD->getAnyInitializer(); 2775 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2776 // Generate function that re-emits the declaration's initializer into the 2777 // threadprivate copy of the variable VD 2778 CodeGenFunction CtorCGF(CGM); 2779 FunctionArgList Args; 2780 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2781 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2782 ImplicitParamDecl::Other); 2783 Args.push_back(&Dst); 2784 2785 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2786 CGM.getContext().VoidPtrTy, Args); 2787 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2788 std::string Name = getName({"__kmpc_global_ctor_", ""}); 2789 llvm::Function *Fn = 2790 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2791 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2792 Args, Loc, Loc); 2793 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2794 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2795 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2796 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2797 Arg = CtorCGF.Builder.CreateElementBitCast( 2798 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2799 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2800 /*IsInitializer=*/true); 2801 ArgVal = CtorCGF.EmitLoadOfScalar( 2802 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2803 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2804 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2805 CtorCGF.FinishFunction(); 2806 Ctor = Fn; 2807 } 2808 if (VD->getType().isDestructedType() != QualType::DK_none) { 2809 // Generate function that emits destructor call for the threadprivate copy 2810 // of the variable VD 2811 CodeGenFunction DtorCGF(CGM); 2812 FunctionArgList Args; 2813 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2814 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2815 ImplicitParamDecl::Other); 2816 Args.push_back(&Dst); 2817 2818 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2819 CGM.getContext().VoidTy, Args); 2820 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2821 std::string Name = getName({"__kmpc_global_dtor_", ""}); 2822 llvm::Function *Fn = 2823 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2824 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2825 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2826 Loc, Loc); 2827 // Create a scope with an artificial location for the body of this function. 2828 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2829 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2830 DtorCGF.GetAddrOfLocalVar(&Dst), 2831 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2832 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2833 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2834 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2835 DtorCGF.FinishFunction(); 2836 Dtor = Fn; 2837 } 2838 // Do not emit init function if it is not required. 2839 if (!Ctor && !Dtor) 2840 return nullptr; 2841 2842 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2843 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2844 /*isVarArg=*/false) 2845 ->getPointerTo(); 2846 // Copying constructor for the threadprivate variable. 2847 // Must be NULL - reserved by runtime, but currently it requires that this 2848 // parameter is always NULL. Otherwise it fires assertion. 2849 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2850 if (Ctor == nullptr) { 2851 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2852 /*isVarArg=*/false) 2853 ->getPointerTo(); 2854 Ctor = llvm::Constant::getNullValue(CtorTy); 2855 } 2856 if (Dtor == nullptr) { 2857 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2858 /*isVarArg=*/false) 2859 ->getPointerTo(); 2860 Dtor = llvm::Constant::getNullValue(DtorTy); 2861 } 2862 if (!CGF) { 2863 auto *InitFunctionTy = 2864 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2865 std::string Name = getName({"__omp_threadprivate_init_", ""}); 2866 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2867 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 2868 CodeGenFunction InitCGF(CGM); 2869 FunctionArgList ArgList; 2870 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2871 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2872 Loc, Loc); 2873 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2874 InitCGF.FinishFunction(); 2875 return InitFunction; 2876 } 2877 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2878 } 2879 return nullptr; 2880 } 2881 2882 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2883 llvm::GlobalVariable *Addr, 2884 bool PerformInit) { 2885 if (CGM.getLangOpts().OMPTargetTriples.empty() && 2886 !CGM.getLangOpts().OpenMPIsDevice) 2887 return false; 2888 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2889 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2890 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 2891 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2892 HasRequiresUnifiedSharedMemory)) 2893 return CGM.getLangOpts().OpenMPIsDevice; 2894 VD = VD->getDefinition(CGM.getContext()); 2895 assert(VD && "Unknown VarDecl"); 2896 2897 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 2898 return CGM.getLangOpts().OpenMPIsDevice; 2899 2900 QualType ASTTy = VD->getType(); 2901 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 2902 2903 // Produce the unique prefix to identify the new target regions. We use 2904 // the source location of the variable declaration which we know to not 2905 // conflict with any target region. 2906 unsigned DeviceID; 2907 unsigned FileID; 2908 unsigned Line; 2909 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2910 SmallString<128> Buffer, Out; 2911 { 2912 llvm::raw_svector_ostream OS(Buffer); 2913 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2914 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2915 } 2916 2917 const Expr *Init = VD->getAnyInitializer(); 2918 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2919 llvm::Constant *Ctor; 2920 llvm::Constant *ID; 2921 if (CGM.getLangOpts().OpenMPIsDevice) { 2922 // Generate function that re-emits the declaration's initializer into 2923 // the threadprivate copy of the variable VD 2924 CodeGenFunction CtorCGF(CGM); 2925 2926 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2927 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2928 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2929 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2930 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2931 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2932 FunctionArgList(), Loc, Loc); 2933 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2934 CtorCGF.EmitAnyExprToMem(Init, 2935 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2936 Init->getType().getQualifiers(), 2937 /*IsInitializer=*/true); 2938 CtorCGF.FinishFunction(); 2939 Ctor = Fn; 2940 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2941 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 2942 } else { 2943 Ctor = new llvm::GlobalVariable( 2944 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2945 llvm::GlobalValue::PrivateLinkage, 2946 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2947 ID = Ctor; 2948 } 2949 2950 // Register the information for the entry associated with the constructor. 2951 Out.clear(); 2952 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2953 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2954 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2955 } 2956 if (VD->getType().isDestructedType() != QualType::DK_none) { 2957 llvm::Constant *Dtor; 2958 llvm::Constant *ID; 2959 if (CGM.getLangOpts().OpenMPIsDevice) { 2960 // Generate function that emits destructor call for the threadprivate 2961 // copy of the variable VD 2962 CodeGenFunction DtorCGF(CGM); 2963 2964 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2965 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2966 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2967 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2968 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2969 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2970 FunctionArgList(), Loc, Loc); 2971 // Create a scope with an artificial location for the body of this 2972 // function. 2973 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2974 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 2975 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2976 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2977 DtorCGF.FinishFunction(); 2978 Dtor = Fn; 2979 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2980 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 2981 } else { 2982 Dtor = new llvm::GlobalVariable( 2983 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2984 llvm::GlobalValue::PrivateLinkage, 2985 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2986 ID = Dtor; 2987 } 2988 // Register the information for the entry associated with the destructor. 2989 Out.clear(); 2990 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2991 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2992 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2993 } 2994 return CGM.getLangOpts().OpenMPIsDevice; 2995 } 2996 2997 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2998 QualType VarType, 2999 StringRef Name) { 3000 std::string Suffix = getName({"artificial", ""}); 3001 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 3002 llvm::Value *GAddr = 3003 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 3004 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 3005 CGM.getTarget().isTLSSupported()) { 3006 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 3007 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 3008 } 3009 std::string CacheSuffix = getName({"cache", ""}); 3010 llvm::Value *Args[] = { 3011 emitUpdateLocation(CGF, SourceLocation()), 3012 getThreadID(CGF, SourceLocation()), 3013 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 3014 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 3015 /*isSigned=*/false), 3016 getOrCreateInternalVariable( 3017 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 3018 return Address( 3019 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3020 CGF.EmitRuntimeCall( 3021 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 3022 VarLVType->getPointerTo(/*AddrSpace=*/0)), 3023 CGM.getContext().getTypeAlignInChars(VarType)); 3024 } 3025 3026 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 3027 const RegionCodeGenTy &ThenGen, 3028 const RegionCodeGenTy &ElseGen) { 3029 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 3030 3031 // If the condition constant folds and can be elided, try to avoid emitting 3032 // the condition and the dead arm of the if/else. 3033 bool CondConstant; 3034 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 3035 if (CondConstant) 3036 ThenGen(CGF); 3037 else 3038 ElseGen(CGF); 3039 return; 3040 } 3041 3042 // Otherwise, the condition did not fold, or we couldn't elide it. Just 3043 // emit the conditional branch. 3044 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3045 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 3046 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 3047 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 3048 3049 // Emit the 'then' code. 3050 CGF.EmitBlock(ThenBlock); 3051 ThenGen(CGF); 3052 CGF.EmitBranch(ContBlock); 3053 // Emit the 'else' code if present. 3054 // There is no need to emit line number for unconditional branch. 3055 (void)ApplyDebugLocation::CreateEmpty(CGF); 3056 CGF.EmitBlock(ElseBlock); 3057 ElseGen(CGF); 3058 // There is no need to emit line number for unconditional branch. 3059 (void)ApplyDebugLocation::CreateEmpty(CGF); 3060 CGF.EmitBranch(ContBlock); 3061 // Emit the continuation block for code after the if. 3062 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 3063 } 3064 3065 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 3066 llvm::Function *OutlinedFn, 3067 ArrayRef<llvm::Value *> CapturedVars, 3068 const Expr *IfCond) { 3069 if (!CGF.HaveInsertPoint()) 3070 return; 3071 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 3072 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 3073 PrePostActionTy &) { 3074 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 3075 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3076 llvm::Value *Args[] = { 3077 RTLoc, 3078 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 3079 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 3080 llvm::SmallVector<llvm::Value *, 16> RealArgs; 3081 RealArgs.append(std::begin(Args), std::end(Args)); 3082 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 3083 3084 llvm::FunctionCallee RTLFn = 3085 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 3086 CGF.EmitRuntimeCall(RTLFn, RealArgs); 3087 }; 3088 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 3089 PrePostActionTy &) { 3090 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3091 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 3092 // Build calls: 3093 // __kmpc_serialized_parallel(&Loc, GTid); 3094 llvm::Value *Args[] = {RTLoc, ThreadID}; 3095 CGF.EmitRuntimeCall( 3096 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 3097 3098 // OutlinedFn(>id, &zero_bound, CapturedStruct); 3099 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 3100 Address ZeroAddrBound = 3101 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 3102 /*Name=*/".bound.zero.addr"); 3103 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 3104 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 3105 // ThreadId for serialized parallels is 0. 3106 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 3107 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 3108 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 3109 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 3110 3111 // __kmpc_end_serialized_parallel(&Loc, GTid); 3112 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 3113 CGF.EmitRuntimeCall( 3114 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 3115 EndArgs); 3116 }; 3117 if (IfCond) { 3118 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 3119 } else { 3120 RegionCodeGenTy ThenRCG(ThenGen); 3121 ThenRCG(CGF); 3122 } 3123 } 3124 3125 // If we're inside an (outlined) parallel region, use the region info's 3126 // thread-ID variable (it is passed in a first argument of the outlined function 3127 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 3128 // regular serial code region, get thread ID by calling kmp_int32 3129 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 3130 // return the address of that temp. 3131 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 3132 SourceLocation Loc) { 3133 if (auto *OMPRegionInfo = 3134 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3135 if (OMPRegionInfo->getThreadIDVariable()) 3136 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 3137 3138 llvm::Value *ThreadID = getThreadID(CGF, Loc); 3139 QualType Int32Ty = 3140 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 3141 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 3142 CGF.EmitStoreOfScalar(ThreadID, 3143 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 3144 3145 return ThreadIDTemp; 3146 } 3147 3148 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 3149 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 3150 SmallString<256> Buffer; 3151 llvm::raw_svector_ostream Out(Buffer); 3152 Out << Name; 3153 StringRef RuntimeName = Out.str(); 3154 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 3155 if (Elem.second) { 3156 assert(Elem.second->getType()->getPointerElementType() == Ty && 3157 "OMP internal variable has different type than requested"); 3158 return &*Elem.second; 3159 } 3160 3161 return Elem.second = new llvm::GlobalVariable( 3162 CGM.getModule(), Ty, /*IsConstant*/ false, 3163 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 3164 Elem.first(), /*InsertBefore=*/nullptr, 3165 llvm::GlobalValue::NotThreadLocal, AddressSpace); 3166 } 3167 3168 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 3169 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 3170 std::string Name = getName({Prefix, "var"}); 3171 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 3172 } 3173 3174 namespace { 3175 /// Common pre(post)-action for different OpenMP constructs. 3176 class CommonActionTy final : public PrePostActionTy { 3177 llvm::FunctionCallee EnterCallee; 3178 ArrayRef<llvm::Value *> EnterArgs; 3179 llvm::FunctionCallee ExitCallee; 3180 ArrayRef<llvm::Value *> ExitArgs; 3181 bool Conditional; 3182 llvm::BasicBlock *ContBlock = nullptr; 3183 3184 public: 3185 CommonActionTy(llvm::FunctionCallee EnterCallee, 3186 ArrayRef<llvm::Value *> EnterArgs, 3187 llvm::FunctionCallee ExitCallee, 3188 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 3189 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 3190 ExitArgs(ExitArgs), Conditional(Conditional) {} 3191 void Enter(CodeGenFunction &CGF) override { 3192 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 3193 if (Conditional) { 3194 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 3195 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3196 ContBlock = CGF.createBasicBlock("omp_if.end"); 3197 // Generate the branch (If-stmt) 3198 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 3199 CGF.EmitBlock(ThenBlock); 3200 } 3201 } 3202 void Done(CodeGenFunction &CGF) { 3203 // Emit the rest of blocks/branches 3204 CGF.EmitBranch(ContBlock); 3205 CGF.EmitBlock(ContBlock, true); 3206 } 3207 void Exit(CodeGenFunction &CGF) override { 3208 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 3209 } 3210 }; 3211 } // anonymous namespace 3212 3213 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 3214 StringRef CriticalName, 3215 const RegionCodeGenTy &CriticalOpGen, 3216 SourceLocation Loc, const Expr *Hint) { 3217 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 3218 // CriticalOpGen(); 3219 // __kmpc_end_critical(ident_t *, gtid, Lock); 3220 // Prepare arguments and build a call to __kmpc_critical 3221 if (!CGF.HaveInsertPoint()) 3222 return; 3223 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3224 getCriticalRegionLock(CriticalName)}; 3225 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 3226 std::end(Args)); 3227 if (Hint) { 3228 EnterArgs.push_back(CGF.Builder.CreateIntCast( 3229 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 3230 } 3231 CommonActionTy Action( 3232 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 3233 : OMPRTL__kmpc_critical), 3234 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 3235 CriticalOpGen.setAction(Action); 3236 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 3237 } 3238 3239 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 3240 const RegionCodeGenTy &MasterOpGen, 3241 SourceLocation Loc) { 3242 if (!CGF.HaveInsertPoint()) 3243 return; 3244 // if(__kmpc_master(ident_t *, gtid)) { 3245 // MasterOpGen(); 3246 // __kmpc_end_master(ident_t *, gtid); 3247 // } 3248 // Prepare arguments and build a call to __kmpc_master 3249 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3250 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 3251 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 3252 /*Conditional=*/true); 3253 MasterOpGen.setAction(Action); 3254 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 3255 Action.Done(CGF); 3256 } 3257 3258 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 3259 SourceLocation Loc) { 3260 if (!CGF.HaveInsertPoint()) 3261 return; 3262 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3263 if (OMPBuilder) { 3264 OMPBuilder->CreateTaskyield(CGF.Builder); 3265 } else { 3266 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 3267 llvm::Value *Args[] = { 3268 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3269 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 3270 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), 3271 Args); 3272 } 3273 3274 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3275 Region->emitUntiedSwitch(CGF); 3276 } 3277 3278 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 3279 const RegionCodeGenTy &TaskgroupOpGen, 3280 SourceLocation Loc) { 3281 if (!CGF.HaveInsertPoint()) 3282 return; 3283 // __kmpc_taskgroup(ident_t *, gtid); 3284 // TaskgroupOpGen(); 3285 // __kmpc_end_taskgroup(ident_t *, gtid); 3286 // Prepare arguments and build a call to __kmpc_taskgroup 3287 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3288 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3289 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3290 Args); 3291 TaskgroupOpGen.setAction(Action); 3292 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3293 } 3294 3295 /// Given an array of pointers to variables, project the address of a 3296 /// given variable. 3297 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3298 unsigned Index, const VarDecl *Var) { 3299 // Pull out the pointer to the variable. 3300 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 3301 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3302 3303 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3304 Addr = CGF.Builder.CreateElementBitCast( 3305 Addr, CGF.ConvertTypeForMem(Var->getType())); 3306 return Addr; 3307 } 3308 3309 static llvm::Value *emitCopyprivateCopyFunction( 3310 CodeGenModule &CGM, llvm::Type *ArgsType, 3311 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3312 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3313 SourceLocation Loc) { 3314 ASTContext &C = CGM.getContext(); 3315 // void copy_func(void *LHSArg, void *RHSArg); 3316 FunctionArgList Args; 3317 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3318 ImplicitParamDecl::Other); 3319 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3320 ImplicitParamDecl::Other); 3321 Args.push_back(&LHSArg); 3322 Args.push_back(&RHSArg); 3323 const auto &CGFI = 3324 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3325 std::string Name = 3326 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 3327 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 3328 llvm::GlobalValue::InternalLinkage, Name, 3329 &CGM.getModule()); 3330 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3331 Fn->setDoesNotRecurse(); 3332 CodeGenFunction CGF(CGM); 3333 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3334 // Dest = (void*[n])(LHSArg); 3335 // Src = (void*[n])(RHSArg); 3336 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3337 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3338 ArgsType), CGF.getPointerAlign()); 3339 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3340 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3341 ArgsType), CGF.getPointerAlign()); 3342 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3343 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3344 // ... 3345 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3346 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3347 const auto *DestVar = 3348 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3349 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3350 3351 const auto *SrcVar = 3352 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3353 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3354 3355 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3356 QualType Type = VD->getType(); 3357 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3358 } 3359 CGF.FinishFunction(); 3360 return Fn; 3361 } 3362 3363 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3364 const RegionCodeGenTy &SingleOpGen, 3365 SourceLocation Loc, 3366 ArrayRef<const Expr *> CopyprivateVars, 3367 ArrayRef<const Expr *> SrcExprs, 3368 ArrayRef<const Expr *> DstExprs, 3369 ArrayRef<const Expr *> AssignmentOps) { 3370 if (!CGF.HaveInsertPoint()) 3371 return; 3372 assert(CopyprivateVars.size() == SrcExprs.size() && 3373 CopyprivateVars.size() == DstExprs.size() && 3374 CopyprivateVars.size() == AssignmentOps.size()); 3375 ASTContext &C = CGM.getContext(); 3376 // int32 did_it = 0; 3377 // if(__kmpc_single(ident_t *, gtid)) { 3378 // SingleOpGen(); 3379 // __kmpc_end_single(ident_t *, gtid); 3380 // did_it = 1; 3381 // } 3382 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3383 // <copy_func>, did_it); 3384 3385 Address DidIt = Address::invalid(); 3386 if (!CopyprivateVars.empty()) { 3387 // int32 did_it = 0; 3388 QualType KmpInt32Ty = 3389 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3390 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3391 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3392 } 3393 // Prepare arguments and build a call to __kmpc_single 3394 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3395 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3396 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3397 /*Conditional=*/true); 3398 SingleOpGen.setAction(Action); 3399 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3400 if (DidIt.isValid()) { 3401 // did_it = 1; 3402 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3403 } 3404 Action.Done(CGF); 3405 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3406 // <copy_func>, did_it); 3407 if (DidIt.isValid()) { 3408 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3409 QualType CopyprivateArrayTy = C.getConstantArrayType( 3410 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 3411 /*IndexTypeQuals=*/0); 3412 // Create a list of all private variables for copyprivate. 3413 Address CopyprivateList = 3414 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3415 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3416 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 3417 CGF.Builder.CreateStore( 3418 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3419 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 3420 CGF.VoidPtrTy), 3421 Elem); 3422 } 3423 // Build function that copies private values from single region to all other 3424 // threads in the corresponding parallel region. 3425 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3426 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3427 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3428 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3429 Address CL = 3430 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3431 CGF.VoidPtrTy); 3432 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3433 llvm::Value *Args[] = { 3434 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3435 getThreadID(CGF, Loc), // i32 <gtid> 3436 BufSize, // size_t <buf_size> 3437 CL.getPointer(), // void *<copyprivate list> 3438 CpyFn, // void (*) (void *, void *) <copy_func> 3439 DidItVal // i32 did_it 3440 }; 3441 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3442 } 3443 } 3444 3445 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3446 const RegionCodeGenTy &OrderedOpGen, 3447 SourceLocation Loc, bool IsThreads) { 3448 if (!CGF.HaveInsertPoint()) 3449 return; 3450 // __kmpc_ordered(ident_t *, gtid); 3451 // OrderedOpGen(); 3452 // __kmpc_end_ordered(ident_t *, gtid); 3453 // Prepare arguments and build a call to __kmpc_ordered 3454 if (IsThreads) { 3455 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3456 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3457 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3458 Args); 3459 OrderedOpGen.setAction(Action); 3460 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3461 return; 3462 } 3463 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3464 } 3465 3466 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 3467 unsigned Flags; 3468 if (Kind == OMPD_for) 3469 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3470 else if (Kind == OMPD_sections) 3471 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3472 else if (Kind == OMPD_single) 3473 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3474 else if (Kind == OMPD_barrier) 3475 Flags = OMP_IDENT_BARRIER_EXPL; 3476 else 3477 Flags = OMP_IDENT_BARRIER_IMPL; 3478 return Flags; 3479 } 3480 3481 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 3482 CodeGenFunction &CGF, const OMPLoopDirective &S, 3483 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 3484 // Check if the loop directive is actually a doacross loop directive. In this 3485 // case choose static, 1 schedule. 3486 if (llvm::any_of( 3487 S.getClausesOfKind<OMPOrderedClause>(), 3488 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 3489 ScheduleKind = OMPC_SCHEDULE_static; 3490 // Chunk size is 1 in this case. 3491 llvm::APInt ChunkSize(32, 1); 3492 ChunkExpr = IntegerLiteral::Create( 3493 CGF.getContext(), ChunkSize, 3494 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3495 SourceLocation()); 3496 } 3497 } 3498 3499 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3500 OpenMPDirectiveKind Kind, bool EmitChecks, 3501 bool ForceSimpleCall) { 3502 // Check if we should use the OMPBuilder 3503 auto *OMPRegionInfo = 3504 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 3505 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3506 if (OMPBuilder) { 3507 CGF.Builder.restoreIP(OMPBuilder->CreateBarrier( 3508 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 3509 return; 3510 } 3511 3512 if (!CGF.HaveInsertPoint()) 3513 return; 3514 // Build call __kmpc_cancel_barrier(loc, thread_id); 3515 // Build call __kmpc_barrier(loc, thread_id); 3516 unsigned Flags = getDefaultFlagsForBarriers(Kind); 3517 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3518 // thread_id); 3519 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3520 getThreadID(CGF, Loc)}; 3521 if (OMPRegionInfo) { 3522 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3523 llvm::Value *Result = CGF.EmitRuntimeCall( 3524 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3525 if (EmitChecks) { 3526 // if (__kmpc_cancel_barrier()) { 3527 // exit from construct; 3528 // } 3529 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3530 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3531 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3532 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3533 CGF.EmitBlock(ExitBB); 3534 // exit from construct; 3535 CodeGenFunction::JumpDest CancelDestination = 3536 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3537 CGF.EmitBranchThroughCleanup(CancelDestination); 3538 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3539 } 3540 return; 3541 } 3542 } 3543 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3544 } 3545 3546 /// Map the OpenMP loop schedule to the runtime enumeration. 3547 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3548 bool Chunked, bool Ordered) { 3549 switch (ScheduleKind) { 3550 case OMPC_SCHEDULE_static: 3551 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3552 : (Ordered ? OMP_ord_static : OMP_sch_static); 3553 case OMPC_SCHEDULE_dynamic: 3554 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3555 case OMPC_SCHEDULE_guided: 3556 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3557 case OMPC_SCHEDULE_runtime: 3558 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3559 case OMPC_SCHEDULE_auto: 3560 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3561 case OMPC_SCHEDULE_unknown: 3562 assert(!Chunked && "chunk was specified but schedule kind not known"); 3563 return Ordered ? OMP_ord_static : OMP_sch_static; 3564 } 3565 llvm_unreachable("Unexpected runtime schedule"); 3566 } 3567 3568 /// Map the OpenMP distribute schedule to the runtime enumeration. 3569 static OpenMPSchedType 3570 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3571 // only static is allowed for dist_schedule 3572 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3573 } 3574 3575 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3576 bool Chunked) const { 3577 OpenMPSchedType Schedule = 3578 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3579 return Schedule == OMP_sch_static; 3580 } 3581 3582 bool CGOpenMPRuntime::isStaticNonchunked( 3583 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3584 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3585 return Schedule == OMP_dist_sch_static; 3586 } 3587 3588 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 3589 bool Chunked) const { 3590 OpenMPSchedType Schedule = 3591 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3592 return Schedule == OMP_sch_static_chunked; 3593 } 3594 3595 bool CGOpenMPRuntime::isStaticChunked( 3596 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3597 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3598 return Schedule == OMP_dist_sch_static_chunked; 3599 } 3600 3601 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3602 OpenMPSchedType Schedule = 3603 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3604 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3605 return Schedule != OMP_sch_static; 3606 } 3607 3608 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 3609 OpenMPScheduleClauseModifier M1, 3610 OpenMPScheduleClauseModifier M2) { 3611 int Modifier = 0; 3612 switch (M1) { 3613 case OMPC_SCHEDULE_MODIFIER_monotonic: 3614 Modifier = OMP_sch_modifier_monotonic; 3615 break; 3616 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3617 Modifier = OMP_sch_modifier_nonmonotonic; 3618 break; 3619 case OMPC_SCHEDULE_MODIFIER_simd: 3620 if (Schedule == OMP_sch_static_chunked) 3621 Schedule = OMP_sch_static_balanced_chunked; 3622 break; 3623 case OMPC_SCHEDULE_MODIFIER_last: 3624 case OMPC_SCHEDULE_MODIFIER_unknown: 3625 break; 3626 } 3627 switch (M2) { 3628 case OMPC_SCHEDULE_MODIFIER_monotonic: 3629 Modifier = OMP_sch_modifier_monotonic; 3630 break; 3631 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3632 Modifier = OMP_sch_modifier_nonmonotonic; 3633 break; 3634 case OMPC_SCHEDULE_MODIFIER_simd: 3635 if (Schedule == OMP_sch_static_chunked) 3636 Schedule = OMP_sch_static_balanced_chunked; 3637 break; 3638 case OMPC_SCHEDULE_MODIFIER_last: 3639 case OMPC_SCHEDULE_MODIFIER_unknown: 3640 break; 3641 } 3642 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 3643 // If the static schedule kind is specified or if the ordered clause is 3644 // specified, and if the nonmonotonic modifier is not specified, the effect is 3645 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 3646 // modifier is specified, the effect is as if the nonmonotonic modifier is 3647 // specified. 3648 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 3649 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 3650 Schedule == OMP_sch_static_balanced_chunked || 3651 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 3652 Schedule == OMP_dist_sch_static_chunked || 3653 Schedule == OMP_dist_sch_static)) 3654 Modifier = OMP_sch_modifier_nonmonotonic; 3655 } 3656 return Schedule | Modifier; 3657 } 3658 3659 void CGOpenMPRuntime::emitForDispatchInit( 3660 CodeGenFunction &CGF, SourceLocation Loc, 3661 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3662 bool Ordered, const DispatchRTInput &DispatchValues) { 3663 if (!CGF.HaveInsertPoint()) 3664 return; 3665 OpenMPSchedType Schedule = getRuntimeSchedule( 3666 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3667 assert(Ordered || 3668 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3669 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3670 Schedule != OMP_sch_static_balanced_chunked)); 3671 // Call __kmpc_dispatch_init( 3672 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3673 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3674 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3675 3676 // If the Chunk was not specified in the clause - use default value 1. 3677 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3678 : CGF.Builder.getIntN(IVSize, 1); 3679 llvm::Value *Args[] = { 3680 emitUpdateLocation(CGF, Loc), 3681 getThreadID(CGF, Loc), 3682 CGF.Builder.getInt32(addMonoNonMonoModifier( 3683 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3684 DispatchValues.LB, // Lower 3685 DispatchValues.UB, // Upper 3686 CGF.Builder.getIntN(IVSize, 1), // Stride 3687 Chunk // Chunk 3688 }; 3689 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3690 } 3691 3692 static void emitForStaticInitCall( 3693 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3694 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 3695 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3696 const CGOpenMPRuntime::StaticRTInput &Values) { 3697 if (!CGF.HaveInsertPoint()) 3698 return; 3699 3700 assert(!Values.Ordered); 3701 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3702 Schedule == OMP_sch_static_balanced_chunked || 3703 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3704 Schedule == OMP_dist_sch_static || 3705 Schedule == OMP_dist_sch_static_chunked); 3706 3707 // Call __kmpc_for_static_init( 3708 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3709 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3710 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3711 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3712 llvm::Value *Chunk = Values.Chunk; 3713 if (Chunk == nullptr) { 3714 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3715 Schedule == OMP_dist_sch_static) && 3716 "expected static non-chunked schedule"); 3717 // If the Chunk was not specified in the clause - use default value 1. 3718 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3719 } else { 3720 assert((Schedule == OMP_sch_static_chunked || 3721 Schedule == OMP_sch_static_balanced_chunked || 3722 Schedule == OMP_ord_static_chunked || 3723 Schedule == OMP_dist_sch_static_chunked) && 3724 "expected static chunked schedule"); 3725 } 3726 llvm::Value *Args[] = { 3727 UpdateLocation, 3728 ThreadId, 3729 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 3730 M2)), // Schedule type 3731 Values.IL.getPointer(), // &isLastIter 3732 Values.LB.getPointer(), // &LB 3733 Values.UB.getPointer(), // &UB 3734 Values.ST.getPointer(), // &Stride 3735 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3736 Chunk // Chunk 3737 }; 3738 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3739 } 3740 3741 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3742 SourceLocation Loc, 3743 OpenMPDirectiveKind DKind, 3744 const OpenMPScheduleTy &ScheduleKind, 3745 const StaticRTInput &Values) { 3746 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3747 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3748 assert(isOpenMPWorksharingDirective(DKind) && 3749 "Expected loop-based or sections-based directive."); 3750 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3751 isOpenMPLoopDirective(DKind) 3752 ? OMP_IDENT_WORK_LOOP 3753 : OMP_IDENT_WORK_SECTIONS); 3754 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3755 llvm::FunctionCallee StaticInitFunction = 3756 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3757 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 3758 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3759 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3760 } 3761 3762 void CGOpenMPRuntime::emitDistributeStaticInit( 3763 CodeGenFunction &CGF, SourceLocation Loc, 3764 OpenMPDistScheduleClauseKind SchedKind, 3765 const CGOpenMPRuntime::StaticRTInput &Values) { 3766 OpenMPSchedType ScheduleNum = 3767 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3768 llvm::Value *UpdatedLocation = 3769 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3770 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3771 llvm::FunctionCallee StaticInitFunction = 3772 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3773 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3774 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3775 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3776 } 3777 3778 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3779 SourceLocation Loc, 3780 OpenMPDirectiveKind DKind) { 3781 if (!CGF.HaveInsertPoint()) 3782 return; 3783 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3784 llvm::Value *Args[] = { 3785 emitUpdateLocation(CGF, Loc, 3786 isOpenMPDistributeDirective(DKind) 3787 ? OMP_IDENT_WORK_DISTRIBUTE 3788 : isOpenMPLoopDirective(DKind) 3789 ? OMP_IDENT_WORK_LOOP 3790 : OMP_IDENT_WORK_SECTIONS), 3791 getThreadID(CGF, Loc)}; 3792 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 3793 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3794 Args); 3795 } 3796 3797 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3798 SourceLocation Loc, 3799 unsigned IVSize, 3800 bool IVSigned) { 3801 if (!CGF.HaveInsertPoint()) 3802 return; 3803 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3804 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3805 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3806 } 3807 3808 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3809 SourceLocation Loc, unsigned IVSize, 3810 bool IVSigned, Address IL, 3811 Address LB, Address UB, 3812 Address ST) { 3813 // Call __kmpc_dispatch_next( 3814 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3815 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3816 // kmp_int[32|64] *p_stride); 3817 llvm::Value *Args[] = { 3818 emitUpdateLocation(CGF, Loc), 3819 getThreadID(CGF, Loc), 3820 IL.getPointer(), // &isLastIter 3821 LB.getPointer(), // &Lower 3822 UB.getPointer(), // &Upper 3823 ST.getPointer() // &Stride 3824 }; 3825 llvm::Value *Call = 3826 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3827 return CGF.EmitScalarConversion( 3828 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3829 CGF.getContext().BoolTy, Loc); 3830 } 3831 3832 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3833 llvm::Value *NumThreads, 3834 SourceLocation Loc) { 3835 if (!CGF.HaveInsertPoint()) 3836 return; 3837 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3838 llvm::Value *Args[] = { 3839 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3840 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3841 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3842 Args); 3843 } 3844 3845 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3846 ProcBindKind ProcBind, 3847 SourceLocation Loc) { 3848 if (!CGF.HaveInsertPoint()) 3849 return; 3850 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 3851 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3852 llvm::Value *Args[] = { 3853 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3854 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 3855 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3856 } 3857 3858 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3859 SourceLocation Loc, llvm::AtomicOrdering AO) { 3860 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3861 if (OMPBuilder) { 3862 OMPBuilder->CreateFlush(CGF.Builder); 3863 } else { 3864 if (!CGF.HaveInsertPoint()) 3865 return; 3866 // Build call void __kmpc_flush(ident_t *loc) 3867 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3868 emitUpdateLocation(CGF, Loc)); 3869 } 3870 } 3871 3872 namespace { 3873 /// Indexes of fields for type kmp_task_t. 3874 enum KmpTaskTFields { 3875 /// List of shared variables. 3876 KmpTaskTShareds, 3877 /// Task routine. 3878 KmpTaskTRoutine, 3879 /// Partition id for the untied tasks. 3880 KmpTaskTPartId, 3881 /// Function with call of destructors for private variables. 3882 Data1, 3883 /// Task priority. 3884 Data2, 3885 /// (Taskloops only) Lower bound. 3886 KmpTaskTLowerBound, 3887 /// (Taskloops only) Upper bound. 3888 KmpTaskTUpperBound, 3889 /// (Taskloops only) Stride. 3890 KmpTaskTStride, 3891 /// (Taskloops only) Is last iteration flag. 3892 KmpTaskTLastIter, 3893 /// (Taskloops only) Reduction data. 3894 KmpTaskTReductions, 3895 }; 3896 } // anonymous namespace 3897 3898 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3899 return OffloadEntriesTargetRegion.empty() && 3900 OffloadEntriesDeviceGlobalVar.empty(); 3901 } 3902 3903 /// Initialize target region entry. 3904 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3905 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3906 StringRef ParentName, unsigned LineNum, 3907 unsigned Order) { 3908 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3909 "only required for the device " 3910 "code generation."); 3911 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3912 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3913 OMPTargetRegionEntryTargetRegion); 3914 ++OffloadingEntriesNum; 3915 } 3916 3917 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3918 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3919 StringRef ParentName, unsigned LineNum, 3920 llvm::Constant *Addr, llvm::Constant *ID, 3921 OMPTargetRegionEntryKind Flags) { 3922 // If we are emitting code for a target, the entry is already initialized, 3923 // only has to be registered. 3924 if (CGM.getLangOpts().OpenMPIsDevice) { 3925 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3926 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3927 DiagnosticsEngine::Error, 3928 "Unable to find target region on line '%0' in the device code."); 3929 CGM.getDiags().Report(DiagID) << LineNum; 3930 return; 3931 } 3932 auto &Entry = 3933 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3934 assert(Entry.isValid() && "Entry not initialized!"); 3935 Entry.setAddress(Addr); 3936 Entry.setID(ID); 3937 Entry.setFlags(Flags); 3938 } else { 3939 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3940 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3941 ++OffloadingEntriesNum; 3942 } 3943 } 3944 3945 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3946 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3947 unsigned LineNum) const { 3948 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3949 if (PerDevice == OffloadEntriesTargetRegion.end()) 3950 return false; 3951 auto PerFile = PerDevice->second.find(FileID); 3952 if (PerFile == PerDevice->second.end()) 3953 return false; 3954 auto PerParentName = PerFile->second.find(ParentName); 3955 if (PerParentName == PerFile->second.end()) 3956 return false; 3957 auto PerLine = PerParentName->second.find(LineNum); 3958 if (PerLine == PerParentName->second.end()) 3959 return false; 3960 // Fail if this entry is already registered. 3961 if (PerLine->second.getAddress() || PerLine->second.getID()) 3962 return false; 3963 return true; 3964 } 3965 3966 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3967 const OffloadTargetRegionEntryInfoActTy &Action) { 3968 // Scan all target region entries and perform the provided action. 3969 for (const auto &D : OffloadEntriesTargetRegion) 3970 for (const auto &F : D.second) 3971 for (const auto &P : F.second) 3972 for (const auto &L : P.second) 3973 Action(D.first, F.first, P.first(), L.first, L.second); 3974 } 3975 3976 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3977 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3978 OMPTargetGlobalVarEntryKind Flags, 3979 unsigned Order) { 3980 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3981 "only required for the device " 3982 "code generation."); 3983 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3984 ++OffloadingEntriesNum; 3985 } 3986 3987 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3988 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3989 CharUnits VarSize, 3990 OMPTargetGlobalVarEntryKind Flags, 3991 llvm::GlobalValue::LinkageTypes Linkage) { 3992 if (CGM.getLangOpts().OpenMPIsDevice) { 3993 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3994 assert(Entry.isValid() && Entry.getFlags() == Flags && 3995 "Entry not initialized!"); 3996 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3997 "Resetting with the new address."); 3998 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3999 if (Entry.getVarSize().isZero()) { 4000 Entry.setVarSize(VarSize); 4001 Entry.setLinkage(Linkage); 4002 } 4003 return; 4004 } 4005 Entry.setVarSize(VarSize); 4006 Entry.setLinkage(Linkage); 4007 Entry.setAddress(Addr); 4008 } else { 4009 if (hasDeviceGlobalVarEntryInfo(VarName)) { 4010 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 4011 assert(Entry.isValid() && Entry.getFlags() == Flags && 4012 "Entry not initialized!"); 4013 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 4014 "Resetting with the new address."); 4015 if (Entry.getVarSize().isZero()) { 4016 Entry.setVarSize(VarSize); 4017 Entry.setLinkage(Linkage); 4018 } 4019 return; 4020 } 4021 OffloadEntriesDeviceGlobalVar.try_emplace( 4022 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 4023 ++OffloadingEntriesNum; 4024 } 4025 } 4026 4027 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4028 actOnDeviceGlobalVarEntriesInfo( 4029 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 4030 // Scan all target region entries and perform the provided action. 4031 for (const auto &E : OffloadEntriesDeviceGlobalVar) 4032 Action(E.getKey(), E.getValue()); 4033 } 4034 4035 void CGOpenMPRuntime::createOffloadEntry( 4036 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 4037 llvm::GlobalValue::LinkageTypes Linkage) { 4038 StringRef Name = Addr->getName(); 4039 llvm::Module &M = CGM.getModule(); 4040 llvm::LLVMContext &C = M.getContext(); 4041 4042 // Create constant string with the name. 4043 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 4044 4045 std::string StringName = getName({"omp_offloading", "entry_name"}); 4046 auto *Str = new llvm::GlobalVariable( 4047 M, StrPtrInit->getType(), /*isConstant=*/true, 4048 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 4049 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4050 4051 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 4052 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 4053 llvm::ConstantInt::get(CGM.SizeTy, Size), 4054 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 4055 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 4056 std::string EntryName = getName({"omp_offloading", "entry", ""}); 4057 llvm::GlobalVariable *Entry = createGlobalStruct( 4058 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 4059 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 4060 4061 // The entry has to be created in the section the linker expects it to be. 4062 Entry->setSection("omp_offloading_entries"); 4063 } 4064 4065 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 4066 // Emit the offloading entries and metadata so that the device codegen side 4067 // can easily figure out what to emit. The produced metadata looks like 4068 // this: 4069 // 4070 // !omp_offload.info = !{!1, ...} 4071 // 4072 // Right now we only generate metadata for function that contain target 4073 // regions. 4074 4075 // If we are in simd mode or there are no entries, we don't need to do 4076 // anything. 4077 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 4078 return; 4079 4080 llvm::Module &M = CGM.getModule(); 4081 llvm::LLVMContext &C = M.getContext(); 4082 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 4083 SourceLocation, StringRef>, 4084 16> 4085 OrderedEntries(OffloadEntriesInfoManager.size()); 4086 llvm::SmallVector<StringRef, 16> ParentFunctions( 4087 OffloadEntriesInfoManager.size()); 4088 4089 // Auxiliary methods to create metadata values and strings. 4090 auto &&GetMDInt = [this](unsigned V) { 4091 return llvm::ConstantAsMetadata::get( 4092 llvm::ConstantInt::get(CGM.Int32Ty, V)); 4093 }; 4094 4095 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 4096 4097 // Create the offloading info metadata node. 4098 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 4099 4100 // Create function that emits metadata for each target region entry; 4101 auto &&TargetRegionMetadataEmitter = 4102 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 4103 &GetMDString]( 4104 unsigned DeviceID, unsigned FileID, StringRef ParentName, 4105 unsigned Line, 4106 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 4107 // Generate metadata for target regions. Each entry of this metadata 4108 // contains: 4109 // - Entry 0 -> Kind of this type of metadata (0). 4110 // - Entry 1 -> Device ID of the file where the entry was identified. 4111 // - Entry 2 -> File ID of the file where the entry was identified. 4112 // - Entry 3 -> Mangled name of the function where the entry was 4113 // identified. 4114 // - Entry 4 -> Line in the file where the entry was identified. 4115 // - Entry 5 -> Order the entry was created. 4116 // The first element of the metadata node is the kind. 4117 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 4118 GetMDInt(FileID), GetMDString(ParentName), 4119 GetMDInt(Line), GetMDInt(E.getOrder())}; 4120 4121 SourceLocation Loc; 4122 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 4123 E = CGM.getContext().getSourceManager().fileinfo_end(); 4124 I != E; ++I) { 4125 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 4126 I->getFirst()->getUniqueID().getFile() == FileID) { 4127 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 4128 I->getFirst(), Line, 1); 4129 break; 4130 } 4131 } 4132 // Save this entry in the right position of the ordered entries array. 4133 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 4134 ParentFunctions[E.getOrder()] = ParentName; 4135 4136 // Add metadata to the named metadata node. 4137 MD->addOperand(llvm::MDNode::get(C, Ops)); 4138 }; 4139 4140 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 4141 TargetRegionMetadataEmitter); 4142 4143 // Create function that emits metadata for each device global variable entry; 4144 auto &&DeviceGlobalVarMetadataEmitter = 4145 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 4146 MD](StringRef MangledName, 4147 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 4148 &E) { 4149 // Generate metadata for global variables. Each entry of this metadata 4150 // contains: 4151 // - Entry 0 -> Kind of this type of metadata (1). 4152 // - Entry 1 -> Mangled name of the variable. 4153 // - Entry 2 -> Declare target kind. 4154 // - Entry 3 -> Order the entry was created. 4155 // The first element of the metadata node is the kind. 4156 llvm::Metadata *Ops[] = { 4157 GetMDInt(E.getKind()), GetMDString(MangledName), 4158 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 4159 4160 // Save this entry in the right position of the ordered entries array. 4161 OrderedEntries[E.getOrder()] = 4162 std::make_tuple(&E, SourceLocation(), MangledName); 4163 4164 // Add metadata to the named metadata node. 4165 MD->addOperand(llvm::MDNode::get(C, Ops)); 4166 }; 4167 4168 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 4169 DeviceGlobalVarMetadataEmitter); 4170 4171 for (const auto &E : OrderedEntries) { 4172 assert(std::get<0>(E) && "All ordered entries must exist!"); 4173 if (const auto *CE = 4174 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 4175 std::get<0>(E))) { 4176 if (!CE->getID() || !CE->getAddress()) { 4177 // Do not blame the entry if the parent funtion is not emitted. 4178 StringRef FnName = ParentFunctions[CE->getOrder()]; 4179 if (!CGM.GetGlobalValue(FnName)) 4180 continue; 4181 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4182 DiagnosticsEngine::Error, 4183 "Offloading entry for target region in %0 is incorrect: either the " 4184 "address or the ID is invalid."); 4185 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 4186 continue; 4187 } 4188 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 4189 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 4190 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 4191 OffloadEntryInfoDeviceGlobalVar>( 4192 std::get<0>(E))) { 4193 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 4194 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4195 CE->getFlags()); 4196 switch (Flags) { 4197 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 4198 if (CGM.getLangOpts().OpenMPIsDevice && 4199 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 4200 continue; 4201 if (!CE->getAddress()) { 4202 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4203 DiagnosticsEngine::Error, "Offloading entry for declare target " 4204 "variable %0 is incorrect: the " 4205 "address is invalid."); 4206 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 4207 continue; 4208 } 4209 // The vaiable has no definition - no need to add the entry. 4210 if (CE->getVarSize().isZero()) 4211 continue; 4212 break; 4213 } 4214 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 4215 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 4216 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 4217 "Declaret target link address is set."); 4218 if (CGM.getLangOpts().OpenMPIsDevice) 4219 continue; 4220 if (!CE->getAddress()) { 4221 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4222 DiagnosticsEngine::Error, 4223 "Offloading entry for declare target variable is incorrect: the " 4224 "address is invalid."); 4225 CGM.getDiags().Report(DiagID); 4226 continue; 4227 } 4228 break; 4229 } 4230 createOffloadEntry(CE->getAddress(), CE->getAddress(), 4231 CE->getVarSize().getQuantity(), Flags, 4232 CE->getLinkage()); 4233 } else { 4234 llvm_unreachable("Unsupported entry kind."); 4235 } 4236 } 4237 } 4238 4239 /// Loads all the offload entries information from the host IR 4240 /// metadata. 4241 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 4242 // If we are in target mode, load the metadata from the host IR. This code has 4243 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 4244 4245 if (!CGM.getLangOpts().OpenMPIsDevice) 4246 return; 4247 4248 if (CGM.getLangOpts().OMPHostIRFile.empty()) 4249 return; 4250 4251 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 4252 if (auto EC = Buf.getError()) { 4253 CGM.getDiags().Report(diag::err_cannot_open_file) 4254 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4255 return; 4256 } 4257 4258 llvm::LLVMContext C; 4259 auto ME = expectedToErrorOrAndEmitErrors( 4260 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 4261 4262 if (auto EC = ME.getError()) { 4263 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4264 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 4265 CGM.getDiags().Report(DiagID) 4266 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4267 return; 4268 } 4269 4270 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4271 if (!MD) 4272 return; 4273 4274 for (llvm::MDNode *MN : MD->operands()) { 4275 auto &&GetMDInt = [MN](unsigned Idx) { 4276 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4277 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4278 }; 4279 4280 auto &&GetMDString = [MN](unsigned Idx) { 4281 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4282 return V->getString(); 4283 }; 4284 4285 switch (GetMDInt(0)) { 4286 default: 4287 llvm_unreachable("Unexpected metadata!"); 4288 break; 4289 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4290 OffloadingEntryInfoTargetRegion: 4291 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4292 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4293 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4294 /*Order=*/GetMDInt(5)); 4295 break; 4296 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4297 OffloadingEntryInfoDeviceGlobalVar: 4298 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4299 /*MangledName=*/GetMDString(1), 4300 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4301 /*Flags=*/GetMDInt(2)), 4302 /*Order=*/GetMDInt(3)); 4303 break; 4304 } 4305 } 4306 } 4307 4308 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4309 if (!KmpRoutineEntryPtrTy) { 4310 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4311 ASTContext &C = CGM.getContext(); 4312 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4313 FunctionProtoType::ExtProtoInfo EPI; 4314 KmpRoutineEntryPtrQTy = C.getPointerType( 4315 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4316 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4317 } 4318 } 4319 4320 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4321 // Make sure the type of the entry is already created. This is the type we 4322 // have to create: 4323 // struct __tgt_offload_entry{ 4324 // void *addr; // Pointer to the offload entry info. 4325 // // (function or global) 4326 // char *name; // Name of the function or global. 4327 // size_t size; // Size of the entry info (0 if it a function). 4328 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4329 // int32_t reserved; // Reserved, to use by the runtime library. 4330 // }; 4331 if (TgtOffloadEntryQTy.isNull()) { 4332 ASTContext &C = CGM.getContext(); 4333 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4334 RD->startDefinition(); 4335 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4336 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4337 addFieldToRecordDecl(C, RD, C.getSizeType()); 4338 addFieldToRecordDecl( 4339 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4340 addFieldToRecordDecl( 4341 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4342 RD->completeDefinition(); 4343 RD->addAttr(PackedAttr::CreateImplicit(C)); 4344 TgtOffloadEntryQTy = C.getRecordType(RD); 4345 } 4346 return TgtOffloadEntryQTy; 4347 } 4348 4349 namespace { 4350 struct PrivateHelpersTy { 4351 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 4352 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 4353 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 4354 PrivateElemInit(PrivateElemInit) {} 4355 const Expr *OriginalRef = nullptr; 4356 const VarDecl *Original = nullptr; 4357 const VarDecl *PrivateCopy = nullptr; 4358 const VarDecl *PrivateElemInit = nullptr; 4359 }; 4360 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4361 } // anonymous namespace 4362 4363 static RecordDecl * 4364 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4365 if (!Privates.empty()) { 4366 ASTContext &C = CGM.getContext(); 4367 // Build struct .kmp_privates_t. { 4368 // /* private vars */ 4369 // }; 4370 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4371 RD->startDefinition(); 4372 for (const auto &Pair : Privates) { 4373 const VarDecl *VD = Pair.second.Original; 4374 QualType Type = VD->getType().getNonReferenceType(); 4375 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4376 if (VD->hasAttrs()) { 4377 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4378 E(VD->getAttrs().end()); 4379 I != E; ++I) 4380 FD->addAttr(*I); 4381 } 4382 } 4383 RD->completeDefinition(); 4384 return RD; 4385 } 4386 return nullptr; 4387 } 4388 4389 static RecordDecl * 4390 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4391 QualType KmpInt32Ty, 4392 QualType KmpRoutineEntryPointerQTy) { 4393 ASTContext &C = CGM.getContext(); 4394 // Build struct kmp_task_t { 4395 // void * shareds; 4396 // kmp_routine_entry_t routine; 4397 // kmp_int32 part_id; 4398 // kmp_cmplrdata_t data1; 4399 // kmp_cmplrdata_t data2; 4400 // For taskloops additional fields: 4401 // kmp_uint64 lb; 4402 // kmp_uint64 ub; 4403 // kmp_int64 st; 4404 // kmp_int32 liter; 4405 // void * reductions; 4406 // }; 4407 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4408 UD->startDefinition(); 4409 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4410 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4411 UD->completeDefinition(); 4412 QualType KmpCmplrdataTy = C.getRecordType(UD); 4413 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4414 RD->startDefinition(); 4415 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4416 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4417 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4418 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4419 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4420 if (isOpenMPTaskLoopDirective(Kind)) { 4421 QualType KmpUInt64Ty = 4422 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4423 QualType KmpInt64Ty = 4424 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4425 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4426 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4427 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4428 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4429 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4430 } 4431 RD->completeDefinition(); 4432 return RD; 4433 } 4434 4435 static RecordDecl * 4436 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4437 ArrayRef<PrivateDataTy> Privates) { 4438 ASTContext &C = CGM.getContext(); 4439 // Build struct kmp_task_t_with_privates { 4440 // kmp_task_t task_data; 4441 // .kmp_privates_t. privates; 4442 // }; 4443 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4444 RD->startDefinition(); 4445 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4446 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4447 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4448 RD->completeDefinition(); 4449 return RD; 4450 } 4451 4452 /// Emit a proxy function which accepts kmp_task_t as the second 4453 /// argument. 4454 /// \code 4455 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4456 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4457 /// For taskloops: 4458 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4459 /// tt->reductions, tt->shareds); 4460 /// return 0; 4461 /// } 4462 /// \endcode 4463 static llvm::Function * 4464 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4465 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4466 QualType KmpTaskTWithPrivatesPtrQTy, 4467 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4468 QualType SharedsPtrTy, llvm::Function *TaskFunction, 4469 llvm::Value *TaskPrivatesMap) { 4470 ASTContext &C = CGM.getContext(); 4471 FunctionArgList Args; 4472 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4473 ImplicitParamDecl::Other); 4474 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4475 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4476 ImplicitParamDecl::Other); 4477 Args.push_back(&GtidArg); 4478 Args.push_back(&TaskTypeArg); 4479 const auto &TaskEntryFnInfo = 4480 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4481 llvm::FunctionType *TaskEntryTy = 4482 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4483 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 4484 auto *TaskEntry = llvm::Function::Create( 4485 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4486 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4487 TaskEntry->setDoesNotRecurse(); 4488 CodeGenFunction CGF(CGM); 4489 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4490 Loc, Loc); 4491 4492 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4493 // tt, 4494 // For taskloops: 4495 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4496 // tt->task_data.shareds); 4497 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4498 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4499 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4500 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4501 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4502 const auto *KmpTaskTWithPrivatesQTyRD = 4503 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4504 LValue Base = 4505 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4506 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4507 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4508 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4509 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 4510 4511 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4512 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4513 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4514 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4515 CGF.ConvertTypeForMem(SharedsPtrTy)); 4516 4517 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4518 llvm::Value *PrivatesParam; 4519 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4520 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4521 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4522 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 4523 } else { 4524 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4525 } 4526 4527 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4528 TaskPrivatesMap, 4529 CGF.Builder 4530 .CreatePointerBitCastOrAddrSpaceCast( 4531 TDBase.getAddress(CGF), CGF.VoidPtrTy) 4532 .getPointer()}; 4533 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4534 std::end(CommonArgs)); 4535 if (isOpenMPTaskLoopDirective(Kind)) { 4536 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4537 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4538 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4539 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4540 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4541 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4542 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4543 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4544 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4545 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4546 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4547 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4548 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4549 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4550 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4551 CallArgs.push_back(LBParam); 4552 CallArgs.push_back(UBParam); 4553 CallArgs.push_back(StParam); 4554 CallArgs.push_back(LIParam); 4555 CallArgs.push_back(RParam); 4556 } 4557 CallArgs.push_back(SharedsParam); 4558 4559 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4560 CallArgs); 4561 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4562 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4563 CGF.FinishFunction(); 4564 return TaskEntry; 4565 } 4566 4567 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4568 SourceLocation Loc, 4569 QualType KmpInt32Ty, 4570 QualType KmpTaskTWithPrivatesPtrQTy, 4571 QualType KmpTaskTWithPrivatesQTy) { 4572 ASTContext &C = CGM.getContext(); 4573 FunctionArgList Args; 4574 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4575 ImplicitParamDecl::Other); 4576 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4577 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4578 ImplicitParamDecl::Other); 4579 Args.push_back(&GtidArg); 4580 Args.push_back(&TaskTypeArg); 4581 const auto &DestructorFnInfo = 4582 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4583 llvm::FunctionType *DestructorFnTy = 4584 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4585 std::string Name = 4586 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 4587 auto *DestructorFn = 4588 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4589 Name, &CGM.getModule()); 4590 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4591 DestructorFnInfo); 4592 DestructorFn->setDoesNotRecurse(); 4593 CodeGenFunction CGF(CGM); 4594 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4595 Args, Loc, Loc); 4596 4597 LValue Base = CGF.EmitLoadOfPointerLValue( 4598 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4599 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4600 const auto *KmpTaskTWithPrivatesQTyRD = 4601 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4602 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4603 Base = CGF.EmitLValueForField(Base, *FI); 4604 for (const auto *Field : 4605 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4606 if (QualType::DestructionKind DtorKind = 4607 Field->getType().isDestructedType()) { 4608 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4609 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 4610 } 4611 } 4612 CGF.FinishFunction(); 4613 return DestructorFn; 4614 } 4615 4616 /// Emit a privates mapping function for correct handling of private and 4617 /// firstprivate variables. 4618 /// \code 4619 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4620 /// **noalias priv1,..., <tyn> **noalias privn) { 4621 /// *priv1 = &.privates.priv1; 4622 /// ...; 4623 /// *privn = &.privates.privn; 4624 /// } 4625 /// \endcode 4626 static llvm::Value * 4627 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4628 ArrayRef<const Expr *> PrivateVars, 4629 ArrayRef<const Expr *> FirstprivateVars, 4630 ArrayRef<const Expr *> LastprivateVars, 4631 QualType PrivatesQTy, 4632 ArrayRef<PrivateDataTy> Privates) { 4633 ASTContext &C = CGM.getContext(); 4634 FunctionArgList Args; 4635 ImplicitParamDecl TaskPrivatesArg( 4636 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4637 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4638 ImplicitParamDecl::Other); 4639 Args.push_back(&TaskPrivatesArg); 4640 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4641 unsigned Counter = 1; 4642 for (const Expr *E : PrivateVars) { 4643 Args.push_back(ImplicitParamDecl::Create( 4644 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4645 C.getPointerType(C.getPointerType(E->getType())) 4646 .withConst() 4647 .withRestrict(), 4648 ImplicitParamDecl::Other)); 4649 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4650 PrivateVarsPos[VD] = Counter; 4651 ++Counter; 4652 } 4653 for (const Expr *E : FirstprivateVars) { 4654 Args.push_back(ImplicitParamDecl::Create( 4655 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4656 C.getPointerType(C.getPointerType(E->getType())) 4657 .withConst() 4658 .withRestrict(), 4659 ImplicitParamDecl::Other)); 4660 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4661 PrivateVarsPos[VD] = Counter; 4662 ++Counter; 4663 } 4664 for (const Expr *E : LastprivateVars) { 4665 Args.push_back(ImplicitParamDecl::Create( 4666 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4667 C.getPointerType(C.getPointerType(E->getType())) 4668 .withConst() 4669 .withRestrict(), 4670 ImplicitParamDecl::Other)); 4671 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4672 PrivateVarsPos[VD] = Counter; 4673 ++Counter; 4674 } 4675 const auto &TaskPrivatesMapFnInfo = 4676 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4677 llvm::FunctionType *TaskPrivatesMapTy = 4678 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4679 std::string Name = 4680 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 4681 auto *TaskPrivatesMap = llvm::Function::Create( 4682 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 4683 &CGM.getModule()); 4684 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4685 TaskPrivatesMapFnInfo); 4686 if (CGM.getLangOpts().Optimize) { 4687 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4688 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4689 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4690 } 4691 CodeGenFunction CGF(CGM); 4692 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4693 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4694 4695 // *privi = &.privates.privi; 4696 LValue Base = CGF.EmitLoadOfPointerLValue( 4697 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4698 TaskPrivatesArg.getType()->castAs<PointerType>()); 4699 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4700 Counter = 0; 4701 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4702 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4703 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4704 LValue RefLVal = 4705 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4706 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4707 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 4708 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 4709 ++Counter; 4710 } 4711 CGF.FinishFunction(); 4712 return TaskPrivatesMap; 4713 } 4714 4715 /// Emit initialization for private variables in task-based directives. 4716 static void emitPrivatesInit(CodeGenFunction &CGF, 4717 const OMPExecutableDirective &D, 4718 Address KmpTaskSharedsPtr, LValue TDBase, 4719 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4720 QualType SharedsTy, QualType SharedsPtrTy, 4721 const OMPTaskDataTy &Data, 4722 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4723 ASTContext &C = CGF.getContext(); 4724 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4725 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4726 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4727 ? OMPD_taskloop 4728 : OMPD_task; 4729 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4730 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4731 LValue SrcBase; 4732 bool IsTargetTask = 4733 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4734 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4735 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4736 // PointersArray and SizesArray. The original variables for these arrays are 4737 // not captured and we get their addresses explicitly. 4738 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 4739 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4740 SrcBase = CGF.MakeAddrLValue( 4741 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4742 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4743 SharedsTy); 4744 } 4745 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4746 for (const PrivateDataTy &Pair : Privates) { 4747 const VarDecl *VD = Pair.second.PrivateCopy; 4748 const Expr *Init = VD->getAnyInitializer(); 4749 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4750 !CGF.isTrivialInitializer(Init)))) { 4751 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4752 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4753 const VarDecl *OriginalVD = Pair.second.Original; 4754 // Check if the variable is the target-based BasePointersArray, 4755 // PointersArray or SizesArray. 4756 LValue SharedRefLValue; 4757 QualType Type = PrivateLValue.getType(); 4758 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4759 if (IsTargetTask && !SharedField) { 4760 assert(isa<ImplicitParamDecl>(OriginalVD) && 4761 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4762 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4763 ->getNumParams() == 0 && 4764 isa<TranslationUnitDecl>( 4765 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4766 ->getDeclContext()) && 4767 "Expected artificial target data variable."); 4768 SharedRefLValue = 4769 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4770 } else if (ForDup) { 4771 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4772 SharedRefLValue = CGF.MakeAddrLValue( 4773 Address(SharedRefLValue.getPointer(CGF), 4774 C.getDeclAlign(OriginalVD)), 4775 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4776 SharedRefLValue.getTBAAInfo()); 4777 } else { 4778 InlinedOpenMPRegionRAII Region( 4779 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 4780 /*HasCancel=*/false); 4781 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 4782 } 4783 if (Type->isArrayType()) { 4784 // Initialize firstprivate array. 4785 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4786 // Perform simple memcpy. 4787 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4788 } else { 4789 // Initialize firstprivate array using element-by-element 4790 // initialization. 4791 CGF.EmitOMPAggregateAssign( 4792 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 4793 Type, 4794 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4795 Address SrcElement) { 4796 // Clean up any temporaries needed by the initialization. 4797 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4798 InitScope.addPrivate( 4799 Elem, [SrcElement]() -> Address { return SrcElement; }); 4800 (void)InitScope.Privatize(); 4801 // Emit initialization for single element. 4802 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4803 CGF, &CapturesInfo); 4804 CGF.EmitAnyExprToMem(Init, DestElement, 4805 Init->getType().getQualifiers(), 4806 /*IsInitializer=*/false); 4807 }); 4808 } 4809 } else { 4810 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4811 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 4812 return SharedRefLValue.getAddress(CGF); 4813 }); 4814 (void)InitScope.Privatize(); 4815 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4816 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4817 /*capturedByInit=*/false); 4818 } 4819 } else { 4820 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4821 } 4822 } 4823 ++FI; 4824 } 4825 } 4826 4827 /// Check if duplication function is required for taskloops. 4828 static bool checkInitIsRequired(CodeGenFunction &CGF, 4829 ArrayRef<PrivateDataTy> Privates) { 4830 bool InitRequired = false; 4831 for (const PrivateDataTy &Pair : Privates) { 4832 const VarDecl *VD = Pair.second.PrivateCopy; 4833 const Expr *Init = VD->getAnyInitializer(); 4834 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4835 !CGF.isTrivialInitializer(Init)); 4836 if (InitRequired) 4837 break; 4838 } 4839 return InitRequired; 4840 } 4841 4842 4843 /// Emit task_dup function (for initialization of 4844 /// private/firstprivate/lastprivate vars and last_iter flag) 4845 /// \code 4846 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4847 /// lastpriv) { 4848 /// // setup lastprivate flag 4849 /// task_dst->last = lastpriv; 4850 /// // could be constructor calls here... 4851 /// } 4852 /// \endcode 4853 static llvm::Value * 4854 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4855 const OMPExecutableDirective &D, 4856 QualType KmpTaskTWithPrivatesPtrQTy, 4857 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4858 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4859 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4860 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4861 ASTContext &C = CGM.getContext(); 4862 FunctionArgList Args; 4863 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4864 KmpTaskTWithPrivatesPtrQTy, 4865 ImplicitParamDecl::Other); 4866 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4867 KmpTaskTWithPrivatesPtrQTy, 4868 ImplicitParamDecl::Other); 4869 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4870 ImplicitParamDecl::Other); 4871 Args.push_back(&DstArg); 4872 Args.push_back(&SrcArg); 4873 Args.push_back(&LastprivArg); 4874 const auto &TaskDupFnInfo = 4875 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4876 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4877 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4878 auto *TaskDup = llvm::Function::Create( 4879 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4880 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4881 TaskDup->setDoesNotRecurse(); 4882 CodeGenFunction CGF(CGM); 4883 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4884 Loc); 4885 4886 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4887 CGF.GetAddrOfLocalVar(&DstArg), 4888 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4889 // task_dst->liter = lastpriv; 4890 if (WithLastIter) { 4891 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4892 LValue Base = CGF.EmitLValueForField( 4893 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4894 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4895 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4896 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4897 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4898 } 4899 4900 // Emit initial values for private copies (if any). 4901 assert(!Privates.empty()); 4902 Address KmpTaskSharedsPtr = Address::invalid(); 4903 if (!Data.FirstprivateVars.empty()) { 4904 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4905 CGF.GetAddrOfLocalVar(&SrcArg), 4906 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4907 LValue Base = CGF.EmitLValueForField( 4908 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4909 KmpTaskSharedsPtr = Address( 4910 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4911 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4912 KmpTaskTShareds)), 4913 Loc), 4914 CGF.getNaturalTypeAlignment(SharedsTy)); 4915 } 4916 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4917 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4918 CGF.FinishFunction(); 4919 return TaskDup; 4920 } 4921 4922 /// Checks if destructor function is required to be generated. 4923 /// \return true if cleanups are required, false otherwise. 4924 static bool 4925 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4926 bool NeedsCleanup = false; 4927 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4928 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4929 for (const FieldDecl *FD : PrivateRD->fields()) { 4930 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4931 if (NeedsCleanup) 4932 break; 4933 } 4934 return NeedsCleanup; 4935 } 4936 4937 CGOpenMPRuntime::TaskResultTy 4938 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4939 const OMPExecutableDirective &D, 4940 llvm::Function *TaskFunction, QualType SharedsTy, 4941 Address Shareds, const OMPTaskDataTy &Data) { 4942 ASTContext &C = CGM.getContext(); 4943 llvm::SmallVector<PrivateDataTy, 4> Privates; 4944 // Aggregate privates and sort them by the alignment. 4945 const auto *I = Data.PrivateCopies.begin(); 4946 for (const Expr *E : Data.PrivateVars) { 4947 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4948 Privates.emplace_back( 4949 C.getDeclAlign(VD), 4950 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4951 /*PrivateElemInit=*/nullptr)); 4952 ++I; 4953 } 4954 I = Data.FirstprivateCopies.begin(); 4955 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4956 for (const Expr *E : Data.FirstprivateVars) { 4957 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4958 Privates.emplace_back( 4959 C.getDeclAlign(VD), 4960 PrivateHelpersTy( 4961 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4962 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4963 ++I; 4964 ++IElemInitRef; 4965 } 4966 I = Data.LastprivateCopies.begin(); 4967 for (const Expr *E : Data.LastprivateVars) { 4968 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4969 Privates.emplace_back( 4970 C.getDeclAlign(VD), 4971 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4972 /*PrivateElemInit=*/nullptr)); 4973 ++I; 4974 } 4975 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 4976 return L.first > R.first; 4977 }); 4978 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4979 // Build type kmp_routine_entry_t (if not built yet). 4980 emitKmpRoutineEntryT(KmpInt32Ty); 4981 // Build type kmp_task_t (if not built yet). 4982 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4983 if (SavedKmpTaskloopTQTy.isNull()) { 4984 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4985 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4986 } 4987 KmpTaskTQTy = SavedKmpTaskloopTQTy; 4988 } else { 4989 assert((D.getDirectiveKind() == OMPD_task || 4990 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 4991 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 4992 "Expected taskloop, task or target directive"); 4993 if (SavedKmpTaskTQTy.isNull()) { 4994 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4995 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4996 } 4997 KmpTaskTQTy = SavedKmpTaskTQTy; 4998 } 4999 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 5000 // Build particular struct kmp_task_t for the given task. 5001 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 5002 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 5003 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 5004 QualType KmpTaskTWithPrivatesPtrQTy = 5005 C.getPointerType(KmpTaskTWithPrivatesQTy); 5006 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 5007 llvm::Type *KmpTaskTWithPrivatesPtrTy = 5008 KmpTaskTWithPrivatesTy->getPointerTo(); 5009 llvm::Value *KmpTaskTWithPrivatesTySize = 5010 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 5011 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 5012 5013 // Emit initial values for private copies (if any). 5014 llvm::Value *TaskPrivatesMap = nullptr; 5015 llvm::Type *TaskPrivatesMapTy = 5016 std::next(TaskFunction->arg_begin(), 3)->getType(); 5017 if (!Privates.empty()) { 5018 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 5019 TaskPrivatesMap = emitTaskPrivateMappingFunction( 5020 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 5021 FI->getType(), Privates); 5022 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5023 TaskPrivatesMap, TaskPrivatesMapTy); 5024 } else { 5025 TaskPrivatesMap = llvm::ConstantPointerNull::get( 5026 cast<llvm::PointerType>(TaskPrivatesMapTy)); 5027 } 5028 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 5029 // kmp_task_t *tt); 5030 llvm::Function *TaskEntry = emitProxyTaskFunction( 5031 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5032 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 5033 TaskPrivatesMap); 5034 5035 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 5036 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 5037 // kmp_routine_entry_t *task_entry); 5038 // Task flags. Format is taken from 5039 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 5040 // description of kmp_tasking_flags struct. 5041 enum { 5042 TiedFlag = 0x1, 5043 FinalFlag = 0x2, 5044 DestructorsFlag = 0x8, 5045 PriorityFlag = 0x20, 5046 DetachableFlag = 0x40, 5047 }; 5048 unsigned Flags = Data.Tied ? TiedFlag : 0; 5049 bool NeedsCleanup = false; 5050 if (!Privates.empty()) { 5051 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 5052 if (NeedsCleanup) 5053 Flags = Flags | DestructorsFlag; 5054 } 5055 if (Data.Priority.getInt()) 5056 Flags = Flags | PriorityFlag; 5057 if (D.hasClausesOfKind<OMPDetachClause>()) 5058 Flags = Flags | DetachableFlag; 5059 llvm::Value *TaskFlags = 5060 Data.Final.getPointer() 5061 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 5062 CGF.Builder.getInt32(FinalFlag), 5063 CGF.Builder.getInt32(/*C=*/0)) 5064 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 5065 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 5066 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 5067 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 5068 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 5069 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5070 TaskEntry, KmpRoutineEntryPtrTy)}; 5071 llvm::Value *NewTask; 5072 if (D.hasClausesOfKind<OMPNowaitClause>()) { 5073 // Check if we have any device clause associated with the directive. 5074 const Expr *Device = nullptr; 5075 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 5076 Device = C->getDevice(); 5077 // Emit device ID if any otherwise use default value. 5078 llvm::Value *DeviceID; 5079 if (Device) 5080 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 5081 CGF.Int64Ty, /*isSigned=*/true); 5082 else 5083 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 5084 AllocArgs.push_back(DeviceID); 5085 NewTask = CGF.EmitRuntimeCall( 5086 createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs); 5087 } else { 5088 NewTask = CGF.EmitRuntimeCall( 5089 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 5090 } 5091 // Emit detach clause initialization. 5092 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 5093 // task_descriptor); 5094 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 5095 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 5096 LValue EvtLVal = CGF.EmitLValue(Evt); 5097 5098 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 5099 // int gtid, kmp_task_t *task); 5100 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 5101 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 5102 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 5103 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 5104 createRuntimeFunction(OMPRTL__kmpc_task_allow_completion_event), 5105 {Loc, Tid, NewTask}); 5106 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 5107 Evt->getExprLoc()); 5108 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 5109 } 5110 llvm::Value *NewTaskNewTaskTTy = 5111 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5112 NewTask, KmpTaskTWithPrivatesPtrTy); 5113 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 5114 KmpTaskTWithPrivatesQTy); 5115 LValue TDBase = 5116 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 5117 // Fill the data in the resulting kmp_task_t record. 5118 // Copy shareds if there are any. 5119 Address KmpTaskSharedsPtr = Address::invalid(); 5120 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 5121 KmpTaskSharedsPtr = 5122 Address(CGF.EmitLoadOfScalar( 5123 CGF.EmitLValueForField( 5124 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 5125 KmpTaskTShareds)), 5126 Loc), 5127 CGF.getNaturalTypeAlignment(SharedsTy)); 5128 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 5129 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 5130 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 5131 } 5132 // Emit initial values for private copies (if any). 5133 TaskResultTy Result; 5134 if (!Privates.empty()) { 5135 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 5136 SharedsTy, SharedsPtrTy, Data, Privates, 5137 /*ForDup=*/false); 5138 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 5139 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 5140 Result.TaskDupFn = emitTaskDupFunction( 5141 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 5142 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 5143 /*WithLastIter=*/!Data.LastprivateVars.empty()); 5144 } 5145 } 5146 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 5147 enum { Priority = 0, Destructors = 1 }; 5148 // Provide pointer to function with destructors for privates. 5149 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 5150 const RecordDecl *KmpCmplrdataUD = 5151 (*FI)->getType()->getAsUnionType()->getDecl(); 5152 if (NeedsCleanup) { 5153 llvm::Value *DestructorFn = emitDestructorsFunction( 5154 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5155 KmpTaskTWithPrivatesQTy); 5156 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 5157 LValue DestructorsLV = CGF.EmitLValueForField( 5158 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 5159 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5160 DestructorFn, KmpRoutineEntryPtrTy), 5161 DestructorsLV); 5162 } 5163 // Set priority. 5164 if (Data.Priority.getInt()) { 5165 LValue Data2LV = CGF.EmitLValueForField( 5166 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 5167 LValue PriorityLV = CGF.EmitLValueForField( 5168 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 5169 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 5170 } 5171 Result.NewTask = NewTask; 5172 Result.TaskEntry = TaskEntry; 5173 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 5174 Result.TDBase = TDBase; 5175 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 5176 return Result; 5177 } 5178 5179 namespace { 5180 /// Dependence kind for RTL. 5181 enum RTLDependenceKindTy { 5182 DepIn = 0x01, 5183 DepInOut = 0x3, 5184 DepMutexInOutSet = 0x4 5185 }; 5186 /// Fields ids in kmp_depend_info record. 5187 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 5188 } // namespace 5189 5190 /// Translates internal dependency kind into the runtime kind. 5191 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 5192 RTLDependenceKindTy DepKind; 5193 switch (K) { 5194 case OMPC_DEPEND_in: 5195 DepKind = DepIn; 5196 break; 5197 // Out and InOut dependencies must use the same code. 5198 case OMPC_DEPEND_out: 5199 case OMPC_DEPEND_inout: 5200 DepKind = DepInOut; 5201 break; 5202 case OMPC_DEPEND_mutexinoutset: 5203 DepKind = DepMutexInOutSet; 5204 break; 5205 case OMPC_DEPEND_source: 5206 case OMPC_DEPEND_sink: 5207 case OMPC_DEPEND_depobj: 5208 case OMPC_DEPEND_unknown: 5209 llvm_unreachable("Unknown task dependence type"); 5210 } 5211 return DepKind; 5212 } 5213 5214 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 5215 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 5216 QualType &FlagsTy) { 5217 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 5218 if (KmpDependInfoTy.isNull()) { 5219 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 5220 KmpDependInfoRD->startDefinition(); 5221 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 5222 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 5223 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 5224 KmpDependInfoRD->completeDefinition(); 5225 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 5226 } 5227 } 5228 5229 std::pair<llvm::Value *, LValue> 5230 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 5231 SourceLocation Loc) { 5232 ASTContext &C = CGM.getContext(); 5233 QualType FlagsTy; 5234 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5235 RecordDecl *KmpDependInfoRD = 5236 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5237 LValue Base = CGF.EmitLoadOfPointerLValue( 5238 DepobjLVal.getAddress(CGF), 5239 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5240 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5241 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5242 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5243 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 5244 Base.getTBAAInfo()); 5245 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5246 Addr.getPointer(), 5247 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5248 LValue NumDepsBase = CGF.MakeAddrLValue( 5249 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 5250 Base.getBaseInfo(), Base.getTBAAInfo()); 5251 // NumDeps = deps[i].base_addr; 5252 LValue BaseAddrLVal = CGF.EmitLValueForField( 5253 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5254 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 5255 return std::make_pair(NumDeps, Base); 5256 } 5257 5258 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 5259 CodeGenFunction &CGF, 5260 ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependencies, 5261 bool ForDepobj, SourceLocation Loc) { 5262 // Process list of dependencies. 5263 ASTContext &C = CGM.getContext(); 5264 Address DependenciesArray = Address::invalid(); 5265 unsigned NumDependencies = Dependencies.size(); 5266 llvm::Value *NumOfElements = nullptr; 5267 if (NumDependencies) { 5268 QualType FlagsTy; 5269 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5270 RecordDecl *KmpDependInfoRD = 5271 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5272 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5273 unsigned NumDepobjDependecies = 0; 5274 SmallVector<std::pair<llvm::Value *, LValue>, 4> Depobjs; 5275 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 5276 // Calculate number of depobj dependecies. 5277 for (const std::pair<OpenMPDependClauseKind, const Expr *> &Pair : 5278 Dependencies) { 5279 if (Pair.first != OMPC_DEPEND_depobj) 5280 continue; 5281 LValue DepobjLVal = CGF.EmitLValue(Pair.second); 5282 llvm::Value *NumDeps; 5283 LValue Base; 5284 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5285 NumOfDepobjElements = 5286 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumDeps); 5287 Depobjs.emplace_back(NumDeps, Base); 5288 ++NumDepobjDependecies; 5289 } 5290 5291 QualType KmpDependInfoArrayTy; 5292 // Define type kmp_depend_info[<Dependencies.size()>]; 5293 // For depobj reserve one extra element to store the number of elements. 5294 // It is required to handle depobj(x) update(in) construct. 5295 // kmp_depend_info[<Dependencies.size()>] deps; 5296 if (ForDepobj) { 5297 assert(NumDepobjDependecies == 0 && 5298 "depobj dependency kind is not expected in depobj directive."); 5299 KmpDependInfoArrayTy = C.getConstantArrayType( 5300 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 5301 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5302 // Need to allocate on the dynamic memory. 5303 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5304 // Use default allocator. 5305 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5306 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoArrayTy); 5307 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 5308 llvm::Value *Size = CGF.CGM.getSize(Sz.alignTo(Align)); 5309 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5310 5311 llvm::Value *Addr = CGF.EmitRuntimeCall( 5312 createRuntimeFunction(OMPRTL__kmpc_alloc), Args, ".dep.arr.addr"); 5313 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5314 Addr, CGF.ConvertTypeForMem(KmpDependInfoArrayTy)->getPointerTo()); 5315 DependenciesArray = Address(Addr, Align); 5316 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 5317 /*isSigned=*/false); 5318 } else if (NumDepobjDependecies > 0) { 5319 NumOfElements = CGF.Builder.CreateNUWAdd( 5320 NumOfDepobjElements, 5321 llvm::ConstantInt::get(CGM.IntPtrTy, 5322 NumDependencies - NumDepobjDependecies, 5323 /*isSigned=*/false)); 5324 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 5325 /*isSigned=*/false); 5326 OpaqueValueExpr OVE( 5327 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 5328 VK_RValue); 5329 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 5330 RValue::get(NumOfElements)); 5331 KmpDependInfoArrayTy = 5332 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 5333 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 5334 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 5335 // Properly emit variable-sized array. 5336 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 5337 ImplicitParamDecl::Other); 5338 CGF.EmitVarDecl(*PD); 5339 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 5340 } else { 5341 KmpDependInfoArrayTy = C.getConstantArrayType( 5342 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 5343 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5344 DependenciesArray = 5345 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 5346 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 5347 /*isSigned=*/false); 5348 } 5349 if (ForDepobj) { 5350 // Write number of elements in the first element of array for depobj. 5351 llvm::Value *NumVal = 5352 llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 5353 LValue Base = CGF.MakeAddrLValue( 5354 CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), 5355 KmpDependInfoTy); 5356 // deps[i].base_addr = NumDependencies; 5357 LValue BaseAddrLVal = CGF.EmitLValueForField( 5358 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5359 CGF.EmitStoreOfScalar(NumVal, BaseAddrLVal); 5360 } 5361 unsigned Pos = ForDepobj ? 1 : 0; 5362 for (unsigned I = 0; I < NumDependencies; ++I) { 5363 if (Dependencies[I].first == OMPC_DEPEND_depobj) 5364 continue; 5365 const Expr *E = Dependencies[I].second; 5366 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 5367 llvm::Value *Addr; 5368 if (OASE) { 5369 const Expr *Base = OASE->getBase(); 5370 Addr = CGF.EmitScalarExpr(Base); 5371 } else { 5372 Addr = CGF.EmitLValue(E).getPointer(CGF); 5373 } 5374 llvm::Value *Size; 5375 QualType Ty = E->getType(); 5376 if (OASE) { 5377 Size = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 5378 for (const Expr *SE : OASE->getDimensions()) { 5379 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 5380 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 5381 CGF.getContext().getSizeType(), 5382 SE->getExprLoc()); 5383 Size = CGF.Builder.CreateNUWMul(Size, Sz); 5384 } 5385 } else if (const auto *ASE = 5386 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 5387 LValue UpAddrLVal = 5388 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 5389 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 5390 UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 5391 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGM.SizeTy); 5392 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 5393 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 5394 } else { 5395 Size = CGF.getTypeSize(Ty); 5396 } 5397 LValue Base; 5398 if (NumDepobjDependecies > 0) { 5399 Base = CGF.MakeAddrLValue( 5400 CGF.Builder.CreateConstGEP(DependenciesArray, Pos), 5401 KmpDependInfoTy); 5402 } else { 5403 Base = CGF.MakeAddrLValue( 5404 CGF.Builder.CreateConstArrayGEP(DependenciesArray, Pos), 5405 KmpDependInfoTy); 5406 } 5407 // deps[i].base_addr = &<Dependencies[i].second>; 5408 LValue BaseAddrLVal = CGF.EmitLValueForField( 5409 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5410 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 5411 BaseAddrLVal); 5412 // deps[i].len = sizeof(<Dependencies[i].second>); 5413 LValue LenLVal = CGF.EmitLValueForField( 5414 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 5415 CGF.EmitStoreOfScalar(Size, LenLVal); 5416 // deps[i].flags = <Dependencies[i].first>; 5417 RTLDependenceKindTy DepKind = 5418 translateDependencyKind(Dependencies[I].first); 5419 LValue FlagsLVal = CGF.EmitLValueForField( 5420 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5421 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5422 FlagsLVal); 5423 ++Pos; 5424 } 5425 // Copy final depobj arrays. 5426 if (NumDepobjDependecies > 0) { 5427 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 5428 Address Addr = CGF.Builder.CreateConstGEP(DependenciesArray, Pos); 5429 for (const std::pair<llvm::Value *, LValue> &Pair : Depobjs) { 5430 llvm::Value *Size = CGF.Builder.CreateNUWMul(ElSize, Pair.first); 5431 CGF.Builder.CreateMemCpy(Addr, Pair.second.getAddress(CGF), Size); 5432 Addr = 5433 Address(CGF.Builder.CreateGEP( 5434 Addr.getElementType(), Addr.getPointer(), Pair.first), 5435 DependenciesArray.getAlignment().alignmentOfArrayElement( 5436 C.getTypeSizeInChars(KmpDependInfoTy))); 5437 } 5438 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5439 DependenciesArray, CGF.VoidPtrTy); 5440 } else { 5441 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5442 CGF.Builder.CreateConstArrayGEP(DependenciesArray, ForDepobj ? 1 : 0), 5443 CGF.VoidPtrTy); 5444 } 5445 } 5446 return std::make_pair(NumOfElements, DependenciesArray); 5447 } 5448 5449 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5450 SourceLocation Loc) { 5451 ASTContext &C = CGM.getContext(); 5452 QualType FlagsTy; 5453 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5454 LValue Base = CGF.EmitLoadOfPointerLValue( 5455 DepobjLVal.getAddress(CGF), 5456 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5457 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5458 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5459 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5460 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5461 Addr.getPointer(), 5462 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5463 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5464 CGF.VoidPtrTy); 5465 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5466 // Use default allocator. 5467 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5468 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5469 5470 // _kmpc_free(gtid, addr, nullptr); 5471 (void)CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_free), Args); 5472 } 5473 5474 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5475 OpenMPDependClauseKind NewDepKind, 5476 SourceLocation Loc) { 5477 ASTContext &C = CGM.getContext(); 5478 QualType FlagsTy; 5479 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5480 RecordDecl *KmpDependInfoRD = 5481 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5482 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5483 llvm::Value *NumDeps; 5484 LValue Base; 5485 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5486 5487 Address Begin = Base.getAddress(CGF); 5488 // Cast from pointer to array type to pointer to single element. 5489 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5490 // The basic structure here is a while-do loop. 5491 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5492 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5493 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5494 CGF.EmitBlock(BodyBB); 5495 llvm::PHINode *ElementPHI = 5496 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5497 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5498 Begin = Address(ElementPHI, Begin.getAlignment()); 5499 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5500 Base.getTBAAInfo()); 5501 // deps[i].flags = NewDepKind; 5502 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5503 LValue FlagsLVal = CGF.EmitLValueForField( 5504 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5505 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5506 FlagsLVal); 5507 5508 // Shift the address forward by one element. 5509 Address ElementNext = 5510 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5511 ElementPHI->addIncoming(ElementNext.getPointer(), 5512 CGF.Builder.GetInsertBlock()); 5513 llvm::Value *IsEmpty = 5514 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5515 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5516 // Done. 5517 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5518 } 5519 5520 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5521 const OMPExecutableDirective &D, 5522 llvm::Function *TaskFunction, 5523 QualType SharedsTy, Address Shareds, 5524 const Expr *IfCond, 5525 const OMPTaskDataTy &Data) { 5526 if (!CGF.HaveInsertPoint()) 5527 return; 5528 5529 TaskResultTy Result = 5530 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5531 llvm::Value *NewTask = Result.NewTask; 5532 llvm::Function *TaskEntry = Result.TaskEntry; 5533 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5534 LValue TDBase = Result.TDBase; 5535 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5536 // Process list of dependences. 5537 Address DependenciesArray = Address::invalid(); 5538 llvm::Value *NumOfElements; 5539 std::tie(NumOfElements, DependenciesArray) = 5540 emitDependClause(CGF, Data.Dependences, /*ForDepobj=*/false, Loc); 5541 5542 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5543 // libcall. 5544 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5545 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5546 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5547 // list is not empty 5548 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5549 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5550 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5551 llvm::Value *DepTaskArgs[7]; 5552 if (!Data.Dependences.empty()) { 5553 DepTaskArgs[0] = UpLoc; 5554 DepTaskArgs[1] = ThreadID; 5555 DepTaskArgs[2] = NewTask; 5556 DepTaskArgs[3] = NumOfElements; 5557 DepTaskArgs[4] = DependenciesArray.getPointer(); 5558 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5559 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5560 } 5561 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5562 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5563 if (!Data.Tied) { 5564 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5565 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5566 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5567 } 5568 if (!Data.Dependences.empty()) { 5569 CGF.EmitRuntimeCall( 5570 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5571 } else { 5572 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5573 TaskArgs); 5574 } 5575 // Check if parent region is untied and build return for untied task; 5576 if (auto *Region = 5577 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5578 Region->emitUntiedSwitch(CGF); 5579 }; 5580 5581 llvm::Value *DepWaitTaskArgs[6]; 5582 if (!Data.Dependences.empty()) { 5583 DepWaitTaskArgs[0] = UpLoc; 5584 DepWaitTaskArgs[1] = ThreadID; 5585 DepWaitTaskArgs[2] = NumOfElements; 5586 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5587 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5588 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5589 } 5590 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5591 &Data, &DepWaitTaskArgs, 5592 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5593 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5594 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5595 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5596 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5597 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5598 // is specified. 5599 if (!Data.Dependences.empty()) 5600 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5601 DepWaitTaskArgs); 5602 // Call proxy_task_entry(gtid, new_task); 5603 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5604 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5605 Action.Enter(CGF); 5606 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5607 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5608 OutlinedFnArgs); 5609 }; 5610 5611 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5612 // kmp_task_t *new_task); 5613 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5614 // kmp_task_t *new_task); 5615 RegionCodeGenTy RCG(CodeGen); 5616 CommonActionTy Action( 5617 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5618 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5619 RCG.setAction(Action); 5620 RCG(CGF); 5621 }; 5622 5623 if (IfCond) { 5624 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5625 } else { 5626 RegionCodeGenTy ThenRCG(ThenCodeGen); 5627 ThenRCG(CGF); 5628 } 5629 } 5630 5631 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5632 const OMPLoopDirective &D, 5633 llvm::Function *TaskFunction, 5634 QualType SharedsTy, Address Shareds, 5635 const Expr *IfCond, 5636 const OMPTaskDataTy &Data) { 5637 if (!CGF.HaveInsertPoint()) 5638 return; 5639 TaskResultTy Result = 5640 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5641 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5642 // libcall. 5643 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5644 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5645 // sched, kmp_uint64 grainsize, void *task_dup); 5646 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5647 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5648 llvm::Value *IfVal; 5649 if (IfCond) { 5650 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5651 /*isSigned=*/true); 5652 } else { 5653 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5654 } 5655 5656 LValue LBLVal = CGF.EmitLValueForField( 5657 Result.TDBase, 5658 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5659 const auto *LBVar = 5660 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5661 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5662 LBLVal.getQuals(), 5663 /*IsInitializer=*/true); 5664 LValue UBLVal = CGF.EmitLValueForField( 5665 Result.TDBase, 5666 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5667 const auto *UBVar = 5668 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5669 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5670 UBLVal.getQuals(), 5671 /*IsInitializer=*/true); 5672 LValue StLVal = CGF.EmitLValueForField( 5673 Result.TDBase, 5674 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5675 const auto *StVar = 5676 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5677 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5678 StLVal.getQuals(), 5679 /*IsInitializer=*/true); 5680 // Store reductions address. 5681 LValue RedLVal = CGF.EmitLValueForField( 5682 Result.TDBase, 5683 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5684 if (Data.Reductions) { 5685 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5686 } else { 5687 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5688 CGF.getContext().VoidPtrTy); 5689 } 5690 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5691 llvm::Value *TaskArgs[] = { 5692 UpLoc, 5693 ThreadID, 5694 Result.NewTask, 5695 IfVal, 5696 LBLVal.getPointer(CGF), 5697 UBLVal.getPointer(CGF), 5698 CGF.EmitLoadOfScalar(StLVal, Loc), 5699 llvm::ConstantInt::getSigned( 5700 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5701 llvm::ConstantInt::getSigned( 5702 CGF.IntTy, Data.Schedule.getPointer() 5703 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5704 : NoSchedule), 5705 Data.Schedule.getPointer() 5706 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5707 /*isSigned=*/false) 5708 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5709 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5710 Result.TaskDupFn, CGF.VoidPtrTy) 5711 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5712 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 5713 } 5714 5715 /// Emit reduction operation for each element of array (required for 5716 /// array sections) LHS op = RHS. 5717 /// \param Type Type of array. 5718 /// \param LHSVar Variable on the left side of the reduction operation 5719 /// (references element of array in original variable). 5720 /// \param RHSVar Variable on the right side of the reduction operation 5721 /// (references element of array in original variable). 5722 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5723 /// RHSVar. 5724 static void EmitOMPAggregateReduction( 5725 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5726 const VarDecl *RHSVar, 5727 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5728 const Expr *, const Expr *)> &RedOpGen, 5729 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5730 const Expr *UpExpr = nullptr) { 5731 // Perform element-by-element initialization. 5732 QualType ElementTy; 5733 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5734 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5735 5736 // Drill down to the base element type on both arrays. 5737 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5738 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5739 5740 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5741 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5742 // Cast from pointer to array type to pointer to single element. 5743 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5744 // The basic structure here is a while-do loop. 5745 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5746 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5747 llvm::Value *IsEmpty = 5748 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5749 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5750 5751 // Enter the loop body, making that address the current address. 5752 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5753 CGF.EmitBlock(BodyBB); 5754 5755 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5756 5757 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5758 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5759 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5760 Address RHSElementCurrent = 5761 Address(RHSElementPHI, 5762 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5763 5764 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5765 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5766 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5767 Address LHSElementCurrent = 5768 Address(LHSElementPHI, 5769 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5770 5771 // Emit copy. 5772 CodeGenFunction::OMPPrivateScope Scope(CGF); 5773 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5774 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5775 Scope.Privatize(); 5776 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5777 Scope.ForceCleanup(); 5778 5779 // Shift the address forward by one element. 5780 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5781 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5782 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5783 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5784 // Check whether we've reached the end. 5785 llvm::Value *Done = 5786 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5787 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5788 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5789 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5790 5791 // Done. 5792 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5793 } 5794 5795 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5796 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5797 /// UDR combiner function. 5798 static void emitReductionCombiner(CodeGenFunction &CGF, 5799 const Expr *ReductionOp) { 5800 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5801 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5802 if (const auto *DRE = 5803 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5804 if (const auto *DRD = 5805 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5806 std::pair<llvm::Function *, llvm::Function *> Reduction = 5807 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5808 RValue Func = RValue::get(Reduction.first); 5809 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5810 CGF.EmitIgnoredExpr(ReductionOp); 5811 return; 5812 } 5813 CGF.EmitIgnoredExpr(ReductionOp); 5814 } 5815 5816 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5817 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5818 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5819 ArrayRef<const Expr *> ReductionOps) { 5820 ASTContext &C = CGM.getContext(); 5821 5822 // void reduction_func(void *LHSArg, void *RHSArg); 5823 FunctionArgList Args; 5824 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5825 ImplicitParamDecl::Other); 5826 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5827 ImplicitParamDecl::Other); 5828 Args.push_back(&LHSArg); 5829 Args.push_back(&RHSArg); 5830 const auto &CGFI = 5831 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5832 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5833 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5834 llvm::GlobalValue::InternalLinkage, Name, 5835 &CGM.getModule()); 5836 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5837 Fn->setDoesNotRecurse(); 5838 CodeGenFunction CGF(CGM); 5839 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5840 5841 // Dst = (void*[n])(LHSArg); 5842 // Src = (void*[n])(RHSArg); 5843 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5844 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5845 ArgsType), CGF.getPointerAlign()); 5846 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5847 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5848 ArgsType), CGF.getPointerAlign()); 5849 5850 // ... 5851 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5852 // ... 5853 CodeGenFunction::OMPPrivateScope Scope(CGF); 5854 auto IPriv = Privates.begin(); 5855 unsigned Idx = 0; 5856 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5857 const auto *RHSVar = 5858 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5859 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5860 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5861 }); 5862 const auto *LHSVar = 5863 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5864 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5865 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5866 }); 5867 QualType PrivTy = (*IPriv)->getType(); 5868 if (PrivTy->isVariablyModifiedType()) { 5869 // Get array size and emit VLA type. 5870 ++Idx; 5871 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5872 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5873 const VariableArrayType *VLA = 5874 CGF.getContext().getAsVariableArrayType(PrivTy); 5875 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5876 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5877 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5878 CGF.EmitVariablyModifiedType(PrivTy); 5879 } 5880 } 5881 Scope.Privatize(); 5882 IPriv = Privates.begin(); 5883 auto ILHS = LHSExprs.begin(); 5884 auto IRHS = RHSExprs.begin(); 5885 for (const Expr *E : ReductionOps) { 5886 if ((*IPriv)->getType()->isArrayType()) { 5887 // Emit reduction for array section. 5888 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5889 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5890 EmitOMPAggregateReduction( 5891 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5892 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5893 emitReductionCombiner(CGF, E); 5894 }); 5895 } else { 5896 // Emit reduction for array subscript or single variable. 5897 emitReductionCombiner(CGF, E); 5898 } 5899 ++IPriv; 5900 ++ILHS; 5901 ++IRHS; 5902 } 5903 Scope.ForceCleanup(); 5904 CGF.FinishFunction(); 5905 return Fn; 5906 } 5907 5908 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5909 const Expr *ReductionOp, 5910 const Expr *PrivateRef, 5911 const DeclRefExpr *LHS, 5912 const DeclRefExpr *RHS) { 5913 if (PrivateRef->getType()->isArrayType()) { 5914 // Emit reduction for array section. 5915 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5916 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5917 EmitOMPAggregateReduction( 5918 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5919 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5920 emitReductionCombiner(CGF, ReductionOp); 5921 }); 5922 } else { 5923 // Emit reduction for array subscript or single variable. 5924 emitReductionCombiner(CGF, ReductionOp); 5925 } 5926 } 5927 5928 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5929 ArrayRef<const Expr *> Privates, 5930 ArrayRef<const Expr *> LHSExprs, 5931 ArrayRef<const Expr *> RHSExprs, 5932 ArrayRef<const Expr *> ReductionOps, 5933 ReductionOptionsTy Options) { 5934 if (!CGF.HaveInsertPoint()) 5935 return; 5936 5937 bool WithNowait = Options.WithNowait; 5938 bool SimpleReduction = Options.SimpleReduction; 5939 5940 // Next code should be emitted for reduction: 5941 // 5942 // static kmp_critical_name lock = { 0 }; 5943 // 5944 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5945 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5946 // ... 5947 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5948 // *(Type<n>-1*)rhs[<n>-1]); 5949 // } 5950 // 5951 // ... 5952 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5953 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5954 // RedList, reduce_func, &<lock>)) { 5955 // case 1: 5956 // ... 5957 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5958 // ... 5959 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5960 // break; 5961 // case 2: 5962 // ... 5963 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5964 // ... 5965 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5966 // break; 5967 // default:; 5968 // } 5969 // 5970 // if SimpleReduction is true, only the next code is generated: 5971 // ... 5972 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5973 // ... 5974 5975 ASTContext &C = CGM.getContext(); 5976 5977 if (SimpleReduction) { 5978 CodeGenFunction::RunCleanupsScope Scope(CGF); 5979 auto IPriv = Privates.begin(); 5980 auto ILHS = LHSExprs.begin(); 5981 auto IRHS = RHSExprs.begin(); 5982 for (const Expr *E : ReductionOps) { 5983 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5984 cast<DeclRefExpr>(*IRHS)); 5985 ++IPriv; 5986 ++ILHS; 5987 ++IRHS; 5988 } 5989 return; 5990 } 5991 5992 // 1. Build a list of reduction variables. 5993 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5994 auto Size = RHSExprs.size(); 5995 for (const Expr *E : Privates) { 5996 if (E->getType()->isVariablyModifiedType()) 5997 // Reserve place for array size. 5998 ++Size; 5999 } 6000 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 6001 QualType ReductionArrayTy = 6002 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 6003 /*IndexTypeQuals=*/0); 6004 Address ReductionList = 6005 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 6006 auto IPriv = Privates.begin(); 6007 unsigned Idx = 0; 6008 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 6009 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 6010 CGF.Builder.CreateStore( 6011 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6012 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 6013 Elem); 6014 if ((*IPriv)->getType()->isVariablyModifiedType()) { 6015 // Store array size. 6016 ++Idx; 6017 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 6018 llvm::Value *Size = CGF.Builder.CreateIntCast( 6019 CGF.getVLASize( 6020 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 6021 .NumElts, 6022 CGF.SizeTy, /*isSigned=*/false); 6023 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 6024 Elem); 6025 } 6026 } 6027 6028 // 2. Emit reduce_func(). 6029 llvm::Function *ReductionFn = emitReductionFunction( 6030 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 6031 LHSExprs, RHSExprs, ReductionOps); 6032 6033 // 3. Create static kmp_critical_name lock = { 0 }; 6034 std::string Name = getName({"reduction"}); 6035 llvm::Value *Lock = getCriticalRegionLock(Name); 6036 6037 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 6038 // RedList, reduce_func, &<lock>); 6039 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 6040 llvm::Value *ThreadId = getThreadID(CGF, Loc); 6041 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 6042 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6043 ReductionList.getPointer(), CGF.VoidPtrTy); 6044 llvm::Value *Args[] = { 6045 IdentTLoc, // ident_t *<loc> 6046 ThreadId, // i32 <gtid> 6047 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 6048 ReductionArrayTySize, // size_type sizeof(RedList) 6049 RL, // void *RedList 6050 ReductionFn, // void (*) (void *, void *) <reduce_func> 6051 Lock // kmp_critical_name *&<lock> 6052 }; 6053 llvm::Value *Res = CGF.EmitRuntimeCall( 6054 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 6055 : OMPRTL__kmpc_reduce), 6056 Args); 6057 6058 // 5. Build switch(res) 6059 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 6060 llvm::SwitchInst *SwInst = 6061 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 6062 6063 // 6. Build case 1: 6064 // ... 6065 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 6066 // ... 6067 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 6068 // break; 6069 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 6070 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 6071 CGF.EmitBlock(Case1BB); 6072 6073 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 6074 llvm::Value *EndArgs[] = { 6075 IdentTLoc, // ident_t *<loc> 6076 ThreadId, // i32 <gtid> 6077 Lock // kmp_critical_name *&<lock> 6078 }; 6079 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 6080 CodeGenFunction &CGF, PrePostActionTy &Action) { 6081 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6082 auto IPriv = Privates.begin(); 6083 auto ILHS = LHSExprs.begin(); 6084 auto IRHS = RHSExprs.begin(); 6085 for (const Expr *E : ReductionOps) { 6086 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 6087 cast<DeclRefExpr>(*IRHS)); 6088 ++IPriv; 6089 ++ILHS; 6090 ++IRHS; 6091 } 6092 }; 6093 RegionCodeGenTy RCG(CodeGen); 6094 CommonActionTy Action( 6095 nullptr, llvm::None, 6096 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 6097 : OMPRTL__kmpc_end_reduce), 6098 EndArgs); 6099 RCG.setAction(Action); 6100 RCG(CGF); 6101 6102 CGF.EmitBranch(DefaultBB); 6103 6104 // 7. Build case 2: 6105 // ... 6106 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 6107 // ... 6108 // break; 6109 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 6110 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 6111 CGF.EmitBlock(Case2BB); 6112 6113 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 6114 CodeGenFunction &CGF, PrePostActionTy &Action) { 6115 auto ILHS = LHSExprs.begin(); 6116 auto IRHS = RHSExprs.begin(); 6117 auto IPriv = Privates.begin(); 6118 for (const Expr *E : ReductionOps) { 6119 const Expr *XExpr = nullptr; 6120 const Expr *EExpr = nullptr; 6121 const Expr *UpExpr = nullptr; 6122 BinaryOperatorKind BO = BO_Comma; 6123 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 6124 if (BO->getOpcode() == BO_Assign) { 6125 XExpr = BO->getLHS(); 6126 UpExpr = BO->getRHS(); 6127 } 6128 } 6129 // Try to emit update expression as a simple atomic. 6130 const Expr *RHSExpr = UpExpr; 6131 if (RHSExpr) { 6132 // Analyze RHS part of the whole expression. 6133 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 6134 RHSExpr->IgnoreParenImpCasts())) { 6135 // If this is a conditional operator, analyze its condition for 6136 // min/max reduction operator. 6137 RHSExpr = ACO->getCond(); 6138 } 6139 if (const auto *BORHS = 6140 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 6141 EExpr = BORHS->getRHS(); 6142 BO = BORHS->getOpcode(); 6143 } 6144 } 6145 if (XExpr) { 6146 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 6147 auto &&AtomicRedGen = [BO, VD, 6148 Loc](CodeGenFunction &CGF, const Expr *XExpr, 6149 const Expr *EExpr, const Expr *UpExpr) { 6150 LValue X = CGF.EmitLValue(XExpr); 6151 RValue E; 6152 if (EExpr) 6153 E = CGF.EmitAnyExpr(EExpr); 6154 CGF.EmitOMPAtomicSimpleUpdateExpr( 6155 X, E, BO, /*IsXLHSInRHSPart=*/true, 6156 llvm::AtomicOrdering::Monotonic, Loc, 6157 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 6158 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6159 PrivateScope.addPrivate( 6160 VD, [&CGF, VD, XRValue, Loc]() { 6161 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 6162 CGF.emitOMPSimpleStore( 6163 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 6164 VD->getType().getNonReferenceType(), Loc); 6165 return LHSTemp; 6166 }); 6167 (void)PrivateScope.Privatize(); 6168 return CGF.EmitAnyExpr(UpExpr); 6169 }); 6170 }; 6171 if ((*IPriv)->getType()->isArrayType()) { 6172 // Emit atomic reduction for array section. 6173 const auto *RHSVar = 6174 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 6175 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 6176 AtomicRedGen, XExpr, EExpr, UpExpr); 6177 } else { 6178 // Emit atomic reduction for array subscript or single variable. 6179 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 6180 } 6181 } else { 6182 // Emit as a critical region. 6183 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 6184 const Expr *, const Expr *) { 6185 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6186 std::string Name = RT.getName({"atomic_reduction"}); 6187 RT.emitCriticalRegion( 6188 CGF, Name, 6189 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 6190 Action.Enter(CGF); 6191 emitReductionCombiner(CGF, E); 6192 }, 6193 Loc); 6194 }; 6195 if ((*IPriv)->getType()->isArrayType()) { 6196 const auto *LHSVar = 6197 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 6198 const auto *RHSVar = 6199 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 6200 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 6201 CritRedGen); 6202 } else { 6203 CritRedGen(CGF, nullptr, nullptr, nullptr); 6204 } 6205 } 6206 ++ILHS; 6207 ++IRHS; 6208 ++IPriv; 6209 } 6210 }; 6211 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 6212 if (!WithNowait) { 6213 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 6214 llvm::Value *EndArgs[] = { 6215 IdentTLoc, // ident_t *<loc> 6216 ThreadId, // i32 <gtid> 6217 Lock // kmp_critical_name *&<lock> 6218 }; 6219 CommonActionTy Action(nullptr, llvm::None, 6220 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 6221 EndArgs); 6222 AtomicRCG.setAction(Action); 6223 AtomicRCG(CGF); 6224 } else { 6225 AtomicRCG(CGF); 6226 } 6227 6228 CGF.EmitBranch(DefaultBB); 6229 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 6230 } 6231 6232 /// Generates unique name for artificial threadprivate variables. 6233 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 6234 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 6235 const Expr *Ref) { 6236 SmallString<256> Buffer; 6237 llvm::raw_svector_ostream Out(Buffer); 6238 const clang::DeclRefExpr *DE; 6239 const VarDecl *D = ::getBaseDecl(Ref, DE); 6240 if (!D) 6241 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 6242 D = D->getCanonicalDecl(); 6243 std::string Name = CGM.getOpenMPRuntime().getName( 6244 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 6245 Out << Prefix << Name << "_" 6246 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 6247 return std::string(Out.str()); 6248 } 6249 6250 /// Emits reduction initializer function: 6251 /// \code 6252 /// void @.red_init(void* %arg) { 6253 /// %0 = bitcast void* %arg to <type>* 6254 /// store <type> <init>, <type>* %0 6255 /// ret void 6256 /// } 6257 /// \endcode 6258 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 6259 SourceLocation Loc, 6260 ReductionCodeGen &RCG, unsigned N) { 6261 ASTContext &C = CGM.getContext(); 6262 FunctionArgList Args; 6263 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6264 ImplicitParamDecl::Other); 6265 Args.emplace_back(&Param); 6266 const auto &FnInfo = 6267 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6268 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6269 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 6270 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6271 Name, &CGM.getModule()); 6272 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6273 Fn->setDoesNotRecurse(); 6274 CodeGenFunction CGF(CGM); 6275 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6276 Address PrivateAddr = CGF.EmitLoadOfPointer( 6277 CGF.GetAddrOfLocalVar(&Param), 6278 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6279 llvm::Value *Size = nullptr; 6280 // If the size of the reduction item is non-constant, load it from global 6281 // threadprivate variable. 6282 if (RCG.getSizes(N).second) { 6283 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6284 CGF, CGM.getContext().getSizeType(), 6285 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6286 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6287 CGM.getContext().getSizeType(), Loc); 6288 } 6289 RCG.emitAggregateType(CGF, N, Size); 6290 LValue SharedLVal; 6291 // If initializer uses initializer from declare reduction construct, emit a 6292 // pointer to the address of the original reduction item (reuired by reduction 6293 // initializer) 6294 if (RCG.usesReductionInitializer(N)) { 6295 Address SharedAddr = 6296 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6297 CGF, CGM.getContext().VoidPtrTy, 6298 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6299 SharedAddr = CGF.EmitLoadOfPointer( 6300 SharedAddr, 6301 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 6302 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 6303 } else { 6304 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 6305 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 6306 CGM.getContext().VoidPtrTy); 6307 } 6308 // Emit the initializer: 6309 // %0 = bitcast void* %arg to <type>* 6310 // store <type> <init>, <type>* %0 6311 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 6312 [](CodeGenFunction &) { return false; }); 6313 CGF.FinishFunction(); 6314 return Fn; 6315 } 6316 6317 /// Emits reduction combiner function: 6318 /// \code 6319 /// void @.red_comb(void* %arg0, void* %arg1) { 6320 /// %lhs = bitcast void* %arg0 to <type>* 6321 /// %rhs = bitcast void* %arg1 to <type>* 6322 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 6323 /// store <type> %2, <type>* %lhs 6324 /// ret void 6325 /// } 6326 /// \endcode 6327 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 6328 SourceLocation Loc, 6329 ReductionCodeGen &RCG, unsigned N, 6330 const Expr *ReductionOp, 6331 const Expr *LHS, const Expr *RHS, 6332 const Expr *PrivateRef) { 6333 ASTContext &C = CGM.getContext(); 6334 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 6335 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 6336 FunctionArgList Args; 6337 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 6338 C.VoidPtrTy, ImplicitParamDecl::Other); 6339 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6340 ImplicitParamDecl::Other); 6341 Args.emplace_back(&ParamInOut); 6342 Args.emplace_back(&ParamIn); 6343 const auto &FnInfo = 6344 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6345 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6346 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 6347 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6348 Name, &CGM.getModule()); 6349 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6350 Fn->setDoesNotRecurse(); 6351 CodeGenFunction CGF(CGM); 6352 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6353 llvm::Value *Size = nullptr; 6354 // If the size of the reduction item is non-constant, load it from global 6355 // threadprivate variable. 6356 if (RCG.getSizes(N).second) { 6357 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6358 CGF, CGM.getContext().getSizeType(), 6359 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6360 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6361 CGM.getContext().getSizeType(), Loc); 6362 } 6363 RCG.emitAggregateType(CGF, N, Size); 6364 // Remap lhs and rhs variables to the addresses of the function arguments. 6365 // %lhs = bitcast void* %arg0 to <type>* 6366 // %rhs = bitcast void* %arg1 to <type>* 6367 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6368 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6369 // Pull out the pointer to the variable. 6370 Address PtrAddr = CGF.EmitLoadOfPointer( 6371 CGF.GetAddrOfLocalVar(&ParamInOut), 6372 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6373 return CGF.Builder.CreateElementBitCast( 6374 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6375 }); 6376 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6377 // Pull out the pointer to the variable. 6378 Address PtrAddr = CGF.EmitLoadOfPointer( 6379 CGF.GetAddrOfLocalVar(&ParamIn), 6380 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6381 return CGF.Builder.CreateElementBitCast( 6382 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6383 }); 6384 PrivateScope.Privatize(); 6385 // Emit the combiner body: 6386 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6387 // store <type> %2, <type>* %lhs 6388 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6389 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6390 cast<DeclRefExpr>(RHS)); 6391 CGF.FinishFunction(); 6392 return Fn; 6393 } 6394 6395 /// Emits reduction finalizer function: 6396 /// \code 6397 /// void @.red_fini(void* %arg) { 6398 /// %0 = bitcast void* %arg to <type>* 6399 /// <destroy>(<type>* %0) 6400 /// ret void 6401 /// } 6402 /// \endcode 6403 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6404 SourceLocation Loc, 6405 ReductionCodeGen &RCG, unsigned N) { 6406 if (!RCG.needCleanups(N)) 6407 return nullptr; 6408 ASTContext &C = CGM.getContext(); 6409 FunctionArgList Args; 6410 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6411 ImplicitParamDecl::Other); 6412 Args.emplace_back(&Param); 6413 const auto &FnInfo = 6414 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6415 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6416 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6417 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6418 Name, &CGM.getModule()); 6419 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6420 Fn->setDoesNotRecurse(); 6421 CodeGenFunction CGF(CGM); 6422 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6423 Address PrivateAddr = CGF.EmitLoadOfPointer( 6424 CGF.GetAddrOfLocalVar(&Param), 6425 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6426 llvm::Value *Size = nullptr; 6427 // If the size of the reduction item is non-constant, load it from global 6428 // threadprivate variable. 6429 if (RCG.getSizes(N).second) { 6430 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6431 CGF, CGM.getContext().getSizeType(), 6432 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6433 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6434 CGM.getContext().getSizeType(), Loc); 6435 } 6436 RCG.emitAggregateType(CGF, N, Size); 6437 // Emit the finalizer body: 6438 // <destroy>(<type>* %0) 6439 RCG.emitCleanups(CGF, N, PrivateAddr); 6440 CGF.FinishFunction(Loc); 6441 return Fn; 6442 } 6443 6444 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6445 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6446 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6447 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6448 return nullptr; 6449 6450 // Build typedef struct: 6451 // kmp_task_red_input { 6452 // void *reduce_shar; // shared reduction item 6453 // size_t reduce_size; // size of data item 6454 // void *reduce_init; // data initialization routine 6455 // void *reduce_fini; // data finalization routine 6456 // void *reduce_comb; // data combiner routine 6457 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6458 // } kmp_task_red_input_t; 6459 ASTContext &C = CGM.getContext(); 6460 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 6461 RD->startDefinition(); 6462 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6463 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6464 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6465 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6466 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6467 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6468 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6469 RD->completeDefinition(); 6470 QualType RDType = C.getRecordType(RD); 6471 unsigned Size = Data.ReductionVars.size(); 6472 llvm::APInt ArraySize(/*numBits=*/64, Size); 6473 QualType ArrayRDType = C.getConstantArrayType( 6474 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6475 // kmp_task_red_input_t .rd_input.[Size]; 6476 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6477 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 6478 Data.ReductionOps); 6479 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6480 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6481 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6482 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6483 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6484 TaskRedInput.getPointer(), Idxs, 6485 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6486 ".rd_input.gep."); 6487 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6488 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6489 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6490 RCG.emitSharedLValue(CGF, Cnt); 6491 llvm::Value *CastedShared = 6492 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6493 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6494 RCG.emitAggregateType(CGF, Cnt); 6495 llvm::Value *SizeValInChars; 6496 llvm::Value *SizeVal; 6497 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6498 // We use delayed creation/initialization for VLAs, array sections and 6499 // custom reduction initializations. It is required because runtime does not 6500 // provide the way to pass the sizes of VLAs/array sections to 6501 // initializer/combiner/finalizer functions and does not pass the pointer to 6502 // original reduction item to the initializer. Instead threadprivate global 6503 // variables are used to store these values and use them in the functions. 6504 bool DelayedCreation = !!SizeVal; 6505 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6506 /*isSigned=*/false); 6507 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6508 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6509 // ElemLVal.reduce_init = init; 6510 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6511 llvm::Value *InitAddr = 6512 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6513 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6514 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 6515 // ElemLVal.reduce_fini = fini; 6516 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6517 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6518 llvm::Value *FiniAddr = Fini 6519 ? CGF.EmitCastToVoidPtr(Fini) 6520 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6521 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6522 // ElemLVal.reduce_comb = comb; 6523 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6524 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6525 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6526 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6527 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6528 // ElemLVal.flags = 0; 6529 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6530 if (DelayedCreation) { 6531 CGF.EmitStoreOfScalar( 6532 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6533 FlagsLVal); 6534 } else 6535 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6536 FlagsLVal.getType()); 6537 } 6538 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 6539 // *data); 6540 llvm::Value *Args[] = { 6541 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6542 /*isSigned=*/true), 6543 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6544 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6545 CGM.VoidPtrTy)}; 6546 return CGF.EmitRuntimeCall( 6547 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 6548 } 6549 6550 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6551 SourceLocation Loc, 6552 ReductionCodeGen &RCG, 6553 unsigned N) { 6554 auto Sizes = RCG.getSizes(N); 6555 // Emit threadprivate global variable if the type is non-constant 6556 // (Sizes.second = nullptr). 6557 if (Sizes.second) { 6558 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6559 /*isSigned=*/false); 6560 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6561 CGF, CGM.getContext().getSizeType(), 6562 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6563 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6564 } 6565 // Store address of the original reduction item if custom initializer is used. 6566 if (RCG.usesReductionInitializer(N)) { 6567 Address SharedAddr = getAddrOfArtificialThreadPrivate( 6568 CGF, CGM.getContext().VoidPtrTy, 6569 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6570 CGF.Builder.CreateStore( 6571 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6572 RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy), 6573 SharedAddr, /*IsVolatile=*/false); 6574 } 6575 } 6576 6577 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6578 SourceLocation Loc, 6579 llvm::Value *ReductionsPtr, 6580 LValue SharedLVal) { 6581 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6582 // *d); 6583 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6584 CGM.IntTy, 6585 /*isSigned=*/true), 6586 ReductionsPtr, 6587 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6588 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6589 return Address( 6590 CGF.EmitRuntimeCall( 6591 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6592 SharedLVal.getAlignment()); 6593 } 6594 6595 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6596 SourceLocation Loc) { 6597 if (!CGF.HaveInsertPoint()) 6598 return; 6599 6600 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 6601 if (OMPBuilder) { 6602 OMPBuilder->CreateTaskwait(CGF.Builder); 6603 } else { 6604 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6605 // global_tid); 6606 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6607 // Ignore return result until untied tasks are supported. 6608 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 6609 } 6610 6611 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6612 Region->emitUntiedSwitch(CGF); 6613 } 6614 6615 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6616 OpenMPDirectiveKind InnerKind, 6617 const RegionCodeGenTy &CodeGen, 6618 bool HasCancel) { 6619 if (!CGF.HaveInsertPoint()) 6620 return; 6621 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6622 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6623 } 6624 6625 namespace { 6626 enum RTCancelKind { 6627 CancelNoreq = 0, 6628 CancelParallel = 1, 6629 CancelLoop = 2, 6630 CancelSections = 3, 6631 CancelTaskgroup = 4 6632 }; 6633 } // anonymous namespace 6634 6635 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6636 RTCancelKind CancelKind = CancelNoreq; 6637 if (CancelRegion == OMPD_parallel) 6638 CancelKind = CancelParallel; 6639 else if (CancelRegion == OMPD_for) 6640 CancelKind = CancelLoop; 6641 else if (CancelRegion == OMPD_sections) 6642 CancelKind = CancelSections; 6643 else { 6644 assert(CancelRegion == OMPD_taskgroup); 6645 CancelKind = CancelTaskgroup; 6646 } 6647 return CancelKind; 6648 } 6649 6650 void CGOpenMPRuntime::emitCancellationPointCall( 6651 CodeGenFunction &CGF, SourceLocation Loc, 6652 OpenMPDirectiveKind CancelRegion) { 6653 if (!CGF.HaveInsertPoint()) 6654 return; 6655 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6656 // global_tid, kmp_int32 cncl_kind); 6657 if (auto *OMPRegionInfo = 6658 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6659 // For 'cancellation point taskgroup', the task region info may not have a 6660 // cancel. This may instead happen in another adjacent task. 6661 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6662 llvm::Value *Args[] = { 6663 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6664 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6665 // Ignore return result until untied tasks are supported. 6666 llvm::Value *Result = CGF.EmitRuntimeCall( 6667 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 6668 // if (__kmpc_cancellationpoint()) { 6669 // exit from construct; 6670 // } 6671 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6672 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6673 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6674 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6675 CGF.EmitBlock(ExitBB); 6676 // exit from construct; 6677 CodeGenFunction::JumpDest CancelDest = 6678 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6679 CGF.EmitBranchThroughCleanup(CancelDest); 6680 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6681 } 6682 } 6683 } 6684 6685 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6686 const Expr *IfCond, 6687 OpenMPDirectiveKind CancelRegion) { 6688 if (!CGF.HaveInsertPoint()) 6689 return; 6690 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6691 // kmp_int32 cncl_kind); 6692 if (auto *OMPRegionInfo = 6693 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6694 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 6695 PrePostActionTy &) { 6696 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6697 llvm::Value *Args[] = { 6698 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6699 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6700 // Ignore return result until untied tasks are supported. 6701 llvm::Value *Result = CGF.EmitRuntimeCall( 6702 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 6703 // if (__kmpc_cancel()) { 6704 // exit from construct; 6705 // } 6706 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6707 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6708 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6709 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6710 CGF.EmitBlock(ExitBB); 6711 // exit from construct; 6712 CodeGenFunction::JumpDest CancelDest = 6713 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6714 CGF.EmitBranchThroughCleanup(CancelDest); 6715 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6716 }; 6717 if (IfCond) { 6718 emitIfClause(CGF, IfCond, ThenGen, 6719 [](CodeGenFunction &, PrePostActionTy &) {}); 6720 } else { 6721 RegionCodeGenTy ThenRCG(ThenGen); 6722 ThenRCG(CGF); 6723 } 6724 } 6725 } 6726 6727 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6728 const OMPExecutableDirective &D, StringRef ParentName, 6729 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6730 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6731 assert(!ParentName.empty() && "Invalid target region parent name!"); 6732 HasEmittedTargetRegion = true; 6733 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6734 IsOffloadEntry, CodeGen); 6735 } 6736 6737 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6738 const OMPExecutableDirective &D, StringRef ParentName, 6739 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6740 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6741 // Create a unique name for the entry function using the source location 6742 // information of the current target region. The name will be something like: 6743 // 6744 // __omp_offloading_DD_FFFF_PP_lBB 6745 // 6746 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6747 // mangled name of the function that encloses the target region and BB is the 6748 // line number of the target region. 6749 6750 unsigned DeviceID; 6751 unsigned FileID; 6752 unsigned Line; 6753 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6754 Line); 6755 SmallString<64> EntryFnName; 6756 { 6757 llvm::raw_svector_ostream OS(EntryFnName); 6758 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6759 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6760 } 6761 6762 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6763 6764 CodeGenFunction CGF(CGM, true); 6765 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6766 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6767 6768 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6769 6770 // If this target outline function is not an offload entry, we don't need to 6771 // register it. 6772 if (!IsOffloadEntry) 6773 return; 6774 6775 // The target region ID is used by the runtime library to identify the current 6776 // target region, so it only has to be unique and not necessarily point to 6777 // anything. It could be the pointer to the outlined function that implements 6778 // the target region, but we aren't using that so that the compiler doesn't 6779 // need to keep that, and could therefore inline the host function if proven 6780 // worthwhile during optimization. In the other hand, if emitting code for the 6781 // device, the ID has to be the function address so that it can retrieved from 6782 // the offloading entry and launched by the runtime library. We also mark the 6783 // outlined function to have external linkage in case we are emitting code for 6784 // the device, because these functions will be entry points to the device. 6785 6786 if (CGM.getLangOpts().OpenMPIsDevice) { 6787 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6788 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6789 OutlinedFn->setDSOLocal(false); 6790 } else { 6791 std::string Name = getName({EntryFnName, "region_id"}); 6792 OutlinedFnID = new llvm::GlobalVariable( 6793 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6794 llvm::GlobalValue::WeakAnyLinkage, 6795 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6796 } 6797 6798 // Register the information for the entry associated with this target region. 6799 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6800 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6801 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6802 } 6803 6804 /// Checks if the expression is constant or does not have non-trivial function 6805 /// calls. 6806 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6807 // We can skip constant expressions. 6808 // We can skip expressions with trivial calls or simple expressions. 6809 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6810 !E->hasNonTrivialCall(Ctx)) && 6811 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6812 } 6813 6814 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6815 const Stmt *Body) { 6816 const Stmt *Child = Body->IgnoreContainers(); 6817 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6818 Child = nullptr; 6819 for (const Stmt *S : C->body()) { 6820 if (const auto *E = dyn_cast<Expr>(S)) { 6821 if (isTrivial(Ctx, E)) 6822 continue; 6823 } 6824 // Some of the statements can be ignored. 6825 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6826 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6827 continue; 6828 // Analyze declarations. 6829 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6830 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6831 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6832 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6833 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6834 isa<UsingDirectiveDecl>(D) || 6835 isa<OMPDeclareReductionDecl>(D) || 6836 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6837 return true; 6838 const auto *VD = dyn_cast<VarDecl>(D); 6839 if (!VD) 6840 return false; 6841 return VD->isConstexpr() || 6842 ((VD->getType().isTrivialType(Ctx) || 6843 VD->getType()->isReferenceType()) && 6844 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6845 })) 6846 continue; 6847 } 6848 // Found multiple children - cannot get the one child only. 6849 if (Child) 6850 return nullptr; 6851 Child = S; 6852 } 6853 if (Child) 6854 Child = Child->IgnoreContainers(); 6855 } 6856 return Child; 6857 } 6858 6859 /// Emit the number of teams for a target directive. Inspect the num_teams 6860 /// clause associated with a teams construct combined or closely nested 6861 /// with the target directive. 6862 /// 6863 /// Emit a team of size one for directives such as 'target parallel' that 6864 /// have no associated teams construct. 6865 /// 6866 /// Otherwise, return nullptr. 6867 static llvm::Value * 6868 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6869 const OMPExecutableDirective &D) { 6870 assert(!CGF.getLangOpts().OpenMPIsDevice && 6871 "Clauses associated with the teams directive expected to be emitted " 6872 "only for the host!"); 6873 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6874 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6875 "Expected target-based executable directive."); 6876 CGBuilderTy &Bld = CGF.Builder; 6877 switch (DirectiveKind) { 6878 case OMPD_target: { 6879 const auto *CS = D.getInnermostCapturedStmt(); 6880 const auto *Body = 6881 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6882 const Stmt *ChildStmt = 6883 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6884 if (const auto *NestedDir = 6885 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6886 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6887 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6888 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6889 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6890 const Expr *NumTeams = 6891 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6892 llvm::Value *NumTeamsVal = 6893 CGF.EmitScalarExpr(NumTeams, 6894 /*IgnoreResultAssign*/ true); 6895 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6896 /*isSigned=*/true); 6897 } 6898 return Bld.getInt32(0); 6899 } 6900 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6901 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6902 return Bld.getInt32(1); 6903 return Bld.getInt32(0); 6904 } 6905 return nullptr; 6906 } 6907 case OMPD_target_teams: 6908 case OMPD_target_teams_distribute: 6909 case OMPD_target_teams_distribute_simd: 6910 case OMPD_target_teams_distribute_parallel_for: 6911 case OMPD_target_teams_distribute_parallel_for_simd: { 6912 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6913 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6914 const Expr *NumTeams = 6915 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6916 llvm::Value *NumTeamsVal = 6917 CGF.EmitScalarExpr(NumTeams, 6918 /*IgnoreResultAssign*/ true); 6919 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6920 /*isSigned=*/true); 6921 } 6922 return Bld.getInt32(0); 6923 } 6924 case OMPD_target_parallel: 6925 case OMPD_target_parallel_for: 6926 case OMPD_target_parallel_for_simd: 6927 case OMPD_target_simd: 6928 return Bld.getInt32(1); 6929 case OMPD_parallel: 6930 case OMPD_for: 6931 case OMPD_parallel_for: 6932 case OMPD_parallel_master: 6933 case OMPD_parallel_sections: 6934 case OMPD_for_simd: 6935 case OMPD_parallel_for_simd: 6936 case OMPD_cancel: 6937 case OMPD_cancellation_point: 6938 case OMPD_ordered: 6939 case OMPD_threadprivate: 6940 case OMPD_allocate: 6941 case OMPD_task: 6942 case OMPD_simd: 6943 case OMPD_sections: 6944 case OMPD_section: 6945 case OMPD_single: 6946 case OMPD_master: 6947 case OMPD_critical: 6948 case OMPD_taskyield: 6949 case OMPD_barrier: 6950 case OMPD_taskwait: 6951 case OMPD_taskgroup: 6952 case OMPD_atomic: 6953 case OMPD_flush: 6954 case OMPD_depobj: 6955 case OMPD_scan: 6956 case OMPD_teams: 6957 case OMPD_target_data: 6958 case OMPD_target_exit_data: 6959 case OMPD_target_enter_data: 6960 case OMPD_distribute: 6961 case OMPD_distribute_simd: 6962 case OMPD_distribute_parallel_for: 6963 case OMPD_distribute_parallel_for_simd: 6964 case OMPD_teams_distribute: 6965 case OMPD_teams_distribute_simd: 6966 case OMPD_teams_distribute_parallel_for: 6967 case OMPD_teams_distribute_parallel_for_simd: 6968 case OMPD_target_update: 6969 case OMPD_declare_simd: 6970 case OMPD_declare_variant: 6971 case OMPD_begin_declare_variant: 6972 case OMPD_end_declare_variant: 6973 case OMPD_declare_target: 6974 case OMPD_end_declare_target: 6975 case OMPD_declare_reduction: 6976 case OMPD_declare_mapper: 6977 case OMPD_taskloop: 6978 case OMPD_taskloop_simd: 6979 case OMPD_master_taskloop: 6980 case OMPD_master_taskloop_simd: 6981 case OMPD_parallel_master_taskloop: 6982 case OMPD_parallel_master_taskloop_simd: 6983 case OMPD_requires: 6984 case OMPD_unknown: 6985 break; 6986 } 6987 llvm_unreachable("Unexpected directive kind."); 6988 } 6989 6990 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6991 llvm::Value *DefaultThreadLimitVal) { 6992 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6993 CGF.getContext(), CS->getCapturedStmt()); 6994 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6995 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6996 llvm::Value *NumThreads = nullptr; 6997 llvm::Value *CondVal = nullptr; 6998 // Handle if clause. If if clause present, the number of threads is 6999 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7000 if (Dir->hasClausesOfKind<OMPIfClause>()) { 7001 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7002 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7003 const OMPIfClause *IfClause = nullptr; 7004 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 7005 if (C->getNameModifier() == OMPD_unknown || 7006 C->getNameModifier() == OMPD_parallel) { 7007 IfClause = C; 7008 break; 7009 } 7010 } 7011 if (IfClause) { 7012 const Expr *Cond = IfClause->getCondition(); 7013 bool Result; 7014 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7015 if (!Result) 7016 return CGF.Builder.getInt32(1); 7017 } else { 7018 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 7019 if (const auto *PreInit = 7020 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 7021 for (const auto *I : PreInit->decls()) { 7022 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7023 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7024 } else { 7025 CodeGenFunction::AutoVarEmission Emission = 7026 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7027 CGF.EmitAutoVarCleanups(Emission); 7028 } 7029 } 7030 } 7031 CondVal = CGF.EvaluateExprAsBool(Cond); 7032 } 7033 } 7034 } 7035 // Check the value of num_threads clause iff if clause was not specified 7036 // or is not evaluated to false. 7037 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 7038 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7039 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7040 const auto *NumThreadsClause = 7041 Dir->getSingleClause<OMPNumThreadsClause>(); 7042 CodeGenFunction::LexicalScope Scope( 7043 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 7044 if (const auto *PreInit = 7045 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 7046 for (const auto *I : PreInit->decls()) { 7047 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7048 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7049 } else { 7050 CodeGenFunction::AutoVarEmission Emission = 7051 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7052 CGF.EmitAutoVarCleanups(Emission); 7053 } 7054 } 7055 } 7056 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 7057 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 7058 /*isSigned=*/false); 7059 if (DefaultThreadLimitVal) 7060 NumThreads = CGF.Builder.CreateSelect( 7061 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 7062 DefaultThreadLimitVal, NumThreads); 7063 } else { 7064 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 7065 : CGF.Builder.getInt32(0); 7066 } 7067 // Process condition of the if clause. 7068 if (CondVal) { 7069 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 7070 CGF.Builder.getInt32(1)); 7071 } 7072 return NumThreads; 7073 } 7074 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 7075 return CGF.Builder.getInt32(1); 7076 return DefaultThreadLimitVal; 7077 } 7078 return DefaultThreadLimitVal ? DefaultThreadLimitVal 7079 : CGF.Builder.getInt32(0); 7080 } 7081 7082 /// Emit the number of threads for a target directive. Inspect the 7083 /// thread_limit clause associated with a teams construct combined or closely 7084 /// nested with the target directive. 7085 /// 7086 /// Emit the num_threads clause for directives such as 'target parallel' that 7087 /// have no associated teams construct. 7088 /// 7089 /// Otherwise, return nullptr. 7090 static llvm::Value * 7091 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 7092 const OMPExecutableDirective &D) { 7093 assert(!CGF.getLangOpts().OpenMPIsDevice && 7094 "Clauses associated with the teams directive expected to be emitted " 7095 "only for the host!"); 7096 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7097 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7098 "Expected target-based executable directive."); 7099 CGBuilderTy &Bld = CGF.Builder; 7100 llvm::Value *ThreadLimitVal = nullptr; 7101 llvm::Value *NumThreadsVal = nullptr; 7102 switch (DirectiveKind) { 7103 case OMPD_target: { 7104 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7105 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7106 return NumThreads; 7107 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7108 CGF.getContext(), CS->getCapturedStmt()); 7109 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7110 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 7111 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7112 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7113 const auto *ThreadLimitClause = 7114 Dir->getSingleClause<OMPThreadLimitClause>(); 7115 CodeGenFunction::LexicalScope Scope( 7116 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 7117 if (const auto *PreInit = 7118 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 7119 for (const auto *I : PreInit->decls()) { 7120 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7121 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7122 } else { 7123 CodeGenFunction::AutoVarEmission Emission = 7124 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7125 CGF.EmitAutoVarCleanups(Emission); 7126 } 7127 } 7128 } 7129 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7130 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7131 ThreadLimitVal = 7132 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7133 } 7134 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 7135 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 7136 CS = Dir->getInnermostCapturedStmt(); 7137 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7138 CGF.getContext(), CS->getCapturedStmt()); 7139 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 7140 } 7141 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 7142 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 7143 CS = Dir->getInnermostCapturedStmt(); 7144 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7145 return NumThreads; 7146 } 7147 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 7148 return Bld.getInt32(1); 7149 } 7150 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7151 } 7152 case OMPD_target_teams: { 7153 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7154 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7155 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7156 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7157 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7158 ThreadLimitVal = 7159 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7160 } 7161 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7162 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7163 return NumThreads; 7164 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7165 CGF.getContext(), CS->getCapturedStmt()); 7166 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7167 if (Dir->getDirectiveKind() == OMPD_distribute) { 7168 CS = Dir->getInnermostCapturedStmt(); 7169 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7170 return NumThreads; 7171 } 7172 } 7173 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7174 } 7175 case OMPD_target_teams_distribute: 7176 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7177 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7178 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7179 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7180 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7181 ThreadLimitVal = 7182 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7183 } 7184 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7185 case OMPD_target_parallel: 7186 case OMPD_target_parallel_for: 7187 case OMPD_target_parallel_for_simd: 7188 case OMPD_target_teams_distribute_parallel_for: 7189 case OMPD_target_teams_distribute_parallel_for_simd: { 7190 llvm::Value *CondVal = nullptr; 7191 // Handle if clause. If if clause present, the number of threads is 7192 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7193 if (D.hasClausesOfKind<OMPIfClause>()) { 7194 const OMPIfClause *IfClause = nullptr; 7195 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7196 if (C->getNameModifier() == OMPD_unknown || 7197 C->getNameModifier() == OMPD_parallel) { 7198 IfClause = C; 7199 break; 7200 } 7201 } 7202 if (IfClause) { 7203 const Expr *Cond = IfClause->getCondition(); 7204 bool Result; 7205 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7206 if (!Result) 7207 return Bld.getInt32(1); 7208 } else { 7209 CodeGenFunction::RunCleanupsScope Scope(CGF); 7210 CondVal = CGF.EvaluateExprAsBool(Cond); 7211 } 7212 } 7213 } 7214 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7215 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7216 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7217 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7218 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7219 ThreadLimitVal = 7220 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7221 } 7222 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7223 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7224 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7225 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7226 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7227 NumThreadsVal = 7228 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7229 ThreadLimitVal = ThreadLimitVal 7230 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7231 ThreadLimitVal), 7232 NumThreadsVal, ThreadLimitVal) 7233 : NumThreadsVal; 7234 } 7235 if (!ThreadLimitVal) 7236 ThreadLimitVal = Bld.getInt32(0); 7237 if (CondVal) 7238 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7239 return ThreadLimitVal; 7240 } 7241 case OMPD_target_teams_distribute_simd: 7242 case OMPD_target_simd: 7243 return Bld.getInt32(1); 7244 case OMPD_parallel: 7245 case OMPD_for: 7246 case OMPD_parallel_for: 7247 case OMPD_parallel_master: 7248 case OMPD_parallel_sections: 7249 case OMPD_for_simd: 7250 case OMPD_parallel_for_simd: 7251 case OMPD_cancel: 7252 case OMPD_cancellation_point: 7253 case OMPD_ordered: 7254 case OMPD_threadprivate: 7255 case OMPD_allocate: 7256 case OMPD_task: 7257 case OMPD_simd: 7258 case OMPD_sections: 7259 case OMPD_section: 7260 case OMPD_single: 7261 case OMPD_master: 7262 case OMPD_critical: 7263 case OMPD_taskyield: 7264 case OMPD_barrier: 7265 case OMPD_taskwait: 7266 case OMPD_taskgroup: 7267 case OMPD_atomic: 7268 case OMPD_flush: 7269 case OMPD_depobj: 7270 case OMPD_scan: 7271 case OMPD_teams: 7272 case OMPD_target_data: 7273 case OMPD_target_exit_data: 7274 case OMPD_target_enter_data: 7275 case OMPD_distribute: 7276 case OMPD_distribute_simd: 7277 case OMPD_distribute_parallel_for: 7278 case OMPD_distribute_parallel_for_simd: 7279 case OMPD_teams_distribute: 7280 case OMPD_teams_distribute_simd: 7281 case OMPD_teams_distribute_parallel_for: 7282 case OMPD_teams_distribute_parallel_for_simd: 7283 case OMPD_target_update: 7284 case OMPD_declare_simd: 7285 case OMPD_declare_variant: 7286 case OMPD_begin_declare_variant: 7287 case OMPD_end_declare_variant: 7288 case OMPD_declare_target: 7289 case OMPD_end_declare_target: 7290 case OMPD_declare_reduction: 7291 case OMPD_declare_mapper: 7292 case OMPD_taskloop: 7293 case OMPD_taskloop_simd: 7294 case OMPD_master_taskloop: 7295 case OMPD_master_taskloop_simd: 7296 case OMPD_parallel_master_taskloop: 7297 case OMPD_parallel_master_taskloop_simd: 7298 case OMPD_requires: 7299 case OMPD_unknown: 7300 break; 7301 } 7302 llvm_unreachable("Unsupported directive kind."); 7303 } 7304 7305 namespace { 7306 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7307 7308 // Utility to handle information from clauses associated with a given 7309 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7310 // It provides a convenient interface to obtain the information and generate 7311 // code for that information. 7312 class MappableExprsHandler { 7313 public: 7314 /// Values for bit flags used to specify the mapping type for 7315 /// offloading. 7316 enum OpenMPOffloadMappingFlags : uint64_t { 7317 /// No flags 7318 OMP_MAP_NONE = 0x0, 7319 /// Allocate memory on the device and move data from host to device. 7320 OMP_MAP_TO = 0x01, 7321 /// Allocate memory on the device and move data from device to host. 7322 OMP_MAP_FROM = 0x02, 7323 /// Always perform the requested mapping action on the element, even 7324 /// if it was already mapped before. 7325 OMP_MAP_ALWAYS = 0x04, 7326 /// Delete the element from the device environment, ignoring the 7327 /// current reference count associated with the element. 7328 OMP_MAP_DELETE = 0x08, 7329 /// The element being mapped is a pointer-pointee pair; both the 7330 /// pointer and the pointee should be mapped. 7331 OMP_MAP_PTR_AND_OBJ = 0x10, 7332 /// This flags signals that the base address of an entry should be 7333 /// passed to the target kernel as an argument. 7334 OMP_MAP_TARGET_PARAM = 0x20, 7335 /// Signal that the runtime library has to return the device pointer 7336 /// in the current position for the data being mapped. Used when we have the 7337 /// use_device_ptr clause. 7338 OMP_MAP_RETURN_PARAM = 0x40, 7339 /// This flag signals that the reference being passed is a pointer to 7340 /// private data. 7341 OMP_MAP_PRIVATE = 0x80, 7342 /// Pass the element to the device by value. 7343 OMP_MAP_LITERAL = 0x100, 7344 /// Implicit map 7345 OMP_MAP_IMPLICIT = 0x200, 7346 /// Close is a hint to the runtime to allocate memory close to 7347 /// the target device. 7348 OMP_MAP_CLOSE = 0x400, 7349 /// The 16 MSBs of the flags indicate whether the entry is member of some 7350 /// struct/class. 7351 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7352 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7353 }; 7354 7355 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7356 static unsigned getFlagMemberOffset() { 7357 unsigned Offset = 0; 7358 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7359 Remain = Remain >> 1) 7360 Offset++; 7361 return Offset; 7362 } 7363 7364 /// Class that associates information with a base pointer to be passed to the 7365 /// runtime library. 7366 class BasePointerInfo { 7367 /// The base pointer. 7368 llvm::Value *Ptr = nullptr; 7369 /// The base declaration that refers to this device pointer, or null if 7370 /// there is none. 7371 const ValueDecl *DevPtrDecl = nullptr; 7372 7373 public: 7374 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7375 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7376 llvm::Value *operator*() const { return Ptr; } 7377 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7378 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7379 }; 7380 7381 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7382 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7383 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7384 7385 /// Map between a struct and the its lowest & highest elements which have been 7386 /// mapped. 7387 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7388 /// HE(FieldIndex, Pointer)} 7389 struct StructRangeInfoTy { 7390 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7391 0, Address::invalid()}; 7392 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7393 0, Address::invalid()}; 7394 Address Base = Address::invalid(); 7395 }; 7396 7397 private: 7398 /// Kind that defines how a device pointer has to be returned. 7399 struct MapInfo { 7400 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7401 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7402 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7403 bool ReturnDevicePointer = false; 7404 bool IsImplicit = false; 7405 7406 MapInfo() = default; 7407 MapInfo( 7408 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7409 OpenMPMapClauseKind MapType, 7410 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7411 bool ReturnDevicePointer, bool IsImplicit) 7412 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7413 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 7414 }; 7415 7416 /// If use_device_ptr is used on a pointer which is a struct member and there 7417 /// is no map information about it, then emission of that entry is deferred 7418 /// until the whole struct has been processed. 7419 struct DeferredDevicePtrEntryTy { 7420 const Expr *IE = nullptr; 7421 const ValueDecl *VD = nullptr; 7422 7423 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 7424 : IE(IE), VD(VD) {} 7425 }; 7426 7427 /// The target directive from where the mappable clauses were extracted. It 7428 /// is either a executable directive or a user-defined mapper directive. 7429 llvm::PointerUnion<const OMPExecutableDirective *, 7430 const OMPDeclareMapperDecl *> 7431 CurDir; 7432 7433 /// Function the directive is being generated for. 7434 CodeGenFunction &CGF; 7435 7436 /// Set of all first private variables in the current directive. 7437 /// bool data is set to true if the variable is implicitly marked as 7438 /// firstprivate, false otherwise. 7439 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7440 7441 /// Map between device pointer declarations and their expression components. 7442 /// The key value for declarations in 'this' is null. 7443 llvm::DenseMap< 7444 const ValueDecl *, 7445 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7446 DevPointersMap; 7447 7448 llvm::Value *getExprTypeSize(const Expr *E) const { 7449 QualType ExprTy = E->getType().getCanonicalType(); 7450 7451 // Calculate the size for array shaping expression. 7452 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7453 llvm::Value *Size = 7454 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7455 for (const Expr *SE : OAE->getDimensions()) { 7456 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7457 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7458 CGF.getContext().getSizeType(), 7459 SE->getExprLoc()); 7460 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7461 } 7462 return Size; 7463 } 7464 7465 // Reference types are ignored for mapping purposes. 7466 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7467 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7468 7469 // Given that an array section is considered a built-in type, we need to 7470 // do the calculation based on the length of the section instead of relying 7471 // on CGF.getTypeSize(E->getType()). 7472 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7473 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7474 OAE->getBase()->IgnoreParenImpCasts()) 7475 .getCanonicalType(); 7476 7477 // If there is no length associated with the expression and lower bound is 7478 // not specified too, that means we are using the whole length of the 7479 // base. 7480 if (!OAE->getLength() && OAE->getColonLoc().isValid() && 7481 !OAE->getLowerBound()) 7482 return CGF.getTypeSize(BaseTy); 7483 7484 llvm::Value *ElemSize; 7485 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7486 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7487 } else { 7488 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7489 assert(ATy && "Expecting array type if not a pointer type."); 7490 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7491 } 7492 7493 // If we don't have a length at this point, that is because we have an 7494 // array section with a single element. 7495 if (!OAE->getLength() && OAE->getColonLoc().isInvalid()) 7496 return ElemSize; 7497 7498 if (const Expr *LenExpr = OAE->getLength()) { 7499 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7500 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7501 CGF.getContext().getSizeType(), 7502 LenExpr->getExprLoc()); 7503 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7504 } 7505 assert(!OAE->getLength() && OAE->getColonLoc().isValid() && 7506 OAE->getLowerBound() && "expected array_section[lb:]."); 7507 // Size = sizetype - lb * elemtype; 7508 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7509 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7510 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7511 CGF.getContext().getSizeType(), 7512 OAE->getLowerBound()->getExprLoc()); 7513 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7514 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7515 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7516 LengthVal = CGF.Builder.CreateSelect( 7517 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7518 return LengthVal; 7519 } 7520 return CGF.getTypeSize(ExprTy); 7521 } 7522 7523 /// Return the corresponding bits for a given map clause modifier. Add 7524 /// a flag marking the map as a pointer if requested. Add a flag marking the 7525 /// map as the first one of a series of maps that relate to the same map 7526 /// expression. 7527 OpenMPOffloadMappingFlags getMapTypeBits( 7528 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7529 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7530 OpenMPOffloadMappingFlags Bits = 7531 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7532 switch (MapType) { 7533 case OMPC_MAP_alloc: 7534 case OMPC_MAP_release: 7535 // alloc and release is the default behavior in the runtime library, i.e. 7536 // if we don't pass any bits alloc/release that is what the runtime is 7537 // going to do. Therefore, we don't need to signal anything for these two 7538 // type modifiers. 7539 break; 7540 case OMPC_MAP_to: 7541 Bits |= OMP_MAP_TO; 7542 break; 7543 case OMPC_MAP_from: 7544 Bits |= OMP_MAP_FROM; 7545 break; 7546 case OMPC_MAP_tofrom: 7547 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7548 break; 7549 case OMPC_MAP_delete: 7550 Bits |= OMP_MAP_DELETE; 7551 break; 7552 case OMPC_MAP_unknown: 7553 llvm_unreachable("Unexpected map type!"); 7554 } 7555 if (AddPtrFlag) 7556 Bits |= OMP_MAP_PTR_AND_OBJ; 7557 if (AddIsTargetParamFlag) 7558 Bits |= OMP_MAP_TARGET_PARAM; 7559 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7560 != MapModifiers.end()) 7561 Bits |= OMP_MAP_ALWAYS; 7562 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7563 != MapModifiers.end()) 7564 Bits |= OMP_MAP_CLOSE; 7565 return Bits; 7566 } 7567 7568 /// Return true if the provided expression is a final array section. A 7569 /// final array section, is one whose length can't be proved to be one. 7570 bool isFinalArraySectionExpression(const Expr *E) const { 7571 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7572 7573 // It is not an array section and therefore not a unity-size one. 7574 if (!OASE) 7575 return false; 7576 7577 // An array section with no colon always refer to a single element. 7578 if (OASE->getColonLoc().isInvalid()) 7579 return false; 7580 7581 const Expr *Length = OASE->getLength(); 7582 7583 // If we don't have a length we have to check if the array has size 1 7584 // for this dimension. Also, we should always expect a length if the 7585 // base type is pointer. 7586 if (!Length) { 7587 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7588 OASE->getBase()->IgnoreParenImpCasts()) 7589 .getCanonicalType(); 7590 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7591 return ATy->getSize().getSExtValue() != 1; 7592 // If we don't have a constant dimension length, we have to consider 7593 // the current section as having any size, so it is not necessarily 7594 // unitary. If it happen to be unity size, that's user fault. 7595 return true; 7596 } 7597 7598 // Check if the length evaluates to 1. 7599 Expr::EvalResult Result; 7600 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7601 return true; // Can have more that size 1. 7602 7603 llvm::APSInt ConstLength = Result.Val.getInt(); 7604 return ConstLength.getSExtValue() != 1; 7605 } 7606 7607 /// Generate the base pointers, section pointers, sizes and map type 7608 /// bits for the provided map type, map modifier, and expression components. 7609 /// \a IsFirstComponent should be set to true if the provided set of 7610 /// components is the first associated with a capture. 7611 void generateInfoForComponentList( 7612 OpenMPMapClauseKind MapType, 7613 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7614 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7615 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7616 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7617 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 7618 bool IsImplicit, 7619 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7620 OverlappedElements = llvm::None) const { 7621 // The following summarizes what has to be generated for each map and the 7622 // types below. The generated information is expressed in this order: 7623 // base pointer, section pointer, size, flags 7624 // (to add to the ones that come from the map type and modifier). 7625 // 7626 // double d; 7627 // int i[100]; 7628 // float *p; 7629 // 7630 // struct S1 { 7631 // int i; 7632 // float f[50]; 7633 // } 7634 // struct S2 { 7635 // int i; 7636 // float f[50]; 7637 // S1 s; 7638 // double *p; 7639 // struct S2 *ps; 7640 // } 7641 // S2 s; 7642 // S2 *ps; 7643 // 7644 // map(d) 7645 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7646 // 7647 // map(i) 7648 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7649 // 7650 // map(i[1:23]) 7651 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7652 // 7653 // map(p) 7654 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7655 // 7656 // map(p[1:24]) 7657 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7658 // 7659 // map(s) 7660 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7661 // 7662 // map(s.i) 7663 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7664 // 7665 // map(s.s.f) 7666 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7667 // 7668 // map(s.p) 7669 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7670 // 7671 // map(to: s.p[:22]) 7672 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7673 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7674 // &(s.p), &(s.p[0]), 22*sizeof(double), 7675 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7676 // (*) alloc space for struct members, only this is a target parameter 7677 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7678 // optimizes this entry out, same in the examples below) 7679 // (***) map the pointee (map: to) 7680 // 7681 // map(s.ps) 7682 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7683 // 7684 // map(from: s.ps->s.i) 7685 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7686 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7687 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7688 // 7689 // map(to: s.ps->ps) 7690 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7691 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7692 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7693 // 7694 // map(s.ps->ps->ps) 7695 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7696 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7697 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7698 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7699 // 7700 // map(to: s.ps->ps->s.f[:22]) 7701 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7702 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7703 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7704 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7705 // 7706 // map(ps) 7707 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7708 // 7709 // map(ps->i) 7710 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7711 // 7712 // map(ps->s.f) 7713 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7714 // 7715 // map(from: ps->p) 7716 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7717 // 7718 // map(to: ps->p[:22]) 7719 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7720 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7721 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7722 // 7723 // map(ps->ps) 7724 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7725 // 7726 // map(from: ps->ps->s.i) 7727 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7728 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7729 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7730 // 7731 // map(from: ps->ps->ps) 7732 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7733 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7734 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7735 // 7736 // map(ps->ps->ps->ps) 7737 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7738 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7739 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7740 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7741 // 7742 // map(to: ps->ps->ps->s.f[:22]) 7743 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7744 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7745 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7746 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7747 // 7748 // map(to: s.f[:22]) map(from: s.p[:33]) 7749 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7750 // sizeof(double*) (**), TARGET_PARAM 7751 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7752 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7753 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7754 // (*) allocate contiguous space needed to fit all mapped members even if 7755 // we allocate space for members not mapped (in this example, 7756 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7757 // them as well because they fall between &s.f[0] and &s.p) 7758 // 7759 // map(from: s.f[:22]) map(to: ps->p[:33]) 7760 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7761 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7762 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7763 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7764 // (*) the struct this entry pertains to is the 2nd element in the list of 7765 // arguments, hence MEMBER_OF(2) 7766 // 7767 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7768 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7769 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7770 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7771 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7772 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7773 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7774 // (*) the struct this entry pertains to is the 4th element in the list 7775 // of arguments, hence MEMBER_OF(4) 7776 7777 // Track if the map information being generated is the first for a capture. 7778 bool IsCaptureFirstInfo = IsFirstComponentList; 7779 // When the variable is on a declare target link or in a to clause with 7780 // unified memory, a reference is needed to hold the host/device address 7781 // of the variable. 7782 bool RequiresReference = false; 7783 7784 // Scan the components from the base to the complete expression. 7785 auto CI = Components.rbegin(); 7786 auto CE = Components.rend(); 7787 auto I = CI; 7788 7789 // Track if the map information being generated is the first for a list of 7790 // components. 7791 bool IsExpressionFirstInfo = true; 7792 Address BP = Address::invalid(); 7793 const Expr *AssocExpr = I->getAssociatedExpression(); 7794 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7795 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7796 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 7797 7798 if (isa<MemberExpr>(AssocExpr)) { 7799 // The base is the 'this' pointer. The content of the pointer is going 7800 // to be the base of the field being mapped. 7801 BP = CGF.LoadCXXThisAddress(); 7802 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7803 (OASE && 7804 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7805 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7806 } else if (OAShE && 7807 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 7808 BP = Address( 7809 CGF.EmitScalarExpr(OAShE->getBase()), 7810 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 7811 } else { 7812 // The base is the reference to the variable. 7813 // BP = &Var. 7814 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7815 if (const auto *VD = 7816 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7817 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7818 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7819 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7820 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7821 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7822 RequiresReference = true; 7823 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7824 } 7825 } 7826 } 7827 7828 // If the variable is a pointer and is being dereferenced (i.e. is not 7829 // the last component), the base has to be the pointer itself, not its 7830 // reference. References are ignored for mapping purposes. 7831 QualType Ty = 7832 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7833 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7834 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7835 7836 // We do not need to generate individual map information for the 7837 // pointer, it can be associated with the combined storage. 7838 ++I; 7839 } 7840 } 7841 7842 // Track whether a component of the list should be marked as MEMBER_OF some 7843 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7844 // in a component list should be marked as MEMBER_OF, all subsequent entries 7845 // do not belong to the base struct. E.g. 7846 // struct S2 s; 7847 // s.ps->ps->ps->f[:] 7848 // (1) (2) (3) (4) 7849 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7850 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7851 // is the pointee of ps(2) which is not member of struct s, so it should not 7852 // be marked as such (it is still PTR_AND_OBJ). 7853 // The variable is initialized to false so that PTR_AND_OBJ entries which 7854 // are not struct members are not considered (e.g. array of pointers to 7855 // data). 7856 bool ShouldBeMemberOf = false; 7857 7858 // Variable keeping track of whether or not we have encountered a component 7859 // in the component list which is a member expression. Useful when we have a 7860 // pointer or a final array section, in which case it is the previous 7861 // component in the list which tells us whether we have a member expression. 7862 // E.g. X.f[:] 7863 // While processing the final array section "[:]" it is "f" which tells us 7864 // whether we are dealing with a member of a declared struct. 7865 const MemberExpr *EncounteredME = nullptr; 7866 7867 for (; I != CE; ++I) { 7868 // If the current component is member of a struct (parent struct) mark it. 7869 if (!EncounteredME) { 7870 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7871 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7872 // as MEMBER_OF the parent struct. 7873 if (EncounteredME) 7874 ShouldBeMemberOf = true; 7875 } 7876 7877 auto Next = std::next(I); 7878 7879 // We need to generate the addresses and sizes if this is the last 7880 // component, if the component is a pointer or if it is an array section 7881 // whose length can't be proved to be one. If this is a pointer, it 7882 // becomes the base address for the following components. 7883 7884 // A final array section, is one whose length can't be proved to be one. 7885 bool IsFinalArraySection = 7886 isFinalArraySectionExpression(I->getAssociatedExpression()); 7887 7888 // Get information on whether the element is a pointer. Have to do a 7889 // special treatment for array sections given that they are built-in 7890 // types. 7891 const auto *OASE = 7892 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7893 const auto *OAShE = 7894 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 7895 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7896 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7897 bool IsPointer = 7898 OAShE || 7899 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7900 .getCanonicalType() 7901 ->isAnyPointerType()) || 7902 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7903 bool IsNonDerefPointer = IsPointer && !UO && !BO; 7904 7905 if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { 7906 // If this is not the last component, we expect the pointer to be 7907 // associated with an array expression or member expression. 7908 assert((Next == CE || 7909 isa<MemberExpr>(Next->getAssociatedExpression()) || 7910 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7911 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7912 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7913 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7914 "Unexpected expression"); 7915 7916 Address LB = Address::invalid(); 7917 if (OAShE) { 7918 LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 7919 CGF.getContext().getTypeAlignInChars( 7920 OAShE->getBase()->getType())); 7921 } else { 7922 LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7923 .getAddress(CGF); 7924 } 7925 7926 // If this component is a pointer inside the base struct then we don't 7927 // need to create any entry for it - it will be combined with the object 7928 // it is pointing to into a single PTR_AND_OBJ entry. 7929 bool IsMemberPointer = 7930 IsPointer && EncounteredME && 7931 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7932 EncounteredME); 7933 if (!OverlappedElements.empty()) { 7934 // Handle base element with the info for overlapped elements. 7935 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7936 assert(Next == CE && 7937 "Expected last element for the overlapped elements."); 7938 assert(!IsPointer && 7939 "Unexpected base element with the pointer type."); 7940 // Mark the whole struct as the struct that requires allocation on the 7941 // device. 7942 PartialStruct.LowestElem = {0, LB}; 7943 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7944 I->getAssociatedExpression()->getType()); 7945 Address HB = CGF.Builder.CreateConstGEP( 7946 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7947 CGF.VoidPtrTy), 7948 TypeSize.getQuantity() - 1); 7949 PartialStruct.HighestElem = { 7950 std::numeric_limits<decltype( 7951 PartialStruct.HighestElem.first)>::max(), 7952 HB}; 7953 PartialStruct.Base = BP; 7954 // Emit data for non-overlapped data. 7955 OpenMPOffloadMappingFlags Flags = 7956 OMP_MAP_MEMBER_OF | 7957 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7958 /*AddPtrFlag=*/false, 7959 /*AddIsTargetParamFlag=*/false); 7960 LB = BP; 7961 llvm::Value *Size = nullptr; 7962 // Do bitcopy of all non-overlapped structure elements. 7963 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7964 Component : OverlappedElements) { 7965 Address ComponentLB = Address::invalid(); 7966 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7967 Component) { 7968 if (MC.getAssociatedDeclaration()) { 7969 ComponentLB = 7970 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7971 .getAddress(CGF); 7972 Size = CGF.Builder.CreatePtrDiff( 7973 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7974 CGF.EmitCastToVoidPtr(LB.getPointer())); 7975 break; 7976 } 7977 } 7978 BasePointers.push_back(BP.getPointer()); 7979 Pointers.push_back(LB.getPointer()); 7980 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, 7981 /*isSigned=*/true)); 7982 Types.push_back(Flags); 7983 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7984 } 7985 BasePointers.push_back(BP.getPointer()); 7986 Pointers.push_back(LB.getPointer()); 7987 Size = CGF.Builder.CreatePtrDiff( 7988 CGF.EmitCastToVoidPtr( 7989 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7990 CGF.EmitCastToVoidPtr(LB.getPointer())); 7991 Sizes.push_back( 7992 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7993 Types.push_back(Flags); 7994 break; 7995 } 7996 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7997 if (!IsMemberPointer) { 7998 BasePointers.push_back(BP.getPointer()); 7999 Pointers.push_back(LB.getPointer()); 8000 Sizes.push_back( 8001 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8002 8003 // We need to add a pointer flag for each map that comes from the 8004 // same expression except for the first one. We also need to signal 8005 // this map is the first one that relates with the current capture 8006 // (there is a set of entries for each capture). 8007 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 8008 MapType, MapModifiers, IsImplicit, 8009 !IsExpressionFirstInfo || RequiresReference, 8010 IsCaptureFirstInfo && !RequiresReference); 8011 8012 if (!IsExpressionFirstInfo) { 8013 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 8014 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 8015 if (IsPointer) 8016 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 8017 OMP_MAP_DELETE | OMP_MAP_CLOSE); 8018 8019 if (ShouldBeMemberOf) { 8020 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 8021 // should be later updated with the correct value of MEMBER_OF. 8022 Flags |= OMP_MAP_MEMBER_OF; 8023 // From now on, all subsequent PTR_AND_OBJ entries should not be 8024 // marked as MEMBER_OF. 8025 ShouldBeMemberOf = false; 8026 } 8027 } 8028 8029 Types.push_back(Flags); 8030 } 8031 8032 // If we have encountered a member expression so far, keep track of the 8033 // mapped member. If the parent is "*this", then the value declaration 8034 // is nullptr. 8035 if (EncounteredME) { 8036 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 8037 unsigned FieldIndex = FD->getFieldIndex(); 8038 8039 // Update info about the lowest and highest elements for this struct 8040 if (!PartialStruct.Base.isValid()) { 8041 PartialStruct.LowestElem = {FieldIndex, LB}; 8042 PartialStruct.HighestElem = {FieldIndex, LB}; 8043 PartialStruct.Base = BP; 8044 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8045 PartialStruct.LowestElem = {FieldIndex, LB}; 8046 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8047 PartialStruct.HighestElem = {FieldIndex, LB}; 8048 } 8049 } 8050 8051 // If we have a final array section, we are done with this expression. 8052 if (IsFinalArraySection) 8053 break; 8054 8055 // The pointer becomes the base for the next element. 8056 if (Next != CE) 8057 BP = LB; 8058 8059 IsExpressionFirstInfo = false; 8060 IsCaptureFirstInfo = false; 8061 } 8062 } 8063 } 8064 8065 /// Return the adjusted map modifiers if the declaration a capture refers to 8066 /// appears in a first-private clause. This is expected to be used only with 8067 /// directives that start with 'target'. 8068 MappableExprsHandler::OpenMPOffloadMappingFlags 8069 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8070 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8071 8072 // A first private variable captured by reference will use only the 8073 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8074 // declaration is known as first-private in this handler. 8075 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8076 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 8077 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 8078 return MappableExprsHandler::OMP_MAP_ALWAYS | 8079 MappableExprsHandler::OMP_MAP_TO; 8080 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8081 return MappableExprsHandler::OMP_MAP_TO | 8082 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8083 return MappableExprsHandler::OMP_MAP_PRIVATE | 8084 MappableExprsHandler::OMP_MAP_TO; 8085 } 8086 return MappableExprsHandler::OMP_MAP_TO | 8087 MappableExprsHandler::OMP_MAP_FROM; 8088 } 8089 8090 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8091 // Rotate by getFlagMemberOffset() bits. 8092 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8093 << getFlagMemberOffset()); 8094 } 8095 8096 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8097 OpenMPOffloadMappingFlags MemberOfFlag) { 8098 // If the entry is PTR_AND_OBJ but has not been marked with the special 8099 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8100 // marked as MEMBER_OF. 8101 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8102 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8103 return; 8104 8105 // Reset the placeholder value to prepare the flag for the assignment of the 8106 // proper MEMBER_OF value. 8107 Flags &= ~OMP_MAP_MEMBER_OF; 8108 Flags |= MemberOfFlag; 8109 } 8110 8111 void getPlainLayout(const CXXRecordDecl *RD, 8112 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8113 bool AsBase) const { 8114 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8115 8116 llvm::StructType *St = 8117 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8118 8119 unsigned NumElements = St->getNumElements(); 8120 llvm::SmallVector< 8121 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8122 RecordLayout(NumElements); 8123 8124 // Fill bases. 8125 for (const auto &I : RD->bases()) { 8126 if (I.isVirtual()) 8127 continue; 8128 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8129 // Ignore empty bases. 8130 if (Base->isEmpty() || CGF.getContext() 8131 .getASTRecordLayout(Base) 8132 .getNonVirtualSize() 8133 .isZero()) 8134 continue; 8135 8136 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8137 RecordLayout[FieldIndex] = Base; 8138 } 8139 // Fill in virtual bases. 8140 for (const auto &I : RD->vbases()) { 8141 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8142 // Ignore empty bases. 8143 if (Base->isEmpty()) 8144 continue; 8145 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8146 if (RecordLayout[FieldIndex]) 8147 continue; 8148 RecordLayout[FieldIndex] = Base; 8149 } 8150 // Fill in all the fields. 8151 assert(!RD->isUnion() && "Unexpected union."); 8152 for (const auto *Field : RD->fields()) { 8153 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8154 // will fill in later.) 8155 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8156 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8157 RecordLayout[FieldIndex] = Field; 8158 } 8159 } 8160 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8161 &Data : RecordLayout) { 8162 if (Data.isNull()) 8163 continue; 8164 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8165 getPlainLayout(Base, Layout, /*AsBase=*/true); 8166 else 8167 Layout.push_back(Data.get<const FieldDecl *>()); 8168 } 8169 } 8170 8171 public: 8172 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8173 : CurDir(&Dir), CGF(CGF) { 8174 // Extract firstprivate clause information. 8175 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8176 for (const auto *D : C->varlists()) 8177 FirstPrivateDecls.try_emplace( 8178 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8179 // Extract device pointer clause information. 8180 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8181 for (auto L : C->component_lists()) 8182 DevPointersMap[L.first].push_back(L.second); 8183 } 8184 8185 /// Constructor for the declare mapper directive. 8186 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8187 : CurDir(&Dir), CGF(CGF) {} 8188 8189 /// Generate code for the combined entry if we have a partially mapped struct 8190 /// and take care of the mapping flags of the arguments corresponding to 8191 /// individual struct members. 8192 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 8193 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8194 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 8195 const StructRangeInfoTy &PartialStruct) const { 8196 // Base is the base of the struct 8197 BasePointers.push_back(PartialStruct.Base.getPointer()); 8198 // Pointer is the address of the lowest element 8199 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 8200 Pointers.push_back(LB); 8201 // Size is (addr of {highest+1} element) - (addr of lowest element) 8202 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 8203 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 8204 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8205 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8206 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8207 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8208 /*isSigned=*/false); 8209 Sizes.push_back(Size); 8210 // Map type is always TARGET_PARAM 8211 Types.push_back(OMP_MAP_TARGET_PARAM); 8212 // Remove TARGET_PARAM flag from the first element 8213 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8214 8215 // All other current entries will be MEMBER_OF the combined entry 8216 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8217 // 0xFFFF in the MEMBER_OF field). 8218 OpenMPOffloadMappingFlags MemberOfFlag = 8219 getMemberOfFlag(BasePointers.size() - 1); 8220 for (auto &M : CurTypes) 8221 setCorrectMemberOfFlag(M, MemberOfFlag); 8222 } 8223 8224 /// Generate all the base pointers, section pointers, sizes and map 8225 /// types for the extracted mappable expressions. Also, for each item that 8226 /// relates with a device pointer, a pair of the relevant declaration and 8227 /// index where it occurs is appended to the device pointers info array. 8228 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 8229 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8230 MapFlagsArrayTy &Types) const { 8231 // We have to process the component lists that relate with the same 8232 // declaration in a single chunk so that we can generate the map flags 8233 // correctly. Therefore, we organize all lists in a map. 8234 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8235 8236 // Helper function to fill the information map for the different supported 8237 // clauses. 8238 auto &&InfoGen = [&Info]( 8239 const ValueDecl *D, 8240 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8241 OpenMPMapClauseKind MapType, 8242 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8243 bool ReturnDevicePointer, bool IsImplicit) { 8244 const ValueDecl *VD = 8245 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8246 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8247 IsImplicit); 8248 }; 8249 8250 assert(CurDir.is<const OMPExecutableDirective *>() && 8251 "Expect a executable directive"); 8252 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8253 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 8254 for (const auto L : C->component_lists()) { 8255 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 8256 /*ReturnDevicePointer=*/false, C->isImplicit()); 8257 } 8258 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 8259 for (const auto L : C->component_lists()) { 8260 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 8261 /*ReturnDevicePointer=*/false, C->isImplicit()); 8262 } 8263 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 8264 for (const auto L : C->component_lists()) { 8265 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 8266 /*ReturnDevicePointer=*/false, C->isImplicit()); 8267 } 8268 8269 // Look at the use_device_ptr clause information and mark the existing map 8270 // entries as such. If there is no map information for an entry in the 8271 // use_device_ptr list, we create one with map type 'alloc' and zero size 8272 // section. It is the user fault if that was not mapped before. If there is 8273 // no map information and the pointer is a struct member, then we defer the 8274 // emission of that entry until the whole struct has been processed. 8275 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 8276 DeferredInfo; 8277 8278 for (const auto *C : 8279 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 8280 for (const auto L : C->component_lists()) { 8281 assert(!L.second.empty() && "Not expecting empty list of components!"); 8282 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 8283 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8284 const Expr *IE = L.second.back().getAssociatedExpression(); 8285 // If the first component is a member expression, we have to look into 8286 // 'this', which maps to null in the map of map information. Otherwise 8287 // look directly for the information. 8288 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8289 8290 // We potentially have map information for this declaration already. 8291 // Look for the first set of components that refer to it. 8292 if (It != Info.end()) { 8293 auto CI = std::find_if( 8294 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 8295 return MI.Components.back().getAssociatedDeclaration() == VD; 8296 }); 8297 // If we found a map entry, signal that the pointer has to be returned 8298 // and move on to the next declaration. 8299 if (CI != It->second.end()) { 8300 CI->ReturnDevicePointer = true; 8301 continue; 8302 } 8303 } 8304 8305 // We didn't find any match in our map information - generate a zero 8306 // size array section - if the pointer is a struct member we defer this 8307 // action until the whole struct has been processed. 8308 if (isa<MemberExpr>(IE)) { 8309 // Insert the pointer into Info to be processed by 8310 // generateInfoForComponentList. Because it is a member pointer 8311 // without a pointee, no entry will be generated for it, therefore 8312 // we need to generate one after the whole struct has been processed. 8313 // Nonetheless, generateInfoForComponentList must be called to take 8314 // the pointer into account for the calculation of the range of the 8315 // partial struct. 8316 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 8317 /*ReturnDevicePointer=*/false, C->isImplicit()); 8318 DeferredInfo[nullptr].emplace_back(IE, VD); 8319 } else { 8320 llvm::Value *Ptr = 8321 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8322 BasePointers.emplace_back(Ptr, VD); 8323 Pointers.push_back(Ptr); 8324 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8325 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8326 } 8327 } 8328 } 8329 8330 for (const auto &M : Info) { 8331 // We need to know when we generate information for the first component 8332 // associated with a capture, because the mapping flags depend on it. 8333 bool IsFirstComponentList = true; 8334 8335 // Temporary versions of arrays 8336 MapBaseValuesArrayTy CurBasePointers; 8337 MapValuesArrayTy CurPointers; 8338 MapValuesArrayTy CurSizes; 8339 MapFlagsArrayTy CurTypes; 8340 StructRangeInfoTy PartialStruct; 8341 8342 for (const MapInfo &L : M.second) { 8343 assert(!L.Components.empty() && 8344 "Not expecting declaration with no component lists."); 8345 8346 // Remember the current base pointer index. 8347 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 8348 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8349 CurBasePointers, CurPointers, CurSizes, 8350 CurTypes, PartialStruct, 8351 IsFirstComponentList, L.IsImplicit); 8352 8353 // If this entry relates with a device pointer, set the relevant 8354 // declaration and add the 'return pointer' flag. 8355 if (L.ReturnDevicePointer) { 8356 assert(CurBasePointers.size() > CurrentBasePointersIdx && 8357 "Unexpected number of mapped base pointers."); 8358 8359 const ValueDecl *RelevantVD = 8360 L.Components.back().getAssociatedDeclaration(); 8361 assert(RelevantVD && 8362 "No relevant declaration related with device pointer??"); 8363 8364 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 8365 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8366 } 8367 IsFirstComponentList = false; 8368 } 8369 8370 // Append any pending zero-length pointers which are struct members and 8371 // used with use_device_ptr. 8372 auto CI = DeferredInfo.find(M.first); 8373 if (CI != DeferredInfo.end()) { 8374 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8375 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8376 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 8377 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 8378 CurBasePointers.emplace_back(BasePtr, L.VD); 8379 CurPointers.push_back(Ptr); 8380 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8381 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8382 // value MEMBER_OF=FFFF so that the entry is later updated with the 8383 // correct value of MEMBER_OF. 8384 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8385 OMP_MAP_MEMBER_OF); 8386 } 8387 } 8388 8389 // If there is an entry in PartialStruct it means we have a struct with 8390 // individual members mapped. Emit an extra combined entry. 8391 if (PartialStruct.Base.isValid()) 8392 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8393 PartialStruct); 8394 8395 // We need to append the results of this capture to what we already have. 8396 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8397 Pointers.append(CurPointers.begin(), CurPointers.end()); 8398 Sizes.append(CurSizes.begin(), CurSizes.end()); 8399 Types.append(CurTypes.begin(), CurTypes.end()); 8400 } 8401 } 8402 8403 /// Generate all the base pointers, section pointers, sizes and map types for 8404 /// the extracted map clauses of user-defined mapper. 8405 void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, 8406 MapValuesArrayTy &Pointers, 8407 MapValuesArrayTy &Sizes, 8408 MapFlagsArrayTy &Types) const { 8409 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8410 "Expect a declare mapper directive"); 8411 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8412 // We have to process the component lists that relate with the same 8413 // declaration in a single chunk so that we can generate the map flags 8414 // correctly. Therefore, we organize all lists in a map. 8415 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8416 8417 // Helper function to fill the information map for the different supported 8418 // clauses. 8419 auto &&InfoGen = [&Info]( 8420 const ValueDecl *D, 8421 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8422 OpenMPMapClauseKind MapType, 8423 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8424 bool ReturnDevicePointer, bool IsImplicit) { 8425 const ValueDecl *VD = 8426 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8427 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8428 IsImplicit); 8429 }; 8430 8431 for (const auto *C : CurMapperDir->clauselists()) { 8432 const auto *MC = cast<OMPMapClause>(C); 8433 for (const auto L : MC->component_lists()) { 8434 InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), 8435 /*ReturnDevicePointer=*/false, MC->isImplicit()); 8436 } 8437 } 8438 8439 for (const auto &M : Info) { 8440 // We need to know when we generate information for the first component 8441 // associated with a capture, because the mapping flags depend on it. 8442 bool IsFirstComponentList = true; 8443 8444 // Temporary versions of arrays 8445 MapBaseValuesArrayTy CurBasePointers; 8446 MapValuesArrayTy CurPointers; 8447 MapValuesArrayTy CurSizes; 8448 MapFlagsArrayTy CurTypes; 8449 StructRangeInfoTy PartialStruct; 8450 8451 for (const MapInfo &L : M.second) { 8452 assert(!L.Components.empty() && 8453 "Not expecting declaration with no component lists."); 8454 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8455 CurBasePointers, CurPointers, CurSizes, 8456 CurTypes, PartialStruct, 8457 IsFirstComponentList, L.IsImplicit); 8458 IsFirstComponentList = false; 8459 } 8460 8461 // If there is an entry in PartialStruct it means we have a struct with 8462 // individual members mapped. Emit an extra combined entry. 8463 if (PartialStruct.Base.isValid()) 8464 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8465 PartialStruct); 8466 8467 // We need to append the results of this capture to what we already have. 8468 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8469 Pointers.append(CurPointers.begin(), CurPointers.end()); 8470 Sizes.append(CurSizes.begin(), CurSizes.end()); 8471 Types.append(CurTypes.begin(), CurTypes.end()); 8472 } 8473 } 8474 8475 /// Emit capture info for lambdas for variables captured by reference. 8476 void generateInfoForLambdaCaptures( 8477 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 8478 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8479 MapFlagsArrayTy &Types, 8480 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8481 const auto *RD = VD->getType() 8482 .getCanonicalType() 8483 .getNonReferenceType() 8484 ->getAsCXXRecordDecl(); 8485 if (!RD || !RD->isLambda()) 8486 return; 8487 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8488 LValue VDLVal = CGF.MakeAddrLValue( 8489 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8490 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8491 FieldDecl *ThisCapture = nullptr; 8492 RD->getCaptureFields(Captures, ThisCapture); 8493 if (ThisCapture) { 8494 LValue ThisLVal = 8495 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8496 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8497 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8498 VDLVal.getPointer(CGF)); 8499 BasePointers.push_back(ThisLVal.getPointer(CGF)); 8500 Pointers.push_back(ThisLValVal.getPointer(CGF)); 8501 Sizes.push_back( 8502 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8503 CGF.Int64Ty, /*isSigned=*/true)); 8504 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8505 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8506 } 8507 for (const LambdaCapture &LC : RD->captures()) { 8508 if (!LC.capturesVariable()) 8509 continue; 8510 const VarDecl *VD = LC.getCapturedVar(); 8511 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8512 continue; 8513 auto It = Captures.find(VD); 8514 assert(It != Captures.end() && "Found lambda capture without field."); 8515 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8516 if (LC.getCaptureKind() == LCK_ByRef) { 8517 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8518 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8519 VDLVal.getPointer(CGF)); 8520 BasePointers.push_back(VarLVal.getPointer(CGF)); 8521 Pointers.push_back(VarLValVal.getPointer(CGF)); 8522 Sizes.push_back(CGF.Builder.CreateIntCast( 8523 CGF.getTypeSize( 8524 VD->getType().getCanonicalType().getNonReferenceType()), 8525 CGF.Int64Ty, /*isSigned=*/true)); 8526 } else { 8527 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8528 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8529 VDLVal.getPointer(CGF)); 8530 BasePointers.push_back(VarLVal.getPointer(CGF)); 8531 Pointers.push_back(VarRVal.getScalarVal()); 8532 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8533 } 8534 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8535 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8536 } 8537 } 8538 8539 /// Set correct indices for lambdas captures. 8540 void adjustMemberOfForLambdaCaptures( 8541 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8542 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8543 MapFlagsArrayTy &Types) const { 8544 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8545 // Set correct member_of idx for all implicit lambda captures. 8546 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8547 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8548 continue; 8549 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8550 assert(BasePtr && "Unable to find base lambda address."); 8551 int TgtIdx = -1; 8552 for (unsigned J = I; J > 0; --J) { 8553 unsigned Idx = J - 1; 8554 if (Pointers[Idx] != BasePtr) 8555 continue; 8556 TgtIdx = Idx; 8557 break; 8558 } 8559 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8560 // All other current entries will be MEMBER_OF the combined entry 8561 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8562 // 0xFFFF in the MEMBER_OF field). 8563 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8564 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8565 } 8566 } 8567 8568 /// Generate the base pointers, section pointers, sizes and map types 8569 /// associated to a given capture. 8570 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8571 llvm::Value *Arg, 8572 MapBaseValuesArrayTy &BasePointers, 8573 MapValuesArrayTy &Pointers, 8574 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8575 StructRangeInfoTy &PartialStruct) const { 8576 assert(!Cap->capturesVariableArrayType() && 8577 "Not expecting to generate map info for a variable array type!"); 8578 8579 // We need to know when we generating information for the first component 8580 const ValueDecl *VD = Cap->capturesThis() 8581 ? nullptr 8582 : Cap->getCapturedVar()->getCanonicalDecl(); 8583 8584 // If this declaration appears in a is_device_ptr clause we just have to 8585 // pass the pointer by value. If it is a reference to a declaration, we just 8586 // pass its value. 8587 if (DevPointersMap.count(VD)) { 8588 BasePointers.emplace_back(Arg, VD); 8589 Pointers.push_back(Arg); 8590 Sizes.push_back( 8591 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8592 CGF.Int64Ty, /*isSigned=*/true)); 8593 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8594 return; 8595 } 8596 8597 using MapData = 8598 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8599 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 8600 SmallVector<MapData, 4> DeclComponentLists; 8601 assert(CurDir.is<const OMPExecutableDirective *>() && 8602 "Expect a executable directive"); 8603 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8604 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8605 for (const auto L : C->decl_component_lists(VD)) { 8606 assert(L.first == VD && 8607 "We got information for the wrong declaration??"); 8608 assert(!L.second.empty() && 8609 "Not expecting declaration with no component lists."); 8610 DeclComponentLists.emplace_back(L.second, C->getMapType(), 8611 C->getMapTypeModifiers(), 8612 C->isImplicit()); 8613 } 8614 } 8615 8616 // Find overlapping elements (including the offset from the base element). 8617 llvm::SmallDenseMap< 8618 const MapData *, 8619 llvm::SmallVector< 8620 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8621 4> 8622 OverlappedData; 8623 size_t Count = 0; 8624 for (const MapData &L : DeclComponentLists) { 8625 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8626 OpenMPMapClauseKind MapType; 8627 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8628 bool IsImplicit; 8629 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8630 ++Count; 8631 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8632 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8633 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 8634 auto CI = Components.rbegin(); 8635 auto CE = Components.rend(); 8636 auto SI = Components1.rbegin(); 8637 auto SE = Components1.rend(); 8638 for (; CI != CE && SI != SE; ++CI, ++SI) { 8639 if (CI->getAssociatedExpression()->getStmtClass() != 8640 SI->getAssociatedExpression()->getStmtClass()) 8641 break; 8642 // Are we dealing with different variables/fields? 8643 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8644 break; 8645 } 8646 // Found overlapping if, at least for one component, reached the head of 8647 // the components list. 8648 if (CI == CE || SI == SE) { 8649 assert((CI != CE || SI != SE) && 8650 "Unexpected full match of the mapping components."); 8651 const MapData &BaseData = CI == CE ? L : L1; 8652 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8653 SI == SE ? Components : Components1; 8654 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8655 OverlappedElements.getSecond().push_back(SubData); 8656 } 8657 } 8658 } 8659 // Sort the overlapped elements for each item. 8660 llvm::SmallVector<const FieldDecl *, 4> Layout; 8661 if (!OverlappedData.empty()) { 8662 if (const auto *CRD = 8663 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8664 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8665 else { 8666 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8667 Layout.append(RD->field_begin(), RD->field_end()); 8668 } 8669 } 8670 for (auto &Pair : OverlappedData) { 8671 llvm::sort( 8672 Pair.getSecond(), 8673 [&Layout]( 8674 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8675 OMPClauseMappableExprCommon::MappableExprComponentListRef 8676 Second) { 8677 auto CI = First.rbegin(); 8678 auto CE = First.rend(); 8679 auto SI = Second.rbegin(); 8680 auto SE = Second.rend(); 8681 for (; CI != CE && SI != SE; ++CI, ++SI) { 8682 if (CI->getAssociatedExpression()->getStmtClass() != 8683 SI->getAssociatedExpression()->getStmtClass()) 8684 break; 8685 // Are we dealing with different variables/fields? 8686 if (CI->getAssociatedDeclaration() != 8687 SI->getAssociatedDeclaration()) 8688 break; 8689 } 8690 8691 // Lists contain the same elements. 8692 if (CI == CE && SI == SE) 8693 return false; 8694 8695 // List with less elements is less than list with more elements. 8696 if (CI == CE || SI == SE) 8697 return CI == CE; 8698 8699 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8700 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8701 if (FD1->getParent() == FD2->getParent()) 8702 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8703 const auto It = 8704 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8705 return FD == FD1 || FD == FD2; 8706 }); 8707 return *It == FD1; 8708 }); 8709 } 8710 8711 // Associated with a capture, because the mapping flags depend on it. 8712 // Go through all of the elements with the overlapped elements. 8713 for (const auto &Pair : OverlappedData) { 8714 const MapData &L = *Pair.getFirst(); 8715 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8716 OpenMPMapClauseKind MapType; 8717 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8718 bool IsImplicit; 8719 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8720 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8721 OverlappedComponents = Pair.getSecond(); 8722 bool IsFirstComponentList = true; 8723 generateInfoForComponentList(MapType, MapModifiers, Components, 8724 BasePointers, Pointers, Sizes, Types, 8725 PartialStruct, IsFirstComponentList, 8726 IsImplicit, OverlappedComponents); 8727 } 8728 // Go through other elements without overlapped elements. 8729 bool IsFirstComponentList = OverlappedData.empty(); 8730 for (const MapData &L : DeclComponentLists) { 8731 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8732 OpenMPMapClauseKind MapType; 8733 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8734 bool IsImplicit; 8735 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8736 auto It = OverlappedData.find(&L); 8737 if (It == OverlappedData.end()) 8738 generateInfoForComponentList(MapType, MapModifiers, Components, 8739 BasePointers, Pointers, Sizes, Types, 8740 PartialStruct, IsFirstComponentList, 8741 IsImplicit); 8742 IsFirstComponentList = false; 8743 } 8744 } 8745 8746 /// Generate the base pointers, section pointers, sizes and map types 8747 /// associated with the declare target link variables. 8748 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 8749 MapValuesArrayTy &Pointers, 8750 MapValuesArrayTy &Sizes, 8751 MapFlagsArrayTy &Types) const { 8752 assert(CurDir.is<const OMPExecutableDirective *>() && 8753 "Expect a executable directive"); 8754 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8755 // Map other list items in the map clause which are not captured variables 8756 // but "declare target link" global variables. 8757 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8758 for (const auto L : C->component_lists()) { 8759 if (!L.first) 8760 continue; 8761 const auto *VD = dyn_cast<VarDecl>(L.first); 8762 if (!VD) 8763 continue; 8764 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8765 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8766 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8767 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 8768 continue; 8769 StructRangeInfoTy PartialStruct; 8770 generateInfoForComponentList( 8771 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 8772 Pointers, Sizes, Types, PartialStruct, 8773 /*IsFirstComponentList=*/true, C->isImplicit()); 8774 assert(!PartialStruct.Base.isValid() && 8775 "No partial structs for declare target link expected."); 8776 } 8777 } 8778 } 8779 8780 /// Generate the default map information for a given capture \a CI, 8781 /// record field declaration \a RI and captured value \a CV. 8782 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8783 const FieldDecl &RI, llvm::Value *CV, 8784 MapBaseValuesArrayTy &CurBasePointers, 8785 MapValuesArrayTy &CurPointers, 8786 MapValuesArrayTy &CurSizes, 8787 MapFlagsArrayTy &CurMapTypes) const { 8788 bool IsImplicit = true; 8789 // Do the default mapping. 8790 if (CI.capturesThis()) { 8791 CurBasePointers.push_back(CV); 8792 CurPointers.push_back(CV); 8793 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8794 CurSizes.push_back( 8795 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8796 CGF.Int64Ty, /*isSigned=*/true)); 8797 // Default map type. 8798 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8799 } else if (CI.capturesVariableByCopy()) { 8800 CurBasePointers.push_back(CV); 8801 CurPointers.push_back(CV); 8802 if (!RI.getType()->isAnyPointerType()) { 8803 // We have to signal to the runtime captures passed by value that are 8804 // not pointers. 8805 CurMapTypes.push_back(OMP_MAP_LITERAL); 8806 CurSizes.push_back(CGF.Builder.CreateIntCast( 8807 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8808 } else { 8809 // Pointers are implicitly mapped with a zero size and no flags 8810 // (other than first map that is added for all implicit maps). 8811 CurMapTypes.push_back(OMP_MAP_NONE); 8812 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8813 } 8814 const VarDecl *VD = CI.getCapturedVar(); 8815 auto I = FirstPrivateDecls.find(VD); 8816 if (I != FirstPrivateDecls.end()) 8817 IsImplicit = I->getSecond(); 8818 } else { 8819 assert(CI.capturesVariable() && "Expected captured reference."); 8820 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8821 QualType ElementType = PtrTy->getPointeeType(); 8822 CurSizes.push_back(CGF.Builder.CreateIntCast( 8823 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8824 // The default map type for a scalar/complex type is 'to' because by 8825 // default the value doesn't have to be retrieved. For an aggregate 8826 // type, the default is 'tofrom'. 8827 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 8828 const VarDecl *VD = CI.getCapturedVar(); 8829 auto I = FirstPrivateDecls.find(VD); 8830 if (I != FirstPrivateDecls.end() && 8831 VD->getType().isConstant(CGF.getContext())) { 8832 llvm::Constant *Addr = 8833 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8834 // Copy the value of the original variable to the new global copy. 8835 CGF.Builder.CreateMemCpy( 8836 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 8837 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8838 CurSizes.back(), /*IsVolatile=*/false); 8839 // Use new global variable as the base pointers. 8840 CurBasePointers.push_back(Addr); 8841 CurPointers.push_back(Addr); 8842 } else { 8843 CurBasePointers.push_back(CV); 8844 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8845 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8846 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8847 AlignmentSource::Decl)); 8848 CurPointers.push_back(PtrAddr.getPointer()); 8849 } else { 8850 CurPointers.push_back(CV); 8851 } 8852 } 8853 if (I != FirstPrivateDecls.end()) 8854 IsImplicit = I->getSecond(); 8855 } 8856 // Every default map produces a single argument which is a target parameter. 8857 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 8858 8859 // Add flag stating this is an implicit map. 8860 if (IsImplicit) 8861 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 8862 } 8863 }; 8864 } // anonymous namespace 8865 8866 /// Emit the arrays used to pass the captures and map information to the 8867 /// offloading runtime library. If there is no map or capture information, 8868 /// return nullptr by reference. 8869 static void 8870 emitOffloadingArrays(CodeGenFunction &CGF, 8871 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 8872 MappableExprsHandler::MapValuesArrayTy &Pointers, 8873 MappableExprsHandler::MapValuesArrayTy &Sizes, 8874 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 8875 CGOpenMPRuntime::TargetDataInfo &Info) { 8876 CodeGenModule &CGM = CGF.CGM; 8877 ASTContext &Ctx = CGF.getContext(); 8878 8879 // Reset the array information. 8880 Info.clearArrayInfo(); 8881 Info.NumberOfPtrs = BasePointers.size(); 8882 8883 if (Info.NumberOfPtrs) { 8884 // Detect if we have any capture size requiring runtime evaluation of the 8885 // size so that a constant array could be eventually used. 8886 bool hasRuntimeEvaluationCaptureSize = false; 8887 for (llvm::Value *S : Sizes) 8888 if (!isa<llvm::Constant>(S)) { 8889 hasRuntimeEvaluationCaptureSize = true; 8890 break; 8891 } 8892 8893 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8894 QualType PointerArrayType = Ctx.getConstantArrayType( 8895 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 8896 /*IndexTypeQuals=*/0); 8897 8898 Info.BasePointersArray = 8899 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8900 Info.PointersArray = 8901 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8902 8903 // If we don't have any VLA types or other types that require runtime 8904 // evaluation, we can use a constant array for the map sizes, otherwise we 8905 // need to fill up the arrays as we do for the pointers. 8906 QualType Int64Ty = 8907 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8908 if (hasRuntimeEvaluationCaptureSize) { 8909 QualType SizeArrayType = Ctx.getConstantArrayType( 8910 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 8911 /*IndexTypeQuals=*/0); 8912 Info.SizesArray = 8913 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8914 } else { 8915 // We expect all the sizes to be constant, so we collect them to create 8916 // a constant array. 8917 SmallVector<llvm::Constant *, 16> ConstSizes; 8918 for (llvm::Value *S : Sizes) 8919 ConstSizes.push_back(cast<llvm::Constant>(S)); 8920 8921 auto *SizesArrayInit = llvm::ConstantArray::get( 8922 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 8923 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8924 auto *SizesArrayGbl = new llvm::GlobalVariable( 8925 CGM.getModule(), SizesArrayInit->getType(), 8926 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8927 SizesArrayInit, Name); 8928 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8929 Info.SizesArray = SizesArrayGbl; 8930 } 8931 8932 // The map types are always constant so we don't need to generate code to 8933 // fill arrays. Instead, we create an array constant. 8934 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 8935 llvm::copy(MapTypes, Mapping.begin()); 8936 llvm::Constant *MapTypesArrayInit = 8937 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8938 std::string MaptypesName = 8939 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8940 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8941 CGM.getModule(), MapTypesArrayInit->getType(), 8942 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8943 MapTypesArrayInit, MaptypesName); 8944 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8945 Info.MapTypesArray = MapTypesArrayGbl; 8946 8947 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8948 llvm::Value *BPVal = *BasePointers[I]; 8949 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8950 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8951 Info.BasePointersArray, 0, I); 8952 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8953 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8954 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8955 CGF.Builder.CreateStore(BPVal, BPAddr); 8956 8957 if (Info.requiresDevicePointerInfo()) 8958 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 8959 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8960 8961 llvm::Value *PVal = Pointers[I]; 8962 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8963 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8964 Info.PointersArray, 0, I); 8965 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8966 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8967 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8968 CGF.Builder.CreateStore(PVal, PAddr); 8969 8970 if (hasRuntimeEvaluationCaptureSize) { 8971 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8972 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8973 Info.SizesArray, 8974 /*Idx0=*/0, 8975 /*Idx1=*/I); 8976 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 8977 CGF.Builder.CreateStore( 8978 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true), 8979 SAddr); 8980 } 8981 } 8982 } 8983 } 8984 8985 /// Emit the arguments to be passed to the runtime library based on the 8986 /// arrays of pointers, sizes and map types. 8987 static void emitOffloadingArraysArgument( 8988 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8989 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8990 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 8991 CodeGenModule &CGM = CGF.CGM; 8992 if (Info.NumberOfPtrs) { 8993 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8994 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8995 Info.BasePointersArray, 8996 /*Idx0=*/0, /*Idx1=*/0); 8997 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8998 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8999 Info.PointersArray, 9000 /*Idx0=*/0, 9001 /*Idx1=*/0); 9002 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9003 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9004 /*Idx0=*/0, /*Idx1=*/0); 9005 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9006 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9007 Info.MapTypesArray, 9008 /*Idx0=*/0, 9009 /*Idx1=*/0); 9010 } else { 9011 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9012 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9013 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9014 MapTypesArrayArg = 9015 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9016 } 9017 } 9018 9019 /// Check for inner distribute directive. 9020 static const OMPExecutableDirective * 9021 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9022 const auto *CS = D.getInnermostCapturedStmt(); 9023 const auto *Body = 9024 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9025 const Stmt *ChildStmt = 9026 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9027 9028 if (const auto *NestedDir = 9029 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9030 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9031 switch (D.getDirectiveKind()) { 9032 case OMPD_target: 9033 if (isOpenMPDistributeDirective(DKind)) 9034 return NestedDir; 9035 if (DKind == OMPD_teams) { 9036 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9037 /*IgnoreCaptured=*/true); 9038 if (!Body) 9039 return nullptr; 9040 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9041 if (const auto *NND = 9042 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9043 DKind = NND->getDirectiveKind(); 9044 if (isOpenMPDistributeDirective(DKind)) 9045 return NND; 9046 } 9047 } 9048 return nullptr; 9049 case OMPD_target_teams: 9050 if (isOpenMPDistributeDirective(DKind)) 9051 return NestedDir; 9052 return nullptr; 9053 case OMPD_target_parallel: 9054 case OMPD_target_simd: 9055 case OMPD_target_parallel_for: 9056 case OMPD_target_parallel_for_simd: 9057 return nullptr; 9058 case OMPD_target_teams_distribute: 9059 case OMPD_target_teams_distribute_simd: 9060 case OMPD_target_teams_distribute_parallel_for: 9061 case OMPD_target_teams_distribute_parallel_for_simd: 9062 case OMPD_parallel: 9063 case OMPD_for: 9064 case OMPD_parallel_for: 9065 case OMPD_parallel_master: 9066 case OMPD_parallel_sections: 9067 case OMPD_for_simd: 9068 case OMPD_parallel_for_simd: 9069 case OMPD_cancel: 9070 case OMPD_cancellation_point: 9071 case OMPD_ordered: 9072 case OMPD_threadprivate: 9073 case OMPD_allocate: 9074 case OMPD_task: 9075 case OMPD_simd: 9076 case OMPD_sections: 9077 case OMPD_section: 9078 case OMPD_single: 9079 case OMPD_master: 9080 case OMPD_critical: 9081 case OMPD_taskyield: 9082 case OMPD_barrier: 9083 case OMPD_taskwait: 9084 case OMPD_taskgroup: 9085 case OMPD_atomic: 9086 case OMPD_flush: 9087 case OMPD_depobj: 9088 case OMPD_scan: 9089 case OMPD_teams: 9090 case OMPD_target_data: 9091 case OMPD_target_exit_data: 9092 case OMPD_target_enter_data: 9093 case OMPD_distribute: 9094 case OMPD_distribute_simd: 9095 case OMPD_distribute_parallel_for: 9096 case OMPD_distribute_parallel_for_simd: 9097 case OMPD_teams_distribute: 9098 case OMPD_teams_distribute_simd: 9099 case OMPD_teams_distribute_parallel_for: 9100 case OMPD_teams_distribute_parallel_for_simd: 9101 case OMPD_target_update: 9102 case OMPD_declare_simd: 9103 case OMPD_declare_variant: 9104 case OMPD_begin_declare_variant: 9105 case OMPD_end_declare_variant: 9106 case OMPD_declare_target: 9107 case OMPD_end_declare_target: 9108 case OMPD_declare_reduction: 9109 case OMPD_declare_mapper: 9110 case OMPD_taskloop: 9111 case OMPD_taskloop_simd: 9112 case OMPD_master_taskloop: 9113 case OMPD_master_taskloop_simd: 9114 case OMPD_parallel_master_taskloop: 9115 case OMPD_parallel_master_taskloop_simd: 9116 case OMPD_requires: 9117 case OMPD_unknown: 9118 llvm_unreachable("Unexpected directive."); 9119 } 9120 } 9121 9122 return nullptr; 9123 } 9124 9125 /// Emit the user-defined mapper function. The code generation follows the 9126 /// pattern in the example below. 9127 /// \code 9128 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9129 /// void *base, void *begin, 9130 /// int64_t size, int64_t type) { 9131 /// // Allocate space for an array section first. 9132 /// if (size > 1 && !maptype.IsDelete) 9133 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9134 /// size*sizeof(Ty), clearToFrom(type)); 9135 /// // Map members. 9136 /// for (unsigned i = 0; i < size; i++) { 9137 /// // For each component specified by this mapper: 9138 /// for (auto c : all_components) { 9139 /// if (c.hasMapper()) 9140 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9141 /// c.arg_type); 9142 /// else 9143 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9144 /// c.arg_begin, c.arg_size, c.arg_type); 9145 /// } 9146 /// } 9147 /// // Delete the array section. 9148 /// if (size > 1 && maptype.IsDelete) 9149 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9150 /// size*sizeof(Ty), clearToFrom(type)); 9151 /// } 9152 /// \endcode 9153 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9154 CodeGenFunction *CGF) { 9155 if (UDMMap.count(D) > 0) 9156 return; 9157 ASTContext &C = CGM.getContext(); 9158 QualType Ty = D->getType(); 9159 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9160 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9161 auto *MapperVarDecl = 9162 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9163 SourceLocation Loc = D->getLocation(); 9164 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9165 9166 // Prepare mapper function arguments and attributes. 9167 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9168 C.VoidPtrTy, ImplicitParamDecl::Other); 9169 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9170 ImplicitParamDecl::Other); 9171 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9172 C.VoidPtrTy, ImplicitParamDecl::Other); 9173 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9174 ImplicitParamDecl::Other); 9175 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9176 ImplicitParamDecl::Other); 9177 FunctionArgList Args; 9178 Args.push_back(&HandleArg); 9179 Args.push_back(&BaseArg); 9180 Args.push_back(&BeginArg); 9181 Args.push_back(&SizeArg); 9182 Args.push_back(&TypeArg); 9183 const CGFunctionInfo &FnInfo = 9184 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9185 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9186 SmallString<64> TyStr; 9187 llvm::raw_svector_ostream Out(TyStr); 9188 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9189 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9190 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9191 Name, &CGM.getModule()); 9192 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9193 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9194 // Start the mapper function code generation. 9195 CodeGenFunction MapperCGF(CGM); 9196 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9197 // Compute the starting and end addreses of array elements. 9198 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9199 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9200 C.getPointerType(Int64Ty), Loc); 9201 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9202 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 9203 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 9204 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9205 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9206 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9207 C.getPointerType(Int64Ty), Loc); 9208 // Prepare common arguments for array initiation and deletion. 9209 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9210 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9211 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9212 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9213 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9214 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9215 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9216 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9217 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9218 9219 // Emit array initiation if this is an array section and \p MapType indicates 9220 // that memory allocation is required. 9221 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9222 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9223 ElementSize, HeadBB, /*IsInit=*/true); 9224 9225 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9226 9227 // Emit the loop header block. 9228 MapperCGF.EmitBlock(HeadBB); 9229 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9230 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9231 // Evaluate whether the initial condition is satisfied. 9232 llvm::Value *IsEmpty = 9233 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9234 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9235 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9236 9237 // Emit the loop body block. 9238 MapperCGF.EmitBlock(BodyBB); 9239 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9240 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9241 PtrPHI->addIncoming(PtrBegin, EntryBB); 9242 Address PtrCurrent = 9243 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9244 .getAlignment() 9245 .alignmentOfArrayElement(ElementSize)); 9246 // Privatize the declared variable of mapper to be the current array element. 9247 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9248 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 9249 return MapperCGF 9250 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 9251 .getAddress(MapperCGF); 9252 }); 9253 (void)Scope.Privatize(); 9254 9255 // Get map clause information. Fill up the arrays with all mapped variables. 9256 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9257 MappableExprsHandler::MapValuesArrayTy Pointers; 9258 MappableExprsHandler::MapValuesArrayTy Sizes; 9259 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9260 MappableExprsHandler MEHandler(*D, MapperCGF); 9261 MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); 9262 9263 // Call the runtime API __tgt_mapper_num_components to get the number of 9264 // pre-existing components. 9265 llvm::Value *OffloadingArgs[] = {Handle}; 9266 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9267 createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs); 9268 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9269 PreviousSize, 9270 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9271 9272 // Fill up the runtime mapper handle for all components. 9273 for (unsigned I = 0; I < BasePointers.size(); ++I) { 9274 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9275 *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9276 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9277 Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9278 llvm::Value *CurSizeArg = Sizes[I]; 9279 9280 // Extract the MEMBER_OF field from the map type. 9281 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 9282 MapperCGF.EmitBlock(MemberBB); 9283 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); 9284 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 9285 OriMapType, 9286 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 9287 llvm::BasicBlock *MemberCombineBB = 9288 MapperCGF.createBasicBlock("omp.member.combine"); 9289 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 9290 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 9291 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 9292 // Add the number of pre-existing components to the MEMBER_OF field if it 9293 // is valid. 9294 MapperCGF.EmitBlock(MemberCombineBB); 9295 llvm::Value *CombinedMember = 9296 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9297 // Do nothing if it is not a member of previous components. 9298 MapperCGF.EmitBlock(TypeBB); 9299 llvm::PHINode *MemberMapType = 9300 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9301 MemberMapType->addIncoming(OriMapType, MemberBB); 9302 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9303 9304 // Combine the map type inherited from user-defined mapper with that 9305 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9306 // bits of the \a MapType, which is the input argument of the mapper 9307 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9308 // bits of MemberMapType. 9309 // [OpenMP 5.0], 1.2.6. map-type decay. 9310 // | alloc | to | from | tofrom | release | delete 9311 // ---------------------------------------------------------- 9312 // alloc | alloc | alloc | alloc | alloc | release | delete 9313 // to | alloc | to | alloc | to | release | delete 9314 // from | alloc | alloc | from | from | release | delete 9315 // tofrom | alloc | to | from | tofrom | release | delete 9316 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9317 MapType, 9318 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9319 MappableExprsHandler::OMP_MAP_FROM)); 9320 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9321 llvm::BasicBlock *AllocElseBB = 9322 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9323 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9324 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9325 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9326 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9327 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9328 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9329 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9330 MapperCGF.EmitBlock(AllocBB); 9331 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9332 MemberMapType, 9333 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9334 MappableExprsHandler::OMP_MAP_FROM))); 9335 MapperCGF.Builder.CreateBr(EndBB); 9336 MapperCGF.EmitBlock(AllocElseBB); 9337 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9338 LeftToFrom, 9339 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9340 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9341 // In case of to, clear OMP_MAP_FROM. 9342 MapperCGF.EmitBlock(ToBB); 9343 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9344 MemberMapType, 9345 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9346 MapperCGF.Builder.CreateBr(EndBB); 9347 MapperCGF.EmitBlock(ToElseBB); 9348 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9349 LeftToFrom, 9350 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9351 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9352 // In case of from, clear OMP_MAP_TO. 9353 MapperCGF.EmitBlock(FromBB); 9354 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9355 MemberMapType, 9356 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9357 // In case of tofrom, do nothing. 9358 MapperCGF.EmitBlock(EndBB); 9359 llvm::PHINode *CurMapType = 9360 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9361 CurMapType->addIncoming(AllocMapType, AllocBB); 9362 CurMapType->addIncoming(ToMapType, ToBB); 9363 CurMapType->addIncoming(FromMapType, FromBB); 9364 CurMapType->addIncoming(MemberMapType, ToElseBB); 9365 9366 // TODO: call the corresponding mapper function if a user-defined mapper is 9367 // associated with this map clause. 9368 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9369 // data structure. 9370 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9371 CurSizeArg, CurMapType}; 9372 MapperCGF.EmitRuntimeCall( 9373 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), 9374 OffloadingArgs); 9375 } 9376 9377 // Update the pointer to point to the next element that needs to be mapped, 9378 // and check whether we have mapped all elements. 9379 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9380 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9381 PtrPHI->addIncoming(PtrNext, BodyBB); 9382 llvm::Value *IsDone = 9383 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9384 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9385 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9386 9387 MapperCGF.EmitBlock(ExitBB); 9388 // Emit array deletion if this is an array section and \p MapType indicates 9389 // that deletion is required. 9390 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9391 ElementSize, DoneBB, /*IsInit=*/false); 9392 9393 // Emit the function exit block. 9394 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9395 MapperCGF.FinishFunction(); 9396 UDMMap.try_emplace(D, Fn); 9397 if (CGF) { 9398 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9399 Decls.second.push_back(D); 9400 } 9401 } 9402 9403 /// Emit the array initialization or deletion portion for user-defined mapper 9404 /// code generation. First, it evaluates whether an array section is mapped and 9405 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9406 /// true, and \a MapType indicates to not delete this array, array 9407 /// initialization code is generated. If \a IsInit is false, and \a MapType 9408 /// indicates to not this array, array deletion code is generated. 9409 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9410 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9411 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9412 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9413 StringRef Prefix = IsInit ? ".init" : ".del"; 9414 9415 // Evaluate if this is an array section. 9416 llvm::BasicBlock *IsDeleteBB = 9417 MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"})); 9418 llvm::BasicBlock *BodyBB = 9419 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9420 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9421 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9422 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9423 9424 // Evaluate if we are going to delete this section. 9425 MapperCGF.EmitBlock(IsDeleteBB); 9426 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9427 MapType, 9428 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9429 llvm::Value *DeleteCond; 9430 if (IsInit) { 9431 DeleteCond = MapperCGF.Builder.CreateIsNull( 9432 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9433 } else { 9434 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9435 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9436 } 9437 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9438 9439 MapperCGF.EmitBlock(BodyBB); 9440 // Get the array size by multiplying element size and element number (i.e., \p 9441 // Size). 9442 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9443 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9444 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9445 // memory allocation/deletion purpose only. 9446 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9447 MapType, 9448 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9449 MappableExprsHandler::OMP_MAP_FROM))); 9450 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9451 // data structure. 9452 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9453 MapperCGF.EmitRuntimeCall( 9454 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs); 9455 } 9456 9457 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9458 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9459 llvm::Value *DeviceID, 9460 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9461 const OMPLoopDirective &D)> 9462 SizeEmitter) { 9463 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9464 const OMPExecutableDirective *TD = &D; 9465 // Get nested teams distribute kind directive, if any. 9466 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9467 TD = getNestedDistributeDirective(CGM.getContext(), D); 9468 if (!TD) 9469 return; 9470 const auto *LD = cast<OMPLoopDirective>(TD); 9471 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, 9472 PrePostActionTy &) { 9473 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9474 llvm::Value *Args[] = {DeviceID, NumIterations}; 9475 CGF.EmitRuntimeCall( 9476 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); 9477 } 9478 }; 9479 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9480 } 9481 9482 void CGOpenMPRuntime::emitTargetCall( 9483 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9484 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9485 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9486 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9487 const OMPLoopDirective &D)> 9488 SizeEmitter) { 9489 if (!CGF.HaveInsertPoint()) 9490 return; 9491 9492 assert(OutlinedFn && "Invalid outlined function!"); 9493 9494 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 9495 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9496 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9497 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9498 PrePostActionTy &) { 9499 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9500 }; 9501 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9502 9503 CodeGenFunction::OMPTargetDataInfo InputInfo; 9504 llvm::Value *MapTypesArray = nullptr; 9505 // Fill up the pointer arrays and transfer execution to the device. 9506 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9507 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, 9508 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9509 if (Device.getInt() == OMPC_DEVICE_ancestor) { 9510 // Reverse offloading is not supported, so just execute on the host. 9511 if (RequiresOuterTask) { 9512 CapturedVars.clear(); 9513 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9514 } 9515 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9516 return; 9517 } 9518 9519 // On top of the arrays that were filled up, the target offloading call 9520 // takes as arguments the device id as well as the host pointer. The host 9521 // pointer is used by the runtime library to identify the current target 9522 // region, so it only has to be unique and not necessarily point to 9523 // anything. It could be the pointer to the outlined function that 9524 // implements the target region, but we aren't using that so that the 9525 // compiler doesn't need to keep that, and could therefore inline the host 9526 // function if proven worthwhile during optimization. 9527 9528 // From this point on, we need to have an ID of the target region defined. 9529 assert(OutlinedFnID && "Invalid outlined function ID!"); 9530 9531 // Emit device ID if any. 9532 llvm::Value *DeviceID; 9533 if (Device.getPointer()) { 9534 assert((Device.getInt() == OMPC_DEVICE_unknown || 9535 Device.getInt() == OMPC_DEVICE_device_num) && 9536 "Expected device_num modifier."); 9537 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 9538 DeviceID = 9539 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 9540 } else { 9541 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9542 } 9543 9544 // Emit the number of elements in the offloading arrays. 9545 llvm::Value *PointerNum = 9546 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9547 9548 // Return value of the runtime offloading call. 9549 llvm::Value *Return; 9550 9551 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9552 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9553 9554 // Emit tripcount for the target loop-based directive. 9555 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 9556 9557 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9558 // The target region is an outlined function launched by the runtime 9559 // via calls __tgt_target() or __tgt_target_teams(). 9560 // 9561 // __tgt_target() launches a target region with one team and one thread, 9562 // executing a serial region. This master thread may in turn launch 9563 // more threads within its team upon encountering a parallel region, 9564 // however, no additional teams can be launched on the device. 9565 // 9566 // __tgt_target_teams() launches a target region with one or more teams, 9567 // each with one or more threads. This call is required for target 9568 // constructs such as: 9569 // 'target teams' 9570 // 'target' / 'teams' 9571 // 'target teams distribute parallel for' 9572 // 'target parallel' 9573 // and so on. 9574 // 9575 // Note that on the host and CPU targets, the runtime implementation of 9576 // these calls simply call the outlined function without forking threads. 9577 // The outlined functions themselves have runtime calls to 9578 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9579 // the compiler in emitTeamsCall() and emitParallelCall(). 9580 // 9581 // In contrast, on the NVPTX target, the implementation of 9582 // __tgt_target_teams() launches a GPU kernel with the requested number 9583 // of teams and threads so no additional calls to the runtime are required. 9584 if (NumTeams) { 9585 // If we have NumTeams defined this means that we have an enclosed teams 9586 // region. Therefore we also expect to have NumThreads defined. These two 9587 // values should be defined in the presence of a teams directive, 9588 // regardless of having any clauses associated. If the user is using teams 9589 // but no clauses, these two values will be the default that should be 9590 // passed to the runtime library - a 32-bit integer with the value zero. 9591 assert(NumThreads && "Thread limit expression should be available along " 9592 "with number of teams."); 9593 llvm::Value *OffloadingArgs[] = {DeviceID, 9594 OutlinedFnID, 9595 PointerNum, 9596 InputInfo.BasePointersArray.getPointer(), 9597 InputInfo.PointersArray.getPointer(), 9598 InputInfo.SizesArray.getPointer(), 9599 MapTypesArray, 9600 NumTeams, 9601 NumThreads}; 9602 Return = CGF.EmitRuntimeCall( 9603 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 9604 : OMPRTL__tgt_target_teams), 9605 OffloadingArgs); 9606 } else { 9607 llvm::Value *OffloadingArgs[] = {DeviceID, 9608 OutlinedFnID, 9609 PointerNum, 9610 InputInfo.BasePointersArray.getPointer(), 9611 InputInfo.PointersArray.getPointer(), 9612 InputInfo.SizesArray.getPointer(), 9613 MapTypesArray}; 9614 Return = CGF.EmitRuntimeCall( 9615 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 9616 : OMPRTL__tgt_target), 9617 OffloadingArgs); 9618 } 9619 9620 // Check the error code and execute the host version if required. 9621 llvm::BasicBlock *OffloadFailedBlock = 9622 CGF.createBasicBlock("omp_offload.failed"); 9623 llvm::BasicBlock *OffloadContBlock = 9624 CGF.createBasicBlock("omp_offload.cont"); 9625 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9626 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9627 9628 CGF.EmitBlock(OffloadFailedBlock); 9629 if (RequiresOuterTask) { 9630 CapturedVars.clear(); 9631 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9632 } 9633 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9634 CGF.EmitBranch(OffloadContBlock); 9635 9636 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9637 }; 9638 9639 // Notify that the host version must be executed. 9640 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 9641 RequiresOuterTask](CodeGenFunction &CGF, 9642 PrePostActionTy &) { 9643 if (RequiresOuterTask) { 9644 CapturedVars.clear(); 9645 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9646 } 9647 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9648 }; 9649 9650 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 9651 &CapturedVars, RequiresOuterTask, 9652 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 9653 // Fill up the arrays with all the captured variables. 9654 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9655 MappableExprsHandler::MapValuesArrayTy Pointers; 9656 MappableExprsHandler::MapValuesArrayTy Sizes; 9657 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9658 9659 // Get mappable expression information. 9660 MappableExprsHandler MEHandler(D, CGF); 9661 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9662 9663 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9664 auto CV = CapturedVars.begin(); 9665 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9666 CE = CS.capture_end(); 9667 CI != CE; ++CI, ++RI, ++CV) { 9668 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 9669 MappableExprsHandler::MapValuesArrayTy CurPointers; 9670 MappableExprsHandler::MapValuesArrayTy CurSizes; 9671 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 9672 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9673 9674 // VLA sizes are passed to the outlined region by copy and do not have map 9675 // information associated. 9676 if (CI->capturesVariableArrayType()) { 9677 CurBasePointers.push_back(*CV); 9678 CurPointers.push_back(*CV); 9679 CurSizes.push_back(CGF.Builder.CreateIntCast( 9680 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9681 // Copy to the device as an argument. No need to retrieve it. 9682 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 9683 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 9684 MappableExprsHandler::OMP_MAP_IMPLICIT); 9685 } else { 9686 // If we have any information in the map clause, we use it, otherwise we 9687 // just do a default mapping. 9688 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 9689 CurSizes, CurMapTypes, PartialStruct); 9690 if (CurBasePointers.empty()) 9691 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 9692 CurPointers, CurSizes, CurMapTypes); 9693 // Generate correct mapping for variables captured by reference in 9694 // lambdas. 9695 if (CI->capturesVariable()) 9696 MEHandler.generateInfoForLambdaCaptures( 9697 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 9698 CurMapTypes, LambdaPointers); 9699 } 9700 // We expect to have at least an element of information for this capture. 9701 assert(!CurBasePointers.empty() && 9702 "Non-existing map pointer for capture!"); 9703 assert(CurBasePointers.size() == CurPointers.size() && 9704 CurBasePointers.size() == CurSizes.size() && 9705 CurBasePointers.size() == CurMapTypes.size() && 9706 "Inconsistent map information sizes!"); 9707 9708 // If there is an entry in PartialStruct it means we have a struct with 9709 // individual members mapped. Emit an extra combined entry. 9710 if (PartialStruct.Base.isValid()) 9711 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 9712 CurMapTypes, PartialStruct); 9713 9714 // We need to append the results of this capture to what we already have. 9715 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 9716 Pointers.append(CurPointers.begin(), CurPointers.end()); 9717 Sizes.append(CurSizes.begin(), CurSizes.end()); 9718 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 9719 } 9720 // Adjust MEMBER_OF flags for the lambdas captures. 9721 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 9722 Pointers, MapTypes); 9723 // Map other list items in the map clause which are not captured variables 9724 // but "declare target link" global variables. 9725 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 9726 MapTypes); 9727 9728 TargetDataInfo Info; 9729 // Fill up the arrays and create the arguments. 9730 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9731 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9732 Info.PointersArray, Info.SizesArray, 9733 Info.MapTypesArray, Info); 9734 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9735 InputInfo.BasePointersArray = 9736 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9737 InputInfo.PointersArray = 9738 Address(Info.PointersArray, CGM.getPointerAlign()); 9739 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 9740 MapTypesArray = Info.MapTypesArray; 9741 if (RequiresOuterTask) 9742 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9743 else 9744 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9745 }; 9746 9747 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 9748 CodeGenFunction &CGF, PrePostActionTy &) { 9749 if (RequiresOuterTask) { 9750 CodeGenFunction::OMPTargetDataInfo InputInfo; 9751 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9752 } else { 9753 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9754 } 9755 }; 9756 9757 // If we have a target function ID it means that we need to support 9758 // offloading, otherwise, just execute on the host. We need to execute on host 9759 // regardless of the conditional in the if clause if, e.g., the user do not 9760 // specify target triples. 9761 if (OutlinedFnID) { 9762 if (IfCond) { 9763 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9764 } else { 9765 RegionCodeGenTy ThenRCG(TargetThenGen); 9766 ThenRCG(CGF); 9767 } 9768 } else { 9769 RegionCodeGenTy ElseRCG(TargetElseGen); 9770 ElseRCG(CGF); 9771 } 9772 } 9773 9774 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9775 StringRef ParentName) { 9776 if (!S) 9777 return; 9778 9779 // Codegen OMP target directives that offload compute to the device. 9780 bool RequiresDeviceCodegen = 9781 isa<OMPExecutableDirective>(S) && 9782 isOpenMPTargetExecutionDirective( 9783 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9784 9785 if (RequiresDeviceCodegen) { 9786 const auto &E = *cast<OMPExecutableDirective>(S); 9787 unsigned DeviceID; 9788 unsigned FileID; 9789 unsigned Line; 9790 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 9791 FileID, Line); 9792 9793 // Is this a target region that should not be emitted as an entry point? If 9794 // so just signal we are done with this target region. 9795 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 9796 ParentName, Line)) 9797 return; 9798 9799 switch (E.getDirectiveKind()) { 9800 case OMPD_target: 9801 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9802 cast<OMPTargetDirective>(E)); 9803 break; 9804 case OMPD_target_parallel: 9805 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9806 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9807 break; 9808 case OMPD_target_teams: 9809 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9810 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9811 break; 9812 case OMPD_target_teams_distribute: 9813 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9814 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9815 break; 9816 case OMPD_target_teams_distribute_simd: 9817 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9818 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9819 break; 9820 case OMPD_target_parallel_for: 9821 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9822 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9823 break; 9824 case OMPD_target_parallel_for_simd: 9825 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9826 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9827 break; 9828 case OMPD_target_simd: 9829 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9830 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9831 break; 9832 case OMPD_target_teams_distribute_parallel_for: 9833 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9834 CGM, ParentName, 9835 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9836 break; 9837 case OMPD_target_teams_distribute_parallel_for_simd: 9838 CodeGenFunction:: 9839 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9840 CGM, ParentName, 9841 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9842 break; 9843 case OMPD_parallel: 9844 case OMPD_for: 9845 case OMPD_parallel_for: 9846 case OMPD_parallel_master: 9847 case OMPD_parallel_sections: 9848 case OMPD_for_simd: 9849 case OMPD_parallel_for_simd: 9850 case OMPD_cancel: 9851 case OMPD_cancellation_point: 9852 case OMPD_ordered: 9853 case OMPD_threadprivate: 9854 case OMPD_allocate: 9855 case OMPD_task: 9856 case OMPD_simd: 9857 case OMPD_sections: 9858 case OMPD_section: 9859 case OMPD_single: 9860 case OMPD_master: 9861 case OMPD_critical: 9862 case OMPD_taskyield: 9863 case OMPD_barrier: 9864 case OMPD_taskwait: 9865 case OMPD_taskgroup: 9866 case OMPD_atomic: 9867 case OMPD_flush: 9868 case OMPD_depobj: 9869 case OMPD_scan: 9870 case OMPD_teams: 9871 case OMPD_target_data: 9872 case OMPD_target_exit_data: 9873 case OMPD_target_enter_data: 9874 case OMPD_distribute: 9875 case OMPD_distribute_simd: 9876 case OMPD_distribute_parallel_for: 9877 case OMPD_distribute_parallel_for_simd: 9878 case OMPD_teams_distribute: 9879 case OMPD_teams_distribute_simd: 9880 case OMPD_teams_distribute_parallel_for: 9881 case OMPD_teams_distribute_parallel_for_simd: 9882 case OMPD_target_update: 9883 case OMPD_declare_simd: 9884 case OMPD_declare_variant: 9885 case OMPD_begin_declare_variant: 9886 case OMPD_end_declare_variant: 9887 case OMPD_declare_target: 9888 case OMPD_end_declare_target: 9889 case OMPD_declare_reduction: 9890 case OMPD_declare_mapper: 9891 case OMPD_taskloop: 9892 case OMPD_taskloop_simd: 9893 case OMPD_master_taskloop: 9894 case OMPD_master_taskloop_simd: 9895 case OMPD_parallel_master_taskloop: 9896 case OMPD_parallel_master_taskloop_simd: 9897 case OMPD_requires: 9898 case OMPD_unknown: 9899 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9900 } 9901 return; 9902 } 9903 9904 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9905 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9906 return; 9907 9908 scanForTargetRegionsFunctions( 9909 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9910 return; 9911 } 9912 9913 // If this is a lambda function, look into its body. 9914 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9915 S = L->getBody(); 9916 9917 // Keep looking for target regions recursively. 9918 for (const Stmt *II : S->children()) 9919 scanForTargetRegionsFunctions(II, ParentName); 9920 } 9921 9922 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9923 // If emitting code for the host, we do not process FD here. Instead we do 9924 // the normal code generation. 9925 if (!CGM.getLangOpts().OpenMPIsDevice) { 9926 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 9927 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9928 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9929 // Do not emit device_type(nohost) functions for the host. 9930 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 9931 return true; 9932 } 9933 return false; 9934 } 9935 9936 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9937 // Try to detect target regions in the function. 9938 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 9939 StringRef Name = CGM.getMangledName(GD); 9940 scanForTargetRegionsFunctions(FD->getBody(), Name); 9941 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9942 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9943 // Do not emit device_type(nohost) functions for the host. 9944 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 9945 return true; 9946 } 9947 9948 // Do not to emit function if it is not marked as declare target. 9949 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9950 AlreadyEmittedTargetDecls.count(VD) == 0; 9951 } 9952 9953 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9954 if (!CGM.getLangOpts().OpenMPIsDevice) 9955 return false; 9956 9957 // Check if there are Ctors/Dtors in this declaration and look for target 9958 // regions in it. We use the complete variant to produce the kernel name 9959 // mangling. 9960 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9961 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9962 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9963 StringRef ParentName = 9964 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9965 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9966 } 9967 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9968 StringRef ParentName = 9969 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9970 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9971 } 9972 } 9973 9974 // Do not to emit variable if it is not marked as declare target. 9975 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9976 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9977 cast<VarDecl>(GD.getDecl())); 9978 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9979 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9980 HasRequiresUnifiedSharedMemory)) { 9981 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9982 return true; 9983 } 9984 return false; 9985 } 9986 9987 llvm::Constant * 9988 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9989 const VarDecl *VD) { 9990 assert(VD->getType().isConstant(CGM.getContext()) && 9991 "Expected constant variable."); 9992 StringRef VarName; 9993 llvm::Constant *Addr; 9994 llvm::GlobalValue::LinkageTypes Linkage; 9995 QualType Ty = VD->getType(); 9996 SmallString<128> Buffer; 9997 { 9998 unsigned DeviceID; 9999 unsigned FileID; 10000 unsigned Line; 10001 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 10002 FileID, Line); 10003 llvm::raw_svector_ostream OS(Buffer); 10004 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 10005 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 10006 VarName = OS.str(); 10007 } 10008 Linkage = llvm::GlobalValue::InternalLinkage; 10009 Addr = 10010 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 10011 getDefaultFirstprivateAddressSpace()); 10012 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 10013 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 10014 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 10015 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10016 VarName, Addr, VarSize, 10017 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 10018 return Addr; 10019 } 10020 10021 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10022 llvm::Constant *Addr) { 10023 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10024 !CGM.getLangOpts().OpenMPIsDevice) 10025 return; 10026 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10027 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10028 if (!Res) { 10029 if (CGM.getLangOpts().OpenMPIsDevice) { 10030 // Register non-target variables being emitted in device code (debug info 10031 // may cause this). 10032 StringRef VarName = CGM.getMangledName(VD); 10033 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10034 } 10035 return; 10036 } 10037 // Register declare target variables. 10038 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10039 StringRef VarName; 10040 CharUnits VarSize; 10041 llvm::GlobalValue::LinkageTypes Linkage; 10042 10043 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10044 !HasRequiresUnifiedSharedMemory) { 10045 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10046 VarName = CGM.getMangledName(VD); 10047 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10048 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10049 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10050 } else { 10051 VarSize = CharUnits::Zero(); 10052 } 10053 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10054 // Temp solution to prevent optimizations of the internal variables. 10055 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10056 std::string RefName = getName({VarName, "ref"}); 10057 if (!CGM.GetGlobalValue(RefName)) { 10058 llvm::Constant *AddrRef = 10059 getOrCreateInternalVariable(Addr->getType(), RefName); 10060 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10061 GVAddrRef->setConstant(/*Val=*/true); 10062 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10063 GVAddrRef->setInitializer(Addr); 10064 CGM.addCompilerUsedGlobal(GVAddrRef); 10065 } 10066 } 10067 } else { 10068 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10069 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10070 HasRequiresUnifiedSharedMemory)) && 10071 "Declare target attribute must link or to with unified memory."); 10072 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10073 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10074 else 10075 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10076 10077 if (CGM.getLangOpts().OpenMPIsDevice) { 10078 VarName = Addr->getName(); 10079 Addr = nullptr; 10080 } else { 10081 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10082 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10083 } 10084 VarSize = CGM.getPointerSize(); 10085 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10086 } 10087 10088 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10089 VarName, Addr, VarSize, Flags, Linkage); 10090 } 10091 10092 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10093 if (isa<FunctionDecl>(GD.getDecl()) || 10094 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10095 return emitTargetFunctions(GD); 10096 10097 return emitTargetGlobalVariable(GD); 10098 } 10099 10100 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10101 for (const VarDecl *VD : DeferredGlobalVariables) { 10102 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10103 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10104 if (!Res) 10105 continue; 10106 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10107 !HasRequiresUnifiedSharedMemory) { 10108 CGM.EmitGlobal(VD); 10109 } else { 10110 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10111 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10112 HasRequiresUnifiedSharedMemory)) && 10113 "Expected link clause or to clause with unified memory."); 10114 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10115 } 10116 } 10117 } 10118 10119 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10120 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10121 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10122 " Expected target-based directive."); 10123 } 10124 10125 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10126 for (const OMPClause *Clause : D->clauselists()) { 10127 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10128 HasRequiresUnifiedSharedMemory = true; 10129 } else if (const auto *AC = 10130 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10131 switch (AC->getAtomicDefaultMemOrderKind()) { 10132 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10133 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10134 break; 10135 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10136 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10137 break; 10138 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10139 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10140 break; 10141 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10142 break; 10143 } 10144 } 10145 } 10146 } 10147 10148 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10149 return RequiresAtomicOrdering; 10150 } 10151 10152 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10153 LangAS &AS) { 10154 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10155 return false; 10156 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10157 switch(A->getAllocatorType()) { 10158 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10159 // Not supported, fallback to the default mem space. 10160 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10161 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10162 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10163 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10164 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10165 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10166 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10167 AS = LangAS::Default; 10168 return true; 10169 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10170 llvm_unreachable("Expected predefined allocator for the variables with the " 10171 "static storage."); 10172 } 10173 return false; 10174 } 10175 10176 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10177 return HasRequiresUnifiedSharedMemory; 10178 } 10179 10180 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10181 CodeGenModule &CGM) 10182 : CGM(CGM) { 10183 if (CGM.getLangOpts().OpenMPIsDevice) { 10184 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10185 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10186 } 10187 } 10188 10189 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10190 if (CGM.getLangOpts().OpenMPIsDevice) 10191 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10192 } 10193 10194 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10195 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10196 return true; 10197 10198 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10199 // Do not to emit function if it is marked as declare target as it was already 10200 // emitted. 10201 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10202 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10203 if (auto *F = dyn_cast_or_null<llvm::Function>( 10204 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10205 return !F->isDeclaration(); 10206 return false; 10207 } 10208 return true; 10209 } 10210 10211 return !AlreadyEmittedTargetDecls.insert(D).second; 10212 } 10213 10214 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10215 // If we don't have entries or if we are emitting code for the device, we 10216 // don't need to do anything. 10217 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10218 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10219 (OffloadEntriesInfoManager.empty() && 10220 !HasEmittedDeclareTargetRegion && 10221 !HasEmittedTargetRegion)) 10222 return nullptr; 10223 10224 // Create and register the function that handles the requires directives. 10225 ASTContext &C = CGM.getContext(); 10226 10227 llvm::Function *RequiresRegFn; 10228 { 10229 CodeGenFunction CGF(CGM); 10230 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10231 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10232 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10233 RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); 10234 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10235 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10236 // TODO: check for other requires clauses. 10237 // The requires directive takes effect only when a target region is 10238 // present in the compilation unit. Otherwise it is ignored and not 10239 // passed to the runtime. This avoids the runtime from throwing an error 10240 // for mismatching requires clauses across compilation units that don't 10241 // contain at least 1 target region. 10242 assert((HasEmittedTargetRegion || 10243 HasEmittedDeclareTargetRegion || 10244 !OffloadEntriesInfoManager.empty()) && 10245 "Target or declare target region expected."); 10246 if (HasRequiresUnifiedSharedMemory) 10247 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10248 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires), 10249 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10250 CGF.FinishFunction(); 10251 } 10252 return RequiresRegFn; 10253 } 10254 10255 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10256 const OMPExecutableDirective &D, 10257 SourceLocation Loc, 10258 llvm::Function *OutlinedFn, 10259 ArrayRef<llvm::Value *> CapturedVars) { 10260 if (!CGF.HaveInsertPoint()) 10261 return; 10262 10263 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10264 CodeGenFunction::RunCleanupsScope Scope(CGF); 10265 10266 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10267 llvm::Value *Args[] = { 10268 RTLoc, 10269 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10270 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10271 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10272 RealArgs.append(std::begin(Args), std::end(Args)); 10273 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10274 10275 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 10276 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10277 } 10278 10279 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10280 const Expr *NumTeams, 10281 const Expr *ThreadLimit, 10282 SourceLocation Loc) { 10283 if (!CGF.HaveInsertPoint()) 10284 return; 10285 10286 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10287 10288 llvm::Value *NumTeamsVal = 10289 NumTeams 10290 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10291 CGF.CGM.Int32Ty, /* isSigned = */ true) 10292 : CGF.Builder.getInt32(0); 10293 10294 llvm::Value *ThreadLimitVal = 10295 ThreadLimit 10296 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10297 CGF.CGM.Int32Ty, /* isSigned = */ true) 10298 : CGF.Builder.getInt32(0); 10299 10300 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10301 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10302 ThreadLimitVal}; 10303 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 10304 PushNumTeamsArgs); 10305 } 10306 10307 void CGOpenMPRuntime::emitTargetDataCalls( 10308 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10309 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10310 if (!CGF.HaveInsertPoint()) 10311 return; 10312 10313 // Action used to replace the default codegen action and turn privatization 10314 // off. 10315 PrePostActionTy NoPrivAction; 10316 10317 // Generate the code for the opening of the data environment. Capture all the 10318 // arguments of the runtime call by reference because they are used in the 10319 // closing of the region. 10320 auto &&BeginThenGen = [this, &D, Device, &Info, 10321 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10322 // Fill up the arrays with all the mapped variables. 10323 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10324 MappableExprsHandler::MapValuesArrayTy Pointers; 10325 MappableExprsHandler::MapValuesArrayTy Sizes; 10326 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10327 10328 // Get map clause information. 10329 MappableExprsHandler MCHandler(D, CGF); 10330 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10331 10332 // Fill up the arrays and create the arguments. 10333 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10334 10335 llvm::Value *BasePointersArrayArg = nullptr; 10336 llvm::Value *PointersArrayArg = nullptr; 10337 llvm::Value *SizesArrayArg = nullptr; 10338 llvm::Value *MapTypesArrayArg = nullptr; 10339 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10340 SizesArrayArg, MapTypesArrayArg, Info); 10341 10342 // Emit device ID if any. 10343 llvm::Value *DeviceID = nullptr; 10344 if (Device) { 10345 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10346 CGF.Int64Ty, /*isSigned=*/true); 10347 } else { 10348 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10349 } 10350 10351 // Emit the number of elements in the offloading arrays. 10352 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10353 10354 llvm::Value *OffloadingArgs[] = { 10355 DeviceID, PointerNum, BasePointersArrayArg, 10356 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10357 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 10358 OffloadingArgs); 10359 10360 // If device pointer privatization is required, emit the body of the region 10361 // here. It will have to be duplicated: with and without privatization. 10362 if (!Info.CaptureDeviceAddrMap.empty()) 10363 CodeGen(CGF); 10364 }; 10365 10366 // Generate code for the closing of the data region. 10367 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10368 PrePostActionTy &) { 10369 assert(Info.isValid() && "Invalid data environment closing arguments."); 10370 10371 llvm::Value *BasePointersArrayArg = nullptr; 10372 llvm::Value *PointersArrayArg = nullptr; 10373 llvm::Value *SizesArrayArg = nullptr; 10374 llvm::Value *MapTypesArrayArg = nullptr; 10375 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10376 SizesArrayArg, MapTypesArrayArg, Info); 10377 10378 // Emit device ID if any. 10379 llvm::Value *DeviceID = nullptr; 10380 if (Device) { 10381 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10382 CGF.Int64Ty, /*isSigned=*/true); 10383 } else { 10384 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10385 } 10386 10387 // Emit the number of elements in the offloading arrays. 10388 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10389 10390 llvm::Value *OffloadingArgs[] = { 10391 DeviceID, PointerNum, BasePointersArrayArg, 10392 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10393 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 10394 OffloadingArgs); 10395 }; 10396 10397 // If we need device pointer privatization, we need to emit the body of the 10398 // region with no privatization in the 'else' branch of the conditional. 10399 // Otherwise, we don't have to do anything. 10400 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10401 PrePostActionTy &) { 10402 if (!Info.CaptureDeviceAddrMap.empty()) { 10403 CodeGen.setAction(NoPrivAction); 10404 CodeGen(CGF); 10405 } 10406 }; 10407 10408 // We don't have to do anything to close the region if the if clause evaluates 10409 // to false. 10410 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10411 10412 if (IfCond) { 10413 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10414 } else { 10415 RegionCodeGenTy RCG(BeginThenGen); 10416 RCG(CGF); 10417 } 10418 10419 // If we don't require privatization of device pointers, we emit the body in 10420 // between the runtime calls. This avoids duplicating the body code. 10421 if (Info.CaptureDeviceAddrMap.empty()) { 10422 CodeGen.setAction(NoPrivAction); 10423 CodeGen(CGF); 10424 } 10425 10426 if (IfCond) { 10427 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10428 } else { 10429 RegionCodeGenTy RCG(EndThenGen); 10430 RCG(CGF); 10431 } 10432 } 10433 10434 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10435 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10436 const Expr *Device) { 10437 if (!CGF.HaveInsertPoint()) 10438 return; 10439 10440 assert((isa<OMPTargetEnterDataDirective>(D) || 10441 isa<OMPTargetExitDataDirective>(D) || 10442 isa<OMPTargetUpdateDirective>(D)) && 10443 "Expecting either target enter, exit data, or update directives."); 10444 10445 CodeGenFunction::OMPTargetDataInfo InputInfo; 10446 llvm::Value *MapTypesArray = nullptr; 10447 // Generate the code for the opening of the data environment. 10448 auto &&ThenGen = [this, &D, Device, &InputInfo, 10449 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10450 // Emit device ID if any. 10451 llvm::Value *DeviceID = nullptr; 10452 if (Device) { 10453 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10454 CGF.Int64Ty, /*isSigned=*/true); 10455 } else { 10456 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10457 } 10458 10459 // Emit the number of elements in the offloading arrays. 10460 llvm::Constant *PointerNum = 10461 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10462 10463 llvm::Value *OffloadingArgs[] = {DeviceID, 10464 PointerNum, 10465 InputInfo.BasePointersArray.getPointer(), 10466 InputInfo.PointersArray.getPointer(), 10467 InputInfo.SizesArray.getPointer(), 10468 MapTypesArray}; 10469 10470 // Select the right runtime function call for each expected standalone 10471 // directive. 10472 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10473 OpenMPRTLFunction RTLFn; 10474 switch (D.getDirectiveKind()) { 10475 case OMPD_target_enter_data: 10476 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 10477 : OMPRTL__tgt_target_data_begin; 10478 break; 10479 case OMPD_target_exit_data: 10480 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 10481 : OMPRTL__tgt_target_data_end; 10482 break; 10483 case OMPD_target_update: 10484 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 10485 : OMPRTL__tgt_target_data_update; 10486 break; 10487 case OMPD_parallel: 10488 case OMPD_for: 10489 case OMPD_parallel_for: 10490 case OMPD_parallel_master: 10491 case OMPD_parallel_sections: 10492 case OMPD_for_simd: 10493 case OMPD_parallel_for_simd: 10494 case OMPD_cancel: 10495 case OMPD_cancellation_point: 10496 case OMPD_ordered: 10497 case OMPD_threadprivate: 10498 case OMPD_allocate: 10499 case OMPD_task: 10500 case OMPD_simd: 10501 case OMPD_sections: 10502 case OMPD_section: 10503 case OMPD_single: 10504 case OMPD_master: 10505 case OMPD_critical: 10506 case OMPD_taskyield: 10507 case OMPD_barrier: 10508 case OMPD_taskwait: 10509 case OMPD_taskgroup: 10510 case OMPD_atomic: 10511 case OMPD_flush: 10512 case OMPD_depobj: 10513 case OMPD_scan: 10514 case OMPD_teams: 10515 case OMPD_target_data: 10516 case OMPD_distribute: 10517 case OMPD_distribute_simd: 10518 case OMPD_distribute_parallel_for: 10519 case OMPD_distribute_parallel_for_simd: 10520 case OMPD_teams_distribute: 10521 case OMPD_teams_distribute_simd: 10522 case OMPD_teams_distribute_parallel_for: 10523 case OMPD_teams_distribute_parallel_for_simd: 10524 case OMPD_declare_simd: 10525 case OMPD_declare_variant: 10526 case OMPD_begin_declare_variant: 10527 case OMPD_end_declare_variant: 10528 case OMPD_declare_target: 10529 case OMPD_end_declare_target: 10530 case OMPD_declare_reduction: 10531 case OMPD_declare_mapper: 10532 case OMPD_taskloop: 10533 case OMPD_taskloop_simd: 10534 case OMPD_master_taskloop: 10535 case OMPD_master_taskloop_simd: 10536 case OMPD_parallel_master_taskloop: 10537 case OMPD_parallel_master_taskloop_simd: 10538 case OMPD_target: 10539 case OMPD_target_simd: 10540 case OMPD_target_teams_distribute: 10541 case OMPD_target_teams_distribute_simd: 10542 case OMPD_target_teams_distribute_parallel_for: 10543 case OMPD_target_teams_distribute_parallel_for_simd: 10544 case OMPD_target_teams: 10545 case OMPD_target_parallel: 10546 case OMPD_target_parallel_for: 10547 case OMPD_target_parallel_for_simd: 10548 case OMPD_requires: 10549 case OMPD_unknown: 10550 llvm_unreachable("Unexpected standalone target data directive."); 10551 break; 10552 } 10553 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 10554 }; 10555 10556 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 10557 CodeGenFunction &CGF, PrePostActionTy &) { 10558 // Fill up the arrays with all the mapped variables. 10559 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10560 MappableExprsHandler::MapValuesArrayTy Pointers; 10561 MappableExprsHandler::MapValuesArrayTy Sizes; 10562 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10563 10564 // Get map clause information. 10565 MappableExprsHandler MEHandler(D, CGF); 10566 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10567 10568 TargetDataInfo Info; 10569 // Fill up the arrays and create the arguments. 10570 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10571 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10572 Info.PointersArray, Info.SizesArray, 10573 Info.MapTypesArray, Info); 10574 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10575 InputInfo.BasePointersArray = 10576 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10577 InputInfo.PointersArray = 10578 Address(Info.PointersArray, CGM.getPointerAlign()); 10579 InputInfo.SizesArray = 10580 Address(Info.SizesArray, CGM.getPointerAlign()); 10581 MapTypesArray = Info.MapTypesArray; 10582 if (D.hasClausesOfKind<OMPDependClause>()) 10583 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10584 else 10585 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10586 }; 10587 10588 if (IfCond) { 10589 emitIfClause(CGF, IfCond, TargetThenGen, 10590 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10591 } else { 10592 RegionCodeGenTy ThenRCG(TargetThenGen); 10593 ThenRCG(CGF); 10594 } 10595 } 10596 10597 namespace { 10598 /// Kind of parameter in a function with 'declare simd' directive. 10599 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 10600 /// Attribute set of the parameter. 10601 struct ParamAttrTy { 10602 ParamKindTy Kind = Vector; 10603 llvm::APSInt StrideOrArg; 10604 llvm::APSInt Alignment; 10605 }; 10606 } // namespace 10607 10608 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10609 ArrayRef<ParamAttrTy> ParamAttrs) { 10610 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10611 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10612 // of that clause. The VLEN value must be power of 2. 10613 // In other case the notion of the function`s "characteristic data type" (CDT) 10614 // is used to compute the vector length. 10615 // CDT is defined in the following order: 10616 // a) For non-void function, the CDT is the return type. 10617 // b) If the function has any non-uniform, non-linear parameters, then the 10618 // CDT is the type of the first such parameter. 10619 // c) If the CDT determined by a) or b) above is struct, union, or class 10620 // type which is pass-by-value (except for the type that maps to the 10621 // built-in complex data type), the characteristic data type is int. 10622 // d) If none of the above three cases is applicable, the CDT is int. 10623 // The VLEN is then determined based on the CDT and the size of vector 10624 // register of that ISA for which current vector version is generated. The 10625 // VLEN is computed using the formula below: 10626 // VLEN = sizeof(vector_register) / sizeof(CDT), 10627 // where vector register size specified in section 3.2.1 Registers and the 10628 // Stack Frame of original AMD64 ABI document. 10629 QualType RetType = FD->getReturnType(); 10630 if (RetType.isNull()) 10631 return 0; 10632 ASTContext &C = FD->getASTContext(); 10633 QualType CDT; 10634 if (!RetType.isNull() && !RetType->isVoidType()) { 10635 CDT = RetType; 10636 } else { 10637 unsigned Offset = 0; 10638 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10639 if (ParamAttrs[Offset].Kind == Vector) 10640 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10641 ++Offset; 10642 } 10643 if (CDT.isNull()) { 10644 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10645 if (ParamAttrs[I + Offset].Kind == Vector) { 10646 CDT = FD->getParamDecl(I)->getType(); 10647 break; 10648 } 10649 } 10650 } 10651 } 10652 if (CDT.isNull()) 10653 CDT = C.IntTy; 10654 CDT = CDT->getCanonicalTypeUnqualified(); 10655 if (CDT->isRecordType() || CDT->isUnionType()) 10656 CDT = C.IntTy; 10657 return C.getTypeSize(CDT); 10658 } 10659 10660 static void 10661 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10662 const llvm::APSInt &VLENVal, 10663 ArrayRef<ParamAttrTy> ParamAttrs, 10664 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10665 struct ISADataTy { 10666 char ISA; 10667 unsigned VecRegSize; 10668 }; 10669 ISADataTy ISAData[] = { 10670 { 10671 'b', 128 10672 }, // SSE 10673 { 10674 'c', 256 10675 }, // AVX 10676 { 10677 'd', 256 10678 }, // AVX2 10679 { 10680 'e', 512 10681 }, // AVX512 10682 }; 10683 llvm::SmallVector<char, 2> Masked; 10684 switch (State) { 10685 case OMPDeclareSimdDeclAttr::BS_Undefined: 10686 Masked.push_back('N'); 10687 Masked.push_back('M'); 10688 break; 10689 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10690 Masked.push_back('N'); 10691 break; 10692 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10693 Masked.push_back('M'); 10694 break; 10695 } 10696 for (char Mask : Masked) { 10697 for (const ISADataTy &Data : ISAData) { 10698 SmallString<256> Buffer; 10699 llvm::raw_svector_ostream Out(Buffer); 10700 Out << "_ZGV" << Data.ISA << Mask; 10701 if (!VLENVal) { 10702 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10703 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10704 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10705 } else { 10706 Out << VLENVal; 10707 } 10708 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 10709 switch (ParamAttr.Kind){ 10710 case LinearWithVarStride: 10711 Out << 's' << ParamAttr.StrideOrArg; 10712 break; 10713 case Linear: 10714 Out << 'l'; 10715 if (!!ParamAttr.StrideOrArg) 10716 Out << ParamAttr.StrideOrArg; 10717 break; 10718 case Uniform: 10719 Out << 'u'; 10720 break; 10721 case Vector: 10722 Out << 'v'; 10723 break; 10724 } 10725 if (!!ParamAttr.Alignment) 10726 Out << 'a' << ParamAttr.Alignment; 10727 } 10728 Out << '_' << Fn->getName(); 10729 Fn->addFnAttr(Out.str()); 10730 } 10731 } 10732 } 10733 10734 // This are the Functions that are needed to mangle the name of the 10735 // vector functions generated by the compiler, according to the rules 10736 // defined in the "Vector Function ABI specifications for AArch64", 10737 // available at 10738 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10739 10740 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 10741 /// 10742 /// TODO: Need to implement the behavior for reference marked with a 10743 /// var or no linear modifiers (1.b in the section). For this, we 10744 /// need to extend ParamKindTy to support the linear modifiers. 10745 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10746 QT = QT.getCanonicalType(); 10747 10748 if (QT->isVoidType()) 10749 return false; 10750 10751 if (Kind == ParamKindTy::Uniform) 10752 return false; 10753 10754 if (Kind == ParamKindTy::Linear) 10755 return false; 10756 10757 // TODO: Handle linear references with modifiers 10758 10759 if (Kind == ParamKindTy::LinearWithVarStride) 10760 return false; 10761 10762 return true; 10763 } 10764 10765 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10766 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10767 QT = QT.getCanonicalType(); 10768 unsigned Size = C.getTypeSize(QT); 10769 10770 // Only scalars and complex within 16 bytes wide set PVB to true. 10771 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10772 return false; 10773 10774 if (QT->isFloatingType()) 10775 return true; 10776 10777 if (QT->isIntegerType()) 10778 return true; 10779 10780 if (QT->isPointerType()) 10781 return true; 10782 10783 // TODO: Add support for complex types (section 3.1.2, item 2). 10784 10785 return false; 10786 } 10787 10788 /// Computes the lane size (LS) of a return type or of an input parameter, 10789 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10790 /// TODO: Add support for references, section 3.2.1, item 1. 10791 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10792 if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10793 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10794 if (getAArch64PBV(PTy, C)) 10795 return C.getTypeSize(PTy); 10796 } 10797 if (getAArch64PBV(QT, C)) 10798 return C.getTypeSize(QT); 10799 10800 return C.getTypeSize(C.getUIntPtrType()); 10801 } 10802 10803 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10804 // signature of the scalar function, as defined in 3.2.2 of the 10805 // AAVFABI. 10806 static std::tuple<unsigned, unsigned, bool> 10807 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10808 QualType RetType = FD->getReturnType().getCanonicalType(); 10809 10810 ASTContext &C = FD->getASTContext(); 10811 10812 bool OutputBecomesInput = false; 10813 10814 llvm::SmallVector<unsigned, 8> Sizes; 10815 if (!RetType->isVoidType()) { 10816 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10817 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10818 OutputBecomesInput = true; 10819 } 10820 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10821 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10822 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10823 } 10824 10825 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10826 // The LS of a function parameter / return value can only be a power 10827 // of 2, starting from 8 bits, up to 128. 10828 assert(std::all_of(Sizes.begin(), Sizes.end(), 10829 [](unsigned Size) { 10830 return Size == 8 || Size == 16 || Size == 32 || 10831 Size == 64 || Size == 128; 10832 }) && 10833 "Invalid size"); 10834 10835 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10836 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10837 OutputBecomesInput); 10838 } 10839 10840 /// Mangle the parameter part of the vector function name according to 10841 /// their OpenMP classification. The mangling function is defined in 10842 /// section 3.5 of the AAVFABI. 10843 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10844 SmallString<256> Buffer; 10845 llvm::raw_svector_ostream Out(Buffer); 10846 for (const auto &ParamAttr : ParamAttrs) { 10847 switch (ParamAttr.Kind) { 10848 case LinearWithVarStride: 10849 Out << "ls" << ParamAttr.StrideOrArg; 10850 break; 10851 case Linear: 10852 Out << 'l'; 10853 // Don't print the step value if it is not present or if it is 10854 // equal to 1. 10855 if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1) 10856 Out << ParamAttr.StrideOrArg; 10857 break; 10858 case Uniform: 10859 Out << 'u'; 10860 break; 10861 case Vector: 10862 Out << 'v'; 10863 break; 10864 } 10865 10866 if (!!ParamAttr.Alignment) 10867 Out << 'a' << ParamAttr.Alignment; 10868 } 10869 10870 return std::string(Out.str()); 10871 } 10872 10873 // Function used to add the attribute. The parameter `VLEN` is 10874 // templated to allow the use of "x" when targeting scalable functions 10875 // for SVE. 10876 template <typename T> 10877 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10878 char ISA, StringRef ParSeq, 10879 StringRef MangledName, bool OutputBecomesInput, 10880 llvm::Function *Fn) { 10881 SmallString<256> Buffer; 10882 llvm::raw_svector_ostream Out(Buffer); 10883 Out << Prefix << ISA << LMask << VLEN; 10884 if (OutputBecomesInput) 10885 Out << "v"; 10886 Out << ParSeq << "_" << MangledName; 10887 Fn->addFnAttr(Out.str()); 10888 } 10889 10890 // Helper function to generate the Advanced SIMD names depending on 10891 // the value of the NDS when simdlen is not present. 10892 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10893 StringRef Prefix, char ISA, 10894 StringRef ParSeq, StringRef MangledName, 10895 bool OutputBecomesInput, 10896 llvm::Function *Fn) { 10897 switch (NDS) { 10898 case 8: 10899 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10900 OutputBecomesInput, Fn); 10901 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10902 OutputBecomesInput, Fn); 10903 break; 10904 case 16: 10905 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10906 OutputBecomesInput, Fn); 10907 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10908 OutputBecomesInput, Fn); 10909 break; 10910 case 32: 10911 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10912 OutputBecomesInput, Fn); 10913 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10914 OutputBecomesInput, Fn); 10915 break; 10916 case 64: 10917 case 128: 10918 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10919 OutputBecomesInput, Fn); 10920 break; 10921 default: 10922 llvm_unreachable("Scalar type is too wide."); 10923 } 10924 } 10925 10926 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10927 static void emitAArch64DeclareSimdFunction( 10928 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10929 ArrayRef<ParamAttrTy> ParamAttrs, 10930 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10931 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10932 10933 // Get basic data for building the vector signature. 10934 const auto Data = getNDSWDS(FD, ParamAttrs); 10935 const unsigned NDS = std::get<0>(Data); 10936 const unsigned WDS = std::get<1>(Data); 10937 const bool OutputBecomesInput = std::get<2>(Data); 10938 10939 // Check the values provided via `simdlen` by the user. 10940 // 1. A `simdlen(1)` doesn't produce vector signatures, 10941 if (UserVLEN == 1) { 10942 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10943 DiagnosticsEngine::Warning, 10944 "The clause simdlen(1) has no effect when targeting aarch64."); 10945 CGM.getDiags().Report(SLoc, DiagID); 10946 return; 10947 } 10948 10949 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10950 // Advanced SIMD output. 10951 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10952 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10953 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10954 "power of 2 when targeting Advanced SIMD."); 10955 CGM.getDiags().Report(SLoc, DiagID); 10956 return; 10957 } 10958 10959 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10960 // limits. 10961 if (ISA == 's' && UserVLEN != 0) { 10962 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10963 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10964 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10965 "lanes in the architectural constraints " 10966 "for SVE (min is 128-bit, max is " 10967 "2048-bit, by steps of 128-bit)"); 10968 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10969 return; 10970 } 10971 } 10972 10973 // Sort out parameter sequence. 10974 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10975 StringRef Prefix = "_ZGV"; 10976 // Generate simdlen from user input (if any). 10977 if (UserVLEN) { 10978 if (ISA == 's') { 10979 // SVE generates only a masked function. 10980 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10981 OutputBecomesInput, Fn); 10982 } else { 10983 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10984 // Advanced SIMD generates one or two functions, depending on 10985 // the `[not]inbranch` clause. 10986 switch (State) { 10987 case OMPDeclareSimdDeclAttr::BS_Undefined: 10988 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10989 OutputBecomesInput, Fn); 10990 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10991 OutputBecomesInput, Fn); 10992 break; 10993 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10994 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10995 OutputBecomesInput, Fn); 10996 break; 10997 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10998 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10999 OutputBecomesInput, Fn); 11000 break; 11001 } 11002 } 11003 } else { 11004 // If no user simdlen is provided, follow the AAVFABI rules for 11005 // generating the vector length. 11006 if (ISA == 's') { 11007 // SVE, section 3.4.1, item 1. 11008 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11009 OutputBecomesInput, Fn); 11010 } else { 11011 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11012 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11013 // two vector names depending on the use of the clause 11014 // `[not]inbranch`. 11015 switch (State) { 11016 case OMPDeclareSimdDeclAttr::BS_Undefined: 11017 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11018 OutputBecomesInput, Fn); 11019 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11020 OutputBecomesInput, Fn); 11021 break; 11022 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11023 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11024 OutputBecomesInput, Fn); 11025 break; 11026 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11027 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11028 OutputBecomesInput, Fn); 11029 break; 11030 } 11031 } 11032 } 11033 } 11034 11035 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11036 llvm::Function *Fn) { 11037 ASTContext &C = CGM.getContext(); 11038 FD = FD->getMostRecentDecl(); 11039 // Map params to their positions in function decl. 11040 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11041 if (isa<CXXMethodDecl>(FD)) 11042 ParamPositions.try_emplace(FD, 0); 11043 unsigned ParamPos = ParamPositions.size(); 11044 for (const ParmVarDecl *P : FD->parameters()) { 11045 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11046 ++ParamPos; 11047 } 11048 while (FD) { 11049 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11050 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11051 // Mark uniform parameters. 11052 for (const Expr *E : Attr->uniforms()) { 11053 E = E->IgnoreParenImpCasts(); 11054 unsigned Pos; 11055 if (isa<CXXThisExpr>(E)) { 11056 Pos = ParamPositions[FD]; 11057 } else { 11058 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11059 ->getCanonicalDecl(); 11060 Pos = ParamPositions[PVD]; 11061 } 11062 ParamAttrs[Pos].Kind = Uniform; 11063 } 11064 // Get alignment info. 11065 auto NI = Attr->alignments_begin(); 11066 for (const Expr *E : Attr->aligneds()) { 11067 E = E->IgnoreParenImpCasts(); 11068 unsigned Pos; 11069 QualType ParmTy; 11070 if (isa<CXXThisExpr>(E)) { 11071 Pos = ParamPositions[FD]; 11072 ParmTy = E->getType(); 11073 } else { 11074 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11075 ->getCanonicalDecl(); 11076 Pos = ParamPositions[PVD]; 11077 ParmTy = PVD->getType(); 11078 } 11079 ParamAttrs[Pos].Alignment = 11080 (*NI) 11081 ? (*NI)->EvaluateKnownConstInt(C) 11082 : llvm::APSInt::getUnsigned( 11083 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11084 .getQuantity()); 11085 ++NI; 11086 } 11087 // Mark linear parameters. 11088 auto SI = Attr->steps_begin(); 11089 auto MI = Attr->modifiers_begin(); 11090 for (const Expr *E : Attr->linears()) { 11091 E = E->IgnoreParenImpCasts(); 11092 unsigned Pos; 11093 if (isa<CXXThisExpr>(E)) { 11094 Pos = ParamPositions[FD]; 11095 } else { 11096 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11097 ->getCanonicalDecl(); 11098 Pos = ParamPositions[PVD]; 11099 } 11100 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11101 ParamAttr.Kind = Linear; 11102 if (*SI) { 11103 Expr::EvalResult Result; 11104 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11105 if (const auto *DRE = 11106 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11107 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 11108 ParamAttr.Kind = LinearWithVarStride; 11109 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 11110 ParamPositions[StridePVD->getCanonicalDecl()]); 11111 } 11112 } 11113 } else { 11114 ParamAttr.StrideOrArg = Result.Val.getInt(); 11115 } 11116 } 11117 ++SI; 11118 ++MI; 11119 } 11120 llvm::APSInt VLENVal; 11121 SourceLocation ExprLoc; 11122 const Expr *VLENExpr = Attr->getSimdlen(); 11123 if (VLENExpr) { 11124 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11125 ExprLoc = VLENExpr->getExprLoc(); 11126 } 11127 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11128 if (CGM.getTriple().isX86()) { 11129 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11130 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11131 unsigned VLEN = VLENVal.getExtValue(); 11132 StringRef MangledName = Fn->getName(); 11133 if (CGM.getTarget().hasFeature("sve")) 11134 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11135 MangledName, 's', 128, Fn, ExprLoc); 11136 if (CGM.getTarget().hasFeature("neon")) 11137 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11138 MangledName, 'n', 128, Fn, ExprLoc); 11139 } 11140 } 11141 FD = FD->getPreviousDecl(); 11142 } 11143 } 11144 11145 namespace { 11146 /// Cleanup action for doacross support. 11147 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11148 public: 11149 static const int DoacrossFinArgs = 2; 11150 11151 private: 11152 llvm::FunctionCallee RTLFn; 11153 llvm::Value *Args[DoacrossFinArgs]; 11154 11155 public: 11156 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11157 ArrayRef<llvm::Value *> CallArgs) 11158 : RTLFn(RTLFn) { 11159 assert(CallArgs.size() == DoacrossFinArgs); 11160 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11161 } 11162 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11163 if (!CGF.HaveInsertPoint()) 11164 return; 11165 CGF.EmitRuntimeCall(RTLFn, Args); 11166 } 11167 }; 11168 } // namespace 11169 11170 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11171 const OMPLoopDirective &D, 11172 ArrayRef<Expr *> NumIterations) { 11173 if (!CGF.HaveInsertPoint()) 11174 return; 11175 11176 ASTContext &C = CGM.getContext(); 11177 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11178 RecordDecl *RD; 11179 if (KmpDimTy.isNull()) { 11180 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11181 // kmp_int64 lo; // lower 11182 // kmp_int64 up; // upper 11183 // kmp_int64 st; // stride 11184 // }; 11185 RD = C.buildImplicitRecord("kmp_dim"); 11186 RD->startDefinition(); 11187 addFieldToRecordDecl(C, RD, Int64Ty); 11188 addFieldToRecordDecl(C, RD, Int64Ty); 11189 addFieldToRecordDecl(C, RD, Int64Ty); 11190 RD->completeDefinition(); 11191 KmpDimTy = C.getRecordType(RD); 11192 } else { 11193 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11194 } 11195 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11196 QualType ArrayTy = 11197 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11198 11199 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11200 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11201 enum { LowerFD = 0, UpperFD, StrideFD }; 11202 // Fill dims with data. 11203 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11204 LValue DimsLVal = CGF.MakeAddrLValue( 11205 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11206 // dims.upper = num_iterations; 11207 LValue UpperLVal = CGF.EmitLValueForField( 11208 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11209 llvm::Value *NumIterVal = 11210 CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]), 11211 D.getNumIterations()->getType(), Int64Ty, 11212 D.getNumIterations()->getExprLoc()); 11213 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11214 // dims.stride = 1; 11215 LValue StrideLVal = CGF.EmitLValueForField( 11216 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11217 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11218 StrideLVal); 11219 } 11220 11221 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11222 // kmp_int32 num_dims, struct kmp_dim * dims); 11223 llvm::Value *Args[] = { 11224 emitUpdateLocation(CGF, D.getBeginLoc()), 11225 getThreadID(CGF, D.getBeginLoc()), 11226 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11227 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11228 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11229 CGM.VoidPtrTy)}; 11230 11231 llvm::FunctionCallee RTLFn = 11232 createRuntimeFunction(OMPRTL__kmpc_doacross_init); 11233 CGF.EmitRuntimeCall(RTLFn, Args); 11234 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11235 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11236 llvm::FunctionCallee FiniRTLFn = 11237 createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 11238 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11239 llvm::makeArrayRef(FiniArgs)); 11240 } 11241 11242 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11243 const OMPDependClause *C) { 11244 QualType Int64Ty = 11245 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11246 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11247 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11248 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11249 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11250 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11251 const Expr *CounterVal = C->getLoopData(I); 11252 assert(CounterVal); 11253 llvm::Value *CntVal = CGF.EmitScalarConversion( 11254 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11255 CounterVal->getExprLoc()); 11256 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11257 /*Volatile=*/false, Int64Ty); 11258 } 11259 llvm::Value *Args[] = { 11260 emitUpdateLocation(CGF, C->getBeginLoc()), 11261 getThreadID(CGF, C->getBeginLoc()), 11262 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11263 llvm::FunctionCallee RTLFn; 11264 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11265 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 11266 } else { 11267 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11268 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 11269 } 11270 CGF.EmitRuntimeCall(RTLFn, Args); 11271 } 11272 11273 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11274 llvm::FunctionCallee Callee, 11275 ArrayRef<llvm::Value *> Args) const { 11276 assert(Loc.isValid() && "Outlined function call location must be valid."); 11277 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11278 11279 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11280 if (Fn->doesNotThrow()) { 11281 CGF.EmitNounwindRuntimeCall(Fn, Args); 11282 return; 11283 } 11284 } 11285 CGF.EmitRuntimeCall(Callee, Args); 11286 } 11287 11288 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11289 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11290 ArrayRef<llvm::Value *> Args) const { 11291 emitCall(CGF, Loc, OutlinedFn, Args); 11292 } 11293 11294 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11295 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11296 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11297 HasEmittedDeclareTargetRegion = true; 11298 } 11299 11300 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11301 const VarDecl *NativeParam, 11302 const VarDecl *TargetParam) const { 11303 return CGF.GetAddrOfLocalVar(NativeParam); 11304 } 11305 11306 namespace { 11307 /// Cleanup action for allocate support. 11308 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11309 public: 11310 static const int CleanupArgs = 3; 11311 11312 private: 11313 llvm::FunctionCallee RTLFn; 11314 llvm::Value *Args[CleanupArgs]; 11315 11316 public: 11317 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 11318 ArrayRef<llvm::Value *> CallArgs) 11319 : RTLFn(RTLFn) { 11320 assert(CallArgs.size() == CleanupArgs && 11321 "Size of arguments does not match."); 11322 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11323 } 11324 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11325 if (!CGF.HaveInsertPoint()) 11326 return; 11327 CGF.EmitRuntimeCall(RTLFn, Args); 11328 } 11329 }; 11330 } // namespace 11331 11332 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11333 const VarDecl *VD) { 11334 if (!VD) 11335 return Address::invalid(); 11336 const VarDecl *CVD = VD->getCanonicalDecl(); 11337 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 11338 return Address::invalid(); 11339 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11340 // Use the default allocation. 11341 if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 11342 !AA->getAllocator()) 11343 return Address::invalid(); 11344 llvm::Value *Size; 11345 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11346 if (CVD->getType()->isVariablyModifiedType()) { 11347 Size = CGF.getTypeSize(CVD->getType()); 11348 // Align the size: ((size + align - 1) / align) * align 11349 Size = CGF.Builder.CreateNUWAdd( 11350 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11351 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11352 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11353 } else { 11354 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11355 Size = CGM.getSize(Sz.alignTo(Align)); 11356 } 11357 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11358 assert(AA->getAllocator() && 11359 "Expected allocator expression for non-default allocator."); 11360 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11361 // According to the standard, the original allocator type is a enum (integer). 11362 // Convert to pointer type, if required. 11363 if (Allocator->getType()->isIntegerTy()) 11364 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 11365 else if (Allocator->getType()->isPointerTy()) 11366 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 11367 CGM.VoidPtrTy); 11368 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11369 11370 llvm::Value *Addr = 11371 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, 11372 getName({CVD->getName(), ".void.addr"})); 11373 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 11374 Allocator}; 11375 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); 11376 11377 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11378 llvm::makeArrayRef(FiniArgs)); 11379 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11380 Addr, 11381 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 11382 getName({CVD->getName(), ".addr"})); 11383 return Address(Addr, Align); 11384 } 11385 11386 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11387 CodeGenModule &CGM, const OMPLoopDirective &S) 11388 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11389 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11390 if (!NeedToPush) 11391 return; 11392 NontemporalDeclsSet &DS = 11393 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11394 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11395 for (const Stmt *Ref : C->private_refs()) { 11396 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11397 const ValueDecl *VD; 11398 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11399 VD = DRE->getDecl(); 11400 } else { 11401 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11402 assert((ME->isImplicitCXXThis() || 11403 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11404 "Expected member of current class."); 11405 VD = ME->getMemberDecl(); 11406 } 11407 DS.insert(VD); 11408 } 11409 } 11410 } 11411 11412 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11413 if (!NeedToPush) 11414 return; 11415 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11416 } 11417 11418 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11419 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11420 11421 return llvm::any_of( 11422 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11423 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 11424 } 11425 11426 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11427 const OMPExecutableDirective &S, 11428 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11429 const { 11430 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11431 // Vars in target/task regions must be excluded completely. 11432 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11433 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11434 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11435 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11436 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11437 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11438 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11439 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11440 } 11441 } 11442 // Exclude vars in private clauses. 11443 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11444 for (const Expr *Ref : C->varlists()) { 11445 if (!Ref->getType()->isScalarType()) 11446 continue; 11447 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11448 if (!DRE) 11449 continue; 11450 NeedToCheckForLPCs.insert(DRE->getDecl()); 11451 } 11452 } 11453 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11454 for (const Expr *Ref : C->varlists()) { 11455 if (!Ref->getType()->isScalarType()) 11456 continue; 11457 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11458 if (!DRE) 11459 continue; 11460 NeedToCheckForLPCs.insert(DRE->getDecl()); 11461 } 11462 } 11463 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11464 for (const Expr *Ref : C->varlists()) { 11465 if (!Ref->getType()->isScalarType()) 11466 continue; 11467 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11468 if (!DRE) 11469 continue; 11470 NeedToCheckForLPCs.insert(DRE->getDecl()); 11471 } 11472 } 11473 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11474 for (const Expr *Ref : C->varlists()) { 11475 if (!Ref->getType()->isScalarType()) 11476 continue; 11477 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11478 if (!DRE) 11479 continue; 11480 NeedToCheckForLPCs.insert(DRE->getDecl()); 11481 } 11482 } 11483 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 11484 for (const Expr *Ref : C->varlists()) { 11485 if (!Ref->getType()->isScalarType()) 11486 continue; 11487 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11488 if (!DRE) 11489 continue; 11490 NeedToCheckForLPCs.insert(DRE->getDecl()); 11491 } 11492 } 11493 for (const Decl *VD : NeedToCheckForLPCs) { 11494 for (const LastprivateConditionalData &Data : 11495 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 11496 if (Data.DeclToUniqueName.count(VD) > 0) { 11497 if (!Data.Disabled) 11498 NeedToAddForLPCsAsDisabled.insert(VD); 11499 break; 11500 } 11501 } 11502 } 11503 } 11504 11505 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11506 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 11507 : CGM(CGF.CGM), 11508 Action((CGM.getLangOpts().OpenMP >= 50 && 11509 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11510 [](const OMPLastprivateClause *C) { 11511 return C->getKind() == 11512 OMPC_LASTPRIVATE_conditional; 11513 })) 11514 ? ActionToDo::PushAsLastprivateConditional 11515 : ActionToDo::DoNotPush) { 11516 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11517 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 11518 return; 11519 assert(Action == ActionToDo::PushAsLastprivateConditional && 11520 "Expected a push action."); 11521 LastprivateConditionalData &Data = 11522 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11523 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11524 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 11525 continue; 11526 11527 for (const Expr *Ref : C->varlists()) { 11528 Data.DeclToUniqueName.insert(std::make_pair( 11529 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 11530 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 11531 } 11532 } 11533 Data.IVLVal = IVLVal; 11534 Data.Fn = CGF.CurFn; 11535 } 11536 11537 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11538 CodeGenFunction &CGF, const OMPExecutableDirective &S) 11539 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 11540 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11541 if (CGM.getLangOpts().OpenMP < 50) 11542 return; 11543 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 11544 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 11545 if (!NeedToAddForLPCsAsDisabled.empty()) { 11546 Action = ActionToDo::DisableLastprivateConditional; 11547 LastprivateConditionalData &Data = 11548 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11549 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 11550 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 11551 Data.Fn = CGF.CurFn; 11552 Data.Disabled = true; 11553 } 11554 } 11555 11556 CGOpenMPRuntime::LastprivateConditionalRAII 11557 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 11558 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 11559 return LastprivateConditionalRAII(CGF, S); 11560 } 11561 11562 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 11563 if (CGM.getLangOpts().OpenMP < 50) 11564 return; 11565 if (Action == ActionToDo::DisableLastprivateConditional) { 11566 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11567 "Expected list of disabled private vars."); 11568 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11569 } 11570 if (Action == ActionToDo::PushAsLastprivateConditional) { 11571 assert( 11572 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11573 "Expected list of lastprivate conditional vars."); 11574 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11575 } 11576 } 11577 11578 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 11579 const VarDecl *VD) { 11580 ASTContext &C = CGM.getContext(); 11581 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 11582 if (I == LastprivateConditionalToTypes.end()) 11583 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 11584 QualType NewType; 11585 const FieldDecl *VDField; 11586 const FieldDecl *FiredField; 11587 LValue BaseLVal; 11588 auto VI = I->getSecond().find(VD); 11589 if (VI == I->getSecond().end()) { 11590 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 11591 RD->startDefinition(); 11592 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 11593 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 11594 RD->completeDefinition(); 11595 NewType = C.getRecordType(RD); 11596 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 11597 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 11598 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 11599 } else { 11600 NewType = std::get<0>(VI->getSecond()); 11601 VDField = std::get<1>(VI->getSecond()); 11602 FiredField = std::get<2>(VI->getSecond()); 11603 BaseLVal = std::get<3>(VI->getSecond()); 11604 } 11605 LValue FiredLVal = 11606 CGF.EmitLValueForField(BaseLVal, FiredField); 11607 CGF.EmitStoreOfScalar( 11608 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 11609 FiredLVal); 11610 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 11611 } 11612 11613 namespace { 11614 /// Checks if the lastprivate conditional variable is referenced in LHS. 11615 class LastprivateConditionalRefChecker final 11616 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 11617 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 11618 const Expr *FoundE = nullptr; 11619 const Decl *FoundD = nullptr; 11620 StringRef UniqueDeclName; 11621 LValue IVLVal; 11622 llvm::Function *FoundFn = nullptr; 11623 SourceLocation Loc; 11624 11625 public: 11626 bool VisitDeclRefExpr(const DeclRefExpr *E) { 11627 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11628 llvm::reverse(LPM)) { 11629 auto It = D.DeclToUniqueName.find(E->getDecl()); 11630 if (It == D.DeclToUniqueName.end()) 11631 continue; 11632 if (D.Disabled) 11633 return false; 11634 FoundE = E; 11635 FoundD = E->getDecl()->getCanonicalDecl(); 11636 UniqueDeclName = It->second; 11637 IVLVal = D.IVLVal; 11638 FoundFn = D.Fn; 11639 break; 11640 } 11641 return FoundE == E; 11642 } 11643 bool VisitMemberExpr(const MemberExpr *E) { 11644 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 11645 return false; 11646 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11647 llvm::reverse(LPM)) { 11648 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 11649 if (It == D.DeclToUniqueName.end()) 11650 continue; 11651 if (D.Disabled) 11652 return false; 11653 FoundE = E; 11654 FoundD = E->getMemberDecl()->getCanonicalDecl(); 11655 UniqueDeclName = It->second; 11656 IVLVal = D.IVLVal; 11657 FoundFn = D.Fn; 11658 break; 11659 } 11660 return FoundE == E; 11661 } 11662 bool VisitStmt(const Stmt *S) { 11663 for (const Stmt *Child : S->children()) { 11664 if (!Child) 11665 continue; 11666 if (const auto *E = dyn_cast<Expr>(Child)) 11667 if (!E->isGLValue()) 11668 continue; 11669 if (Visit(Child)) 11670 return true; 11671 } 11672 return false; 11673 } 11674 explicit LastprivateConditionalRefChecker( 11675 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 11676 : LPM(LPM) {} 11677 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 11678 getFoundData() const { 11679 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 11680 } 11681 }; 11682 } // namespace 11683 11684 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 11685 LValue IVLVal, 11686 StringRef UniqueDeclName, 11687 LValue LVal, 11688 SourceLocation Loc) { 11689 // Last updated loop counter for the lastprivate conditional var. 11690 // int<xx> last_iv = 0; 11691 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 11692 llvm::Constant *LastIV = 11693 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 11694 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 11695 IVLVal.getAlignment().getAsAlign()); 11696 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 11697 11698 // Last value of the lastprivate conditional. 11699 // decltype(priv_a) last_a; 11700 llvm::Constant *Last = getOrCreateInternalVariable( 11701 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 11702 cast<llvm::GlobalVariable>(Last)->setAlignment( 11703 LVal.getAlignment().getAsAlign()); 11704 LValue LastLVal = 11705 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 11706 11707 // Global loop counter. Required to handle inner parallel-for regions. 11708 // iv 11709 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 11710 11711 // #pragma omp critical(a) 11712 // if (last_iv <= iv) { 11713 // last_iv = iv; 11714 // last_a = priv_a; 11715 // } 11716 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 11717 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 11718 Action.Enter(CGF); 11719 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 11720 // (last_iv <= iv) ? Check if the variable is updated and store new 11721 // value in global var. 11722 llvm::Value *CmpRes; 11723 if (IVLVal.getType()->isSignedIntegerType()) { 11724 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 11725 } else { 11726 assert(IVLVal.getType()->isUnsignedIntegerType() && 11727 "Loop iteration variable must be integer."); 11728 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 11729 } 11730 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 11731 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 11732 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 11733 // { 11734 CGF.EmitBlock(ThenBB); 11735 11736 // last_iv = iv; 11737 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 11738 11739 // last_a = priv_a; 11740 switch (CGF.getEvaluationKind(LVal.getType())) { 11741 case TEK_Scalar: { 11742 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 11743 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 11744 break; 11745 } 11746 case TEK_Complex: { 11747 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 11748 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 11749 break; 11750 } 11751 case TEK_Aggregate: 11752 llvm_unreachable( 11753 "Aggregates are not supported in lastprivate conditional."); 11754 } 11755 // } 11756 CGF.EmitBranch(ExitBB); 11757 // There is no need to emit line number for unconditional branch. 11758 (void)ApplyDebugLocation::CreateEmpty(CGF); 11759 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 11760 }; 11761 11762 if (CGM.getLangOpts().OpenMPSimd) { 11763 // Do not emit as a critical region as no parallel region could be emitted. 11764 RegionCodeGenTy ThenRCG(CodeGen); 11765 ThenRCG(CGF); 11766 } else { 11767 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 11768 } 11769 } 11770 11771 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 11772 const Expr *LHS) { 11773 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11774 return; 11775 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 11776 if (!Checker.Visit(LHS)) 11777 return; 11778 const Expr *FoundE; 11779 const Decl *FoundD; 11780 StringRef UniqueDeclName; 11781 LValue IVLVal; 11782 llvm::Function *FoundFn; 11783 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 11784 Checker.getFoundData(); 11785 if (FoundFn != CGF.CurFn) { 11786 // Special codegen for inner parallel regions. 11787 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 11788 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 11789 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 11790 "Lastprivate conditional is not found in outer region."); 11791 QualType StructTy = std::get<0>(It->getSecond()); 11792 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 11793 LValue PrivLVal = CGF.EmitLValue(FoundE); 11794 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11795 PrivLVal.getAddress(CGF), 11796 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 11797 LValue BaseLVal = 11798 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 11799 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 11800 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 11801 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 11802 FiredLVal, llvm::AtomicOrdering::Unordered, 11803 /*IsVolatile=*/true, /*isInit=*/false); 11804 return; 11805 } 11806 11807 // Private address of the lastprivate conditional in the current context. 11808 // priv_a 11809 LValue LVal = CGF.EmitLValue(FoundE); 11810 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 11811 FoundE->getExprLoc()); 11812 } 11813 11814 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 11815 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11816 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 11817 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11818 return; 11819 auto Range = llvm::reverse(LastprivateConditionalStack); 11820 auto It = llvm::find_if( 11821 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 11822 if (It == Range.end() || It->Fn != CGF.CurFn) 11823 return; 11824 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 11825 assert(LPCI != LastprivateConditionalToTypes.end() && 11826 "Lastprivates must be registered already."); 11827 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11828 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 11829 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 11830 for (const auto &Pair : It->DeclToUniqueName) { 11831 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 11832 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 11833 continue; 11834 auto I = LPCI->getSecond().find(Pair.first); 11835 assert(I != LPCI->getSecond().end() && 11836 "Lastprivate must be rehistered already."); 11837 // bool Cmp = priv_a.Fired != 0; 11838 LValue BaseLVal = std::get<3>(I->getSecond()); 11839 LValue FiredLVal = 11840 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 11841 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 11842 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 11843 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 11844 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 11845 // if (Cmp) { 11846 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 11847 CGF.EmitBlock(ThenBB); 11848 Address Addr = CGF.GetAddrOfLocalVar(VD); 11849 LValue LVal; 11850 if (VD->getType()->isReferenceType()) 11851 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 11852 AlignmentSource::Decl); 11853 else 11854 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 11855 AlignmentSource::Decl); 11856 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 11857 D.getBeginLoc()); 11858 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 11859 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 11860 // } 11861 } 11862 } 11863 11864 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 11865 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 11866 SourceLocation Loc) { 11867 if (CGF.getLangOpts().OpenMP < 50) 11868 return; 11869 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 11870 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 11871 "Unknown lastprivate conditional variable."); 11872 StringRef UniqueName = It->second; 11873 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 11874 // The variable was not updated in the region - exit. 11875 if (!GV) 11876 return; 11877 LValue LPLVal = CGF.MakeAddrLValue( 11878 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 11879 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 11880 CGF.EmitStoreOfScalar(Res, PrivLVal); 11881 } 11882 11883 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11884 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11885 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11886 llvm_unreachable("Not supported in SIMD-only mode"); 11887 } 11888 11889 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11890 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11891 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11892 llvm_unreachable("Not supported in SIMD-only mode"); 11893 } 11894 11895 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11896 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11897 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11898 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11899 bool Tied, unsigned &NumberOfParts) { 11900 llvm_unreachable("Not supported in SIMD-only mode"); 11901 } 11902 11903 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11904 SourceLocation Loc, 11905 llvm::Function *OutlinedFn, 11906 ArrayRef<llvm::Value *> CapturedVars, 11907 const Expr *IfCond) { 11908 llvm_unreachable("Not supported in SIMD-only mode"); 11909 } 11910 11911 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11912 CodeGenFunction &CGF, StringRef CriticalName, 11913 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11914 const Expr *Hint) { 11915 llvm_unreachable("Not supported in SIMD-only mode"); 11916 } 11917 11918 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11919 const RegionCodeGenTy &MasterOpGen, 11920 SourceLocation Loc) { 11921 llvm_unreachable("Not supported in SIMD-only mode"); 11922 } 11923 11924 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11925 SourceLocation Loc) { 11926 llvm_unreachable("Not supported in SIMD-only mode"); 11927 } 11928 11929 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 11930 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 11931 SourceLocation Loc) { 11932 llvm_unreachable("Not supported in SIMD-only mode"); 11933 } 11934 11935 void CGOpenMPSIMDRuntime::emitSingleRegion( 11936 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 11937 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 11938 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 11939 ArrayRef<const Expr *> AssignmentOps) { 11940 llvm_unreachable("Not supported in SIMD-only mode"); 11941 } 11942 11943 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 11944 const RegionCodeGenTy &OrderedOpGen, 11945 SourceLocation Loc, 11946 bool IsThreads) { 11947 llvm_unreachable("Not supported in SIMD-only mode"); 11948 } 11949 11950 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 11951 SourceLocation Loc, 11952 OpenMPDirectiveKind Kind, 11953 bool EmitChecks, 11954 bool ForceSimpleCall) { 11955 llvm_unreachable("Not supported in SIMD-only mode"); 11956 } 11957 11958 void CGOpenMPSIMDRuntime::emitForDispatchInit( 11959 CodeGenFunction &CGF, SourceLocation Loc, 11960 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 11961 bool Ordered, const DispatchRTInput &DispatchValues) { 11962 llvm_unreachable("Not supported in SIMD-only mode"); 11963 } 11964 11965 void CGOpenMPSIMDRuntime::emitForStaticInit( 11966 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 11967 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 11968 llvm_unreachable("Not supported in SIMD-only mode"); 11969 } 11970 11971 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 11972 CodeGenFunction &CGF, SourceLocation Loc, 11973 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 11974 llvm_unreachable("Not supported in SIMD-only mode"); 11975 } 11976 11977 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 11978 SourceLocation Loc, 11979 unsigned IVSize, 11980 bool IVSigned) { 11981 llvm_unreachable("Not supported in SIMD-only mode"); 11982 } 11983 11984 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 11985 SourceLocation Loc, 11986 OpenMPDirectiveKind DKind) { 11987 llvm_unreachable("Not supported in SIMD-only mode"); 11988 } 11989 11990 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 11991 SourceLocation Loc, 11992 unsigned IVSize, bool IVSigned, 11993 Address IL, Address LB, 11994 Address UB, Address ST) { 11995 llvm_unreachable("Not supported in SIMD-only mode"); 11996 } 11997 11998 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 11999 llvm::Value *NumThreads, 12000 SourceLocation Loc) { 12001 llvm_unreachable("Not supported in SIMD-only mode"); 12002 } 12003 12004 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12005 ProcBindKind ProcBind, 12006 SourceLocation Loc) { 12007 llvm_unreachable("Not supported in SIMD-only mode"); 12008 } 12009 12010 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12011 const VarDecl *VD, 12012 Address VDAddr, 12013 SourceLocation Loc) { 12014 llvm_unreachable("Not supported in SIMD-only mode"); 12015 } 12016 12017 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12018 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12019 CodeGenFunction *CGF) { 12020 llvm_unreachable("Not supported in SIMD-only mode"); 12021 } 12022 12023 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12024 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12025 llvm_unreachable("Not supported in SIMD-only mode"); 12026 } 12027 12028 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12029 ArrayRef<const Expr *> Vars, 12030 SourceLocation Loc, 12031 llvm::AtomicOrdering AO) { 12032 llvm_unreachable("Not supported in SIMD-only mode"); 12033 } 12034 12035 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12036 const OMPExecutableDirective &D, 12037 llvm::Function *TaskFunction, 12038 QualType SharedsTy, Address Shareds, 12039 const Expr *IfCond, 12040 const OMPTaskDataTy &Data) { 12041 llvm_unreachable("Not supported in SIMD-only mode"); 12042 } 12043 12044 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12045 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12046 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12047 const Expr *IfCond, const OMPTaskDataTy &Data) { 12048 llvm_unreachable("Not supported in SIMD-only mode"); 12049 } 12050 12051 void CGOpenMPSIMDRuntime::emitReduction( 12052 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12053 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12054 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12055 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12056 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12057 ReductionOps, Options); 12058 } 12059 12060 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12061 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12062 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12063 llvm_unreachable("Not supported in SIMD-only mode"); 12064 } 12065 12066 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12067 SourceLocation Loc, 12068 ReductionCodeGen &RCG, 12069 unsigned N) { 12070 llvm_unreachable("Not supported in SIMD-only mode"); 12071 } 12072 12073 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12074 SourceLocation Loc, 12075 llvm::Value *ReductionsPtr, 12076 LValue SharedLVal) { 12077 llvm_unreachable("Not supported in SIMD-only mode"); 12078 } 12079 12080 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12081 SourceLocation Loc) { 12082 llvm_unreachable("Not supported in SIMD-only mode"); 12083 } 12084 12085 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12086 CodeGenFunction &CGF, SourceLocation Loc, 12087 OpenMPDirectiveKind CancelRegion) { 12088 llvm_unreachable("Not supported in SIMD-only mode"); 12089 } 12090 12091 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12092 SourceLocation Loc, const Expr *IfCond, 12093 OpenMPDirectiveKind CancelRegion) { 12094 llvm_unreachable("Not supported in SIMD-only mode"); 12095 } 12096 12097 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12098 const OMPExecutableDirective &D, StringRef ParentName, 12099 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12100 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12101 llvm_unreachable("Not supported in SIMD-only mode"); 12102 } 12103 12104 void CGOpenMPSIMDRuntime::emitTargetCall( 12105 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12106 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12107 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12108 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12109 const OMPLoopDirective &D)> 12110 SizeEmitter) { 12111 llvm_unreachable("Not supported in SIMD-only mode"); 12112 } 12113 12114 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12115 llvm_unreachable("Not supported in SIMD-only mode"); 12116 } 12117 12118 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12119 llvm_unreachable("Not supported in SIMD-only mode"); 12120 } 12121 12122 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12123 return false; 12124 } 12125 12126 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12127 const OMPExecutableDirective &D, 12128 SourceLocation Loc, 12129 llvm::Function *OutlinedFn, 12130 ArrayRef<llvm::Value *> CapturedVars) { 12131 llvm_unreachable("Not supported in SIMD-only mode"); 12132 } 12133 12134 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12135 const Expr *NumTeams, 12136 const Expr *ThreadLimit, 12137 SourceLocation Loc) { 12138 llvm_unreachable("Not supported in SIMD-only mode"); 12139 } 12140 12141 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12142 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12143 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12144 llvm_unreachable("Not supported in SIMD-only mode"); 12145 } 12146 12147 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12148 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12149 const Expr *Device) { 12150 llvm_unreachable("Not supported in SIMD-only mode"); 12151 } 12152 12153 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12154 const OMPLoopDirective &D, 12155 ArrayRef<Expr *> NumIterations) { 12156 llvm_unreachable("Not supported in SIMD-only mode"); 12157 } 12158 12159 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12160 const OMPDependClause *C) { 12161 llvm_unreachable("Not supported in SIMD-only mode"); 12162 } 12163 12164 const VarDecl * 12165 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12166 const VarDecl *NativeParam) const { 12167 llvm_unreachable("Not supported in SIMD-only mode"); 12168 } 12169 12170 Address 12171 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12172 const VarDecl *NativeParam, 12173 const VarDecl *TargetParam) const { 12174 llvm_unreachable("Not supported in SIMD-only mode"); 12175 } 12176