1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/OpenMPKinds.h" 25 #include "clang/Basic/SourceManager.h" 26 #include "clang/CodeGen/ConstantInitBuilder.h" 27 #include "llvm/ADT/ArrayRef.h" 28 #include "llvm/ADT/SetOperations.h" 29 #include "llvm/ADT/StringExtras.h" 30 #include "llvm/Bitcode/BitcodeReader.h" 31 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 32 #include "llvm/IR/Constants.h" 33 #include "llvm/IR/DerivedTypes.h" 34 #include "llvm/IR/GlobalValue.h" 35 #include "llvm/IR/Value.h" 36 #include "llvm/Support/AtomicOrdering.h" 37 #include "llvm/Support/Format.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include <cassert> 40 41 using namespace clang; 42 using namespace CodeGen; 43 using namespace llvm::omp; 44 45 namespace { 46 /// Base class for handling code generation inside OpenMP regions. 47 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 48 public: 49 /// Kinds of OpenMP regions used in codegen. 50 enum CGOpenMPRegionKind { 51 /// Region with outlined function for standalone 'parallel' 52 /// directive. 53 ParallelOutlinedRegion, 54 /// Region with outlined function for standalone 'task' directive. 55 TaskOutlinedRegion, 56 /// Region for constructs that do not require function outlining, 57 /// like 'for', 'sections', 'atomic' etc. directives. 58 InlinedRegion, 59 /// Region with outlined function for standalone 'target' directive. 60 TargetRegion, 61 }; 62 63 CGOpenMPRegionInfo(const CapturedStmt &CS, 64 const CGOpenMPRegionKind RegionKind, 65 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 66 bool HasCancel) 67 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 68 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 69 70 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 71 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 72 bool HasCancel) 73 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 74 Kind(Kind), HasCancel(HasCancel) {} 75 76 /// Get a variable or parameter for storing global thread id 77 /// inside OpenMP construct. 78 virtual const VarDecl *getThreadIDVariable() const = 0; 79 80 /// Emit the captured statement body. 81 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 82 83 /// Get an LValue for the current ThreadID variable. 84 /// \return LValue for thread id variable. This LValue always has type int32*. 85 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 86 87 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 88 89 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 90 91 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 92 93 bool hasCancel() const { return HasCancel; } 94 95 static bool classof(const CGCapturedStmtInfo *Info) { 96 return Info->getKind() == CR_OpenMP; 97 } 98 99 ~CGOpenMPRegionInfo() override = default; 100 101 protected: 102 CGOpenMPRegionKind RegionKind; 103 RegionCodeGenTy CodeGen; 104 OpenMPDirectiveKind Kind; 105 bool HasCancel; 106 }; 107 108 /// API for captured statement code generation in OpenMP constructs. 109 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 110 public: 111 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 112 const RegionCodeGenTy &CodeGen, 113 OpenMPDirectiveKind Kind, bool HasCancel, 114 StringRef HelperName) 115 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 116 HasCancel), 117 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 118 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 119 } 120 121 /// Get a variable or parameter for storing global thread id 122 /// inside OpenMP construct. 123 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 124 125 /// Get the name of the capture helper. 126 StringRef getHelperName() const override { return HelperName; } 127 128 static bool classof(const CGCapturedStmtInfo *Info) { 129 return CGOpenMPRegionInfo::classof(Info) && 130 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 131 ParallelOutlinedRegion; 132 } 133 134 private: 135 /// A variable or parameter storing global thread id for OpenMP 136 /// constructs. 137 const VarDecl *ThreadIDVar; 138 StringRef HelperName; 139 }; 140 141 /// API for captured statement code generation in OpenMP constructs. 142 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 143 public: 144 class UntiedTaskActionTy final : public PrePostActionTy { 145 bool Untied; 146 const VarDecl *PartIDVar; 147 const RegionCodeGenTy UntiedCodeGen; 148 llvm::SwitchInst *UntiedSwitch = nullptr; 149 150 public: 151 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 152 const RegionCodeGenTy &UntiedCodeGen) 153 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 154 void Enter(CodeGenFunction &CGF) override { 155 if (Untied) { 156 // Emit task switching point. 157 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 158 CGF.GetAddrOfLocalVar(PartIDVar), 159 PartIDVar->getType()->castAs<PointerType>()); 160 llvm::Value *Res = 161 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 162 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 163 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 164 CGF.EmitBlock(DoneBB); 165 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 166 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 167 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 168 CGF.Builder.GetInsertBlock()); 169 emitUntiedSwitch(CGF); 170 } 171 } 172 void emitUntiedSwitch(CodeGenFunction &CGF) const { 173 if (Untied) { 174 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 175 CGF.GetAddrOfLocalVar(PartIDVar), 176 PartIDVar->getType()->castAs<PointerType>()); 177 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 178 PartIdLVal); 179 UntiedCodeGen(CGF); 180 CodeGenFunction::JumpDest CurPoint = 181 CGF.getJumpDestInCurrentScope(".untied.next."); 182 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 183 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 184 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 185 CGF.Builder.GetInsertBlock()); 186 CGF.EmitBranchThroughCleanup(CurPoint); 187 CGF.EmitBlock(CurPoint.getBlock()); 188 } 189 } 190 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 191 }; 192 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 193 const VarDecl *ThreadIDVar, 194 const RegionCodeGenTy &CodeGen, 195 OpenMPDirectiveKind Kind, bool HasCancel, 196 const UntiedTaskActionTy &Action) 197 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 198 ThreadIDVar(ThreadIDVar), Action(Action) { 199 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 200 } 201 202 /// Get a variable or parameter for storing global thread id 203 /// inside OpenMP construct. 204 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 205 206 /// Get an LValue for the current ThreadID variable. 207 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 208 209 /// Get the name of the capture helper. 210 StringRef getHelperName() const override { return ".omp_outlined."; } 211 212 void emitUntiedSwitch(CodeGenFunction &CGF) override { 213 Action.emitUntiedSwitch(CGF); 214 } 215 216 static bool classof(const CGCapturedStmtInfo *Info) { 217 return CGOpenMPRegionInfo::classof(Info) && 218 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 219 TaskOutlinedRegion; 220 } 221 222 private: 223 /// A variable or parameter storing global thread id for OpenMP 224 /// constructs. 225 const VarDecl *ThreadIDVar; 226 /// Action for emitting code for untied tasks. 227 const UntiedTaskActionTy &Action; 228 }; 229 230 /// API for inlined captured statement code generation in OpenMP 231 /// constructs. 232 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 233 public: 234 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 235 const RegionCodeGenTy &CodeGen, 236 OpenMPDirectiveKind Kind, bool HasCancel) 237 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 238 OldCSI(OldCSI), 239 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 240 241 // Retrieve the value of the context parameter. 242 llvm::Value *getContextValue() const override { 243 if (OuterRegionInfo) 244 return OuterRegionInfo->getContextValue(); 245 llvm_unreachable("No context value for inlined OpenMP region"); 246 } 247 248 void setContextValue(llvm::Value *V) override { 249 if (OuterRegionInfo) { 250 OuterRegionInfo->setContextValue(V); 251 return; 252 } 253 llvm_unreachable("No context value for inlined OpenMP region"); 254 } 255 256 /// Lookup the captured field decl for a variable. 257 const FieldDecl *lookup(const VarDecl *VD) const override { 258 if (OuterRegionInfo) 259 return OuterRegionInfo->lookup(VD); 260 // If there is no outer outlined region,no need to lookup in a list of 261 // captured variables, we can use the original one. 262 return nullptr; 263 } 264 265 FieldDecl *getThisFieldDecl() const override { 266 if (OuterRegionInfo) 267 return OuterRegionInfo->getThisFieldDecl(); 268 return nullptr; 269 } 270 271 /// Get a variable or parameter for storing global thread id 272 /// inside OpenMP construct. 273 const VarDecl *getThreadIDVariable() const override { 274 if (OuterRegionInfo) 275 return OuterRegionInfo->getThreadIDVariable(); 276 return nullptr; 277 } 278 279 /// Get an LValue for the current ThreadID variable. 280 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 281 if (OuterRegionInfo) 282 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 283 llvm_unreachable("No LValue for inlined OpenMP construct"); 284 } 285 286 /// Get the name of the capture helper. 287 StringRef getHelperName() const override { 288 if (auto *OuterRegionInfo = getOldCSI()) 289 return OuterRegionInfo->getHelperName(); 290 llvm_unreachable("No helper name for inlined OpenMP construct"); 291 } 292 293 void emitUntiedSwitch(CodeGenFunction &CGF) override { 294 if (OuterRegionInfo) 295 OuterRegionInfo->emitUntiedSwitch(CGF); 296 } 297 298 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 299 300 static bool classof(const CGCapturedStmtInfo *Info) { 301 return CGOpenMPRegionInfo::classof(Info) && 302 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 303 } 304 305 ~CGOpenMPInlinedRegionInfo() override = default; 306 307 private: 308 /// CodeGen info about outer OpenMP region. 309 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 310 CGOpenMPRegionInfo *OuterRegionInfo; 311 }; 312 313 /// API for captured statement code generation in OpenMP target 314 /// constructs. For this captures, implicit parameters are used instead of the 315 /// captured fields. The name of the target region has to be unique in a given 316 /// application so it is provided by the client, because only the client has 317 /// the information to generate that. 318 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 319 public: 320 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 321 const RegionCodeGenTy &CodeGen, StringRef HelperName) 322 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 323 /*HasCancel=*/false), 324 HelperName(HelperName) {} 325 326 /// This is unused for target regions because each starts executing 327 /// with a single thread. 328 const VarDecl *getThreadIDVariable() const override { return nullptr; } 329 330 /// Get the name of the capture helper. 331 StringRef getHelperName() const override { return HelperName; } 332 333 static bool classof(const CGCapturedStmtInfo *Info) { 334 return CGOpenMPRegionInfo::classof(Info) && 335 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 336 } 337 338 private: 339 StringRef HelperName; 340 }; 341 342 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 343 llvm_unreachable("No codegen for expressions"); 344 } 345 /// API for generation of expressions captured in a innermost OpenMP 346 /// region. 347 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 348 public: 349 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 350 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 351 OMPD_unknown, 352 /*HasCancel=*/false), 353 PrivScope(CGF) { 354 // Make sure the globals captured in the provided statement are local by 355 // using the privatization logic. We assume the same variable is not 356 // captured more than once. 357 for (const auto &C : CS.captures()) { 358 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 359 continue; 360 361 const VarDecl *VD = C.getCapturedVar(); 362 if (VD->isLocalVarDeclOrParm()) 363 continue; 364 365 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 366 /*RefersToEnclosingVariableOrCapture=*/false, 367 VD->getType().getNonReferenceType(), VK_LValue, 368 C.getLocation()); 369 PrivScope.addPrivate( 370 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 371 } 372 (void)PrivScope.Privatize(); 373 } 374 375 /// Lookup the captured field decl for a variable. 376 const FieldDecl *lookup(const VarDecl *VD) const override { 377 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 378 return FD; 379 return nullptr; 380 } 381 382 /// Emit the captured statement body. 383 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 384 llvm_unreachable("No body for expressions"); 385 } 386 387 /// Get a variable or parameter for storing global thread id 388 /// inside OpenMP construct. 389 const VarDecl *getThreadIDVariable() const override { 390 llvm_unreachable("No thread id for expressions"); 391 } 392 393 /// Get the name of the capture helper. 394 StringRef getHelperName() const override { 395 llvm_unreachable("No helper name for expressions"); 396 } 397 398 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 399 400 private: 401 /// Private scope to capture global variables. 402 CodeGenFunction::OMPPrivateScope PrivScope; 403 }; 404 405 /// RAII for emitting code of OpenMP constructs. 406 class InlinedOpenMPRegionRAII { 407 CodeGenFunction &CGF; 408 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 409 FieldDecl *LambdaThisCaptureField = nullptr; 410 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 411 412 public: 413 /// Constructs region for combined constructs. 414 /// \param CodeGen Code generation sequence for combined directives. Includes 415 /// a list of functions used for code generation of implicitly inlined 416 /// regions. 417 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 418 OpenMPDirectiveKind Kind, bool HasCancel) 419 : CGF(CGF) { 420 // Start emission for the construct. 421 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 422 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 423 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 424 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 425 CGF.LambdaThisCaptureField = nullptr; 426 BlockInfo = CGF.BlockInfo; 427 CGF.BlockInfo = nullptr; 428 } 429 430 ~InlinedOpenMPRegionRAII() { 431 // Restore original CapturedStmtInfo only if we're done with code emission. 432 auto *OldCSI = 433 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 434 delete CGF.CapturedStmtInfo; 435 CGF.CapturedStmtInfo = OldCSI; 436 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 437 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 438 CGF.BlockInfo = BlockInfo; 439 } 440 }; 441 442 /// Values for bit flags used in the ident_t to describe the fields. 443 /// All enumeric elements are named and described in accordance with the code 444 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 445 enum OpenMPLocationFlags : unsigned { 446 /// Use trampoline for internal microtask. 447 OMP_IDENT_IMD = 0x01, 448 /// Use c-style ident structure. 449 OMP_IDENT_KMPC = 0x02, 450 /// Atomic reduction option for kmpc_reduce. 451 OMP_ATOMIC_REDUCE = 0x10, 452 /// Explicit 'barrier' directive. 453 OMP_IDENT_BARRIER_EXPL = 0x20, 454 /// Implicit barrier in code. 455 OMP_IDENT_BARRIER_IMPL = 0x40, 456 /// Implicit barrier in 'for' directive. 457 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 458 /// Implicit barrier in 'sections' directive. 459 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 460 /// Implicit barrier in 'single' directive. 461 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 462 /// Call of __kmp_for_static_init for static loop. 463 OMP_IDENT_WORK_LOOP = 0x200, 464 /// Call of __kmp_for_static_init for sections. 465 OMP_IDENT_WORK_SECTIONS = 0x400, 466 /// Call of __kmp_for_static_init for distribute. 467 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 468 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 469 }; 470 471 namespace { 472 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 473 /// Values for bit flags for marking which requires clauses have been used. 474 enum OpenMPOffloadingRequiresDirFlags : int64_t { 475 /// flag undefined. 476 OMP_REQ_UNDEFINED = 0x000, 477 /// no requires clause present. 478 OMP_REQ_NONE = 0x001, 479 /// reverse_offload clause. 480 OMP_REQ_REVERSE_OFFLOAD = 0x002, 481 /// unified_address clause. 482 OMP_REQ_UNIFIED_ADDRESS = 0x004, 483 /// unified_shared_memory clause. 484 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 485 /// dynamic_allocators clause. 486 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 487 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 488 }; 489 490 enum OpenMPOffloadingReservedDeviceIDs { 491 /// Device ID if the device was not defined, runtime should get it 492 /// from environment variables in the spec. 493 OMP_DEVICEID_UNDEF = -1, 494 }; 495 } // anonymous namespace 496 497 /// Describes ident structure that describes a source location. 498 /// All descriptions are taken from 499 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 500 /// Original structure: 501 /// typedef struct ident { 502 /// kmp_int32 reserved_1; /**< might be used in Fortran; 503 /// see above */ 504 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 505 /// KMP_IDENT_KMPC identifies this union 506 /// member */ 507 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 508 /// see above */ 509 ///#if USE_ITT_BUILD 510 /// /* but currently used for storing 511 /// region-specific ITT */ 512 /// /* contextual information. */ 513 ///#endif /* USE_ITT_BUILD */ 514 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 515 /// C++ */ 516 /// char const *psource; /**< String describing the source location. 517 /// The string is composed of semi-colon separated 518 // fields which describe the source file, 519 /// the function and a pair of line numbers that 520 /// delimit the construct. 521 /// */ 522 /// } ident_t; 523 enum IdentFieldIndex { 524 /// might be used in Fortran 525 IdentField_Reserved_1, 526 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 527 IdentField_Flags, 528 /// Not really used in Fortran any more 529 IdentField_Reserved_2, 530 /// Source[4] in Fortran, do not use for C++ 531 IdentField_Reserved_3, 532 /// String describing the source location. The string is composed of 533 /// semi-colon separated fields which describe the source file, the function 534 /// and a pair of line numbers that delimit the construct. 535 IdentField_PSource 536 }; 537 538 /// Schedule types for 'omp for' loops (these enumerators are taken from 539 /// the enum sched_type in kmp.h). 540 enum OpenMPSchedType { 541 /// Lower bound for default (unordered) versions. 542 OMP_sch_lower = 32, 543 OMP_sch_static_chunked = 33, 544 OMP_sch_static = 34, 545 OMP_sch_dynamic_chunked = 35, 546 OMP_sch_guided_chunked = 36, 547 OMP_sch_runtime = 37, 548 OMP_sch_auto = 38, 549 /// static with chunk adjustment (e.g., simd) 550 OMP_sch_static_balanced_chunked = 45, 551 /// Lower bound for 'ordered' versions. 552 OMP_ord_lower = 64, 553 OMP_ord_static_chunked = 65, 554 OMP_ord_static = 66, 555 OMP_ord_dynamic_chunked = 67, 556 OMP_ord_guided_chunked = 68, 557 OMP_ord_runtime = 69, 558 OMP_ord_auto = 70, 559 OMP_sch_default = OMP_sch_static, 560 /// dist_schedule types 561 OMP_dist_sch_static_chunked = 91, 562 OMP_dist_sch_static = 92, 563 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 564 /// Set if the monotonic schedule modifier was present. 565 OMP_sch_modifier_monotonic = (1 << 29), 566 /// Set if the nonmonotonic schedule modifier was present. 567 OMP_sch_modifier_nonmonotonic = (1 << 30), 568 }; 569 570 enum OpenMPRTLFunction { 571 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 572 /// kmpc_micro microtask, ...); 573 OMPRTL__kmpc_fork_call, 574 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, 575 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 576 OMPRTL__kmpc_threadprivate_cached, 577 /// Call to void __kmpc_threadprivate_register( ident_t *, 578 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 579 OMPRTL__kmpc_threadprivate_register, 580 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 581 OMPRTL__kmpc_global_thread_num, 582 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 583 // kmp_critical_name *crit); 584 OMPRTL__kmpc_critical, 585 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 586 // global_tid, kmp_critical_name *crit, uintptr_t hint); 587 OMPRTL__kmpc_critical_with_hint, 588 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 589 // kmp_critical_name *crit); 590 OMPRTL__kmpc_end_critical, 591 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 592 // global_tid); 593 OMPRTL__kmpc_cancel_barrier, 594 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 595 OMPRTL__kmpc_barrier, 596 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 597 OMPRTL__kmpc_for_static_fini, 598 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 599 // global_tid); 600 OMPRTL__kmpc_serialized_parallel, 601 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 602 // global_tid); 603 OMPRTL__kmpc_end_serialized_parallel, 604 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 605 // kmp_int32 num_threads); 606 OMPRTL__kmpc_push_num_threads, 607 // Call to void __kmpc_flush(ident_t *loc); 608 OMPRTL__kmpc_flush, 609 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 610 OMPRTL__kmpc_master, 611 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 612 OMPRTL__kmpc_end_master, 613 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 614 // int end_part); 615 OMPRTL__kmpc_omp_taskyield, 616 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 617 OMPRTL__kmpc_single, 618 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 619 OMPRTL__kmpc_end_single, 620 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 621 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 622 // kmp_routine_entry_t *task_entry); 623 OMPRTL__kmpc_omp_task_alloc, 624 // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *, 625 // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, 626 // size_t sizeof_shareds, kmp_routine_entry_t *task_entry, 627 // kmp_int64 device_id); 628 OMPRTL__kmpc_omp_target_task_alloc, 629 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 630 // new_task); 631 OMPRTL__kmpc_omp_task, 632 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 633 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 634 // kmp_int32 didit); 635 OMPRTL__kmpc_copyprivate, 636 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 637 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 638 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 639 OMPRTL__kmpc_reduce, 640 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 641 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 642 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 643 // *lck); 644 OMPRTL__kmpc_reduce_nowait, 645 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 646 // kmp_critical_name *lck); 647 OMPRTL__kmpc_end_reduce, 648 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 649 // kmp_critical_name *lck); 650 OMPRTL__kmpc_end_reduce_nowait, 651 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 652 // kmp_task_t * new_task); 653 OMPRTL__kmpc_omp_task_begin_if0, 654 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 655 // kmp_task_t * new_task); 656 OMPRTL__kmpc_omp_task_complete_if0, 657 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 658 OMPRTL__kmpc_ordered, 659 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 660 OMPRTL__kmpc_end_ordered, 661 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 662 // global_tid); 663 OMPRTL__kmpc_omp_taskwait, 664 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 665 OMPRTL__kmpc_taskgroup, 666 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 667 OMPRTL__kmpc_end_taskgroup, 668 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 669 // int proc_bind); 670 OMPRTL__kmpc_push_proc_bind, 671 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 672 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 673 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 674 OMPRTL__kmpc_omp_task_with_deps, 675 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 676 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 677 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 678 OMPRTL__kmpc_omp_wait_deps, 679 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 680 // global_tid, kmp_int32 cncl_kind); 681 OMPRTL__kmpc_cancellationpoint, 682 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 683 // kmp_int32 cncl_kind); 684 OMPRTL__kmpc_cancel, 685 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 686 // kmp_int32 num_teams, kmp_int32 thread_limit); 687 OMPRTL__kmpc_push_num_teams, 688 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 689 // microtask, ...); 690 OMPRTL__kmpc_fork_teams, 691 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 692 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 693 // sched, kmp_uint64 grainsize, void *task_dup); 694 OMPRTL__kmpc_taskloop, 695 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 696 // num_dims, struct kmp_dim *dims); 697 OMPRTL__kmpc_doacross_init, 698 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 699 OMPRTL__kmpc_doacross_fini, 700 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 701 // *vec); 702 OMPRTL__kmpc_doacross_post, 703 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 704 // *vec); 705 OMPRTL__kmpc_doacross_wait, 706 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 707 // *data); 708 OMPRTL__kmpc_task_reduction_init, 709 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 710 // *d); 711 OMPRTL__kmpc_task_reduction_get_th_data, 712 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); 713 OMPRTL__kmpc_alloc, 714 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); 715 OMPRTL__kmpc_free, 716 717 // 718 // Offloading related calls 719 // 720 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 721 // size); 722 OMPRTL__kmpc_push_target_tripcount, 723 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 724 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 725 // *arg_types); 726 OMPRTL__tgt_target, 727 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 728 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 729 // *arg_types); 730 OMPRTL__tgt_target_nowait, 731 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 732 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 733 // *arg_types, int32_t num_teams, int32_t thread_limit); 734 OMPRTL__tgt_target_teams, 735 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 736 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 737 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 738 OMPRTL__tgt_target_teams_nowait, 739 // Call to void __tgt_register_requires(int64_t flags); 740 OMPRTL__tgt_register_requires, 741 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 742 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 743 OMPRTL__tgt_target_data_begin, 744 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 745 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 746 // *arg_types); 747 OMPRTL__tgt_target_data_begin_nowait, 748 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 749 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 750 OMPRTL__tgt_target_data_end, 751 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 752 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 753 // *arg_types); 754 OMPRTL__tgt_target_data_end_nowait, 755 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 756 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 757 OMPRTL__tgt_target_data_update, 758 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 759 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 760 // *arg_types); 761 OMPRTL__tgt_target_data_update_nowait, 762 // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 763 OMPRTL__tgt_mapper_num_components, 764 // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void 765 // *base, void *begin, int64_t size, int64_t type); 766 OMPRTL__tgt_push_mapper_component, 767 }; 768 769 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 770 /// region. 771 class CleanupTy final : public EHScopeStack::Cleanup { 772 PrePostActionTy *Action; 773 774 public: 775 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 776 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 777 if (!CGF.HaveInsertPoint()) 778 return; 779 Action->Exit(CGF); 780 } 781 }; 782 783 } // anonymous namespace 784 785 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 786 CodeGenFunction::RunCleanupsScope Scope(CGF); 787 if (PrePostAction) { 788 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 789 Callback(CodeGen, CGF, *PrePostAction); 790 } else { 791 PrePostActionTy Action; 792 Callback(CodeGen, CGF, Action); 793 } 794 } 795 796 /// Check if the combiner is a call to UDR combiner and if it is so return the 797 /// UDR decl used for reduction. 798 static const OMPDeclareReductionDecl * 799 getReductionInit(const Expr *ReductionOp) { 800 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 801 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 802 if (const auto *DRE = 803 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 804 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 805 return DRD; 806 return nullptr; 807 } 808 809 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 810 const OMPDeclareReductionDecl *DRD, 811 const Expr *InitOp, 812 Address Private, Address Original, 813 QualType Ty) { 814 if (DRD->getInitializer()) { 815 std::pair<llvm::Function *, llvm::Function *> Reduction = 816 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 817 const auto *CE = cast<CallExpr>(InitOp); 818 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 819 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 820 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 821 const auto *LHSDRE = 822 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 823 const auto *RHSDRE = 824 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 825 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 826 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 827 [=]() { return Private; }); 828 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 829 [=]() { return Original; }); 830 (void)PrivateScope.Privatize(); 831 RValue Func = RValue::get(Reduction.second); 832 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 833 CGF.EmitIgnoredExpr(InitOp); 834 } else { 835 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 836 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 837 auto *GV = new llvm::GlobalVariable( 838 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 839 llvm::GlobalValue::PrivateLinkage, Init, Name); 840 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 841 RValue InitRVal; 842 switch (CGF.getEvaluationKind(Ty)) { 843 case TEK_Scalar: 844 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 845 break; 846 case TEK_Complex: 847 InitRVal = 848 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 849 break; 850 case TEK_Aggregate: 851 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 852 break; 853 } 854 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 855 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 856 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 857 /*IsInitializer=*/false); 858 } 859 } 860 861 /// Emit initialization of arrays of complex types. 862 /// \param DestAddr Address of the array. 863 /// \param Type Type of array. 864 /// \param Init Initial expression of array. 865 /// \param SrcAddr Address of the original array. 866 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 867 QualType Type, bool EmitDeclareReductionInit, 868 const Expr *Init, 869 const OMPDeclareReductionDecl *DRD, 870 Address SrcAddr = Address::invalid()) { 871 // Perform element-by-element initialization. 872 QualType ElementTy; 873 874 // Drill down to the base element type on both arrays. 875 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 876 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 877 DestAddr = 878 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 879 if (DRD) 880 SrcAddr = 881 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 882 883 llvm::Value *SrcBegin = nullptr; 884 if (DRD) 885 SrcBegin = SrcAddr.getPointer(); 886 llvm::Value *DestBegin = DestAddr.getPointer(); 887 // Cast from pointer to array type to pointer to single element. 888 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 889 // The basic structure here is a while-do loop. 890 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 891 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 892 llvm::Value *IsEmpty = 893 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 894 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 895 896 // Enter the loop body, making that address the current address. 897 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 898 CGF.EmitBlock(BodyBB); 899 900 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 901 902 llvm::PHINode *SrcElementPHI = nullptr; 903 Address SrcElementCurrent = Address::invalid(); 904 if (DRD) { 905 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 906 "omp.arraycpy.srcElementPast"); 907 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 908 SrcElementCurrent = 909 Address(SrcElementPHI, 910 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 911 } 912 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 913 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 914 DestElementPHI->addIncoming(DestBegin, EntryBB); 915 Address DestElementCurrent = 916 Address(DestElementPHI, 917 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 918 919 // Emit copy. 920 { 921 CodeGenFunction::RunCleanupsScope InitScope(CGF); 922 if (EmitDeclareReductionInit) { 923 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 924 SrcElementCurrent, ElementTy); 925 } else 926 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 927 /*IsInitializer=*/false); 928 } 929 930 if (DRD) { 931 // Shift the address forward by one element. 932 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 933 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 934 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 935 } 936 937 // Shift the address forward by one element. 938 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 939 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 940 // Check whether we've reached the end. 941 llvm::Value *Done = 942 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 943 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 944 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 945 946 // Done. 947 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 948 } 949 950 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 951 return CGF.EmitOMPSharedLValue(E); 952 } 953 954 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 955 const Expr *E) { 956 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 957 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 958 return LValue(); 959 } 960 961 void ReductionCodeGen::emitAggregateInitialization( 962 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 963 const OMPDeclareReductionDecl *DRD) { 964 // Emit VarDecl with copy init for arrays. 965 // Get the address of the original variable captured in current 966 // captured region. 967 const auto *PrivateVD = 968 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 969 bool EmitDeclareReductionInit = 970 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 971 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 972 EmitDeclareReductionInit, 973 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 974 : PrivateVD->getInit(), 975 DRD, SharedLVal.getAddress(CGF)); 976 } 977 978 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 979 ArrayRef<const Expr *> Privates, 980 ArrayRef<const Expr *> ReductionOps) { 981 ClausesData.reserve(Shareds.size()); 982 SharedAddresses.reserve(Shareds.size()); 983 Sizes.reserve(Shareds.size()); 984 BaseDecls.reserve(Shareds.size()); 985 auto IPriv = Privates.begin(); 986 auto IRed = ReductionOps.begin(); 987 for (const Expr *Ref : Shareds) { 988 ClausesData.emplace_back(Ref, *IPriv, *IRed); 989 std::advance(IPriv, 1); 990 std::advance(IRed, 1); 991 } 992 } 993 994 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 995 assert(SharedAddresses.size() == N && 996 "Number of generated lvalues must be exactly N."); 997 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 998 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 999 SharedAddresses.emplace_back(First, Second); 1000 } 1001 1002 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 1003 const auto *PrivateVD = 1004 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1005 QualType PrivateType = PrivateVD->getType(); 1006 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 1007 if (!PrivateType->isVariablyModifiedType()) { 1008 Sizes.emplace_back( 1009 CGF.getTypeSize( 1010 SharedAddresses[N].first.getType().getNonReferenceType()), 1011 nullptr); 1012 return; 1013 } 1014 llvm::Value *Size; 1015 llvm::Value *SizeInChars; 1016 auto *ElemType = cast<llvm::PointerType>( 1017 SharedAddresses[N].first.getPointer(CGF)->getType()) 1018 ->getElementType(); 1019 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 1020 if (AsArraySection) { 1021 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF), 1022 SharedAddresses[N].first.getPointer(CGF)); 1023 Size = CGF.Builder.CreateNUWAdd( 1024 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 1025 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 1026 } else { 1027 SizeInChars = CGF.getTypeSize( 1028 SharedAddresses[N].first.getType().getNonReferenceType()); 1029 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 1030 } 1031 Sizes.emplace_back(SizeInChars, Size); 1032 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1033 CGF, 1034 cast<OpaqueValueExpr>( 1035 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1036 RValue::get(Size)); 1037 CGF.EmitVariablyModifiedType(PrivateType); 1038 } 1039 1040 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 1041 llvm::Value *Size) { 1042 const auto *PrivateVD = 1043 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1044 QualType PrivateType = PrivateVD->getType(); 1045 if (!PrivateType->isVariablyModifiedType()) { 1046 assert(!Size && !Sizes[N].second && 1047 "Size should be nullptr for non-variably modified reduction " 1048 "items."); 1049 return; 1050 } 1051 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1052 CGF, 1053 cast<OpaqueValueExpr>( 1054 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1055 RValue::get(Size)); 1056 CGF.EmitVariablyModifiedType(PrivateType); 1057 } 1058 1059 void ReductionCodeGen::emitInitialization( 1060 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1061 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1062 assert(SharedAddresses.size() > N && "No variable was generated"); 1063 const auto *PrivateVD = 1064 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1065 const OMPDeclareReductionDecl *DRD = 1066 getReductionInit(ClausesData[N].ReductionOp); 1067 QualType PrivateType = PrivateVD->getType(); 1068 PrivateAddr = CGF.Builder.CreateElementBitCast( 1069 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1070 QualType SharedType = SharedAddresses[N].first.getType(); 1071 SharedLVal = CGF.MakeAddrLValue( 1072 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 1073 CGF.ConvertTypeForMem(SharedType)), 1074 SharedType, SharedAddresses[N].first.getBaseInfo(), 1075 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1076 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1077 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1078 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1079 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1080 PrivateAddr, SharedLVal.getAddress(CGF), 1081 SharedLVal.getType()); 1082 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1083 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1084 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1085 PrivateVD->getType().getQualifiers(), 1086 /*IsInitializer=*/false); 1087 } 1088 } 1089 1090 bool ReductionCodeGen::needCleanups(unsigned N) { 1091 const auto *PrivateVD = 1092 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1093 QualType PrivateType = PrivateVD->getType(); 1094 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1095 return DTorKind != QualType::DK_none; 1096 } 1097 1098 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1099 Address PrivateAddr) { 1100 const auto *PrivateVD = 1101 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1102 QualType PrivateType = PrivateVD->getType(); 1103 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1104 if (needCleanups(N)) { 1105 PrivateAddr = CGF.Builder.CreateElementBitCast( 1106 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1107 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1108 } 1109 } 1110 1111 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1112 LValue BaseLV) { 1113 BaseTy = BaseTy.getNonReferenceType(); 1114 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1115 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1116 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1117 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 1118 } else { 1119 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 1120 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1121 } 1122 BaseTy = BaseTy->getPointeeType(); 1123 } 1124 return CGF.MakeAddrLValue( 1125 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 1126 CGF.ConvertTypeForMem(ElTy)), 1127 BaseLV.getType(), BaseLV.getBaseInfo(), 1128 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1129 } 1130 1131 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1132 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1133 llvm::Value *Addr) { 1134 Address Tmp = Address::invalid(); 1135 Address TopTmp = Address::invalid(); 1136 Address MostTopTmp = Address::invalid(); 1137 BaseTy = BaseTy.getNonReferenceType(); 1138 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1139 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1140 Tmp = CGF.CreateMemTemp(BaseTy); 1141 if (TopTmp.isValid()) 1142 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1143 else 1144 MostTopTmp = Tmp; 1145 TopTmp = Tmp; 1146 BaseTy = BaseTy->getPointeeType(); 1147 } 1148 llvm::Type *Ty = BaseLVType; 1149 if (Tmp.isValid()) 1150 Ty = Tmp.getElementType(); 1151 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1152 if (Tmp.isValid()) { 1153 CGF.Builder.CreateStore(Addr, Tmp); 1154 return MostTopTmp; 1155 } 1156 return Address(Addr, BaseLVAlignment); 1157 } 1158 1159 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1160 const VarDecl *OrigVD = nullptr; 1161 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1162 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1163 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1164 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1165 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1166 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1167 DE = cast<DeclRefExpr>(Base); 1168 OrigVD = cast<VarDecl>(DE->getDecl()); 1169 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1170 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1171 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1172 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1173 DE = cast<DeclRefExpr>(Base); 1174 OrigVD = cast<VarDecl>(DE->getDecl()); 1175 } 1176 return OrigVD; 1177 } 1178 1179 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1180 Address PrivateAddr) { 1181 const DeclRefExpr *DE; 1182 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1183 BaseDecls.emplace_back(OrigVD); 1184 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1185 LValue BaseLValue = 1186 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1187 OriginalBaseLValue); 1188 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1189 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1190 llvm::Value *PrivatePointer = 1191 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1192 PrivateAddr.getPointer(), 1193 SharedAddresses[N].first.getAddress(CGF).getType()); 1194 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1195 return castToBase(CGF, OrigVD->getType(), 1196 SharedAddresses[N].first.getType(), 1197 OriginalBaseLValue.getAddress(CGF).getType(), 1198 OriginalBaseLValue.getAlignment(), Ptr); 1199 } 1200 BaseDecls.emplace_back( 1201 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1202 return PrivateAddr; 1203 } 1204 1205 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1206 const OMPDeclareReductionDecl *DRD = 1207 getReductionInit(ClausesData[N].ReductionOp); 1208 return DRD && DRD->getInitializer(); 1209 } 1210 1211 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1212 return CGF.EmitLoadOfPointerLValue( 1213 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1214 getThreadIDVariable()->getType()->castAs<PointerType>()); 1215 } 1216 1217 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1218 if (!CGF.HaveInsertPoint()) 1219 return; 1220 // 1.2.2 OpenMP Language Terminology 1221 // Structured block - An executable statement with a single entry at the 1222 // top and a single exit at the bottom. 1223 // The point of exit cannot be a branch out of the structured block. 1224 // longjmp() and throw() must not violate the entry/exit criteria. 1225 CGF.EHStack.pushTerminate(); 1226 CodeGen(CGF); 1227 CGF.EHStack.popTerminate(); 1228 } 1229 1230 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1231 CodeGenFunction &CGF) { 1232 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1233 getThreadIDVariable()->getType(), 1234 AlignmentSource::Decl); 1235 } 1236 1237 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1238 QualType FieldTy) { 1239 auto *Field = FieldDecl::Create( 1240 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1241 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1242 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1243 Field->setAccess(AS_public); 1244 DC->addDecl(Field); 1245 return Field; 1246 } 1247 1248 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1249 StringRef Separator) 1250 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1251 OffloadEntriesInfoManager(CGM) { 1252 ASTContext &C = CGM.getContext(); 1253 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1254 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1255 RD->startDefinition(); 1256 // reserved_1 1257 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1258 // flags 1259 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1260 // reserved_2 1261 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1262 // reserved_3 1263 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1264 // psource 1265 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1266 RD->completeDefinition(); 1267 IdentQTy = C.getRecordType(RD); 1268 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1269 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1270 1271 loadOffloadInfoMetadata(); 1272 } 1273 1274 bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD, 1275 const GlobalDecl &OldGD, 1276 llvm::GlobalValue *OrigAddr, 1277 bool IsForDefinition) { 1278 // Emit at least a definition for the aliasee if the the address of the 1279 // original function is requested. 1280 if (IsForDefinition || OrigAddr) 1281 (void)CGM.GetAddrOfGlobal(NewGD); 1282 StringRef NewMangledName = CGM.getMangledName(NewGD); 1283 llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName); 1284 if (Addr && !Addr->isDeclaration()) { 1285 const auto *D = cast<FunctionDecl>(OldGD.getDecl()); 1286 const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(NewGD); 1287 llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI); 1288 1289 // Create a reference to the named value. This ensures that it is emitted 1290 // if a deferred decl. 1291 llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD); 1292 1293 // Create the new alias itself, but don't set a name yet. 1294 auto *GA = 1295 llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule()); 1296 1297 if (OrigAddr) { 1298 assert(OrigAddr->isDeclaration() && "Expected declaration"); 1299 1300 GA->takeName(OrigAddr); 1301 OrigAddr->replaceAllUsesWith( 1302 llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType())); 1303 OrigAddr->eraseFromParent(); 1304 } else { 1305 GA->setName(CGM.getMangledName(OldGD)); 1306 } 1307 1308 // Set attributes which are particular to an alias; this is a 1309 // specialization of the attributes which may be set on a global function. 1310 if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() || 1311 D->isWeakImported()) 1312 GA->setLinkage(llvm::Function::WeakAnyLinkage); 1313 1314 CGM.SetCommonAttributes(OldGD, GA); 1315 return true; 1316 } 1317 return false; 1318 } 1319 1320 void CGOpenMPRuntime::clear() { 1321 InternalVars.clear(); 1322 // Clean non-target variable declarations possibly used only in debug info. 1323 for (const auto &Data : EmittedNonTargetVariables) { 1324 if (!Data.getValue().pointsToAliveValue()) 1325 continue; 1326 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1327 if (!GV) 1328 continue; 1329 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1330 continue; 1331 GV->eraseFromParent(); 1332 } 1333 // Emit aliases for the deferred aliasees. 1334 for (const auto &Pair : DeferredVariantFunction) { 1335 StringRef MangledName = CGM.getMangledName(Pair.second.second); 1336 llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName); 1337 // If not able to emit alias, just emit original declaration. 1338 (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr, 1339 /*IsForDefinition=*/false); 1340 } 1341 } 1342 1343 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1344 SmallString<128> Buffer; 1345 llvm::raw_svector_ostream OS(Buffer); 1346 StringRef Sep = FirstSeparator; 1347 for (StringRef Part : Parts) { 1348 OS << Sep << Part; 1349 Sep = Separator; 1350 } 1351 return std::string(OS.str()); 1352 } 1353 1354 static llvm::Function * 1355 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1356 const Expr *CombinerInitializer, const VarDecl *In, 1357 const VarDecl *Out, bool IsCombiner) { 1358 // void .omp_combiner.(Ty *in, Ty *out); 1359 ASTContext &C = CGM.getContext(); 1360 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1361 FunctionArgList Args; 1362 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1363 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1364 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1365 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1366 Args.push_back(&OmpOutParm); 1367 Args.push_back(&OmpInParm); 1368 const CGFunctionInfo &FnInfo = 1369 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1370 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1371 std::string Name = CGM.getOpenMPRuntime().getName( 1372 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1373 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1374 Name, &CGM.getModule()); 1375 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1376 if (CGM.getLangOpts().Optimize) { 1377 Fn->removeFnAttr(llvm::Attribute::NoInline); 1378 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1379 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1380 } 1381 CodeGenFunction CGF(CGM); 1382 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1383 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1384 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1385 Out->getLocation()); 1386 CodeGenFunction::OMPPrivateScope Scope(CGF); 1387 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1388 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1389 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1390 .getAddress(CGF); 1391 }); 1392 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1393 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1394 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1395 .getAddress(CGF); 1396 }); 1397 (void)Scope.Privatize(); 1398 if (!IsCombiner && Out->hasInit() && 1399 !CGF.isTrivialInitializer(Out->getInit())) { 1400 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1401 Out->getType().getQualifiers(), 1402 /*IsInitializer=*/true); 1403 } 1404 if (CombinerInitializer) 1405 CGF.EmitIgnoredExpr(CombinerInitializer); 1406 Scope.ForceCleanup(); 1407 CGF.FinishFunction(); 1408 return Fn; 1409 } 1410 1411 void CGOpenMPRuntime::emitUserDefinedReduction( 1412 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1413 if (UDRMap.count(D) > 0) 1414 return; 1415 llvm::Function *Combiner = emitCombinerOrInitializer( 1416 CGM, D->getType(), D->getCombiner(), 1417 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1418 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1419 /*IsCombiner=*/true); 1420 llvm::Function *Initializer = nullptr; 1421 if (const Expr *Init = D->getInitializer()) { 1422 Initializer = emitCombinerOrInitializer( 1423 CGM, D->getType(), 1424 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1425 : nullptr, 1426 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1427 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1428 /*IsCombiner=*/false); 1429 } 1430 UDRMap.try_emplace(D, Combiner, Initializer); 1431 if (CGF) { 1432 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1433 Decls.second.push_back(D); 1434 } 1435 } 1436 1437 std::pair<llvm::Function *, llvm::Function *> 1438 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1439 auto I = UDRMap.find(D); 1440 if (I != UDRMap.end()) 1441 return I->second; 1442 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1443 return UDRMap.lookup(D); 1444 } 1445 1446 namespace { 1447 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1448 // Builder if one is present. 1449 struct PushAndPopStackRAII { 1450 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1451 bool HasCancel) 1452 : OMPBuilder(OMPBuilder) { 1453 if (!OMPBuilder) 1454 return; 1455 1456 // The following callback is the crucial part of clangs cleanup process. 1457 // 1458 // NOTE: 1459 // Once the OpenMPIRBuilder is used to create parallel regions (and 1460 // similar), the cancellation destination (Dest below) is determined via 1461 // IP. That means if we have variables to finalize we split the block at IP, 1462 // use the new block (=BB) as destination to build a JumpDest (via 1463 // getJumpDestInCurrentScope(BB)) which then is fed to 1464 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1465 // to push & pop an FinalizationInfo object. 1466 // The FiniCB will still be needed but at the point where the 1467 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1468 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1469 assert(IP.getBlock()->end() == IP.getPoint() && 1470 "Clang CG should cause non-terminated block!"); 1471 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1472 CGF.Builder.restoreIP(IP); 1473 CodeGenFunction::JumpDest Dest = 1474 CGF.getOMPCancelDestination(OMPD_parallel); 1475 CGF.EmitBranchThroughCleanup(Dest); 1476 }; 1477 1478 // TODO: Remove this once we emit parallel regions through the 1479 // OpenMPIRBuilder as it can do this setup internally. 1480 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1481 {FiniCB, OMPD_parallel, HasCancel}); 1482 OMPBuilder->pushFinalizationCB(std::move(FI)); 1483 } 1484 ~PushAndPopStackRAII() { 1485 if (OMPBuilder) 1486 OMPBuilder->popFinalizationCB(); 1487 } 1488 llvm::OpenMPIRBuilder *OMPBuilder; 1489 }; 1490 } // namespace 1491 1492 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1493 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1494 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1495 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1496 assert(ThreadIDVar->getType()->isPointerType() && 1497 "thread id variable must be of type kmp_int32 *"); 1498 CodeGenFunction CGF(CGM, true); 1499 bool HasCancel = false; 1500 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1501 HasCancel = OPD->hasCancel(); 1502 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1503 HasCancel = OPSD->hasCancel(); 1504 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1505 HasCancel = OPFD->hasCancel(); 1506 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1507 HasCancel = OPFD->hasCancel(); 1508 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1509 HasCancel = OPFD->hasCancel(); 1510 else if (const auto *OPFD = 1511 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1512 HasCancel = OPFD->hasCancel(); 1513 else if (const auto *OPFD = 1514 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1515 HasCancel = OPFD->hasCancel(); 1516 1517 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1518 // parallel region to make cancellation barriers work properly. 1519 llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder(); 1520 PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel); 1521 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1522 HasCancel, OutlinedHelperName); 1523 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1524 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1525 } 1526 1527 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1528 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1529 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1530 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1531 return emitParallelOrTeamsOutlinedFunction( 1532 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1533 } 1534 1535 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1536 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1537 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1538 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1539 return emitParallelOrTeamsOutlinedFunction( 1540 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1541 } 1542 1543 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1544 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1545 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1546 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1547 bool Tied, unsigned &NumberOfParts) { 1548 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1549 PrePostActionTy &) { 1550 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1551 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1552 llvm::Value *TaskArgs[] = { 1553 UpLoc, ThreadID, 1554 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1555 TaskTVar->getType()->castAs<PointerType>()) 1556 .getPointer(CGF)}; 1557 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1558 }; 1559 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1560 UntiedCodeGen); 1561 CodeGen.setAction(Action); 1562 assert(!ThreadIDVar->getType()->isPointerType() && 1563 "thread id variable must be of type kmp_int32 for tasks"); 1564 const OpenMPDirectiveKind Region = 1565 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1566 : OMPD_task; 1567 const CapturedStmt *CS = D.getCapturedStmt(Region); 1568 bool HasCancel = false; 1569 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1570 HasCancel = TD->hasCancel(); 1571 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1572 HasCancel = TD->hasCancel(); 1573 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1574 HasCancel = TD->hasCancel(); 1575 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1576 HasCancel = TD->hasCancel(); 1577 1578 CodeGenFunction CGF(CGM, true); 1579 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1580 InnermostKind, HasCancel, Action); 1581 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1582 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1583 if (!Tied) 1584 NumberOfParts = Action.getNumberOfParts(); 1585 return Res; 1586 } 1587 1588 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1589 const RecordDecl *RD, const CGRecordLayout &RL, 1590 ArrayRef<llvm::Constant *> Data) { 1591 llvm::StructType *StructTy = RL.getLLVMType(); 1592 unsigned PrevIdx = 0; 1593 ConstantInitBuilder CIBuilder(CGM); 1594 auto DI = Data.begin(); 1595 for (const FieldDecl *FD : RD->fields()) { 1596 unsigned Idx = RL.getLLVMFieldNo(FD); 1597 // Fill the alignment. 1598 for (unsigned I = PrevIdx; I < Idx; ++I) 1599 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1600 PrevIdx = Idx + 1; 1601 Fields.add(*DI); 1602 ++DI; 1603 } 1604 } 1605 1606 template <class... As> 1607 static llvm::GlobalVariable * 1608 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1609 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1610 As &&... Args) { 1611 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1612 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1613 ConstantInitBuilder CIBuilder(CGM); 1614 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1615 buildStructValue(Fields, CGM, RD, RL, Data); 1616 return Fields.finishAndCreateGlobal( 1617 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1618 std::forward<As>(Args)...); 1619 } 1620 1621 template <typename T> 1622 static void 1623 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1624 ArrayRef<llvm::Constant *> Data, 1625 T &Parent) { 1626 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1627 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1628 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1629 buildStructValue(Fields, CGM, RD, RL, Data); 1630 Fields.finishAndAddTo(Parent); 1631 } 1632 1633 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1634 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1635 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1636 FlagsTy FlagsKey(Flags, Reserved2Flags); 1637 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1638 if (!Entry) { 1639 if (!DefaultOpenMPPSource) { 1640 // Initialize default location for psource field of ident_t structure of 1641 // all ident_t objects. Format is ";file;function;line;column;;". 1642 // Taken from 1643 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1644 DefaultOpenMPPSource = 1645 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1646 DefaultOpenMPPSource = 1647 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1648 } 1649 1650 llvm::Constant *Data[] = { 1651 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1652 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1653 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1654 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1655 llvm::GlobalValue *DefaultOpenMPLocation = 1656 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1657 llvm::GlobalValue::PrivateLinkage); 1658 DefaultOpenMPLocation->setUnnamedAddr( 1659 llvm::GlobalValue::UnnamedAddr::Global); 1660 1661 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1662 } 1663 return Address(Entry, Align); 1664 } 1665 1666 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1667 bool AtCurrentPoint) { 1668 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1669 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1670 1671 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1672 if (AtCurrentPoint) { 1673 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1674 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1675 } else { 1676 Elem.second.ServiceInsertPt = 1677 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1678 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1679 } 1680 } 1681 1682 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1683 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1684 if (Elem.second.ServiceInsertPt) { 1685 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1686 Elem.second.ServiceInsertPt = nullptr; 1687 Ptr->eraseFromParent(); 1688 } 1689 } 1690 1691 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1692 SourceLocation Loc, 1693 unsigned Flags) { 1694 Flags |= OMP_IDENT_KMPC; 1695 // If no debug info is generated - return global default location. 1696 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1697 Loc.isInvalid()) 1698 return getOrCreateDefaultLocation(Flags).getPointer(); 1699 1700 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1701 1702 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1703 Address LocValue = Address::invalid(); 1704 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1705 if (I != OpenMPLocThreadIDMap.end()) 1706 LocValue = Address(I->second.DebugLoc, Align); 1707 1708 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1709 // GetOpenMPThreadID was called before this routine. 1710 if (!LocValue.isValid()) { 1711 // Generate "ident_t .kmpc_loc.addr;" 1712 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1713 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1714 Elem.second.DebugLoc = AI.getPointer(); 1715 LocValue = AI; 1716 1717 if (!Elem.second.ServiceInsertPt) 1718 setLocThreadIdInsertPt(CGF); 1719 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1720 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1721 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1722 CGF.getTypeSize(IdentQTy)); 1723 } 1724 1725 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1726 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1727 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1728 LValue PSource = 1729 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1730 1731 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1732 if (OMPDebugLoc == nullptr) { 1733 SmallString<128> Buffer2; 1734 llvm::raw_svector_ostream OS2(Buffer2); 1735 // Build debug location 1736 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1737 OS2 << ";" << PLoc.getFilename() << ";"; 1738 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1739 OS2 << FD->getQualifiedNameAsString(); 1740 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1741 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1742 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1743 } 1744 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1745 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1746 1747 // Our callers always pass this to a runtime function, so for 1748 // convenience, go ahead and return a naked pointer. 1749 return LocValue.getPointer(); 1750 } 1751 1752 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1753 SourceLocation Loc) { 1754 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1755 1756 llvm::Value *ThreadID = nullptr; 1757 // Check whether we've already cached a load of the thread id in this 1758 // function. 1759 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1760 if (I != OpenMPLocThreadIDMap.end()) { 1761 ThreadID = I->second.ThreadID; 1762 if (ThreadID != nullptr) 1763 return ThreadID; 1764 } 1765 // If exceptions are enabled, do not use parameter to avoid possible crash. 1766 if (auto *OMPRegionInfo = 1767 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1768 if (OMPRegionInfo->getThreadIDVariable()) { 1769 // Check if this an outlined function with thread id passed as argument. 1770 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1771 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1772 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1773 !CGF.getLangOpts().CXXExceptions || 1774 CGF.Builder.GetInsertBlock() == TopBlock || 1775 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1776 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1777 TopBlock || 1778 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1779 CGF.Builder.GetInsertBlock()) { 1780 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1781 // If value loaded in entry block, cache it and use it everywhere in 1782 // function. 1783 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1784 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1785 Elem.second.ThreadID = ThreadID; 1786 } 1787 return ThreadID; 1788 } 1789 } 1790 } 1791 1792 // This is not an outlined function region - need to call __kmpc_int32 1793 // kmpc_global_thread_num(ident_t *loc). 1794 // Generate thread id value and cache this value for use across the 1795 // function. 1796 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1797 if (!Elem.second.ServiceInsertPt) 1798 setLocThreadIdInsertPt(CGF); 1799 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1800 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1801 llvm::CallInst *Call = CGF.Builder.CreateCall( 1802 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1803 emitUpdateLocation(CGF, Loc)); 1804 Call->setCallingConv(CGF.getRuntimeCC()); 1805 Elem.second.ThreadID = Call; 1806 return Call; 1807 } 1808 1809 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1810 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1811 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1812 clearLocThreadIdInsertPt(CGF); 1813 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1814 } 1815 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1816 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1817 UDRMap.erase(D); 1818 FunctionUDRMap.erase(CGF.CurFn); 1819 } 1820 auto I = FunctionUDMMap.find(CGF.CurFn); 1821 if (I != FunctionUDMMap.end()) { 1822 for(const auto *D : I->second) 1823 UDMMap.erase(D); 1824 FunctionUDMMap.erase(I); 1825 } 1826 LastprivateConditionalToTypes.erase(CGF.CurFn); 1827 } 1828 1829 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1830 return IdentTy->getPointerTo(); 1831 } 1832 1833 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1834 if (!Kmpc_MicroTy) { 1835 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1836 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1837 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1838 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1839 } 1840 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1841 } 1842 1843 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1844 llvm::FunctionCallee RTLFn = nullptr; 1845 switch (static_cast<OpenMPRTLFunction>(Function)) { 1846 case OMPRTL__kmpc_fork_call: { 1847 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1848 // microtask, ...); 1849 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1850 getKmpc_MicroPointerTy()}; 1851 auto *FnTy = 1852 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1853 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1854 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 1855 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 1856 llvm::LLVMContext &Ctx = F->getContext(); 1857 llvm::MDBuilder MDB(Ctx); 1858 // Annotate the callback behavior of the __kmpc_fork_call: 1859 // - The callback callee is argument number 2 (microtask). 1860 // - The first two arguments of the callback callee are unknown (-1). 1861 // - All variadic arguments to the __kmpc_fork_call are passed to the 1862 // callback callee. 1863 F->addMetadata( 1864 llvm::LLVMContext::MD_callback, 1865 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 1866 2, {-1, -1}, 1867 /* VarArgsArePassed */ true)})); 1868 } 1869 } 1870 break; 1871 } 1872 case OMPRTL__kmpc_global_thread_num: { 1873 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1874 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1875 auto *FnTy = 1876 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1877 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1878 break; 1879 } 1880 case OMPRTL__kmpc_threadprivate_cached: { 1881 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1882 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1883 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1884 CGM.VoidPtrTy, CGM.SizeTy, 1885 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1886 auto *FnTy = 1887 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1888 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1889 break; 1890 } 1891 case OMPRTL__kmpc_critical: { 1892 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1893 // kmp_critical_name *crit); 1894 llvm::Type *TypeParams[] = { 1895 getIdentTyPointerTy(), CGM.Int32Ty, 1896 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1897 auto *FnTy = 1898 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1899 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1900 break; 1901 } 1902 case OMPRTL__kmpc_critical_with_hint: { 1903 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1904 // kmp_critical_name *crit, uintptr_t hint); 1905 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1906 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1907 CGM.IntPtrTy}; 1908 auto *FnTy = 1909 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1910 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1911 break; 1912 } 1913 case OMPRTL__kmpc_threadprivate_register: { 1914 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1915 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1916 // typedef void *(*kmpc_ctor)(void *); 1917 auto *KmpcCtorTy = 1918 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1919 /*isVarArg*/ false)->getPointerTo(); 1920 // typedef void *(*kmpc_cctor)(void *, void *); 1921 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1922 auto *KmpcCopyCtorTy = 1923 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1924 /*isVarArg*/ false) 1925 ->getPointerTo(); 1926 // typedef void (*kmpc_dtor)(void *); 1927 auto *KmpcDtorTy = 1928 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1929 ->getPointerTo(); 1930 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1931 KmpcCopyCtorTy, KmpcDtorTy}; 1932 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1933 /*isVarArg*/ false); 1934 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1935 break; 1936 } 1937 case OMPRTL__kmpc_end_critical: { 1938 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1939 // kmp_critical_name *crit); 1940 llvm::Type *TypeParams[] = { 1941 getIdentTyPointerTy(), CGM.Int32Ty, 1942 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1943 auto *FnTy = 1944 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1945 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1946 break; 1947 } 1948 case OMPRTL__kmpc_cancel_barrier: { 1949 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1950 // global_tid); 1951 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1952 auto *FnTy = 1953 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1954 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1955 break; 1956 } 1957 case OMPRTL__kmpc_barrier: { 1958 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1959 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1960 auto *FnTy = 1961 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1962 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1963 break; 1964 } 1965 case OMPRTL__kmpc_for_static_fini: { 1966 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1967 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1968 auto *FnTy = 1969 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1970 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1971 break; 1972 } 1973 case OMPRTL__kmpc_push_num_threads: { 1974 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1975 // kmp_int32 num_threads) 1976 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1977 CGM.Int32Ty}; 1978 auto *FnTy = 1979 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1980 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1981 break; 1982 } 1983 case OMPRTL__kmpc_serialized_parallel: { 1984 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1985 // global_tid); 1986 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1987 auto *FnTy = 1988 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1989 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1990 break; 1991 } 1992 case OMPRTL__kmpc_end_serialized_parallel: { 1993 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1994 // global_tid); 1995 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1996 auto *FnTy = 1997 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1998 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1999 break; 2000 } 2001 case OMPRTL__kmpc_flush: { 2002 // Build void __kmpc_flush(ident_t *loc); 2003 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 2004 auto *FnTy = 2005 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2006 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 2007 break; 2008 } 2009 case OMPRTL__kmpc_master: { 2010 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 2011 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2012 auto *FnTy = 2013 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2014 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 2015 break; 2016 } 2017 case OMPRTL__kmpc_end_master: { 2018 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 2019 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2020 auto *FnTy = 2021 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2022 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 2023 break; 2024 } 2025 case OMPRTL__kmpc_omp_taskyield: { 2026 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 2027 // int end_part); 2028 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2029 auto *FnTy = 2030 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2031 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 2032 break; 2033 } 2034 case OMPRTL__kmpc_single: { 2035 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 2036 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2037 auto *FnTy = 2038 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2039 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 2040 break; 2041 } 2042 case OMPRTL__kmpc_end_single: { 2043 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 2044 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2045 auto *FnTy = 2046 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2047 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 2048 break; 2049 } 2050 case OMPRTL__kmpc_omp_task_alloc: { 2051 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 2052 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2053 // kmp_routine_entry_t *task_entry); 2054 assert(KmpRoutineEntryPtrTy != nullptr && 2055 "Type kmp_routine_entry_t must be created."); 2056 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2057 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 2058 // Return void * and then cast to particular kmp_task_t type. 2059 auto *FnTy = 2060 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2061 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 2062 break; 2063 } 2064 case OMPRTL__kmpc_omp_target_task_alloc: { 2065 // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid, 2066 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2067 // kmp_routine_entry_t *task_entry, kmp_int64 device_id); 2068 assert(KmpRoutineEntryPtrTy != nullptr && 2069 "Type kmp_routine_entry_t must be created."); 2070 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2071 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy, 2072 CGM.Int64Ty}; 2073 // Return void * and then cast to particular kmp_task_t type. 2074 auto *FnTy = 2075 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2076 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc"); 2077 break; 2078 } 2079 case OMPRTL__kmpc_omp_task: { 2080 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2081 // *new_task); 2082 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2083 CGM.VoidPtrTy}; 2084 auto *FnTy = 2085 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2086 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 2087 break; 2088 } 2089 case OMPRTL__kmpc_copyprivate: { 2090 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 2091 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 2092 // kmp_int32 didit); 2093 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2094 auto *CpyFnTy = 2095 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 2096 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 2097 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 2098 CGM.Int32Ty}; 2099 auto *FnTy = 2100 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2101 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 2102 break; 2103 } 2104 case OMPRTL__kmpc_reduce: { 2105 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 2106 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 2107 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 2108 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2109 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2110 /*isVarArg=*/false); 2111 llvm::Type *TypeParams[] = { 2112 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2113 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2114 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2115 auto *FnTy = 2116 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2117 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 2118 break; 2119 } 2120 case OMPRTL__kmpc_reduce_nowait: { 2121 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 2122 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 2123 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 2124 // *lck); 2125 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2126 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2127 /*isVarArg=*/false); 2128 llvm::Type *TypeParams[] = { 2129 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2130 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2131 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2132 auto *FnTy = 2133 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2134 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 2135 break; 2136 } 2137 case OMPRTL__kmpc_end_reduce: { 2138 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 2139 // kmp_critical_name *lck); 2140 llvm::Type *TypeParams[] = { 2141 getIdentTyPointerTy(), CGM.Int32Ty, 2142 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2143 auto *FnTy = 2144 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2145 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 2146 break; 2147 } 2148 case OMPRTL__kmpc_end_reduce_nowait: { 2149 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 2150 // kmp_critical_name *lck); 2151 llvm::Type *TypeParams[] = { 2152 getIdentTyPointerTy(), CGM.Int32Ty, 2153 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2154 auto *FnTy = 2155 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2156 RTLFn = 2157 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 2158 break; 2159 } 2160 case OMPRTL__kmpc_omp_task_begin_if0: { 2161 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2162 // *new_task); 2163 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2164 CGM.VoidPtrTy}; 2165 auto *FnTy = 2166 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2167 RTLFn = 2168 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 2169 break; 2170 } 2171 case OMPRTL__kmpc_omp_task_complete_if0: { 2172 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2173 // *new_task); 2174 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2175 CGM.VoidPtrTy}; 2176 auto *FnTy = 2177 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2178 RTLFn = CGM.CreateRuntimeFunction(FnTy, 2179 /*Name=*/"__kmpc_omp_task_complete_if0"); 2180 break; 2181 } 2182 case OMPRTL__kmpc_ordered: { 2183 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 2184 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2185 auto *FnTy = 2186 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2187 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 2188 break; 2189 } 2190 case OMPRTL__kmpc_end_ordered: { 2191 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 2192 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2193 auto *FnTy = 2194 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2195 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 2196 break; 2197 } 2198 case OMPRTL__kmpc_omp_taskwait: { 2199 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 2200 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2201 auto *FnTy = 2202 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2203 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 2204 break; 2205 } 2206 case OMPRTL__kmpc_taskgroup: { 2207 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 2208 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2209 auto *FnTy = 2210 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2211 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 2212 break; 2213 } 2214 case OMPRTL__kmpc_end_taskgroup: { 2215 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 2216 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2217 auto *FnTy = 2218 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2219 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 2220 break; 2221 } 2222 case OMPRTL__kmpc_push_proc_bind: { 2223 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 2224 // int proc_bind) 2225 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2226 auto *FnTy = 2227 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2228 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 2229 break; 2230 } 2231 case OMPRTL__kmpc_omp_task_with_deps: { 2232 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2233 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2234 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 2235 llvm::Type *TypeParams[] = { 2236 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 2237 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 2238 auto *FnTy = 2239 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2240 RTLFn = 2241 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 2242 break; 2243 } 2244 case OMPRTL__kmpc_omp_wait_deps: { 2245 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2246 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2247 // kmp_depend_info_t *noalias_dep_list); 2248 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2249 CGM.Int32Ty, CGM.VoidPtrTy, 2250 CGM.Int32Ty, CGM.VoidPtrTy}; 2251 auto *FnTy = 2252 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2253 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2254 break; 2255 } 2256 case OMPRTL__kmpc_cancellationpoint: { 2257 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2258 // global_tid, kmp_int32 cncl_kind) 2259 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2260 auto *FnTy = 2261 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2262 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2263 break; 2264 } 2265 case OMPRTL__kmpc_cancel: { 2266 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2267 // kmp_int32 cncl_kind) 2268 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2269 auto *FnTy = 2270 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2271 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2272 break; 2273 } 2274 case OMPRTL__kmpc_push_num_teams: { 2275 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2276 // kmp_int32 num_teams, kmp_int32 num_threads) 2277 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2278 CGM.Int32Ty}; 2279 auto *FnTy = 2280 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2281 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2282 break; 2283 } 2284 case OMPRTL__kmpc_fork_teams: { 2285 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2286 // microtask, ...); 2287 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2288 getKmpc_MicroPointerTy()}; 2289 auto *FnTy = 2290 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2291 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2292 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 2293 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 2294 llvm::LLVMContext &Ctx = F->getContext(); 2295 llvm::MDBuilder MDB(Ctx); 2296 // Annotate the callback behavior of the __kmpc_fork_teams: 2297 // - The callback callee is argument number 2 (microtask). 2298 // - The first two arguments of the callback callee are unknown (-1). 2299 // - All variadic arguments to the __kmpc_fork_teams are passed to the 2300 // callback callee. 2301 F->addMetadata( 2302 llvm::LLVMContext::MD_callback, 2303 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 2304 2, {-1, -1}, 2305 /* VarArgsArePassed */ true)})); 2306 } 2307 } 2308 break; 2309 } 2310 case OMPRTL__kmpc_taskloop: { 2311 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2312 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2313 // sched, kmp_uint64 grainsize, void *task_dup); 2314 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2315 CGM.IntTy, 2316 CGM.VoidPtrTy, 2317 CGM.IntTy, 2318 CGM.Int64Ty->getPointerTo(), 2319 CGM.Int64Ty->getPointerTo(), 2320 CGM.Int64Ty, 2321 CGM.IntTy, 2322 CGM.IntTy, 2323 CGM.Int64Ty, 2324 CGM.VoidPtrTy}; 2325 auto *FnTy = 2326 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2327 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2328 break; 2329 } 2330 case OMPRTL__kmpc_doacross_init: { 2331 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2332 // num_dims, struct kmp_dim *dims); 2333 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2334 CGM.Int32Ty, 2335 CGM.Int32Ty, 2336 CGM.VoidPtrTy}; 2337 auto *FnTy = 2338 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2339 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2340 break; 2341 } 2342 case OMPRTL__kmpc_doacross_fini: { 2343 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2344 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2345 auto *FnTy = 2346 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2347 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2348 break; 2349 } 2350 case OMPRTL__kmpc_doacross_post: { 2351 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2352 // *vec); 2353 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2354 CGM.Int64Ty->getPointerTo()}; 2355 auto *FnTy = 2356 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2357 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2358 break; 2359 } 2360 case OMPRTL__kmpc_doacross_wait: { 2361 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2362 // *vec); 2363 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2364 CGM.Int64Ty->getPointerTo()}; 2365 auto *FnTy = 2366 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2367 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2368 break; 2369 } 2370 case OMPRTL__kmpc_task_reduction_init: { 2371 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2372 // *data); 2373 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2374 auto *FnTy = 2375 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2376 RTLFn = 2377 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2378 break; 2379 } 2380 case OMPRTL__kmpc_task_reduction_get_th_data: { 2381 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2382 // *d); 2383 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2384 auto *FnTy = 2385 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2386 RTLFn = CGM.CreateRuntimeFunction( 2387 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2388 break; 2389 } 2390 case OMPRTL__kmpc_alloc: { 2391 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t 2392 // al); omp_allocator_handle_t type is void *. 2393 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy}; 2394 auto *FnTy = 2395 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2396 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc"); 2397 break; 2398 } 2399 case OMPRTL__kmpc_free: { 2400 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t 2401 // al); omp_allocator_handle_t type is void *. 2402 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2403 auto *FnTy = 2404 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2405 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); 2406 break; 2407 } 2408 case OMPRTL__kmpc_push_target_tripcount: { 2409 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 2410 // size); 2411 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; 2412 llvm::FunctionType *FnTy = 2413 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2414 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); 2415 break; 2416 } 2417 case OMPRTL__tgt_target: { 2418 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2419 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2420 // *arg_types); 2421 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2422 CGM.VoidPtrTy, 2423 CGM.Int32Ty, 2424 CGM.VoidPtrPtrTy, 2425 CGM.VoidPtrPtrTy, 2426 CGM.Int64Ty->getPointerTo(), 2427 CGM.Int64Ty->getPointerTo()}; 2428 auto *FnTy = 2429 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2430 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2431 break; 2432 } 2433 case OMPRTL__tgt_target_nowait: { 2434 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2435 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2436 // int64_t *arg_types); 2437 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2438 CGM.VoidPtrTy, 2439 CGM.Int32Ty, 2440 CGM.VoidPtrPtrTy, 2441 CGM.VoidPtrPtrTy, 2442 CGM.Int64Ty->getPointerTo(), 2443 CGM.Int64Ty->getPointerTo()}; 2444 auto *FnTy = 2445 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2446 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2447 break; 2448 } 2449 case OMPRTL__tgt_target_teams: { 2450 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2451 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2452 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2453 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2454 CGM.VoidPtrTy, 2455 CGM.Int32Ty, 2456 CGM.VoidPtrPtrTy, 2457 CGM.VoidPtrPtrTy, 2458 CGM.Int64Ty->getPointerTo(), 2459 CGM.Int64Ty->getPointerTo(), 2460 CGM.Int32Ty, 2461 CGM.Int32Ty}; 2462 auto *FnTy = 2463 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2464 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2465 break; 2466 } 2467 case OMPRTL__tgt_target_teams_nowait: { 2468 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2469 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 2470 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2471 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2472 CGM.VoidPtrTy, 2473 CGM.Int32Ty, 2474 CGM.VoidPtrPtrTy, 2475 CGM.VoidPtrPtrTy, 2476 CGM.Int64Ty->getPointerTo(), 2477 CGM.Int64Ty->getPointerTo(), 2478 CGM.Int32Ty, 2479 CGM.Int32Ty}; 2480 auto *FnTy = 2481 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2482 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2483 break; 2484 } 2485 case OMPRTL__tgt_register_requires: { 2486 // Build void __tgt_register_requires(int64_t flags); 2487 llvm::Type *TypeParams[] = {CGM.Int64Ty}; 2488 auto *FnTy = 2489 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2490 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); 2491 break; 2492 } 2493 case OMPRTL__tgt_target_data_begin: { 2494 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2495 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2496 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2497 CGM.Int32Ty, 2498 CGM.VoidPtrPtrTy, 2499 CGM.VoidPtrPtrTy, 2500 CGM.Int64Ty->getPointerTo(), 2501 CGM.Int64Ty->getPointerTo()}; 2502 auto *FnTy = 2503 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2504 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2505 break; 2506 } 2507 case OMPRTL__tgt_target_data_begin_nowait: { 2508 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2509 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2510 // *arg_types); 2511 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2512 CGM.Int32Ty, 2513 CGM.VoidPtrPtrTy, 2514 CGM.VoidPtrPtrTy, 2515 CGM.Int64Ty->getPointerTo(), 2516 CGM.Int64Ty->getPointerTo()}; 2517 auto *FnTy = 2518 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2519 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2520 break; 2521 } 2522 case OMPRTL__tgt_target_data_end: { 2523 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2524 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2525 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2526 CGM.Int32Ty, 2527 CGM.VoidPtrPtrTy, 2528 CGM.VoidPtrPtrTy, 2529 CGM.Int64Ty->getPointerTo(), 2530 CGM.Int64Ty->getPointerTo()}; 2531 auto *FnTy = 2532 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2533 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2534 break; 2535 } 2536 case OMPRTL__tgt_target_data_end_nowait: { 2537 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2538 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2539 // *arg_types); 2540 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2541 CGM.Int32Ty, 2542 CGM.VoidPtrPtrTy, 2543 CGM.VoidPtrPtrTy, 2544 CGM.Int64Ty->getPointerTo(), 2545 CGM.Int64Ty->getPointerTo()}; 2546 auto *FnTy = 2547 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2548 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2549 break; 2550 } 2551 case OMPRTL__tgt_target_data_update: { 2552 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2553 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2554 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2555 CGM.Int32Ty, 2556 CGM.VoidPtrPtrTy, 2557 CGM.VoidPtrPtrTy, 2558 CGM.Int64Ty->getPointerTo(), 2559 CGM.Int64Ty->getPointerTo()}; 2560 auto *FnTy = 2561 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2562 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2563 break; 2564 } 2565 case OMPRTL__tgt_target_data_update_nowait: { 2566 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2567 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2568 // *arg_types); 2569 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2570 CGM.Int32Ty, 2571 CGM.VoidPtrPtrTy, 2572 CGM.VoidPtrPtrTy, 2573 CGM.Int64Ty->getPointerTo(), 2574 CGM.Int64Ty->getPointerTo()}; 2575 auto *FnTy = 2576 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2577 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2578 break; 2579 } 2580 case OMPRTL__tgt_mapper_num_components: { 2581 // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 2582 llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; 2583 auto *FnTy = 2584 llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); 2585 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components"); 2586 break; 2587 } 2588 case OMPRTL__tgt_push_mapper_component: { 2589 // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void 2590 // *base, void *begin, int64_t size, int64_t type); 2591 llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy, 2592 CGM.Int64Ty, CGM.Int64Ty}; 2593 auto *FnTy = 2594 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2595 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component"); 2596 break; 2597 } 2598 } 2599 assert(RTLFn && "Unable to find OpenMP runtime function"); 2600 return RTLFn; 2601 } 2602 2603 llvm::FunctionCallee 2604 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 2605 assert((IVSize == 32 || IVSize == 64) && 2606 "IV size is not compatible with the omp runtime"); 2607 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2608 : "__kmpc_for_static_init_4u") 2609 : (IVSigned ? "__kmpc_for_static_init_8" 2610 : "__kmpc_for_static_init_8u"); 2611 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2612 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2613 llvm::Type *TypeParams[] = { 2614 getIdentTyPointerTy(), // loc 2615 CGM.Int32Ty, // tid 2616 CGM.Int32Ty, // schedtype 2617 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2618 PtrTy, // p_lower 2619 PtrTy, // p_upper 2620 PtrTy, // p_stride 2621 ITy, // incr 2622 ITy // chunk 2623 }; 2624 auto *FnTy = 2625 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2626 return CGM.CreateRuntimeFunction(FnTy, Name); 2627 } 2628 2629 llvm::FunctionCallee 2630 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 2631 assert((IVSize == 32 || IVSize == 64) && 2632 "IV size is not compatible with the omp runtime"); 2633 StringRef Name = 2634 IVSize == 32 2635 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2636 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2637 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2638 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2639 CGM.Int32Ty, // tid 2640 CGM.Int32Ty, // schedtype 2641 ITy, // lower 2642 ITy, // upper 2643 ITy, // stride 2644 ITy // chunk 2645 }; 2646 auto *FnTy = 2647 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2648 return CGM.CreateRuntimeFunction(FnTy, Name); 2649 } 2650 2651 llvm::FunctionCallee 2652 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 2653 assert((IVSize == 32 || IVSize == 64) && 2654 "IV size is not compatible with the omp runtime"); 2655 StringRef Name = 2656 IVSize == 32 2657 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2658 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2659 llvm::Type *TypeParams[] = { 2660 getIdentTyPointerTy(), // loc 2661 CGM.Int32Ty, // tid 2662 }; 2663 auto *FnTy = 2664 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2665 return CGM.CreateRuntimeFunction(FnTy, Name); 2666 } 2667 2668 llvm::FunctionCallee 2669 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 2670 assert((IVSize == 32 || IVSize == 64) && 2671 "IV size is not compatible with the omp runtime"); 2672 StringRef Name = 2673 IVSize == 32 2674 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2675 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2676 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2677 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2678 llvm::Type *TypeParams[] = { 2679 getIdentTyPointerTy(), // loc 2680 CGM.Int32Ty, // tid 2681 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2682 PtrTy, // p_lower 2683 PtrTy, // p_upper 2684 PtrTy // p_stride 2685 }; 2686 auto *FnTy = 2687 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2688 return CGM.CreateRuntimeFunction(FnTy, Name); 2689 } 2690 2691 /// Obtain information that uniquely identifies a target entry. This 2692 /// consists of the file and device IDs as well as line number associated with 2693 /// the relevant entry source location. 2694 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2695 unsigned &DeviceID, unsigned &FileID, 2696 unsigned &LineNum) { 2697 SourceManager &SM = C.getSourceManager(); 2698 2699 // The loc should be always valid and have a file ID (the user cannot use 2700 // #pragma directives in macros) 2701 2702 assert(Loc.isValid() && "Source location is expected to be always valid."); 2703 2704 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2705 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2706 2707 llvm::sys::fs::UniqueID ID; 2708 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2709 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2710 << PLoc.getFilename() << EC.message(); 2711 2712 DeviceID = ID.getDevice(); 2713 FileID = ID.getFile(); 2714 LineNum = PLoc.getLine(); 2715 } 2716 2717 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 2718 if (CGM.getLangOpts().OpenMPSimd) 2719 return Address::invalid(); 2720 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2721 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2722 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 2723 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2724 HasRequiresUnifiedSharedMemory))) { 2725 SmallString<64> PtrName; 2726 { 2727 llvm::raw_svector_ostream OS(PtrName); 2728 OS << CGM.getMangledName(GlobalDecl(VD)); 2729 if (!VD->isExternallyVisible()) { 2730 unsigned DeviceID, FileID, Line; 2731 getTargetEntryUniqueInfo(CGM.getContext(), 2732 VD->getCanonicalDecl()->getBeginLoc(), 2733 DeviceID, FileID, Line); 2734 OS << llvm::format("_%x", FileID); 2735 } 2736 OS << "_decl_tgt_ref_ptr"; 2737 } 2738 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2739 if (!Ptr) { 2740 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2741 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2742 PtrName); 2743 2744 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2745 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 2746 2747 if (!CGM.getLangOpts().OpenMPIsDevice) 2748 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2749 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2750 } 2751 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2752 } 2753 return Address::invalid(); 2754 } 2755 2756 llvm::Constant * 2757 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2758 assert(!CGM.getLangOpts().OpenMPUseTLS || 2759 !CGM.getContext().getTargetInfo().isTLSSupported()); 2760 // Lookup the entry, lazily creating it if necessary. 2761 std::string Suffix = getName({"cache", ""}); 2762 return getOrCreateInternalVariable( 2763 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 2764 } 2765 2766 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2767 const VarDecl *VD, 2768 Address VDAddr, 2769 SourceLocation Loc) { 2770 if (CGM.getLangOpts().OpenMPUseTLS && 2771 CGM.getContext().getTargetInfo().isTLSSupported()) 2772 return VDAddr; 2773 2774 llvm::Type *VarTy = VDAddr.getElementType(); 2775 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2776 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2777 CGM.Int8PtrTy), 2778 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2779 getOrCreateThreadPrivateCache(VD)}; 2780 return Address(CGF.EmitRuntimeCall( 2781 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2782 VDAddr.getAlignment()); 2783 } 2784 2785 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2786 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2787 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2788 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2789 // library. 2790 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2791 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2792 OMPLoc); 2793 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2794 // to register constructor/destructor for variable. 2795 llvm::Value *Args[] = { 2796 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2797 Ctor, CopyCtor, Dtor}; 2798 CGF.EmitRuntimeCall( 2799 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2800 } 2801 2802 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2803 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2804 bool PerformInit, CodeGenFunction *CGF) { 2805 if (CGM.getLangOpts().OpenMPUseTLS && 2806 CGM.getContext().getTargetInfo().isTLSSupported()) 2807 return nullptr; 2808 2809 VD = VD->getDefinition(CGM.getContext()); 2810 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 2811 QualType ASTTy = VD->getType(); 2812 2813 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2814 const Expr *Init = VD->getAnyInitializer(); 2815 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2816 // Generate function that re-emits the declaration's initializer into the 2817 // threadprivate copy of the variable VD 2818 CodeGenFunction CtorCGF(CGM); 2819 FunctionArgList Args; 2820 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2821 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2822 ImplicitParamDecl::Other); 2823 Args.push_back(&Dst); 2824 2825 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2826 CGM.getContext().VoidPtrTy, Args); 2827 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2828 std::string Name = getName({"__kmpc_global_ctor_", ""}); 2829 llvm::Function *Fn = 2830 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2831 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2832 Args, Loc, Loc); 2833 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2834 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2835 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2836 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2837 Arg = CtorCGF.Builder.CreateElementBitCast( 2838 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2839 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2840 /*IsInitializer=*/true); 2841 ArgVal = CtorCGF.EmitLoadOfScalar( 2842 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2843 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2844 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2845 CtorCGF.FinishFunction(); 2846 Ctor = Fn; 2847 } 2848 if (VD->getType().isDestructedType() != QualType::DK_none) { 2849 // Generate function that emits destructor call for the threadprivate copy 2850 // of the variable VD 2851 CodeGenFunction DtorCGF(CGM); 2852 FunctionArgList Args; 2853 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2854 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2855 ImplicitParamDecl::Other); 2856 Args.push_back(&Dst); 2857 2858 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2859 CGM.getContext().VoidTy, Args); 2860 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2861 std::string Name = getName({"__kmpc_global_dtor_", ""}); 2862 llvm::Function *Fn = 2863 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2864 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2865 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2866 Loc, Loc); 2867 // Create a scope with an artificial location for the body of this function. 2868 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2869 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2870 DtorCGF.GetAddrOfLocalVar(&Dst), 2871 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2872 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2873 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2874 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2875 DtorCGF.FinishFunction(); 2876 Dtor = Fn; 2877 } 2878 // Do not emit init function if it is not required. 2879 if (!Ctor && !Dtor) 2880 return nullptr; 2881 2882 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2883 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2884 /*isVarArg=*/false) 2885 ->getPointerTo(); 2886 // Copying constructor for the threadprivate variable. 2887 // Must be NULL - reserved by runtime, but currently it requires that this 2888 // parameter is always NULL. Otherwise it fires assertion. 2889 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2890 if (Ctor == nullptr) { 2891 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2892 /*isVarArg=*/false) 2893 ->getPointerTo(); 2894 Ctor = llvm::Constant::getNullValue(CtorTy); 2895 } 2896 if (Dtor == nullptr) { 2897 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2898 /*isVarArg=*/false) 2899 ->getPointerTo(); 2900 Dtor = llvm::Constant::getNullValue(DtorTy); 2901 } 2902 if (!CGF) { 2903 auto *InitFunctionTy = 2904 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2905 std::string Name = getName({"__omp_threadprivate_init_", ""}); 2906 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2907 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 2908 CodeGenFunction InitCGF(CGM); 2909 FunctionArgList ArgList; 2910 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2911 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2912 Loc, Loc); 2913 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2914 InitCGF.FinishFunction(); 2915 return InitFunction; 2916 } 2917 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2918 } 2919 return nullptr; 2920 } 2921 2922 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2923 llvm::GlobalVariable *Addr, 2924 bool PerformInit) { 2925 if (CGM.getLangOpts().OMPTargetTriples.empty() && 2926 !CGM.getLangOpts().OpenMPIsDevice) 2927 return false; 2928 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2929 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2930 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 2931 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2932 HasRequiresUnifiedSharedMemory)) 2933 return CGM.getLangOpts().OpenMPIsDevice; 2934 VD = VD->getDefinition(CGM.getContext()); 2935 if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 2936 return CGM.getLangOpts().OpenMPIsDevice; 2937 2938 QualType ASTTy = VD->getType(); 2939 2940 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 2941 // Produce the unique prefix to identify the new target regions. We use 2942 // the source location of the variable declaration which we know to not 2943 // conflict with any target region. 2944 unsigned DeviceID; 2945 unsigned FileID; 2946 unsigned Line; 2947 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2948 SmallString<128> Buffer, Out; 2949 { 2950 llvm::raw_svector_ostream OS(Buffer); 2951 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2952 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2953 } 2954 2955 const Expr *Init = VD->getAnyInitializer(); 2956 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2957 llvm::Constant *Ctor; 2958 llvm::Constant *ID; 2959 if (CGM.getLangOpts().OpenMPIsDevice) { 2960 // Generate function that re-emits the declaration's initializer into 2961 // the threadprivate copy of the variable VD 2962 CodeGenFunction CtorCGF(CGM); 2963 2964 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2965 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2966 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2967 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2968 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2969 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2970 FunctionArgList(), Loc, Loc); 2971 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2972 CtorCGF.EmitAnyExprToMem(Init, 2973 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2974 Init->getType().getQualifiers(), 2975 /*IsInitializer=*/true); 2976 CtorCGF.FinishFunction(); 2977 Ctor = Fn; 2978 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2979 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 2980 } else { 2981 Ctor = new llvm::GlobalVariable( 2982 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2983 llvm::GlobalValue::PrivateLinkage, 2984 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2985 ID = Ctor; 2986 } 2987 2988 // Register the information for the entry associated with the constructor. 2989 Out.clear(); 2990 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2991 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2992 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2993 } 2994 if (VD->getType().isDestructedType() != QualType::DK_none) { 2995 llvm::Constant *Dtor; 2996 llvm::Constant *ID; 2997 if (CGM.getLangOpts().OpenMPIsDevice) { 2998 // Generate function that emits destructor call for the threadprivate 2999 // copy of the variable VD 3000 CodeGenFunction DtorCGF(CGM); 3001 3002 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 3003 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 3004 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 3005 FTy, Twine(Buffer, "_dtor"), FI, Loc); 3006 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 3007 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 3008 FunctionArgList(), Loc, Loc); 3009 // Create a scope with an artificial location for the body of this 3010 // function. 3011 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 3012 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 3013 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 3014 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 3015 DtorCGF.FinishFunction(); 3016 Dtor = Fn; 3017 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 3018 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 3019 } else { 3020 Dtor = new llvm::GlobalVariable( 3021 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 3022 llvm::GlobalValue::PrivateLinkage, 3023 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 3024 ID = Dtor; 3025 } 3026 // Register the information for the entry associated with the destructor. 3027 Out.clear(); 3028 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 3029 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 3030 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 3031 } 3032 return CGM.getLangOpts().OpenMPIsDevice; 3033 } 3034 3035 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 3036 QualType VarType, 3037 StringRef Name) { 3038 std::string Suffix = getName({"artificial", ""}); 3039 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 3040 llvm::Value *GAddr = 3041 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 3042 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 3043 CGM.getTarget().isTLSSupported()) { 3044 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 3045 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 3046 } 3047 std::string CacheSuffix = getName({"cache", ""}); 3048 llvm::Value *Args[] = { 3049 emitUpdateLocation(CGF, SourceLocation()), 3050 getThreadID(CGF, SourceLocation()), 3051 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 3052 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 3053 /*isSigned=*/false), 3054 getOrCreateInternalVariable( 3055 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 3056 return Address( 3057 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3058 CGF.EmitRuntimeCall( 3059 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 3060 VarLVType->getPointerTo(/*AddrSpace=*/0)), 3061 CGM.getContext().getTypeAlignInChars(VarType)); 3062 } 3063 3064 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 3065 const RegionCodeGenTy &ThenGen, 3066 const RegionCodeGenTy &ElseGen) { 3067 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 3068 3069 // If the condition constant folds and can be elided, try to avoid emitting 3070 // the condition and the dead arm of the if/else. 3071 bool CondConstant; 3072 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 3073 if (CondConstant) 3074 ThenGen(CGF); 3075 else 3076 ElseGen(CGF); 3077 return; 3078 } 3079 3080 // Otherwise, the condition did not fold, or we couldn't elide it. Just 3081 // emit the conditional branch. 3082 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3083 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 3084 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 3085 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 3086 3087 // Emit the 'then' code. 3088 CGF.EmitBlock(ThenBlock); 3089 ThenGen(CGF); 3090 CGF.EmitBranch(ContBlock); 3091 // Emit the 'else' code if present. 3092 // There is no need to emit line number for unconditional branch. 3093 (void)ApplyDebugLocation::CreateEmpty(CGF); 3094 CGF.EmitBlock(ElseBlock); 3095 ElseGen(CGF); 3096 // There is no need to emit line number for unconditional branch. 3097 (void)ApplyDebugLocation::CreateEmpty(CGF); 3098 CGF.EmitBranch(ContBlock); 3099 // Emit the continuation block for code after the if. 3100 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 3101 } 3102 3103 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 3104 llvm::Function *OutlinedFn, 3105 ArrayRef<llvm::Value *> CapturedVars, 3106 const Expr *IfCond) { 3107 if (!CGF.HaveInsertPoint()) 3108 return; 3109 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 3110 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 3111 PrePostActionTy &) { 3112 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 3113 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3114 llvm::Value *Args[] = { 3115 RTLoc, 3116 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 3117 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 3118 llvm::SmallVector<llvm::Value *, 16> RealArgs; 3119 RealArgs.append(std::begin(Args), std::end(Args)); 3120 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 3121 3122 llvm::FunctionCallee RTLFn = 3123 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 3124 CGF.EmitRuntimeCall(RTLFn, RealArgs); 3125 }; 3126 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 3127 PrePostActionTy &) { 3128 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3129 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 3130 // Build calls: 3131 // __kmpc_serialized_parallel(&Loc, GTid); 3132 llvm::Value *Args[] = {RTLoc, ThreadID}; 3133 CGF.EmitRuntimeCall( 3134 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 3135 3136 // OutlinedFn(>id, &zero_bound, CapturedStruct); 3137 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 3138 Address ZeroAddrBound = 3139 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 3140 /*Name=*/".bound.zero.addr"); 3141 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 3142 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 3143 // ThreadId for serialized parallels is 0. 3144 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 3145 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 3146 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 3147 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 3148 3149 // __kmpc_end_serialized_parallel(&Loc, GTid); 3150 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 3151 CGF.EmitRuntimeCall( 3152 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 3153 EndArgs); 3154 }; 3155 if (IfCond) { 3156 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 3157 } else { 3158 RegionCodeGenTy ThenRCG(ThenGen); 3159 ThenRCG(CGF); 3160 } 3161 } 3162 3163 // If we're inside an (outlined) parallel region, use the region info's 3164 // thread-ID variable (it is passed in a first argument of the outlined function 3165 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 3166 // regular serial code region, get thread ID by calling kmp_int32 3167 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 3168 // return the address of that temp. 3169 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 3170 SourceLocation Loc) { 3171 if (auto *OMPRegionInfo = 3172 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3173 if (OMPRegionInfo->getThreadIDVariable()) 3174 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 3175 3176 llvm::Value *ThreadID = getThreadID(CGF, Loc); 3177 QualType Int32Ty = 3178 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 3179 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 3180 CGF.EmitStoreOfScalar(ThreadID, 3181 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 3182 3183 return ThreadIDTemp; 3184 } 3185 3186 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 3187 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 3188 SmallString<256> Buffer; 3189 llvm::raw_svector_ostream Out(Buffer); 3190 Out << Name; 3191 StringRef RuntimeName = Out.str(); 3192 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 3193 if (Elem.second) { 3194 assert(Elem.second->getType()->getPointerElementType() == Ty && 3195 "OMP internal variable has different type than requested"); 3196 return &*Elem.second; 3197 } 3198 3199 return Elem.second = new llvm::GlobalVariable( 3200 CGM.getModule(), Ty, /*IsConstant*/ false, 3201 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 3202 Elem.first(), /*InsertBefore=*/nullptr, 3203 llvm::GlobalValue::NotThreadLocal, AddressSpace); 3204 } 3205 3206 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 3207 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 3208 std::string Name = getName({Prefix, "var"}); 3209 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 3210 } 3211 3212 namespace { 3213 /// Common pre(post)-action for different OpenMP constructs. 3214 class CommonActionTy final : public PrePostActionTy { 3215 llvm::FunctionCallee EnterCallee; 3216 ArrayRef<llvm::Value *> EnterArgs; 3217 llvm::FunctionCallee ExitCallee; 3218 ArrayRef<llvm::Value *> ExitArgs; 3219 bool Conditional; 3220 llvm::BasicBlock *ContBlock = nullptr; 3221 3222 public: 3223 CommonActionTy(llvm::FunctionCallee EnterCallee, 3224 ArrayRef<llvm::Value *> EnterArgs, 3225 llvm::FunctionCallee ExitCallee, 3226 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 3227 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 3228 ExitArgs(ExitArgs), Conditional(Conditional) {} 3229 void Enter(CodeGenFunction &CGF) override { 3230 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 3231 if (Conditional) { 3232 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 3233 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3234 ContBlock = CGF.createBasicBlock("omp_if.end"); 3235 // Generate the branch (If-stmt) 3236 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 3237 CGF.EmitBlock(ThenBlock); 3238 } 3239 } 3240 void Done(CodeGenFunction &CGF) { 3241 // Emit the rest of blocks/branches 3242 CGF.EmitBranch(ContBlock); 3243 CGF.EmitBlock(ContBlock, true); 3244 } 3245 void Exit(CodeGenFunction &CGF) override { 3246 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 3247 } 3248 }; 3249 } // anonymous namespace 3250 3251 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 3252 StringRef CriticalName, 3253 const RegionCodeGenTy &CriticalOpGen, 3254 SourceLocation Loc, const Expr *Hint) { 3255 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 3256 // CriticalOpGen(); 3257 // __kmpc_end_critical(ident_t *, gtid, Lock); 3258 // Prepare arguments and build a call to __kmpc_critical 3259 if (!CGF.HaveInsertPoint()) 3260 return; 3261 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3262 getCriticalRegionLock(CriticalName)}; 3263 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 3264 std::end(Args)); 3265 if (Hint) { 3266 EnterArgs.push_back(CGF.Builder.CreateIntCast( 3267 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 3268 } 3269 CommonActionTy Action( 3270 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 3271 : OMPRTL__kmpc_critical), 3272 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 3273 CriticalOpGen.setAction(Action); 3274 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 3275 } 3276 3277 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 3278 const RegionCodeGenTy &MasterOpGen, 3279 SourceLocation Loc) { 3280 if (!CGF.HaveInsertPoint()) 3281 return; 3282 // if(__kmpc_master(ident_t *, gtid)) { 3283 // MasterOpGen(); 3284 // __kmpc_end_master(ident_t *, gtid); 3285 // } 3286 // Prepare arguments and build a call to __kmpc_master 3287 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3288 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 3289 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 3290 /*Conditional=*/true); 3291 MasterOpGen.setAction(Action); 3292 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 3293 Action.Done(CGF); 3294 } 3295 3296 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 3297 SourceLocation Loc) { 3298 if (!CGF.HaveInsertPoint()) 3299 return; 3300 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3301 if (OMPBuilder) { 3302 OMPBuilder->CreateTaskyield(CGF.Builder); 3303 } else { 3304 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 3305 llvm::Value *Args[] = { 3306 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3307 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 3308 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), 3309 Args); 3310 } 3311 3312 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3313 Region->emitUntiedSwitch(CGF); 3314 } 3315 3316 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 3317 const RegionCodeGenTy &TaskgroupOpGen, 3318 SourceLocation Loc) { 3319 if (!CGF.HaveInsertPoint()) 3320 return; 3321 // __kmpc_taskgroup(ident_t *, gtid); 3322 // TaskgroupOpGen(); 3323 // __kmpc_end_taskgroup(ident_t *, gtid); 3324 // Prepare arguments and build a call to __kmpc_taskgroup 3325 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3326 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3327 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3328 Args); 3329 TaskgroupOpGen.setAction(Action); 3330 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3331 } 3332 3333 /// Given an array of pointers to variables, project the address of a 3334 /// given variable. 3335 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3336 unsigned Index, const VarDecl *Var) { 3337 // Pull out the pointer to the variable. 3338 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 3339 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3340 3341 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3342 Addr = CGF.Builder.CreateElementBitCast( 3343 Addr, CGF.ConvertTypeForMem(Var->getType())); 3344 return Addr; 3345 } 3346 3347 static llvm::Value *emitCopyprivateCopyFunction( 3348 CodeGenModule &CGM, llvm::Type *ArgsType, 3349 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3350 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3351 SourceLocation Loc) { 3352 ASTContext &C = CGM.getContext(); 3353 // void copy_func(void *LHSArg, void *RHSArg); 3354 FunctionArgList Args; 3355 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3356 ImplicitParamDecl::Other); 3357 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3358 ImplicitParamDecl::Other); 3359 Args.push_back(&LHSArg); 3360 Args.push_back(&RHSArg); 3361 const auto &CGFI = 3362 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3363 std::string Name = 3364 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 3365 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 3366 llvm::GlobalValue::InternalLinkage, Name, 3367 &CGM.getModule()); 3368 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3369 Fn->setDoesNotRecurse(); 3370 CodeGenFunction CGF(CGM); 3371 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3372 // Dest = (void*[n])(LHSArg); 3373 // Src = (void*[n])(RHSArg); 3374 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3375 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3376 ArgsType), CGF.getPointerAlign()); 3377 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3378 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3379 ArgsType), CGF.getPointerAlign()); 3380 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3381 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3382 // ... 3383 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3384 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3385 const auto *DestVar = 3386 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3387 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3388 3389 const auto *SrcVar = 3390 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3391 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3392 3393 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3394 QualType Type = VD->getType(); 3395 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3396 } 3397 CGF.FinishFunction(); 3398 return Fn; 3399 } 3400 3401 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3402 const RegionCodeGenTy &SingleOpGen, 3403 SourceLocation Loc, 3404 ArrayRef<const Expr *> CopyprivateVars, 3405 ArrayRef<const Expr *> SrcExprs, 3406 ArrayRef<const Expr *> DstExprs, 3407 ArrayRef<const Expr *> AssignmentOps) { 3408 if (!CGF.HaveInsertPoint()) 3409 return; 3410 assert(CopyprivateVars.size() == SrcExprs.size() && 3411 CopyprivateVars.size() == DstExprs.size() && 3412 CopyprivateVars.size() == AssignmentOps.size()); 3413 ASTContext &C = CGM.getContext(); 3414 // int32 did_it = 0; 3415 // if(__kmpc_single(ident_t *, gtid)) { 3416 // SingleOpGen(); 3417 // __kmpc_end_single(ident_t *, gtid); 3418 // did_it = 1; 3419 // } 3420 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3421 // <copy_func>, did_it); 3422 3423 Address DidIt = Address::invalid(); 3424 if (!CopyprivateVars.empty()) { 3425 // int32 did_it = 0; 3426 QualType KmpInt32Ty = 3427 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3428 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3429 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3430 } 3431 // Prepare arguments and build a call to __kmpc_single 3432 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3433 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3434 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3435 /*Conditional=*/true); 3436 SingleOpGen.setAction(Action); 3437 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3438 if (DidIt.isValid()) { 3439 // did_it = 1; 3440 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3441 } 3442 Action.Done(CGF); 3443 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3444 // <copy_func>, did_it); 3445 if (DidIt.isValid()) { 3446 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3447 QualType CopyprivateArrayTy = C.getConstantArrayType( 3448 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 3449 /*IndexTypeQuals=*/0); 3450 // Create a list of all private variables for copyprivate. 3451 Address CopyprivateList = 3452 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3453 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3454 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 3455 CGF.Builder.CreateStore( 3456 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3457 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 3458 CGF.VoidPtrTy), 3459 Elem); 3460 } 3461 // Build function that copies private values from single region to all other 3462 // threads in the corresponding parallel region. 3463 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3464 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3465 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3466 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3467 Address CL = 3468 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3469 CGF.VoidPtrTy); 3470 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3471 llvm::Value *Args[] = { 3472 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3473 getThreadID(CGF, Loc), // i32 <gtid> 3474 BufSize, // size_t <buf_size> 3475 CL.getPointer(), // void *<copyprivate list> 3476 CpyFn, // void (*) (void *, void *) <copy_func> 3477 DidItVal // i32 did_it 3478 }; 3479 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3480 } 3481 } 3482 3483 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3484 const RegionCodeGenTy &OrderedOpGen, 3485 SourceLocation Loc, bool IsThreads) { 3486 if (!CGF.HaveInsertPoint()) 3487 return; 3488 // __kmpc_ordered(ident_t *, gtid); 3489 // OrderedOpGen(); 3490 // __kmpc_end_ordered(ident_t *, gtid); 3491 // Prepare arguments and build a call to __kmpc_ordered 3492 if (IsThreads) { 3493 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3494 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3495 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3496 Args); 3497 OrderedOpGen.setAction(Action); 3498 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3499 return; 3500 } 3501 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3502 } 3503 3504 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 3505 unsigned Flags; 3506 if (Kind == OMPD_for) 3507 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3508 else if (Kind == OMPD_sections) 3509 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3510 else if (Kind == OMPD_single) 3511 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3512 else if (Kind == OMPD_barrier) 3513 Flags = OMP_IDENT_BARRIER_EXPL; 3514 else 3515 Flags = OMP_IDENT_BARRIER_IMPL; 3516 return Flags; 3517 } 3518 3519 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 3520 CodeGenFunction &CGF, const OMPLoopDirective &S, 3521 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 3522 // Check if the loop directive is actually a doacross loop directive. In this 3523 // case choose static, 1 schedule. 3524 if (llvm::any_of( 3525 S.getClausesOfKind<OMPOrderedClause>(), 3526 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 3527 ScheduleKind = OMPC_SCHEDULE_static; 3528 // Chunk size is 1 in this case. 3529 llvm::APInt ChunkSize(32, 1); 3530 ChunkExpr = IntegerLiteral::Create( 3531 CGF.getContext(), ChunkSize, 3532 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3533 SourceLocation()); 3534 } 3535 } 3536 3537 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3538 OpenMPDirectiveKind Kind, bool EmitChecks, 3539 bool ForceSimpleCall) { 3540 // Check if we should use the OMPBuilder 3541 auto *OMPRegionInfo = 3542 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 3543 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3544 if (OMPBuilder) { 3545 CGF.Builder.restoreIP(OMPBuilder->CreateBarrier( 3546 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 3547 return; 3548 } 3549 3550 if (!CGF.HaveInsertPoint()) 3551 return; 3552 // Build call __kmpc_cancel_barrier(loc, thread_id); 3553 // Build call __kmpc_barrier(loc, thread_id); 3554 unsigned Flags = getDefaultFlagsForBarriers(Kind); 3555 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3556 // thread_id); 3557 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3558 getThreadID(CGF, Loc)}; 3559 if (OMPRegionInfo) { 3560 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3561 llvm::Value *Result = CGF.EmitRuntimeCall( 3562 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3563 if (EmitChecks) { 3564 // if (__kmpc_cancel_barrier()) { 3565 // exit from construct; 3566 // } 3567 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3568 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3569 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3570 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3571 CGF.EmitBlock(ExitBB); 3572 // exit from construct; 3573 CodeGenFunction::JumpDest CancelDestination = 3574 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3575 CGF.EmitBranchThroughCleanup(CancelDestination); 3576 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3577 } 3578 return; 3579 } 3580 } 3581 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3582 } 3583 3584 /// Map the OpenMP loop schedule to the runtime enumeration. 3585 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3586 bool Chunked, bool Ordered) { 3587 switch (ScheduleKind) { 3588 case OMPC_SCHEDULE_static: 3589 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3590 : (Ordered ? OMP_ord_static : OMP_sch_static); 3591 case OMPC_SCHEDULE_dynamic: 3592 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3593 case OMPC_SCHEDULE_guided: 3594 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3595 case OMPC_SCHEDULE_runtime: 3596 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3597 case OMPC_SCHEDULE_auto: 3598 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3599 case OMPC_SCHEDULE_unknown: 3600 assert(!Chunked && "chunk was specified but schedule kind not known"); 3601 return Ordered ? OMP_ord_static : OMP_sch_static; 3602 } 3603 llvm_unreachable("Unexpected runtime schedule"); 3604 } 3605 3606 /// Map the OpenMP distribute schedule to the runtime enumeration. 3607 static OpenMPSchedType 3608 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3609 // only static is allowed for dist_schedule 3610 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3611 } 3612 3613 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3614 bool Chunked) const { 3615 OpenMPSchedType Schedule = 3616 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3617 return Schedule == OMP_sch_static; 3618 } 3619 3620 bool CGOpenMPRuntime::isStaticNonchunked( 3621 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3622 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3623 return Schedule == OMP_dist_sch_static; 3624 } 3625 3626 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 3627 bool Chunked) const { 3628 OpenMPSchedType Schedule = 3629 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3630 return Schedule == OMP_sch_static_chunked; 3631 } 3632 3633 bool CGOpenMPRuntime::isStaticChunked( 3634 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3635 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3636 return Schedule == OMP_dist_sch_static_chunked; 3637 } 3638 3639 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3640 OpenMPSchedType Schedule = 3641 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3642 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3643 return Schedule != OMP_sch_static; 3644 } 3645 3646 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 3647 OpenMPScheduleClauseModifier M1, 3648 OpenMPScheduleClauseModifier M2) { 3649 int Modifier = 0; 3650 switch (M1) { 3651 case OMPC_SCHEDULE_MODIFIER_monotonic: 3652 Modifier = OMP_sch_modifier_monotonic; 3653 break; 3654 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3655 Modifier = OMP_sch_modifier_nonmonotonic; 3656 break; 3657 case OMPC_SCHEDULE_MODIFIER_simd: 3658 if (Schedule == OMP_sch_static_chunked) 3659 Schedule = OMP_sch_static_balanced_chunked; 3660 break; 3661 case OMPC_SCHEDULE_MODIFIER_last: 3662 case OMPC_SCHEDULE_MODIFIER_unknown: 3663 break; 3664 } 3665 switch (M2) { 3666 case OMPC_SCHEDULE_MODIFIER_monotonic: 3667 Modifier = OMP_sch_modifier_monotonic; 3668 break; 3669 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3670 Modifier = OMP_sch_modifier_nonmonotonic; 3671 break; 3672 case OMPC_SCHEDULE_MODIFIER_simd: 3673 if (Schedule == OMP_sch_static_chunked) 3674 Schedule = OMP_sch_static_balanced_chunked; 3675 break; 3676 case OMPC_SCHEDULE_MODIFIER_last: 3677 case OMPC_SCHEDULE_MODIFIER_unknown: 3678 break; 3679 } 3680 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 3681 // If the static schedule kind is specified or if the ordered clause is 3682 // specified, and if the nonmonotonic modifier is not specified, the effect is 3683 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 3684 // modifier is specified, the effect is as if the nonmonotonic modifier is 3685 // specified. 3686 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 3687 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 3688 Schedule == OMP_sch_static_balanced_chunked || 3689 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 3690 Schedule == OMP_dist_sch_static_chunked || 3691 Schedule == OMP_dist_sch_static)) 3692 Modifier = OMP_sch_modifier_nonmonotonic; 3693 } 3694 return Schedule | Modifier; 3695 } 3696 3697 void CGOpenMPRuntime::emitForDispatchInit( 3698 CodeGenFunction &CGF, SourceLocation Loc, 3699 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3700 bool Ordered, const DispatchRTInput &DispatchValues) { 3701 if (!CGF.HaveInsertPoint()) 3702 return; 3703 OpenMPSchedType Schedule = getRuntimeSchedule( 3704 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3705 assert(Ordered || 3706 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3707 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3708 Schedule != OMP_sch_static_balanced_chunked)); 3709 // Call __kmpc_dispatch_init( 3710 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3711 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3712 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3713 3714 // If the Chunk was not specified in the clause - use default value 1. 3715 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3716 : CGF.Builder.getIntN(IVSize, 1); 3717 llvm::Value *Args[] = { 3718 emitUpdateLocation(CGF, Loc), 3719 getThreadID(CGF, Loc), 3720 CGF.Builder.getInt32(addMonoNonMonoModifier( 3721 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3722 DispatchValues.LB, // Lower 3723 DispatchValues.UB, // Upper 3724 CGF.Builder.getIntN(IVSize, 1), // Stride 3725 Chunk // Chunk 3726 }; 3727 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3728 } 3729 3730 static void emitForStaticInitCall( 3731 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3732 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 3733 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3734 const CGOpenMPRuntime::StaticRTInput &Values) { 3735 if (!CGF.HaveInsertPoint()) 3736 return; 3737 3738 assert(!Values.Ordered); 3739 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3740 Schedule == OMP_sch_static_balanced_chunked || 3741 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3742 Schedule == OMP_dist_sch_static || 3743 Schedule == OMP_dist_sch_static_chunked); 3744 3745 // Call __kmpc_for_static_init( 3746 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3747 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3748 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3749 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3750 llvm::Value *Chunk = Values.Chunk; 3751 if (Chunk == nullptr) { 3752 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3753 Schedule == OMP_dist_sch_static) && 3754 "expected static non-chunked schedule"); 3755 // If the Chunk was not specified in the clause - use default value 1. 3756 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3757 } else { 3758 assert((Schedule == OMP_sch_static_chunked || 3759 Schedule == OMP_sch_static_balanced_chunked || 3760 Schedule == OMP_ord_static_chunked || 3761 Schedule == OMP_dist_sch_static_chunked) && 3762 "expected static chunked schedule"); 3763 } 3764 llvm::Value *Args[] = { 3765 UpdateLocation, 3766 ThreadId, 3767 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 3768 M2)), // Schedule type 3769 Values.IL.getPointer(), // &isLastIter 3770 Values.LB.getPointer(), // &LB 3771 Values.UB.getPointer(), // &UB 3772 Values.ST.getPointer(), // &Stride 3773 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3774 Chunk // Chunk 3775 }; 3776 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3777 } 3778 3779 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3780 SourceLocation Loc, 3781 OpenMPDirectiveKind DKind, 3782 const OpenMPScheduleTy &ScheduleKind, 3783 const StaticRTInput &Values) { 3784 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3785 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3786 assert(isOpenMPWorksharingDirective(DKind) && 3787 "Expected loop-based or sections-based directive."); 3788 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3789 isOpenMPLoopDirective(DKind) 3790 ? OMP_IDENT_WORK_LOOP 3791 : OMP_IDENT_WORK_SECTIONS); 3792 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3793 llvm::FunctionCallee StaticInitFunction = 3794 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3795 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 3796 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3797 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3798 } 3799 3800 void CGOpenMPRuntime::emitDistributeStaticInit( 3801 CodeGenFunction &CGF, SourceLocation Loc, 3802 OpenMPDistScheduleClauseKind SchedKind, 3803 const CGOpenMPRuntime::StaticRTInput &Values) { 3804 OpenMPSchedType ScheduleNum = 3805 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3806 llvm::Value *UpdatedLocation = 3807 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3808 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3809 llvm::FunctionCallee StaticInitFunction = 3810 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3811 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3812 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3813 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3814 } 3815 3816 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3817 SourceLocation Loc, 3818 OpenMPDirectiveKind DKind) { 3819 if (!CGF.HaveInsertPoint()) 3820 return; 3821 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3822 llvm::Value *Args[] = { 3823 emitUpdateLocation(CGF, Loc, 3824 isOpenMPDistributeDirective(DKind) 3825 ? OMP_IDENT_WORK_DISTRIBUTE 3826 : isOpenMPLoopDirective(DKind) 3827 ? OMP_IDENT_WORK_LOOP 3828 : OMP_IDENT_WORK_SECTIONS), 3829 getThreadID(CGF, Loc)}; 3830 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 3831 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3832 Args); 3833 } 3834 3835 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3836 SourceLocation Loc, 3837 unsigned IVSize, 3838 bool IVSigned) { 3839 if (!CGF.HaveInsertPoint()) 3840 return; 3841 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3842 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3843 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3844 } 3845 3846 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3847 SourceLocation Loc, unsigned IVSize, 3848 bool IVSigned, Address IL, 3849 Address LB, Address UB, 3850 Address ST) { 3851 // Call __kmpc_dispatch_next( 3852 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3853 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3854 // kmp_int[32|64] *p_stride); 3855 llvm::Value *Args[] = { 3856 emitUpdateLocation(CGF, Loc), 3857 getThreadID(CGF, Loc), 3858 IL.getPointer(), // &isLastIter 3859 LB.getPointer(), // &Lower 3860 UB.getPointer(), // &Upper 3861 ST.getPointer() // &Stride 3862 }; 3863 llvm::Value *Call = 3864 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3865 return CGF.EmitScalarConversion( 3866 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3867 CGF.getContext().BoolTy, Loc); 3868 } 3869 3870 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3871 llvm::Value *NumThreads, 3872 SourceLocation Loc) { 3873 if (!CGF.HaveInsertPoint()) 3874 return; 3875 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3876 llvm::Value *Args[] = { 3877 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3878 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3879 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3880 Args); 3881 } 3882 3883 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3884 ProcBindKind ProcBind, 3885 SourceLocation Loc) { 3886 if (!CGF.HaveInsertPoint()) 3887 return; 3888 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 3889 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3890 llvm::Value *Args[] = { 3891 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3892 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 3893 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3894 } 3895 3896 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3897 SourceLocation Loc, llvm::AtomicOrdering AO) { 3898 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3899 if (OMPBuilder) { 3900 OMPBuilder->CreateFlush(CGF.Builder); 3901 } else { 3902 if (!CGF.HaveInsertPoint()) 3903 return; 3904 // Build call void __kmpc_flush(ident_t *loc) 3905 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3906 emitUpdateLocation(CGF, Loc)); 3907 } 3908 } 3909 3910 namespace { 3911 /// Indexes of fields for type kmp_task_t. 3912 enum KmpTaskTFields { 3913 /// List of shared variables. 3914 KmpTaskTShareds, 3915 /// Task routine. 3916 KmpTaskTRoutine, 3917 /// Partition id for the untied tasks. 3918 KmpTaskTPartId, 3919 /// Function with call of destructors for private variables. 3920 Data1, 3921 /// Task priority. 3922 Data2, 3923 /// (Taskloops only) Lower bound. 3924 KmpTaskTLowerBound, 3925 /// (Taskloops only) Upper bound. 3926 KmpTaskTUpperBound, 3927 /// (Taskloops only) Stride. 3928 KmpTaskTStride, 3929 /// (Taskloops only) Is last iteration flag. 3930 KmpTaskTLastIter, 3931 /// (Taskloops only) Reduction data. 3932 KmpTaskTReductions, 3933 }; 3934 } // anonymous namespace 3935 3936 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3937 return OffloadEntriesTargetRegion.empty() && 3938 OffloadEntriesDeviceGlobalVar.empty(); 3939 } 3940 3941 /// Initialize target region entry. 3942 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3943 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3944 StringRef ParentName, unsigned LineNum, 3945 unsigned Order) { 3946 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3947 "only required for the device " 3948 "code generation."); 3949 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3950 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3951 OMPTargetRegionEntryTargetRegion); 3952 ++OffloadingEntriesNum; 3953 } 3954 3955 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3956 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3957 StringRef ParentName, unsigned LineNum, 3958 llvm::Constant *Addr, llvm::Constant *ID, 3959 OMPTargetRegionEntryKind Flags) { 3960 // If we are emitting code for a target, the entry is already initialized, 3961 // only has to be registered. 3962 if (CGM.getLangOpts().OpenMPIsDevice) { 3963 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3964 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3965 DiagnosticsEngine::Error, 3966 "Unable to find target region on line '%0' in the device code."); 3967 CGM.getDiags().Report(DiagID) << LineNum; 3968 return; 3969 } 3970 auto &Entry = 3971 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3972 assert(Entry.isValid() && "Entry not initialized!"); 3973 Entry.setAddress(Addr); 3974 Entry.setID(ID); 3975 Entry.setFlags(Flags); 3976 } else { 3977 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3978 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3979 ++OffloadingEntriesNum; 3980 } 3981 } 3982 3983 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3984 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3985 unsigned LineNum) const { 3986 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3987 if (PerDevice == OffloadEntriesTargetRegion.end()) 3988 return false; 3989 auto PerFile = PerDevice->second.find(FileID); 3990 if (PerFile == PerDevice->second.end()) 3991 return false; 3992 auto PerParentName = PerFile->second.find(ParentName); 3993 if (PerParentName == PerFile->second.end()) 3994 return false; 3995 auto PerLine = PerParentName->second.find(LineNum); 3996 if (PerLine == PerParentName->second.end()) 3997 return false; 3998 // Fail if this entry is already registered. 3999 if (PerLine->second.getAddress() || PerLine->second.getID()) 4000 return false; 4001 return true; 4002 } 4003 4004 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 4005 const OffloadTargetRegionEntryInfoActTy &Action) { 4006 // Scan all target region entries and perform the provided action. 4007 for (const auto &D : OffloadEntriesTargetRegion) 4008 for (const auto &F : D.second) 4009 for (const auto &P : F.second) 4010 for (const auto &L : P.second) 4011 Action(D.first, F.first, P.first(), L.first, L.second); 4012 } 4013 4014 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4015 initializeDeviceGlobalVarEntryInfo(StringRef Name, 4016 OMPTargetGlobalVarEntryKind Flags, 4017 unsigned Order) { 4018 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 4019 "only required for the device " 4020 "code generation."); 4021 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 4022 ++OffloadingEntriesNum; 4023 } 4024 4025 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4026 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 4027 CharUnits VarSize, 4028 OMPTargetGlobalVarEntryKind Flags, 4029 llvm::GlobalValue::LinkageTypes Linkage) { 4030 if (CGM.getLangOpts().OpenMPIsDevice) { 4031 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 4032 assert(Entry.isValid() && Entry.getFlags() == Flags && 4033 "Entry not initialized!"); 4034 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 4035 "Resetting with the new address."); 4036 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 4037 if (Entry.getVarSize().isZero()) { 4038 Entry.setVarSize(VarSize); 4039 Entry.setLinkage(Linkage); 4040 } 4041 return; 4042 } 4043 Entry.setVarSize(VarSize); 4044 Entry.setLinkage(Linkage); 4045 Entry.setAddress(Addr); 4046 } else { 4047 if (hasDeviceGlobalVarEntryInfo(VarName)) { 4048 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 4049 assert(Entry.isValid() && Entry.getFlags() == Flags && 4050 "Entry not initialized!"); 4051 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 4052 "Resetting with the new address."); 4053 if (Entry.getVarSize().isZero()) { 4054 Entry.setVarSize(VarSize); 4055 Entry.setLinkage(Linkage); 4056 } 4057 return; 4058 } 4059 OffloadEntriesDeviceGlobalVar.try_emplace( 4060 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 4061 ++OffloadingEntriesNum; 4062 } 4063 } 4064 4065 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4066 actOnDeviceGlobalVarEntriesInfo( 4067 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 4068 // Scan all target region entries and perform the provided action. 4069 for (const auto &E : OffloadEntriesDeviceGlobalVar) 4070 Action(E.getKey(), E.getValue()); 4071 } 4072 4073 void CGOpenMPRuntime::createOffloadEntry( 4074 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 4075 llvm::GlobalValue::LinkageTypes Linkage) { 4076 StringRef Name = Addr->getName(); 4077 llvm::Module &M = CGM.getModule(); 4078 llvm::LLVMContext &C = M.getContext(); 4079 4080 // Create constant string with the name. 4081 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 4082 4083 std::string StringName = getName({"omp_offloading", "entry_name"}); 4084 auto *Str = new llvm::GlobalVariable( 4085 M, StrPtrInit->getType(), /*isConstant=*/true, 4086 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 4087 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4088 4089 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 4090 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 4091 llvm::ConstantInt::get(CGM.SizeTy, Size), 4092 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 4093 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 4094 std::string EntryName = getName({"omp_offloading", "entry", ""}); 4095 llvm::GlobalVariable *Entry = createGlobalStruct( 4096 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 4097 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 4098 4099 // The entry has to be created in the section the linker expects it to be. 4100 Entry->setSection("omp_offloading_entries"); 4101 } 4102 4103 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 4104 // Emit the offloading entries and metadata so that the device codegen side 4105 // can easily figure out what to emit. The produced metadata looks like 4106 // this: 4107 // 4108 // !omp_offload.info = !{!1, ...} 4109 // 4110 // Right now we only generate metadata for function that contain target 4111 // regions. 4112 4113 // If we are in simd mode or there are no entries, we don't need to do 4114 // anything. 4115 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 4116 return; 4117 4118 llvm::Module &M = CGM.getModule(); 4119 llvm::LLVMContext &C = M.getContext(); 4120 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 4121 SourceLocation, StringRef>, 4122 16> 4123 OrderedEntries(OffloadEntriesInfoManager.size()); 4124 llvm::SmallVector<StringRef, 16> ParentFunctions( 4125 OffloadEntriesInfoManager.size()); 4126 4127 // Auxiliary methods to create metadata values and strings. 4128 auto &&GetMDInt = [this](unsigned V) { 4129 return llvm::ConstantAsMetadata::get( 4130 llvm::ConstantInt::get(CGM.Int32Ty, V)); 4131 }; 4132 4133 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 4134 4135 // Create the offloading info metadata node. 4136 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 4137 4138 // Create function that emits metadata for each target region entry; 4139 auto &&TargetRegionMetadataEmitter = 4140 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 4141 &GetMDString]( 4142 unsigned DeviceID, unsigned FileID, StringRef ParentName, 4143 unsigned Line, 4144 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 4145 // Generate metadata for target regions. Each entry of this metadata 4146 // contains: 4147 // - Entry 0 -> Kind of this type of metadata (0). 4148 // - Entry 1 -> Device ID of the file where the entry was identified. 4149 // - Entry 2 -> File ID of the file where the entry was identified. 4150 // - Entry 3 -> Mangled name of the function where the entry was 4151 // identified. 4152 // - Entry 4 -> Line in the file where the entry was identified. 4153 // - Entry 5 -> Order the entry was created. 4154 // The first element of the metadata node is the kind. 4155 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 4156 GetMDInt(FileID), GetMDString(ParentName), 4157 GetMDInt(Line), GetMDInt(E.getOrder())}; 4158 4159 SourceLocation Loc; 4160 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 4161 E = CGM.getContext().getSourceManager().fileinfo_end(); 4162 I != E; ++I) { 4163 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 4164 I->getFirst()->getUniqueID().getFile() == FileID) { 4165 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 4166 I->getFirst(), Line, 1); 4167 break; 4168 } 4169 } 4170 // Save this entry in the right position of the ordered entries array. 4171 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 4172 ParentFunctions[E.getOrder()] = ParentName; 4173 4174 // Add metadata to the named metadata node. 4175 MD->addOperand(llvm::MDNode::get(C, Ops)); 4176 }; 4177 4178 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 4179 TargetRegionMetadataEmitter); 4180 4181 // Create function that emits metadata for each device global variable entry; 4182 auto &&DeviceGlobalVarMetadataEmitter = 4183 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 4184 MD](StringRef MangledName, 4185 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 4186 &E) { 4187 // Generate metadata for global variables. Each entry of this metadata 4188 // contains: 4189 // - Entry 0 -> Kind of this type of metadata (1). 4190 // - Entry 1 -> Mangled name of the variable. 4191 // - Entry 2 -> Declare target kind. 4192 // - Entry 3 -> Order the entry was created. 4193 // The first element of the metadata node is the kind. 4194 llvm::Metadata *Ops[] = { 4195 GetMDInt(E.getKind()), GetMDString(MangledName), 4196 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 4197 4198 // Save this entry in the right position of the ordered entries array. 4199 OrderedEntries[E.getOrder()] = 4200 std::make_tuple(&E, SourceLocation(), MangledName); 4201 4202 // Add metadata to the named metadata node. 4203 MD->addOperand(llvm::MDNode::get(C, Ops)); 4204 }; 4205 4206 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 4207 DeviceGlobalVarMetadataEmitter); 4208 4209 for (const auto &E : OrderedEntries) { 4210 assert(std::get<0>(E) && "All ordered entries must exist!"); 4211 if (const auto *CE = 4212 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 4213 std::get<0>(E))) { 4214 if (!CE->getID() || !CE->getAddress()) { 4215 // Do not blame the entry if the parent funtion is not emitted. 4216 StringRef FnName = ParentFunctions[CE->getOrder()]; 4217 if (!CGM.GetGlobalValue(FnName)) 4218 continue; 4219 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4220 DiagnosticsEngine::Error, 4221 "Offloading entry for target region in %0 is incorrect: either the " 4222 "address or the ID is invalid."); 4223 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 4224 continue; 4225 } 4226 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 4227 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 4228 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 4229 OffloadEntryInfoDeviceGlobalVar>( 4230 std::get<0>(E))) { 4231 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 4232 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4233 CE->getFlags()); 4234 switch (Flags) { 4235 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 4236 if (CGM.getLangOpts().OpenMPIsDevice && 4237 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 4238 continue; 4239 if (!CE->getAddress()) { 4240 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4241 DiagnosticsEngine::Error, "Offloading entry for declare target " 4242 "variable %0 is incorrect: the " 4243 "address is invalid."); 4244 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 4245 continue; 4246 } 4247 // The vaiable has no definition - no need to add the entry. 4248 if (CE->getVarSize().isZero()) 4249 continue; 4250 break; 4251 } 4252 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 4253 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 4254 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 4255 "Declaret target link address is set."); 4256 if (CGM.getLangOpts().OpenMPIsDevice) 4257 continue; 4258 if (!CE->getAddress()) { 4259 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4260 DiagnosticsEngine::Error, 4261 "Offloading entry for declare target variable is incorrect: the " 4262 "address is invalid."); 4263 CGM.getDiags().Report(DiagID); 4264 continue; 4265 } 4266 break; 4267 } 4268 createOffloadEntry(CE->getAddress(), CE->getAddress(), 4269 CE->getVarSize().getQuantity(), Flags, 4270 CE->getLinkage()); 4271 } else { 4272 llvm_unreachable("Unsupported entry kind."); 4273 } 4274 } 4275 } 4276 4277 /// Loads all the offload entries information from the host IR 4278 /// metadata. 4279 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 4280 // If we are in target mode, load the metadata from the host IR. This code has 4281 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 4282 4283 if (!CGM.getLangOpts().OpenMPIsDevice) 4284 return; 4285 4286 if (CGM.getLangOpts().OMPHostIRFile.empty()) 4287 return; 4288 4289 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 4290 if (auto EC = Buf.getError()) { 4291 CGM.getDiags().Report(diag::err_cannot_open_file) 4292 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4293 return; 4294 } 4295 4296 llvm::LLVMContext C; 4297 auto ME = expectedToErrorOrAndEmitErrors( 4298 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 4299 4300 if (auto EC = ME.getError()) { 4301 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4302 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 4303 CGM.getDiags().Report(DiagID) 4304 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4305 return; 4306 } 4307 4308 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4309 if (!MD) 4310 return; 4311 4312 for (llvm::MDNode *MN : MD->operands()) { 4313 auto &&GetMDInt = [MN](unsigned Idx) { 4314 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4315 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4316 }; 4317 4318 auto &&GetMDString = [MN](unsigned Idx) { 4319 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4320 return V->getString(); 4321 }; 4322 4323 switch (GetMDInt(0)) { 4324 default: 4325 llvm_unreachable("Unexpected metadata!"); 4326 break; 4327 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4328 OffloadingEntryInfoTargetRegion: 4329 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4330 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4331 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4332 /*Order=*/GetMDInt(5)); 4333 break; 4334 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4335 OffloadingEntryInfoDeviceGlobalVar: 4336 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4337 /*MangledName=*/GetMDString(1), 4338 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4339 /*Flags=*/GetMDInt(2)), 4340 /*Order=*/GetMDInt(3)); 4341 break; 4342 } 4343 } 4344 } 4345 4346 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4347 if (!KmpRoutineEntryPtrTy) { 4348 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4349 ASTContext &C = CGM.getContext(); 4350 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4351 FunctionProtoType::ExtProtoInfo EPI; 4352 KmpRoutineEntryPtrQTy = C.getPointerType( 4353 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4354 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4355 } 4356 } 4357 4358 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4359 // Make sure the type of the entry is already created. This is the type we 4360 // have to create: 4361 // struct __tgt_offload_entry{ 4362 // void *addr; // Pointer to the offload entry info. 4363 // // (function or global) 4364 // char *name; // Name of the function or global. 4365 // size_t size; // Size of the entry info (0 if it a function). 4366 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4367 // int32_t reserved; // Reserved, to use by the runtime library. 4368 // }; 4369 if (TgtOffloadEntryQTy.isNull()) { 4370 ASTContext &C = CGM.getContext(); 4371 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4372 RD->startDefinition(); 4373 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4374 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4375 addFieldToRecordDecl(C, RD, C.getSizeType()); 4376 addFieldToRecordDecl( 4377 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4378 addFieldToRecordDecl( 4379 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4380 RD->completeDefinition(); 4381 RD->addAttr(PackedAttr::CreateImplicit(C)); 4382 TgtOffloadEntryQTy = C.getRecordType(RD); 4383 } 4384 return TgtOffloadEntryQTy; 4385 } 4386 4387 namespace { 4388 struct PrivateHelpersTy { 4389 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 4390 const VarDecl *PrivateElemInit) 4391 : Original(Original), PrivateCopy(PrivateCopy), 4392 PrivateElemInit(PrivateElemInit) {} 4393 const VarDecl *Original; 4394 const VarDecl *PrivateCopy; 4395 const VarDecl *PrivateElemInit; 4396 }; 4397 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4398 } // anonymous namespace 4399 4400 static RecordDecl * 4401 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4402 if (!Privates.empty()) { 4403 ASTContext &C = CGM.getContext(); 4404 // Build struct .kmp_privates_t. { 4405 // /* private vars */ 4406 // }; 4407 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4408 RD->startDefinition(); 4409 for (const auto &Pair : Privates) { 4410 const VarDecl *VD = Pair.second.Original; 4411 QualType Type = VD->getType().getNonReferenceType(); 4412 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4413 if (VD->hasAttrs()) { 4414 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4415 E(VD->getAttrs().end()); 4416 I != E; ++I) 4417 FD->addAttr(*I); 4418 } 4419 } 4420 RD->completeDefinition(); 4421 return RD; 4422 } 4423 return nullptr; 4424 } 4425 4426 static RecordDecl * 4427 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4428 QualType KmpInt32Ty, 4429 QualType KmpRoutineEntryPointerQTy) { 4430 ASTContext &C = CGM.getContext(); 4431 // Build struct kmp_task_t { 4432 // void * shareds; 4433 // kmp_routine_entry_t routine; 4434 // kmp_int32 part_id; 4435 // kmp_cmplrdata_t data1; 4436 // kmp_cmplrdata_t data2; 4437 // For taskloops additional fields: 4438 // kmp_uint64 lb; 4439 // kmp_uint64 ub; 4440 // kmp_int64 st; 4441 // kmp_int32 liter; 4442 // void * reductions; 4443 // }; 4444 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4445 UD->startDefinition(); 4446 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4447 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4448 UD->completeDefinition(); 4449 QualType KmpCmplrdataTy = C.getRecordType(UD); 4450 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4451 RD->startDefinition(); 4452 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4453 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4454 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4455 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4456 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4457 if (isOpenMPTaskLoopDirective(Kind)) { 4458 QualType KmpUInt64Ty = 4459 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4460 QualType KmpInt64Ty = 4461 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4462 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4463 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4464 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4465 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4466 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4467 } 4468 RD->completeDefinition(); 4469 return RD; 4470 } 4471 4472 static RecordDecl * 4473 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4474 ArrayRef<PrivateDataTy> Privates) { 4475 ASTContext &C = CGM.getContext(); 4476 // Build struct kmp_task_t_with_privates { 4477 // kmp_task_t task_data; 4478 // .kmp_privates_t. privates; 4479 // }; 4480 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4481 RD->startDefinition(); 4482 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4483 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4484 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4485 RD->completeDefinition(); 4486 return RD; 4487 } 4488 4489 /// Emit a proxy function which accepts kmp_task_t as the second 4490 /// argument. 4491 /// \code 4492 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4493 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4494 /// For taskloops: 4495 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4496 /// tt->reductions, tt->shareds); 4497 /// return 0; 4498 /// } 4499 /// \endcode 4500 static llvm::Function * 4501 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4502 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4503 QualType KmpTaskTWithPrivatesPtrQTy, 4504 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4505 QualType SharedsPtrTy, llvm::Function *TaskFunction, 4506 llvm::Value *TaskPrivatesMap) { 4507 ASTContext &C = CGM.getContext(); 4508 FunctionArgList Args; 4509 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4510 ImplicitParamDecl::Other); 4511 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4512 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4513 ImplicitParamDecl::Other); 4514 Args.push_back(&GtidArg); 4515 Args.push_back(&TaskTypeArg); 4516 const auto &TaskEntryFnInfo = 4517 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4518 llvm::FunctionType *TaskEntryTy = 4519 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4520 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 4521 auto *TaskEntry = llvm::Function::Create( 4522 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4523 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4524 TaskEntry->setDoesNotRecurse(); 4525 CodeGenFunction CGF(CGM); 4526 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4527 Loc, Loc); 4528 4529 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4530 // tt, 4531 // For taskloops: 4532 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4533 // tt->task_data.shareds); 4534 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4535 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4536 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4537 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4538 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4539 const auto *KmpTaskTWithPrivatesQTyRD = 4540 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4541 LValue Base = 4542 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4543 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4544 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4545 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4546 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 4547 4548 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4549 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4550 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4551 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4552 CGF.ConvertTypeForMem(SharedsPtrTy)); 4553 4554 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4555 llvm::Value *PrivatesParam; 4556 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4557 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4558 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4559 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 4560 } else { 4561 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4562 } 4563 4564 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4565 TaskPrivatesMap, 4566 CGF.Builder 4567 .CreatePointerBitCastOrAddrSpaceCast( 4568 TDBase.getAddress(CGF), CGF.VoidPtrTy) 4569 .getPointer()}; 4570 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4571 std::end(CommonArgs)); 4572 if (isOpenMPTaskLoopDirective(Kind)) { 4573 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4574 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4575 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4576 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4577 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4578 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4579 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4580 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4581 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4582 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4583 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4584 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4585 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4586 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4587 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4588 CallArgs.push_back(LBParam); 4589 CallArgs.push_back(UBParam); 4590 CallArgs.push_back(StParam); 4591 CallArgs.push_back(LIParam); 4592 CallArgs.push_back(RParam); 4593 } 4594 CallArgs.push_back(SharedsParam); 4595 4596 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4597 CallArgs); 4598 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4599 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4600 CGF.FinishFunction(); 4601 return TaskEntry; 4602 } 4603 4604 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4605 SourceLocation Loc, 4606 QualType KmpInt32Ty, 4607 QualType KmpTaskTWithPrivatesPtrQTy, 4608 QualType KmpTaskTWithPrivatesQTy) { 4609 ASTContext &C = CGM.getContext(); 4610 FunctionArgList Args; 4611 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4612 ImplicitParamDecl::Other); 4613 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4614 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4615 ImplicitParamDecl::Other); 4616 Args.push_back(&GtidArg); 4617 Args.push_back(&TaskTypeArg); 4618 const auto &DestructorFnInfo = 4619 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4620 llvm::FunctionType *DestructorFnTy = 4621 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4622 std::string Name = 4623 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 4624 auto *DestructorFn = 4625 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4626 Name, &CGM.getModule()); 4627 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4628 DestructorFnInfo); 4629 DestructorFn->setDoesNotRecurse(); 4630 CodeGenFunction CGF(CGM); 4631 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4632 Args, Loc, Loc); 4633 4634 LValue Base = CGF.EmitLoadOfPointerLValue( 4635 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4636 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4637 const auto *KmpTaskTWithPrivatesQTyRD = 4638 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4639 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4640 Base = CGF.EmitLValueForField(Base, *FI); 4641 for (const auto *Field : 4642 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4643 if (QualType::DestructionKind DtorKind = 4644 Field->getType().isDestructedType()) { 4645 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4646 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 4647 } 4648 } 4649 CGF.FinishFunction(); 4650 return DestructorFn; 4651 } 4652 4653 /// Emit a privates mapping function for correct handling of private and 4654 /// firstprivate variables. 4655 /// \code 4656 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4657 /// **noalias priv1,..., <tyn> **noalias privn) { 4658 /// *priv1 = &.privates.priv1; 4659 /// ...; 4660 /// *privn = &.privates.privn; 4661 /// } 4662 /// \endcode 4663 static llvm::Value * 4664 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4665 ArrayRef<const Expr *> PrivateVars, 4666 ArrayRef<const Expr *> FirstprivateVars, 4667 ArrayRef<const Expr *> LastprivateVars, 4668 QualType PrivatesQTy, 4669 ArrayRef<PrivateDataTy> Privates) { 4670 ASTContext &C = CGM.getContext(); 4671 FunctionArgList Args; 4672 ImplicitParamDecl TaskPrivatesArg( 4673 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4674 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4675 ImplicitParamDecl::Other); 4676 Args.push_back(&TaskPrivatesArg); 4677 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4678 unsigned Counter = 1; 4679 for (const Expr *E : PrivateVars) { 4680 Args.push_back(ImplicitParamDecl::Create( 4681 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4682 C.getPointerType(C.getPointerType(E->getType())) 4683 .withConst() 4684 .withRestrict(), 4685 ImplicitParamDecl::Other)); 4686 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4687 PrivateVarsPos[VD] = Counter; 4688 ++Counter; 4689 } 4690 for (const Expr *E : FirstprivateVars) { 4691 Args.push_back(ImplicitParamDecl::Create( 4692 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4693 C.getPointerType(C.getPointerType(E->getType())) 4694 .withConst() 4695 .withRestrict(), 4696 ImplicitParamDecl::Other)); 4697 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4698 PrivateVarsPos[VD] = Counter; 4699 ++Counter; 4700 } 4701 for (const Expr *E : LastprivateVars) { 4702 Args.push_back(ImplicitParamDecl::Create( 4703 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4704 C.getPointerType(C.getPointerType(E->getType())) 4705 .withConst() 4706 .withRestrict(), 4707 ImplicitParamDecl::Other)); 4708 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4709 PrivateVarsPos[VD] = Counter; 4710 ++Counter; 4711 } 4712 const auto &TaskPrivatesMapFnInfo = 4713 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4714 llvm::FunctionType *TaskPrivatesMapTy = 4715 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4716 std::string Name = 4717 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 4718 auto *TaskPrivatesMap = llvm::Function::Create( 4719 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 4720 &CGM.getModule()); 4721 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4722 TaskPrivatesMapFnInfo); 4723 if (CGM.getLangOpts().Optimize) { 4724 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4725 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4726 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4727 } 4728 CodeGenFunction CGF(CGM); 4729 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4730 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4731 4732 // *privi = &.privates.privi; 4733 LValue Base = CGF.EmitLoadOfPointerLValue( 4734 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4735 TaskPrivatesArg.getType()->castAs<PointerType>()); 4736 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4737 Counter = 0; 4738 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4739 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4740 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4741 LValue RefLVal = 4742 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4743 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4744 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 4745 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 4746 ++Counter; 4747 } 4748 CGF.FinishFunction(); 4749 return TaskPrivatesMap; 4750 } 4751 4752 /// Emit initialization for private variables in task-based directives. 4753 static void emitPrivatesInit(CodeGenFunction &CGF, 4754 const OMPExecutableDirective &D, 4755 Address KmpTaskSharedsPtr, LValue TDBase, 4756 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4757 QualType SharedsTy, QualType SharedsPtrTy, 4758 const OMPTaskDataTy &Data, 4759 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4760 ASTContext &C = CGF.getContext(); 4761 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4762 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4763 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4764 ? OMPD_taskloop 4765 : OMPD_task; 4766 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4767 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4768 LValue SrcBase; 4769 bool IsTargetTask = 4770 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4771 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4772 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4773 // PointersArray and SizesArray. The original variables for these arrays are 4774 // not captured and we get their addresses explicitly. 4775 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || 4776 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4777 SrcBase = CGF.MakeAddrLValue( 4778 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4779 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4780 SharedsTy); 4781 } 4782 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4783 for (const PrivateDataTy &Pair : Privates) { 4784 const VarDecl *VD = Pair.second.PrivateCopy; 4785 const Expr *Init = VD->getAnyInitializer(); 4786 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4787 !CGF.isTrivialInitializer(Init)))) { 4788 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4789 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4790 const VarDecl *OriginalVD = Pair.second.Original; 4791 // Check if the variable is the target-based BasePointersArray, 4792 // PointersArray or SizesArray. 4793 LValue SharedRefLValue; 4794 QualType Type = PrivateLValue.getType(); 4795 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4796 if (IsTargetTask && !SharedField) { 4797 assert(isa<ImplicitParamDecl>(OriginalVD) && 4798 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4799 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4800 ->getNumParams() == 0 && 4801 isa<TranslationUnitDecl>( 4802 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4803 ->getDeclContext()) && 4804 "Expected artificial target data variable."); 4805 SharedRefLValue = 4806 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4807 } else { 4808 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4809 SharedRefLValue = CGF.MakeAddrLValue( 4810 Address(SharedRefLValue.getPointer(CGF), 4811 C.getDeclAlign(OriginalVD)), 4812 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4813 SharedRefLValue.getTBAAInfo()); 4814 } 4815 if (Type->isArrayType()) { 4816 // Initialize firstprivate array. 4817 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4818 // Perform simple memcpy. 4819 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4820 } else { 4821 // Initialize firstprivate array using element-by-element 4822 // initialization. 4823 CGF.EmitOMPAggregateAssign( 4824 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 4825 Type, 4826 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4827 Address SrcElement) { 4828 // Clean up any temporaries needed by the initialization. 4829 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4830 InitScope.addPrivate( 4831 Elem, [SrcElement]() -> Address { return SrcElement; }); 4832 (void)InitScope.Privatize(); 4833 // Emit initialization for single element. 4834 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4835 CGF, &CapturesInfo); 4836 CGF.EmitAnyExprToMem(Init, DestElement, 4837 Init->getType().getQualifiers(), 4838 /*IsInitializer=*/false); 4839 }); 4840 } 4841 } else { 4842 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4843 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 4844 return SharedRefLValue.getAddress(CGF); 4845 }); 4846 (void)InitScope.Privatize(); 4847 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4848 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4849 /*capturedByInit=*/false); 4850 } 4851 } else { 4852 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4853 } 4854 } 4855 ++FI; 4856 } 4857 } 4858 4859 /// Check if duplication function is required for taskloops. 4860 static bool checkInitIsRequired(CodeGenFunction &CGF, 4861 ArrayRef<PrivateDataTy> Privates) { 4862 bool InitRequired = false; 4863 for (const PrivateDataTy &Pair : Privates) { 4864 const VarDecl *VD = Pair.second.PrivateCopy; 4865 const Expr *Init = VD->getAnyInitializer(); 4866 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4867 !CGF.isTrivialInitializer(Init)); 4868 if (InitRequired) 4869 break; 4870 } 4871 return InitRequired; 4872 } 4873 4874 4875 /// Emit task_dup function (for initialization of 4876 /// private/firstprivate/lastprivate vars and last_iter flag) 4877 /// \code 4878 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4879 /// lastpriv) { 4880 /// // setup lastprivate flag 4881 /// task_dst->last = lastpriv; 4882 /// // could be constructor calls here... 4883 /// } 4884 /// \endcode 4885 static llvm::Value * 4886 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4887 const OMPExecutableDirective &D, 4888 QualType KmpTaskTWithPrivatesPtrQTy, 4889 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4890 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4891 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4892 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4893 ASTContext &C = CGM.getContext(); 4894 FunctionArgList Args; 4895 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4896 KmpTaskTWithPrivatesPtrQTy, 4897 ImplicitParamDecl::Other); 4898 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4899 KmpTaskTWithPrivatesPtrQTy, 4900 ImplicitParamDecl::Other); 4901 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4902 ImplicitParamDecl::Other); 4903 Args.push_back(&DstArg); 4904 Args.push_back(&SrcArg); 4905 Args.push_back(&LastprivArg); 4906 const auto &TaskDupFnInfo = 4907 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4908 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4909 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4910 auto *TaskDup = llvm::Function::Create( 4911 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4912 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4913 TaskDup->setDoesNotRecurse(); 4914 CodeGenFunction CGF(CGM); 4915 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4916 Loc); 4917 4918 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4919 CGF.GetAddrOfLocalVar(&DstArg), 4920 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4921 // task_dst->liter = lastpriv; 4922 if (WithLastIter) { 4923 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4924 LValue Base = CGF.EmitLValueForField( 4925 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4926 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4927 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4928 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4929 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4930 } 4931 4932 // Emit initial values for private copies (if any). 4933 assert(!Privates.empty()); 4934 Address KmpTaskSharedsPtr = Address::invalid(); 4935 if (!Data.FirstprivateVars.empty()) { 4936 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4937 CGF.GetAddrOfLocalVar(&SrcArg), 4938 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4939 LValue Base = CGF.EmitLValueForField( 4940 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4941 KmpTaskSharedsPtr = Address( 4942 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4943 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4944 KmpTaskTShareds)), 4945 Loc), 4946 CGF.getNaturalTypeAlignment(SharedsTy)); 4947 } 4948 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4949 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4950 CGF.FinishFunction(); 4951 return TaskDup; 4952 } 4953 4954 /// Checks if destructor function is required to be generated. 4955 /// \return true if cleanups are required, false otherwise. 4956 static bool 4957 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4958 bool NeedsCleanup = false; 4959 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4960 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4961 for (const FieldDecl *FD : PrivateRD->fields()) { 4962 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4963 if (NeedsCleanup) 4964 break; 4965 } 4966 return NeedsCleanup; 4967 } 4968 4969 CGOpenMPRuntime::TaskResultTy 4970 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4971 const OMPExecutableDirective &D, 4972 llvm::Function *TaskFunction, QualType SharedsTy, 4973 Address Shareds, const OMPTaskDataTy &Data) { 4974 ASTContext &C = CGM.getContext(); 4975 llvm::SmallVector<PrivateDataTy, 4> Privates; 4976 // Aggregate privates and sort them by the alignment. 4977 auto I = Data.PrivateCopies.begin(); 4978 for (const Expr *E : Data.PrivateVars) { 4979 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4980 Privates.emplace_back( 4981 C.getDeclAlign(VD), 4982 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4983 /*PrivateElemInit=*/nullptr)); 4984 ++I; 4985 } 4986 I = Data.FirstprivateCopies.begin(); 4987 auto IElemInitRef = Data.FirstprivateInits.begin(); 4988 for (const Expr *E : Data.FirstprivateVars) { 4989 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4990 Privates.emplace_back( 4991 C.getDeclAlign(VD), 4992 PrivateHelpersTy( 4993 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4994 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4995 ++I; 4996 ++IElemInitRef; 4997 } 4998 I = Data.LastprivateCopies.begin(); 4999 for (const Expr *E : Data.LastprivateVars) { 5000 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5001 Privates.emplace_back( 5002 C.getDeclAlign(VD), 5003 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5004 /*PrivateElemInit=*/nullptr)); 5005 ++I; 5006 } 5007 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 5008 return L.first > R.first; 5009 }); 5010 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 5011 // Build type kmp_routine_entry_t (if not built yet). 5012 emitKmpRoutineEntryT(KmpInt32Ty); 5013 // Build type kmp_task_t (if not built yet). 5014 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 5015 if (SavedKmpTaskloopTQTy.isNull()) { 5016 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5017 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5018 } 5019 KmpTaskTQTy = SavedKmpTaskloopTQTy; 5020 } else { 5021 assert((D.getDirectiveKind() == OMPD_task || 5022 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 5023 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 5024 "Expected taskloop, task or target directive"); 5025 if (SavedKmpTaskTQTy.isNull()) { 5026 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5027 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5028 } 5029 KmpTaskTQTy = SavedKmpTaskTQTy; 5030 } 5031 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 5032 // Build particular struct kmp_task_t for the given task. 5033 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 5034 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 5035 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 5036 QualType KmpTaskTWithPrivatesPtrQTy = 5037 C.getPointerType(KmpTaskTWithPrivatesQTy); 5038 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 5039 llvm::Type *KmpTaskTWithPrivatesPtrTy = 5040 KmpTaskTWithPrivatesTy->getPointerTo(); 5041 llvm::Value *KmpTaskTWithPrivatesTySize = 5042 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 5043 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 5044 5045 // Emit initial values for private copies (if any). 5046 llvm::Value *TaskPrivatesMap = nullptr; 5047 llvm::Type *TaskPrivatesMapTy = 5048 std::next(TaskFunction->arg_begin(), 3)->getType(); 5049 if (!Privates.empty()) { 5050 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 5051 TaskPrivatesMap = emitTaskPrivateMappingFunction( 5052 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 5053 FI->getType(), Privates); 5054 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5055 TaskPrivatesMap, TaskPrivatesMapTy); 5056 } else { 5057 TaskPrivatesMap = llvm::ConstantPointerNull::get( 5058 cast<llvm::PointerType>(TaskPrivatesMapTy)); 5059 } 5060 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 5061 // kmp_task_t *tt); 5062 llvm::Function *TaskEntry = emitProxyTaskFunction( 5063 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5064 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 5065 TaskPrivatesMap); 5066 5067 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 5068 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 5069 // kmp_routine_entry_t *task_entry); 5070 // Task flags. Format is taken from 5071 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 5072 // description of kmp_tasking_flags struct. 5073 enum { 5074 TiedFlag = 0x1, 5075 FinalFlag = 0x2, 5076 DestructorsFlag = 0x8, 5077 PriorityFlag = 0x20 5078 }; 5079 unsigned Flags = Data.Tied ? TiedFlag : 0; 5080 bool NeedsCleanup = false; 5081 if (!Privates.empty()) { 5082 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 5083 if (NeedsCleanup) 5084 Flags = Flags | DestructorsFlag; 5085 } 5086 if (Data.Priority.getInt()) 5087 Flags = Flags | PriorityFlag; 5088 llvm::Value *TaskFlags = 5089 Data.Final.getPointer() 5090 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 5091 CGF.Builder.getInt32(FinalFlag), 5092 CGF.Builder.getInt32(/*C=*/0)) 5093 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 5094 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 5095 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 5096 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 5097 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 5098 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5099 TaskEntry, KmpRoutineEntryPtrTy)}; 5100 llvm::Value *NewTask; 5101 if (D.hasClausesOfKind<OMPNowaitClause>()) { 5102 // Check if we have any device clause associated with the directive. 5103 const Expr *Device = nullptr; 5104 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 5105 Device = C->getDevice(); 5106 // Emit device ID if any otherwise use default value. 5107 llvm::Value *DeviceID; 5108 if (Device) 5109 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 5110 CGF.Int64Ty, /*isSigned=*/true); 5111 else 5112 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 5113 AllocArgs.push_back(DeviceID); 5114 NewTask = CGF.EmitRuntimeCall( 5115 createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs); 5116 } else { 5117 NewTask = CGF.EmitRuntimeCall( 5118 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 5119 } 5120 llvm::Value *NewTaskNewTaskTTy = 5121 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5122 NewTask, KmpTaskTWithPrivatesPtrTy); 5123 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 5124 KmpTaskTWithPrivatesQTy); 5125 LValue TDBase = 5126 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 5127 // Fill the data in the resulting kmp_task_t record. 5128 // Copy shareds if there are any. 5129 Address KmpTaskSharedsPtr = Address::invalid(); 5130 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 5131 KmpTaskSharedsPtr = 5132 Address(CGF.EmitLoadOfScalar( 5133 CGF.EmitLValueForField( 5134 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 5135 KmpTaskTShareds)), 5136 Loc), 5137 CGF.getNaturalTypeAlignment(SharedsTy)); 5138 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 5139 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 5140 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 5141 } 5142 // Emit initial values for private copies (if any). 5143 TaskResultTy Result; 5144 if (!Privates.empty()) { 5145 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 5146 SharedsTy, SharedsPtrTy, Data, Privates, 5147 /*ForDup=*/false); 5148 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 5149 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 5150 Result.TaskDupFn = emitTaskDupFunction( 5151 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 5152 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 5153 /*WithLastIter=*/!Data.LastprivateVars.empty()); 5154 } 5155 } 5156 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 5157 enum { Priority = 0, Destructors = 1 }; 5158 // Provide pointer to function with destructors for privates. 5159 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 5160 const RecordDecl *KmpCmplrdataUD = 5161 (*FI)->getType()->getAsUnionType()->getDecl(); 5162 if (NeedsCleanup) { 5163 llvm::Value *DestructorFn = emitDestructorsFunction( 5164 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5165 KmpTaskTWithPrivatesQTy); 5166 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 5167 LValue DestructorsLV = CGF.EmitLValueForField( 5168 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 5169 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5170 DestructorFn, KmpRoutineEntryPtrTy), 5171 DestructorsLV); 5172 } 5173 // Set priority. 5174 if (Data.Priority.getInt()) { 5175 LValue Data2LV = CGF.EmitLValueForField( 5176 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 5177 LValue PriorityLV = CGF.EmitLValueForField( 5178 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 5179 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 5180 } 5181 Result.NewTask = NewTask; 5182 Result.TaskEntry = TaskEntry; 5183 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 5184 Result.TDBase = TDBase; 5185 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 5186 return Result; 5187 } 5188 5189 namespace { 5190 /// Dependence kind for RTL. 5191 enum RTLDependenceKindTy { 5192 DepIn = 0x01, 5193 DepInOut = 0x3, 5194 DepMutexInOutSet = 0x4 5195 }; 5196 /// Fields ids in kmp_depend_info record. 5197 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 5198 } // namespace 5199 5200 /// Translates internal dependency kind into the runtime kind. 5201 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 5202 RTLDependenceKindTy DepKind; 5203 switch (K) { 5204 case OMPC_DEPEND_in: 5205 DepKind = DepIn; 5206 break; 5207 // Out and InOut dependencies must use the same code. 5208 case OMPC_DEPEND_out: 5209 case OMPC_DEPEND_inout: 5210 DepKind = DepInOut; 5211 break; 5212 case OMPC_DEPEND_mutexinoutset: 5213 DepKind = DepMutexInOutSet; 5214 break; 5215 case OMPC_DEPEND_source: 5216 case OMPC_DEPEND_sink: 5217 case OMPC_DEPEND_depobj: 5218 case OMPC_DEPEND_unknown: 5219 llvm_unreachable("Unknown task dependence type"); 5220 } 5221 return DepKind; 5222 } 5223 5224 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 5225 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 5226 QualType &FlagsTy) { 5227 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 5228 if (KmpDependInfoTy.isNull()) { 5229 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 5230 KmpDependInfoRD->startDefinition(); 5231 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 5232 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 5233 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 5234 KmpDependInfoRD->completeDefinition(); 5235 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 5236 } 5237 } 5238 5239 std::pair<llvm::Value *, LValue> 5240 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 5241 SourceLocation Loc) { 5242 ASTContext &C = CGM.getContext(); 5243 QualType FlagsTy; 5244 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5245 RecordDecl *KmpDependInfoRD = 5246 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5247 LValue Base = CGF.EmitLoadOfPointerLValue( 5248 DepobjLVal.getAddress(CGF), 5249 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5250 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5251 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5252 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5253 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 5254 Base.getTBAAInfo()); 5255 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5256 Addr.getPointer(), 5257 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5258 LValue NumDepsBase = CGF.MakeAddrLValue( 5259 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 5260 Base.getBaseInfo(), Base.getTBAAInfo()); 5261 // NumDeps = deps[i].base_addr; 5262 LValue BaseAddrLVal = CGF.EmitLValueForField( 5263 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5264 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 5265 return std::make_pair(NumDeps, Base); 5266 } 5267 5268 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 5269 CodeGenFunction &CGF, 5270 ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependencies, 5271 bool ForDepobj, SourceLocation Loc) { 5272 // Process list of dependencies. 5273 ASTContext &C = CGM.getContext(); 5274 Address DependenciesArray = Address::invalid(); 5275 unsigned NumDependencies = Dependencies.size(); 5276 llvm::Value *NumOfElements = nullptr; 5277 if (NumDependencies) { 5278 QualType FlagsTy; 5279 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5280 RecordDecl *KmpDependInfoRD = 5281 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5282 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5283 unsigned NumDepobjDependecies = 0; 5284 SmallVector<std::pair<llvm::Value *, LValue>, 4> Depobjs; 5285 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 5286 // Calculate number of depobj dependecies. 5287 for (const std::pair<OpenMPDependClauseKind, const Expr *> &Pair : 5288 Dependencies) { 5289 if (Pair.first != OMPC_DEPEND_depobj) 5290 continue; 5291 LValue DepobjLVal = CGF.EmitLValue(Pair.second); 5292 llvm::Value *NumDeps; 5293 LValue Base; 5294 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5295 NumOfDepobjElements = 5296 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumDeps); 5297 Depobjs.emplace_back(NumDeps, Base); 5298 ++NumDepobjDependecies; 5299 } 5300 5301 QualType KmpDependInfoArrayTy; 5302 // Define type kmp_depend_info[<Dependencies.size()>]; 5303 // For depobj reserve one extra element to store the number of elements. 5304 // It is required to handle depobj(x) update(in) construct. 5305 // kmp_depend_info[<Dependencies.size()>] deps; 5306 if (ForDepobj) { 5307 assert(NumDepobjDependecies == 0 && 5308 "depobj dependency kind is not expected in depobj directive."); 5309 KmpDependInfoArrayTy = C.getConstantArrayType( 5310 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 5311 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5312 // Need to allocate on the dynamic memory. 5313 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5314 // Use default allocator. 5315 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5316 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoArrayTy); 5317 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 5318 llvm::Value *Size = CGF.CGM.getSize(Sz.alignTo(Align)); 5319 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5320 5321 llvm::Value *Addr = CGF.EmitRuntimeCall( 5322 createRuntimeFunction(OMPRTL__kmpc_alloc), Args, ".dep.arr.addr"); 5323 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5324 Addr, CGF.ConvertTypeForMem(KmpDependInfoArrayTy)->getPointerTo()); 5325 DependenciesArray = Address(Addr, Align); 5326 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 5327 /*isSigned=*/false); 5328 } else if (NumDepobjDependecies > 0) { 5329 NumOfElements = CGF.Builder.CreateNUWAdd( 5330 NumOfDepobjElements, 5331 llvm::ConstantInt::get(CGM.IntPtrTy, 5332 NumDependencies - NumDepobjDependecies, 5333 /*isSigned=*/false)); 5334 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 5335 /*isSigned=*/false); 5336 OpaqueValueExpr OVE( 5337 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 5338 VK_RValue); 5339 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 5340 RValue::get(NumOfElements)); 5341 KmpDependInfoArrayTy = 5342 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 5343 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 5344 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 5345 // Properly emit variable-sized array. 5346 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 5347 ImplicitParamDecl::Other); 5348 CGF.EmitVarDecl(*PD); 5349 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 5350 } else { 5351 KmpDependInfoArrayTy = C.getConstantArrayType( 5352 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 5353 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5354 DependenciesArray = 5355 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 5356 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 5357 /*isSigned=*/false); 5358 } 5359 if (ForDepobj) { 5360 // Write number of elements in the first element of array for depobj. 5361 llvm::Value *NumVal = 5362 llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 5363 LValue Base = CGF.MakeAddrLValue( 5364 CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), 5365 KmpDependInfoTy); 5366 // deps[i].base_addr = NumDependencies; 5367 LValue BaseAddrLVal = CGF.EmitLValueForField( 5368 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5369 CGF.EmitStoreOfScalar(NumVal, BaseAddrLVal); 5370 } 5371 unsigned Pos = ForDepobj ? 1 : 0; 5372 for (unsigned I = 0; I < NumDependencies; ++I) { 5373 if (Dependencies[I].first == OMPC_DEPEND_depobj) 5374 continue; 5375 const Expr *E = Dependencies[I].second; 5376 LValue Addr = CGF.EmitLValue(E); 5377 llvm::Value *Size; 5378 QualType Ty = E->getType(); 5379 if (const auto *ASE = 5380 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 5381 LValue UpAddrLVal = 5382 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 5383 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 5384 UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 5385 llvm::Value *LowIntPtr = 5386 CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGM.SizeTy); 5387 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 5388 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 5389 } else { 5390 Size = CGF.getTypeSize(Ty); 5391 } 5392 LValue Base; 5393 if (NumDepobjDependecies > 0) { 5394 Base = CGF.MakeAddrLValue( 5395 CGF.Builder.CreateConstGEP(DependenciesArray, Pos), 5396 KmpDependInfoTy); 5397 } else { 5398 Base = CGF.MakeAddrLValue( 5399 CGF.Builder.CreateConstArrayGEP(DependenciesArray, Pos), 5400 KmpDependInfoTy); 5401 } 5402 // deps[i].base_addr = &<Dependencies[i].second>; 5403 LValue BaseAddrLVal = CGF.EmitLValueForField( 5404 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5405 CGF.EmitStoreOfScalar( 5406 CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGF.IntPtrTy), 5407 BaseAddrLVal); 5408 // deps[i].len = sizeof(<Dependencies[i].second>); 5409 LValue LenLVal = CGF.EmitLValueForField( 5410 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 5411 CGF.EmitStoreOfScalar(Size, LenLVal); 5412 // deps[i].flags = <Dependencies[i].first>; 5413 RTLDependenceKindTy DepKind = 5414 translateDependencyKind(Dependencies[I].first); 5415 LValue FlagsLVal = CGF.EmitLValueForField( 5416 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5417 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5418 FlagsLVal); 5419 ++Pos; 5420 } 5421 // Copy final depobj arrays. 5422 if (NumDepobjDependecies > 0) { 5423 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 5424 Address Addr = CGF.Builder.CreateConstGEP(DependenciesArray, Pos); 5425 for (const std::pair<llvm::Value *, LValue> &Pair : Depobjs) { 5426 llvm::Value *Size = CGF.Builder.CreateNUWMul(ElSize, Pair.first); 5427 CGF.Builder.CreateMemCpy(Addr, Pair.second.getAddress(CGF), Size); 5428 Addr = 5429 Address(CGF.Builder.CreateGEP( 5430 Addr.getElementType(), Addr.getPointer(), Pair.first), 5431 DependenciesArray.getAlignment().alignmentOfArrayElement( 5432 C.getTypeSizeInChars(KmpDependInfoTy))); 5433 } 5434 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5435 DependenciesArray, CGF.VoidPtrTy); 5436 } else { 5437 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5438 CGF.Builder.CreateConstArrayGEP(DependenciesArray, ForDepobj ? 1 : 0), 5439 CGF.VoidPtrTy); 5440 } 5441 } 5442 return std::make_pair(NumOfElements, DependenciesArray); 5443 } 5444 5445 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5446 SourceLocation Loc) { 5447 ASTContext &C = CGM.getContext(); 5448 QualType FlagsTy; 5449 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5450 LValue Base = CGF.EmitLoadOfPointerLValue( 5451 DepobjLVal.getAddress(CGF), 5452 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5453 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5454 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5455 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5456 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5457 Addr.getPointer(), 5458 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5459 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5460 CGF.VoidPtrTy); 5461 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5462 // Use default allocator. 5463 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5464 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5465 5466 // _kmpc_free(gtid, addr, nullptr); 5467 (void)CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_free), Args); 5468 } 5469 5470 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5471 OpenMPDependClauseKind NewDepKind, 5472 SourceLocation Loc) { 5473 ASTContext &C = CGM.getContext(); 5474 QualType FlagsTy; 5475 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5476 RecordDecl *KmpDependInfoRD = 5477 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5478 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5479 llvm::Value *NumDeps; 5480 LValue Base; 5481 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5482 5483 Address Begin = Base.getAddress(CGF); 5484 // Cast from pointer to array type to pointer to single element. 5485 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5486 // The basic structure here is a while-do loop. 5487 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5488 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5489 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5490 CGF.EmitBlock(BodyBB); 5491 llvm::PHINode *ElementPHI = 5492 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5493 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5494 Begin = Address(ElementPHI, Begin.getAlignment()); 5495 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5496 Base.getTBAAInfo()); 5497 // deps[i].flags = NewDepKind; 5498 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5499 LValue FlagsLVal = CGF.EmitLValueForField( 5500 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5501 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5502 FlagsLVal); 5503 5504 // Shift the address forward by one element. 5505 Address ElementNext = 5506 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5507 ElementPHI->addIncoming(ElementNext.getPointer(), 5508 CGF.Builder.GetInsertBlock()); 5509 llvm::Value *IsEmpty = 5510 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5511 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5512 // Done. 5513 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5514 } 5515 5516 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5517 const OMPExecutableDirective &D, 5518 llvm::Function *TaskFunction, 5519 QualType SharedsTy, Address Shareds, 5520 const Expr *IfCond, 5521 const OMPTaskDataTy &Data) { 5522 if (!CGF.HaveInsertPoint()) 5523 return; 5524 5525 TaskResultTy Result = 5526 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5527 llvm::Value *NewTask = Result.NewTask; 5528 llvm::Function *TaskEntry = Result.TaskEntry; 5529 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5530 LValue TDBase = Result.TDBase; 5531 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5532 // Process list of dependences. 5533 Address DependenciesArray = Address::invalid(); 5534 llvm::Value *NumOfElements; 5535 std::tie(NumOfElements, DependenciesArray) = 5536 emitDependClause(CGF, Data.Dependences, /*ForDepobj=*/false, Loc); 5537 5538 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5539 // libcall. 5540 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5541 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5542 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5543 // list is not empty 5544 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5545 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5546 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5547 llvm::Value *DepTaskArgs[7]; 5548 if (!Data.Dependences.empty()) { 5549 DepTaskArgs[0] = UpLoc; 5550 DepTaskArgs[1] = ThreadID; 5551 DepTaskArgs[2] = NewTask; 5552 DepTaskArgs[3] = NumOfElements; 5553 DepTaskArgs[4] = DependenciesArray.getPointer(); 5554 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5555 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5556 } 5557 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5558 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5559 if (!Data.Tied) { 5560 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5561 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5562 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5563 } 5564 if (!Data.Dependences.empty()) { 5565 CGF.EmitRuntimeCall( 5566 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5567 } else { 5568 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5569 TaskArgs); 5570 } 5571 // Check if parent region is untied and build return for untied task; 5572 if (auto *Region = 5573 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5574 Region->emitUntiedSwitch(CGF); 5575 }; 5576 5577 llvm::Value *DepWaitTaskArgs[6]; 5578 if (!Data.Dependences.empty()) { 5579 DepWaitTaskArgs[0] = UpLoc; 5580 DepWaitTaskArgs[1] = ThreadID; 5581 DepWaitTaskArgs[2] = NumOfElements; 5582 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5583 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5584 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5585 } 5586 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5587 &Data, &DepWaitTaskArgs, 5588 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5589 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5590 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5591 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5592 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5593 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5594 // is specified. 5595 if (!Data.Dependences.empty()) 5596 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5597 DepWaitTaskArgs); 5598 // Call proxy_task_entry(gtid, new_task); 5599 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5600 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5601 Action.Enter(CGF); 5602 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5603 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5604 OutlinedFnArgs); 5605 }; 5606 5607 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5608 // kmp_task_t *new_task); 5609 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5610 // kmp_task_t *new_task); 5611 RegionCodeGenTy RCG(CodeGen); 5612 CommonActionTy Action( 5613 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5614 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5615 RCG.setAction(Action); 5616 RCG(CGF); 5617 }; 5618 5619 if (IfCond) { 5620 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5621 } else { 5622 RegionCodeGenTy ThenRCG(ThenCodeGen); 5623 ThenRCG(CGF); 5624 } 5625 } 5626 5627 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5628 const OMPLoopDirective &D, 5629 llvm::Function *TaskFunction, 5630 QualType SharedsTy, Address Shareds, 5631 const Expr *IfCond, 5632 const OMPTaskDataTy &Data) { 5633 if (!CGF.HaveInsertPoint()) 5634 return; 5635 TaskResultTy Result = 5636 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5637 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5638 // libcall. 5639 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5640 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5641 // sched, kmp_uint64 grainsize, void *task_dup); 5642 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5643 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5644 llvm::Value *IfVal; 5645 if (IfCond) { 5646 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5647 /*isSigned=*/true); 5648 } else { 5649 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5650 } 5651 5652 LValue LBLVal = CGF.EmitLValueForField( 5653 Result.TDBase, 5654 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5655 const auto *LBVar = 5656 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5657 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5658 LBLVal.getQuals(), 5659 /*IsInitializer=*/true); 5660 LValue UBLVal = CGF.EmitLValueForField( 5661 Result.TDBase, 5662 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5663 const auto *UBVar = 5664 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5665 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5666 UBLVal.getQuals(), 5667 /*IsInitializer=*/true); 5668 LValue StLVal = CGF.EmitLValueForField( 5669 Result.TDBase, 5670 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5671 const auto *StVar = 5672 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5673 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5674 StLVal.getQuals(), 5675 /*IsInitializer=*/true); 5676 // Store reductions address. 5677 LValue RedLVal = CGF.EmitLValueForField( 5678 Result.TDBase, 5679 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5680 if (Data.Reductions) { 5681 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5682 } else { 5683 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5684 CGF.getContext().VoidPtrTy); 5685 } 5686 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5687 llvm::Value *TaskArgs[] = { 5688 UpLoc, 5689 ThreadID, 5690 Result.NewTask, 5691 IfVal, 5692 LBLVal.getPointer(CGF), 5693 UBLVal.getPointer(CGF), 5694 CGF.EmitLoadOfScalar(StLVal, Loc), 5695 llvm::ConstantInt::getSigned( 5696 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5697 llvm::ConstantInt::getSigned( 5698 CGF.IntTy, Data.Schedule.getPointer() 5699 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5700 : NoSchedule), 5701 Data.Schedule.getPointer() 5702 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5703 /*isSigned=*/false) 5704 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5705 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5706 Result.TaskDupFn, CGF.VoidPtrTy) 5707 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5708 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 5709 } 5710 5711 /// Emit reduction operation for each element of array (required for 5712 /// array sections) LHS op = RHS. 5713 /// \param Type Type of array. 5714 /// \param LHSVar Variable on the left side of the reduction operation 5715 /// (references element of array in original variable). 5716 /// \param RHSVar Variable on the right side of the reduction operation 5717 /// (references element of array in original variable). 5718 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5719 /// RHSVar. 5720 static void EmitOMPAggregateReduction( 5721 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5722 const VarDecl *RHSVar, 5723 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5724 const Expr *, const Expr *)> &RedOpGen, 5725 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5726 const Expr *UpExpr = nullptr) { 5727 // Perform element-by-element initialization. 5728 QualType ElementTy; 5729 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5730 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5731 5732 // Drill down to the base element type on both arrays. 5733 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5734 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5735 5736 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5737 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5738 // Cast from pointer to array type to pointer to single element. 5739 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5740 // The basic structure here is a while-do loop. 5741 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5742 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5743 llvm::Value *IsEmpty = 5744 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5745 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5746 5747 // Enter the loop body, making that address the current address. 5748 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5749 CGF.EmitBlock(BodyBB); 5750 5751 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5752 5753 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5754 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5755 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5756 Address RHSElementCurrent = 5757 Address(RHSElementPHI, 5758 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5759 5760 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5761 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5762 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5763 Address LHSElementCurrent = 5764 Address(LHSElementPHI, 5765 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5766 5767 // Emit copy. 5768 CodeGenFunction::OMPPrivateScope Scope(CGF); 5769 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5770 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5771 Scope.Privatize(); 5772 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5773 Scope.ForceCleanup(); 5774 5775 // Shift the address forward by one element. 5776 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5777 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5778 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5779 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5780 // Check whether we've reached the end. 5781 llvm::Value *Done = 5782 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5783 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5784 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5785 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5786 5787 // Done. 5788 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5789 } 5790 5791 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5792 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5793 /// UDR combiner function. 5794 static void emitReductionCombiner(CodeGenFunction &CGF, 5795 const Expr *ReductionOp) { 5796 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5797 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5798 if (const auto *DRE = 5799 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5800 if (const auto *DRD = 5801 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5802 std::pair<llvm::Function *, llvm::Function *> Reduction = 5803 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5804 RValue Func = RValue::get(Reduction.first); 5805 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5806 CGF.EmitIgnoredExpr(ReductionOp); 5807 return; 5808 } 5809 CGF.EmitIgnoredExpr(ReductionOp); 5810 } 5811 5812 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5813 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5814 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5815 ArrayRef<const Expr *> ReductionOps) { 5816 ASTContext &C = CGM.getContext(); 5817 5818 // void reduction_func(void *LHSArg, void *RHSArg); 5819 FunctionArgList Args; 5820 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5821 ImplicitParamDecl::Other); 5822 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5823 ImplicitParamDecl::Other); 5824 Args.push_back(&LHSArg); 5825 Args.push_back(&RHSArg); 5826 const auto &CGFI = 5827 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5828 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5829 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5830 llvm::GlobalValue::InternalLinkage, Name, 5831 &CGM.getModule()); 5832 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5833 Fn->setDoesNotRecurse(); 5834 CodeGenFunction CGF(CGM); 5835 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5836 5837 // Dst = (void*[n])(LHSArg); 5838 // Src = (void*[n])(RHSArg); 5839 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5840 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5841 ArgsType), CGF.getPointerAlign()); 5842 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5843 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5844 ArgsType), CGF.getPointerAlign()); 5845 5846 // ... 5847 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5848 // ... 5849 CodeGenFunction::OMPPrivateScope Scope(CGF); 5850 auto IPriv = Privates.begin(); 5851 unsigned Idx = 0; 5852 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5853 const auto *RHSVar = 5854 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5855 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5856 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5857 }); 5858 const auto *LHSVar = 5859 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5860 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5861 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5862 }); 5863 QualType PrivTy = (*IPriv)->getType(); 5864 if (PrivTy->isVariablyModifiedType()) { 5865 // Get array size and emit VLA type. 5866 ++Idx; 5867 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5868 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5869 const VariableArrayType *VLA = 5870 CGF.getContext().getAsVariableArrayType(PrivTy); 5871 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5872 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5873 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5874 CGF.EmitVariablyModifiedType(PrivTy); 5875 } 5876 } 5877 Scope.Privatize(); 5878 IPriv = Privates.begin(); 5879 auto ILHS = LHSExprs.begin(); 5880 auto IRHS = RHSExprs.begin(); 5881 for (const Expr *E : ReductionOps) { 5882 if ((*IPriv)->getType()->isArrayType()) { 5883 // Emit reduction for array section. 5884 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5885 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5886 EmitOMPAggregateReduction( 5887 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5888 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5889 emitReductionCombiner(CGF, E); 5890 }); 5891 } else { 5892 // Emit reduction for array subscript or single variable. 5893 emitReductionCombiner(CGF, E); 5894 } 5895 ++IPriv; 5896 ++ILHS; 5897 ++IRHS; 5898 } 5899 Scope.ForceCleanup(); 5900 CGF.FinishFunction(); 5901 return Fn; 5902 } 5903 5904 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5905 const Expr *ReductionOp, 5906 const Expr *PrivateRef, 5907 const DeclRefExpr *LHS, 5908 const DeclRefExpr *RHS) { 5909 if (PrivateRef->getType()->isArrayType()) { 5910 // Emit reduction for array section. 5911 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5912 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5913 EmitOMPAggregateReduction( 5914 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5915 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5916 emitReductionCombiner(CGF, ReductionOp); 5917 }); 5918 } else { 5919 // Emit reduction for array subscript or single variable. 5920 emitReductionCombiner(CGF, ReductionOp); 5921 } 5922 } 5923 5924 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5925 ArrayRef<const Expr *> Privates, 5926 ArrayRef<const Expr *> LHSExprs, 5927 ArrayRef<const Expr *> RHSExprs, 5928 ArrayRef<const Expr *> ReductionOps, 5929 ReductionOptionsTy Options) { 5930 if (!CGF.HaveInsertPoint()) 5931 return; 5932 5933 bool WithNowait = Options.WithNowait; 5934 bool SimpleReduction = Options.SimpleReduction; 5935 5936 // Next code should be emitted for reduction: 5937 // 5938 // static kmp_critical_name lock = { 0 }; 5939 // 5940 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5941 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5942 // ... 5943 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5944 // *(Type<n>-1*)rhs[<n>-1]); 5945 // } 5946 // 5947 // ... 5948 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5949 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5950 // RedList, reduce_func, &<lock>)) { 5951 // case 1: 5952 // ... 5953 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5954 // ... 5955 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5956 // break; 5957 // case 2: 5958 // ... 5959 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5960 // ... 5961 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5962 // break; 5963 // default:; 5964 // } 5965 // 5966 // if SimpleReduction is true, only the next code is generated: 5967 // ... 5968 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5969 // ... 5970 5971 ASTContext &C = CGM.getContext(); 5972 5973 if (SimpleReduction) { 5974 CodeGenFunction::RunCleanupsScope Scope(CGF); 5975 auto IPriv = Privates.begin(); 5976 auto ILHS = LHSExprs.begin(); 5977 auto IRHS = RHSExprs.begin(); 5978 for (const Expr *E : ReductionOps) { 5979 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5980 cast<DeclRefExpr>(*IRHS)); 5981 ++IPriv; 5982 ++ILHS; 5983 ++IRHS; 5984 } 5985 return; 5986 } 5987 5988 // 1. Build a list of reduction variables. 5989 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5990 auto Size = RHSExprs.size(); 5991 for (const Expr *E : Privates) { 5992 if (E->getType()->isVariablyModifiedType()) 5993 // Reserve place for array size. 5994 ++Size; 5995 } 5996 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5997 QualType ReductionArrayTy = 5998 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5999 /*IndexTypeQuals=*/0); 6000 Address ReductionList = 6001 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 6002 auto IPriv = Privates.begin(); 6003 unsigned Idx = 0; 6004 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 6005 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 6006 CGF.Builder.CreateStore( 6007 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6008 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 6009 Elem); 6010 if ((*IPriv)->getType()->isVariablyModifiedType()) { 6011 // Store array size. 6012 ++Idx; 6013 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 6014 llvm::Value *Size = CGF.Builder.CreateIntCast( 6015 CGF.getVLASize( 6016 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 6017 .NumElts, 6018 CGF.SizeTy, /*isSigned=*/false); 6019 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 6020 Elem); 6021 } 6022 } 6023 6024 // 2. Emit reduce_func(). 6025 llvm::Function *ReductionFn = emitReductionFunction( 6026 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 6027 LHSExprs, RHSExprs, ReductionOps); 6028 6029 // 3. Create static kmp_critical_name lock = { 0 }; 6030 std::string Name = getName({"reduction"}); 6031 llvm::Value *Lock = getCriticalRegionLock(Name); 6032 6033 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 6034 // RedList, reduce_func, &<lock>); 6035 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 6036 llvm::Value *ThreadId = getThreadID(CGF, Loc); 6037 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 6038 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6039 ReductionList.getPointer(), CGF.VoidPtrTy); 6040 llvm::Value *Args[] = { 6041 IdentTLoc, // ident_t *<loc> 6042 ThreadId, // i32 <gtid> 6043 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 6044 ReductionArrayTySize, // size_type sizeof(RedList) 6045 RL, // void *RedList 6046 ReductionFn, // void (*) (void *, void *) <reduce_func> 6047 Lock // kmp_critical_name *&<lock> 6048 }; 6049 llvm::Value *Res = CGF.EmitRuntimeCall( 6050 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 6051 : OMPRTL__kmpc_reduce), 6052 Args); 6053 6054 // 5. Build switch(res) 6055 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 6056 llvm::SwitchInst *SwInst = 6057 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 6058 6059 // 6. Build case 1: 6060 // ... 6061 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 6062 // ... 6063 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 6064 // break; 6065 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 6066 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 6067 CGF.EmitBlock(Case1BB); 6068 6069 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 6070 llvm::Value *EndArgs[] = { 6071 IdentTLoc, // ident_t *<loc> 6072 ThreadId, // i32 <gtid> 6073 Lock // kmp_critical_name *&<lock> 6074 }; 6075 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 6076 CodeGenFunction &CGF, PrePostActionTy &Action) { 6077 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6078 auto IPriv = Privates.begin(); 6079 auto ILHS = LHSExprs.begin(); 6080 auto IRHS = RHSExprs.begin(); 6081 for (const Expr *E : ReductionOps) { 6082 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 6083 cast<DeclRefExpr>(*IRHS)); 6084 ++IPriv; 6085 ++ILHS; 6086 ++IRHS; 6087 } 6088 }; 6089 RegionCodeGenTy RCG(CodeGen); 6090 CommonActionTy Action( 6091 nullptr, llvm::None, 6092 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 6093 : OMPRTL__kmpc_end_reduce), 6094 EndArgs); 6095 RCG.setAction(Action); 6096 RCG(CGF); 6097 6098 CGF.EmitBranch(DefaultBB); 6099 6100 // 7. Build case 2: 6101 // ... 6102 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 6103 // ... 6104 // break; 6105 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 6106 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 6107 CGF.EmitBlock(Case2BB); 6108 6109 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 6110 CodeGenFunction &CGF, PrePostActionTy &Action) { 6111 auto ILHS = LHSExprs.begin(); 6112 auto IRHS = RHSExprs.begin(); 6113 auto IPriv = Privates.begin(); 6114 for (const Expr *E : ReductionOps) { 6115 const Expr *XExpr = nullptr; 6116 const Expr *EExpr = nullptr; 6117 const Expr *UpExpr = nullptr; 6118 BinaryOperatorKind BO = BO_Comma; 6119 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 6120 if (BO->getOpcode() == BO_Assign) { 6121 XExpr = BO->getLHS(); 6122 UpExpr = BO->getRHS(); 6123 } 6124 } 6125 // Try to emit update expression as a simple atomic. 6126 const Expr *RHSExpr = UpExpr; 6127 if (RHSExpr) { 6128 // Analyze RHS part of the whole expression. 6129 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 6130 RHSExpr->IgnoreParenImpCasts())) { 6131 // If this is a conditional operator, analyze its condition for 6132 // min/max reduction operator. 6133 RHSExpr = ACO->getCond(); 6134 } 6135 if (const auto *BORHS = 6136 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 6137 EExpr = BORHS->getRHS(); 6138 BO = BORHS->getOpcode(); 6139 } 6140 } 6141 if (XExpr) { 6142 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 6143 auto &&AtomicRedGen = [BO, VD, 6144 Loc](CodeGenFunction &CGF, const Expr *XExpr, 6145 const Expr *EExpr, const Expr *UpExpr) { 6146 LValue X = CGF.EmitLValue(XExpr); 6147 RValue E; 6148 if (EExpr) 6149 E = CGF.EmitAnyExpr(EExpr); 6150 CGF.EmitOMPAtomicSimpleUpdateExpr( 6151 X, E, BO, /*IsXLHSInRHSPart=*/true, 6152 llvm::AtomicOrdering::Monotonic, Loc, 6153 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 6154 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6155 PrivateScope.addPrivate( 6156 VD, [&CGF, VD, XRValue, Loc]() { 6157 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 6158 CGF.emitOMPSimpleStore( 6159 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 6160 VD->getType().getNonReferenceType(), Loc); 6161 return LHSTemp; 6162 }); 6163 (void)PrivateScope.Privatize(); 6164 return CGF.EmitAnyExpr(UpExpr); 6165 }); 6166 }; 6167 if ((*IPriv)->getType()->isArrayType()) { 6168 // Emit atomic reduction for array section. 6169 const auto *RHSVar = 6170 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 6171 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 6172 AtomicRedGen, XExpr, EExpr, UpExpr); 6173 } else { 6174 // Emit atomic reduction for array subscript or single variable. 6175 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 6176 } 6177 } else { 6178 // Emit as a critical region. 6179 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 6180 const Expr *, const Expr *) { 6181 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6182 std::string Name = RT.getName({"atomic_reduction"}); 6183 RT.emitCriticalRegion( 6184 CGF, Name, 6185 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 6186 Action.Enter(CGF); 6187 emitReductionCombiner(CGF, E); 6188 }, 6189 Loc); 6190 }; 6191 if ((*IPriv)->getType()->isArrayType()) { 6192 const auto *LHSVar = 6193 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 6194 const auto *RHSVar = 6195 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 6196 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 6197 CritRedGen); 6198 } else { 6199 CritRedGen(CGF, nullptr, nullptr, nullptr); 6200 } 6201 } 6202 ++ILHS; 6203 ++IRHS; 6204 ++IPriv; 6205 } 6206 }; 6207 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 6208 if (!WithNowait) { 6209 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 6210 llvm::Value *EndArgs[] = { 6211 IdentTLoc, // ident_t *<loc> 6212 ThreadId, // i32 <gtid> 6213 Lock // kmp_critical_name *&<lock> 6214 }; 6215 CommonActionTy Action(nullptr, llvm::None, 6216 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 6217 EndArgs); 6218 AtomicRCG.setAction(Action); 6219 AtomicRCG(CGF); 6220 } else { 6221 AtomicRCG(CGF); 6222 } 6223 6224 CGF.EmitBranch(DefaultBB); 6225 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 6226 } 6227 6228 /// Generates unique name for artificial threadprivate variables. 6229 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 6230 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 6231 const Expr *Ref) { 6232 SmallString<256> Buffer; 6233 llvm::raw_svector_ostream Out(Buffer); 6234 const clang::DeclRefExpr *DE; 6235 const VarDecl *D = ::getBaseDecl(Ref, DE); 6236 if (!D) 6237 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 6238 D = D->getCanonicalDecl(); 6239 std::string Name = CGM.getOpenMPRuntime().getName( 6240 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 6241 Out << Prefix << Name << "_" 6242 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 6243 return std::string(Out.str()); 6244 } 6245 6246 /// Emits reduction initializer function: 6247 /// \code 6248 /// void @.red_init(void* %arg) { 6249 /// %0 = bitcast void* %arg to <type>* 6250 /// store <type> <init>, <type>* %0 6251 /// ret void 6252 /// } 6253 /// \endcode 6254 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 6255 SourceLocation Loc, 6256 ReductionCodeGen &RCG, unsigned N) { 6257 ASTContext &C = CGM.getContext(); 6258 FunctionArgList Args; 6259 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6260 ImplicitParamDecl::Other); 6261 Args.emplace_back(&Param); 6262 const auto &FnInfo = 6263 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6264 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6265 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 6266 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6267 Name, &CGM.getModule()); 6268 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6269 Fn->setDoesNotRecurse(); 6270 CodeGenFunction CGF(CGM); 6271 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6272 Address PrivateAddr = CGF.EmitLoadOfPointer( 6273 CGF.GetAddrOfLocalVar(&Param), 6274 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6275 llvm::Value *Size = nullptr; 6276 // If the size of the reduction item is non-constant, load it from global 6277 // threadprivate variable. 6278 if (RCG.getSizes(N).second) { 6279 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6280 CGF, CGM.getContext().getSizeType(), 6281 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6282 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6283 CGM.getContext().getSizeType(), Loc); 6284 } 6285 RCG.emitAggregateType(CGF, N, Size); 6286 LValue SharedLVal; 6287 // If initializer uses initializer from declare reduction construct, emit a 6288 // pointer to the address of the original reduction item (reuired by reduction 6289 // initializer) 6290 if (RCG.usesReductionInitializer(N)) { 6291 Address SharedAddr = 6292 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6293 CGF, CGM.getContext().VoidPtrTy, 6294 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6295 SharedAddr = CGF.EmitLoadOfPointer( 6296 SharedAddr, 6297 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 6298 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 6299 } else { 6300 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 6301 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 6302 CGM.getContext().VoidPtrTy); 6303 } 6304 // Emit the initializer: 6305 // %0 = bitcast void* %arg to <type>* 6306 // store <type> <init>, <type>* %0 6307 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 6308 [](CodeGenFunction &) { return false; }); 6309 CGF.FinishFunction(); 6310 return Fn; 6311 } 6312 6313 /// Emits reduction combiner function: 6314 /// \code 6315 /// void @.red_comb(void* %arg0, void* %arg1) { 6316 /// %lhs = bitcast void* %arg0 to <type>* 6317 /// %rhs = bitcast void* %arg1 to <type>* 6318 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 6319 /// store <type> %2, <type>* %lhs 6320 /// ret void 6321 /// } 6322 /// \endcode 6323 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 6324 SourceLocation Loc, 6325 ReductionCodeGen &RCG, unsigned N, 6326 const Expr *ReductionOp, 6327 const Expr *LHS, const Expr *RHS, 6328 const Expr *PrivateRef) { 6329 ASTContext &C = CGM.getContext(); 6330 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 6331 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 6332 FunctionArgList Args; 6333 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 6334 C.VoidPtrTy, ImplicitParamDecl::Other); 6335 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6336 ImplicitParamDecl::Other); 6337 Args.emplace_back(&ParamInOut); 6338 Args.emplace_back(&ParamIn); 6339 const auto &FnInfo = 6340 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6341 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6342 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 6343 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6344 Name, &CGM.getModule()); 6345 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6346 Fn->setDoesNotRecurse(); 6347 CodeGenFunction CGF(CGM); 6348 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6349 llvm::Value *Size = nullptr; 6350 // If the size of the reduction item is non-constant, load it from global 6351 // threadprivate variable. 6352 if (RCG.getSizes(N).second) { 6353 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6354 CGF, CGM.getContext().getSizeType(), 6355 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6356 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6357 CGM.getContext().getSizeType(), Loc); 6358 } 6359 RCG.emitAggregateType(CGF, N, Size); 6360 // Remap lhs and rhs variables to the addresses of the function arguments. 6361 // %lhs = bitcast void* %arg0 to <type>* 6362 // %rhs = bitcast void* %arg1 to <type>* 6363 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6364 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6365 // Pull out the pointer to the variable. 6366 Address PtrAddr = CGF.EmitLoadOfPointer( 6367 CGF.GetAddrOfLocalVar(&ParamInOut), 6368 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6369 return CGF.Builder.CreateElementBitCast( 6370 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6371 }); 6372 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6373 // Pull out the pointer to the variable. 6374 Address PtrAddr = CGF.EmitLoadOfPointer( 6375 CGF.GetAddrOfLocalVar(&ParamIn), 6376 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6377 return CGF.Builder.CreateElementBitCast( 6378 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6379 }); 6380 PrivateScope.Privatize(); 6381 // Emit the combiner body: 6382 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6383 // store <type> %2, <type>* %lhs 6384 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6385 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6386 cast<DeclRefExpr>(RHS)); 6387 CGF.FinishFunction(); 6388 return Fn; 6389 } 6390 6391 /// Emits reduction finalizer function: 6392 /// \code 6393 /// void @.red_fini(void* %arg) { 6394 /// %0 = bitcast void* %arg to <type>* 6395 /// <destroy>(<type>* %0) 6396 /// ret void 6397 /// } 6398 /// \endcode 6399 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6400 SourceLocation Loc, 6401 ReductionCodeGen &RCG, unsigned N) { 6402 if (!RCG.needCleanups(N)) 6403 return nullptr; 6404 ASTContext &C = CGM.getContext(); 6405 FunctionArgList Args; 6406 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6407 ImplicitParamDecl::Other); 6408 Args.emplace_back(&Param); 6409 const auto &FnInfo = 6410 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6411 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6412 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6413 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6414 Name, &CGM.getModule()); 6415 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6416 Fn->setDoesNotRecurse(); 6417 CodeGenFunction CGF(CGM); 6418 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6419 Address PrivateAddr = CGF.EmitLoadOfPointer( 6420 CGF.GetAddrOfLocalVar(&Param), 6421 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6422 llvm::Value *Size = nullptr; 6423 // If the size of the reduction item is non-constant, load it from global 6424 // threadprivate variable. 6425 if (RCG.getSizes(N).second) { 6426 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6427 CGF, CGM.getContext().getSizeType(), 6428 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6429 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6430 CGM.getContext().getSizeType(), Loc); 6431 } 6432 RCG.emitAggregateType(CGF, N, Size); 6433 // Emit the finalizer body: 6434 // <destroy>(<type>* %0) 6435 RCG.emitCleanups(CGF, N, PrivateAddr); 6436 CGF.FinishFunction(Loc); 6437 return Fn; 6438 } 6439 6440 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6441 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6442 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6443 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6444 return nullptr; 6445 6446 // Build typedef struct: 6447 // kmp_task_red_input { 6448 // void *reduce_shar; // shared reduction item 6449 // size_t reduce_size; // size of data item 6450 // void *reduce_init; // data initialization routine 6451 // void *reduce_fini; // data finalization routine 6452 // void *reduce_comb; // data combiner routine 6453 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6454 // } kmp_task_red_input_t; 6455 ASTContext &C = CGM.getContext(); 6456 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 6457 RD->startDefinition(); 6458 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6459 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6460 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6461 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6462 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6463 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6464 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6465 RD->completeDefinition(); 6466 QualType RDType = C.getRecordType(RD); 6467 unsigned Size = Data.ReductionVars.size(); 6468 llvm::APInt ArraySize(/*numBits=*/64, Size); 6469 QualType ArrayRDType = C.getConstantArrayType( 6470 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6471 // kmp_task_red_input_t .rd_input.[Size]; 6472 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6473 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 6474 Data.ReductionOps); 6475 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6476 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6477 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6478 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6479 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6480 TaskRedInput.getPointer(), Idxs, 6481 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6482 ".rd_input.gep."); 6483 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6484 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6485 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6486 RCG.emitSharedLValue(CGF, Cnt); 6487 llvm::Value *CastedShared = 6488 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6489 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6490 RCG.emitAggregateType(CGF, Cnt); 6491 llvm::Value *SizeValInChars; 6492 llvm::Value *SizeVal; 6493 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6494 // We use delayed creation/initialization for VLAs, array sections and 6495 // custom reduction initializations. It is required because runtime does not 6496 // provide the way to pass the sizes of VLAs/array sections to 6497 // initializer/combiner/finalizer functions and does not pass the pointer to 6498 // original reduction item to the initializer. Instead threadprivate global 6499 // variables are used to store these values and use them in the functions. 6500 bool DelayedCreation = !!SizeVal; 6501 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6502 /*isSigned=*/false); 6503 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6504 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6505 // ElemLVal.reduce_init = init; 6506 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6507 llvm::Value *InitAddr = 6508 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6509 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6510 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 6511 // ElemLVal.reduce_fini = fini; 6512 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6513 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6514 llvm::Value *FiniAddr = Fini 6515 ? CGF.EmitCastToVoidPtr(Fini) 6516 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6517 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6518 // ElemLVal.reduce_comb = comb; 6519 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6520 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6521 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6522 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6523 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6524 // ElemLVal.flags = 0; 6525 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6526 if (DelayedCreation) { 6527 CGF.EmitStoreOfScalar( 6528 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6529 FlagsLVal); 6530 } else 6531 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6532 FlagsLVal.getType()); 6533 } 6534 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 6535 // *data); 6536 llvm::Value *Args[] = { 6537 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6538 /*isSigned=*/true), 6539 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6540 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6541 CGM.VoidPtrTy)}; 6542 return CGF.EmitRuntimeCall( 6543 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 6544 } 6545 6546 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6547 SourceLocation Loc, 6548 ReductionCodeGen &RCG, 6549 unsigned N) { 6550 auto Sizes = RCG.getSizes(N); 6551 // Emit threadprivate global variable if the type is non-constant 6552 // (Sizes.second = nullptr). 6553 if (Sizes.second) { 6554 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6555 /*isSigned=*/false); 6556 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6557 CGF, CGM.getContext().getSizeType(), 6558 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6559 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6560 } 6561 // Store address of the original reduction item if custom initializer is used. 6562 if (RCG.usesReductionInitializer(N)) { 6563 Address SharedAddr = getAddrOfArtificialThreadPrivate( 6564 CGF, CGM.getContext().VoidPtrTy, 6565 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6566 CGF.Builder.CreateStore( 6567 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6568 RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy), 6569 SharedAddr, /*IsVolatile=*/false); 6570 } 6571 } 6572 6573 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6574 SourceLocation Loc, 6575 llvm::Value *ReductionsPtr, 6576 LValue SharedLVal) { 6577 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6578 // *d); 6579 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6580 CGM.IntTy, 6581 /*isSigned=*/true), 6582 ReductionsPtr, 6583 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6584 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6585 return Address( 6586 CGF.EmitRuntimeCall( 6587 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6588 SharedLVal.getAlignment()); 6589 } 6590 6591 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6592 SourceLocation Loc) { 6593 if (!CGF.HaveInsertPoint()) 6594 return; 6595 6596 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 6597 if (OMPBuilder) { 6598 OMPBuilder->CreateTaskwait(CGF.Builder); 6599 } else { 6600 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6601 // global_tid); 6602 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6603 // Ignore return result until untied tasks are supported. 6604 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 6605 } 6606 6607 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6608 Region->emitUntiedSwitch(CGF); 6609 } 6610 6611 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6612 OpenMPDirectiveKind InnerKind, 6613 const RegionCodeGenTy &CodeGen, 6614 bool HasCancel) { 6615 if (!CGF.HaveInsertPoint()) 6616 return; 6617 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6618 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6619 } 6620 6621 namespace { 6622 enum RTCancelKind { 6623 CancelNoreq = 0, 6624 CancelParallel = 1, 6625 CancelLoop = 2, 6626 CancelSections = 3, 6627 CancelTaskgroup = 4 6628 }; 6629 } // anonymous namespace 6630 6631 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6632 RTCancelKind CancelKind = CancelNoreq; 6633 if (CancelRegion == OMPD_parallel) 6634 CancelKind = CancelParallel; 6635 else if (CancelRegion == OMPD_for) 6636 CancelKind = CancelLoop; 6637 else if (CancelRegion == OMPD_sections) 6638 CancelKind = CancelSections; 6639 else { 6640 assert(CancelRegion == OMPD_taskgroup); 6641 CancelKind = CancelTaskgroup; 6642 } 6643 return CancelKind; 6644 } 6645 6646 void CGOpenMPRuntime::emitCancellationPointCall( 6647 CodeGenFunction &CGF, SourceLocation Loc, 6648 OpenMPDirectiveKind CancelRegion) { 6649 if (!CGF.HaveInsertPoint()) 6650 return; 6651 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6652 // global_tid, kmp_int32 cncl_kind); 6653 if (auto *OMPRegionInfo = 6654 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6655 // For 'cancellation point taskgroup', the task region info may not have a 6656 // cancel. This may instead happen in another adjacent task. 6657 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6658 llvm::Value *Args[] = { 6659 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6660 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6661 // Ignore return result until untied tasks are supported. 6662 llvm::Value *Result = CGF.EmitRuntimeCall( 6663 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 6664 // if (__kmpc_cancellationpoint()) { 6665 // exit from construct; 6666 // } 6667 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6668 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6669 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6670 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6671 CGF.EmitBlock(ExitBB); 6672 // exit from construct; 6673 CodeGenFunction::JumpDest CancelDest = 6674 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6675 CGF.EmitBranchThroughCleanup(CancelDest); 6676 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6677 } 6678 } 6679 } 6680 6681 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6682 const Expr *IfCond, 6683 OpenMPDirectiveKind CancelRegion) { 6684 if (!CGF.HaveInsertPoint()) 6685 return; 6686 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6687 // kmp_int32 cncl_kind); 6688 if (auto *OMPRegionInfo = 6689 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6690 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 6691 PrePostActionTy &) { 6692 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6693 llvm::Value *Args[] = { 6694 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6695 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6696 // Ignore return result until untied tasks are supported. 6697 llvm::Value *Result = CGF.EmitRuntimeCall( 6698 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 6699 // if (__kmpc_cancel()) { 6700 // exit from construct; 6701 // } 6702 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6703 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6704 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6705 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6706 CGF.EmitBlock(ExitBB); 6707 // exit from construct; 6708 CodeGenFunction::JumpDest CancelDest = 6709 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6710 CGF.EmitBranchThroughCleanup(CancelDest); 6711 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6712 }; 6713 if (IfCond) { 6714 emitIfClause(CGF, IfCond, ThenGen, 6715 [](CodeGenFunction &, PrePostActionTy &) {}); 6716 } else { 6717 RegionCodeGenTy ThenRCG(ThenGen); 6718 ThenRCG(CGF); 6719 } 6720 } 6721 } 6722 6723 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6724 const OMPExecutableDirective &D, StringRef ParentName, 6725 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6726 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6727 assert(!ParentName.empty() && "Invalid target region parent name!"); 6728 HasEmittedTargetRegion = true; 6729 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6730 IsOffloadEntry, CodeGen); 6731 } 6732 6733 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6734 const OMPExecutableDirective &D, StringRef ParentName, 6735 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6736 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6737 // Create a unique name for the entry function using the source location 6738 // information of the current target region. The name will be something like: 6739 // 6740 // __omp_offloading_DD_FFFF_PP_lBB 6741 // 6742 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6743 // mangled name of the function that encloses the target region and BB is the 6744 // line number of the target region. 6745 6746 unsigned DeviceID; 6747 unsigned FileID; 6748 unsigned Line; 6749 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6750 Line); 6751 SmallString<64> EntryFnName; 6752 { 6753 llvm::raw_svector_ostream OS(EntryFnName); 6754 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6755 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6756 } 6757 6758 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6759 6760 CodeGenFunction CGF(CGM, true); 6761 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6762 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6763 6764 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6765 6766 // If this target outline function is not an offload entry, we don't need to 6767 // register it. 6768 if (!IsOffloadEntry) 6769 return; 6770 6771 // The target region ID is used by the runtime library to identify the current 6772 // target region, so it only has to be unique and not necessarily point to 6773 // anything. It could be the pointer to the outlined function that implements 6774 // the target region, but we aren't using that so that the compiler doesn't 6775 // need to keep that, and could therefore inline the host function if proven 6776 // worthwhile during optimization. In the other hand, if emitting code for the 6777 // device, the ID has to be the function address so that it can retrieved from 6778 // the offloading entry and launched by the runtime library. We also mark the 6779 // outlined function to have external linkage in case we are emitting code for 6780 // the device, because these functions will be entry points to the device. 6781 6782 if (CGM.getLangOpts().OpenMPIsDevice) { 6783 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6784 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6785 OutlinedFn->setDSOLocal(false); 6786 } else { 6787 std::string Name = getName({EntryFnName, "region_id"}); 6788 OutlinedFnID = new llvm::GlobalVariable( 6789 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6790 llvm::GlobalValue::WeakAnyLinkage, 6791 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6792 } 6793 6794 // Register the information for the entry associated with this target region. 6795 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6796 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6797 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6798 } 6799 6800 /// Checks if the expression is constant or does not have non-trivial function 6801 /// calls. 6802 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6803 // We can skip constant expressions. 6804 // We can skip expressions with trivial calls or simple expressions. 6805 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6806 !E->hasNonTrivialCall(Ctx)) && 6807 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6808 } 6809 6810 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6811 const Stmt *Body) { 6812 const Stmt *Child = Body->IgnoreContainers(); 6813 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6814 Child = nullptr; 6815 for (const Stmt *S : C->body()) { 6816 if (const auto *E = dyn_cast<Expr>(S)) { 6817 if (isTrivial(Ctx, E)) 6818 continue; 6819 } 6820 // Some of the statements can be ignored. 6821 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6822 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6823 continue; 6824 // Analyze declarations. 6825 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6826 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6827 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6828 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6829 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6830 isa<UsingDirectiveDecl>(D) || 6831 isa<OMPDeclareReductionDecl>(D) || 6832 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6833 return true; 6834 const auto *VD = dyn_cast<VarDecl>(D); 6835 if (!VD) 6836 return false; 6837 return VD->isConstexpr() || 6838 ((VD->getType().isTrivialType(Ctx) || 6839 VD->getType()->isReferenceType()) && 6840 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6841 })) 6842 continue; 6843 } 6844 // Found multiple children - cannot get the one child only. 6845 if (Child) 6846 return nullptr; 6847 Child = S; 6848 } 6849 if (Child) 6850 Child = Child->IgnoreContainers(); 6851 } 6852 return Child; 6853 } 6854 6855 /// Emit the number of teams for a target directive. Inspect the num_teams 6856 /// clause associated with a teams construct combined or closely nested 6857 /// with the target directive. 6858 /// 6859 /// Emit a team of size one for directives such as 'target parallel' that 6860 /// have no associated teams construct. 6861 /// 6862 /// Otherwise, return nullptr. 6863 static llvm::Value * 6864 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6865 const OMPExecutableDirective &D) { 6866 assert(!CGF.getLangOpts().OpenMPIsDevice && 6867 "Clauses associated with the teams directive expected to be emitted " 6868 "only for the host!"); 6869 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6870 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6871 "Expected target-based executable directive."); 6872 CGBuilderTy &Bld = CGF.Builder; 6873 switch (DirectiveKind) { 6874 case OMPD_target: { 6875 const auto *CS = D.getInnermostCapturedStmt(); 6876 const auto *Body = 6877 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6878 const Stmt *ChildStmt = 6879 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6880 if (const auto *NestedDir = 6881 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6882 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6883 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6884 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6885 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6886 const Expr *NumTeams = 6887 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6888 llvm::Value *NumTeamsVal = 6889 CGF.EmitScalarExpr(NumTeams, 6890 /*IgnoreResultAssign*/ true); 6891 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6892 /*isSigned=*/true); 6893 } 6894 return Bld.getInt32(0); 6895 } 6896 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6897 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6898 return Bld.getInt32(1); 6899 return Bld.getInt32(0); 6900 } 6901 return nullptr; 6902 } 6903 case OMPD_target_teams: 6904 case OMPD_target_teams_distribute: 6905 case OMPD_target_teams_distribute_simd: 6906 case OMPD_target_teams_distribute_parallel_for: 6907 case OMPD_target_teams_distribute_parallel_for_simd: { 6908 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6909 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6910 const Expr *NumTeams = 6911 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6912 llvm::Value *NumTeamsVal = 6913 CGF.EmitScalarExpr(NumTeams, 6914 /*IgnoreResultAssign*/ true); 6915 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6916 /*isSigned=*/true); 6917 } 6918 return Bld.getInt32(0); 6919 } 6920 case OMPD_target_parallel: 6921 case OMPD_target_parallel_for: 6922 case OMPD_target_parallel_for_simd: 6923 case OMPD_target_simd: 6924 return Bld.getInt32(1); 6925 case OMPD_parallel: 6926 case OMPD_for: 6927 case OMPD_parallel_for: 6928 case OMPD_parallel_master: 6929 case OMPD_parallel_sections: 6930 case OMPD_for_simd: 6931 case OMPD_parallel_for_simd: 6932 case OMPD_cancel: 6933 case OMPD_cancellation_point: 6934 case OMPD_ordered: 6935 case OMPD_threadprivate: 6936 case OMPD_allocate: 6937 case OMPD_task: 6938 case OMPD_simd: 6939 case OMPD_sections: 6940 case OMPD_section: 6941 case OMPD_single: 6942 case OMPD_master: 6943 case OMPD_critical: 6944 case OMPD_taskyield: 6945 case OMPD_barrier: 6946 case OMPD_taskwait: 6947 case OMPD_taskgroup: 6948 case OMPD_atomic: 6949 case OMPD_flush: 6950 case OMPD_depobj: 6951 case OMPD_teams: 6952 case OMPD_target_data: 6953 case OMPD_target_exit_data: 6954 case OMPD_target_enter_data: 6955 case OMPD_distribute: 6956 case OMPD_distribute_simd: 6957 case OMPD_distribute_parallel_for: 6958 case OMPD_distribute_parallel_for_simd: 6959 case OMPD_teams_distribute: 6960 case OMPD_teams_distribute_simd: 6961 case OMPD_teams_distribute_parallel_for: 6962 case OMPD_teams_distribute_parallel_for_simd: 6963 case OMPD_target_update: 6964 case OMPD_declare_simd: 6965 case OMPD_declare_variant: 6966 case OMPD_declare_target: 6967 case OMPD_end_declare_target: 6968 case OMPD_declare_reduction: 6969 case OMPD_declare_mapper: 6970 case OMPD_taskloop: 6971 case OMPD_taskloop_simd: 6972 case OMPD_master_taskloop: 6973 case OMPD_master_taskloop_simd: 6974 case OMPD_parallel_master_taskloop: 6975 case OMPD_parallel_master_taskloop_simd: 6976 case OMPD_requires: 6977 case OMPD_unknown: 6978 break; 6979 } 6980 llvm_unreachable("Unexpected directive kind."); 6981 } 6982 6983 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6984 llvm::Value *DefaultThreadLimitVal) { 6985 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6986 CGF.getContext(), CS->getCapturedStmt()); 6987 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6988 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6989 llvm::Value *NumThreads = nullptr; 6990 llvm::Value *CondVal = nullptr; 6991 // Handle if clause. If if clause present, the number of threads is 6992 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6993 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6994 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6995 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6996 const OMPIfClause *IfClause = nullptr; 6997 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6998 if (C->getNameModifier() == OMPD_unknown || 6999 C->getNameModifier() == OMPD_parallel) { 7000 IfClause = C; 7001 break; 7002 } 7003 } 7004 if (IfClause) { 7005 const Expr *Cond = IfClause->getCondition(); 7006 bool Result; 7007 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7008 if (!Result) 7009 return CGF.Builder.getInt32(1); 7010 } else { 7011 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 7012 if (const auto *PreInit = 7013 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 7014 for (const auto *I : PreInit->decls()) { 7015 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7016 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7017 } else { 7018 CodeGenFunction::AutoVarEmission Emission = 7019 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7020 CGF.EmitAutoVarCleanups(Emission); 7021 } 7022 } 7023 } 7024 CondVal = CGF.EvaluateExprAsBool(Cond); 7025 } 7026 } 7027 } 7028 // Check the value of num_threads clause iff if clause was not specified 7029 // or is not evaluated to false. 7030 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 7031 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7032 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7033 const auto *NumThreadsClause = 7034 Dir->getSingleClause<OMPNumThreadsClause>(); 7035 CodeGenFunction::LexicalScope Scope( 7036 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 7037 if (const auto *PreInit = 7038 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 7039 for (const auto *I : PreInit->decls()) { 7040 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7041 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7042 } else { 7043 CodeGenFunction::AutoVarEmission Emission = 7044 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7045 CGF.EmitAutoVarCleanups(Emission); 7046 } 7047 } 7048 } 7049 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 7050 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 7051 /*isSigned=*/false); 7052 if (DefaultThreadLimitVal) 7053 NumThreads = CGF.Builder.CreateSelect( 7054 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 7055 DefaultThreadLimitVal, NumThreads); 7056 } else { 7057 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 7058 : CGF.Builder.getInt32(0); 7059 } 7060 // Process condition of the if clause. 7061 if (CondVal) { 7062 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 7063 CGF.Builder.getInt32(1)); 7064 } 7065 return NumThreads; 7066 } 7067 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 7068 return CGF.Builder.getInt32(1); 7069 return DefaultThreadLimitVal; 7070 } 7071 return DefaultThreadLimitVal ? DefaultThreadLimitVal 7072 : CGF.Builder.getInt32(0); 7073 } 7074 7075 /// Emit the number of threads for a target directive. Inspect the 7076 /// thread_limit clause associated with a teams construct combined or closely 7077 /// nested with the target directive. 7078 /// 7079 /// Emit the num_threads clause for directives such as 'target parallel' that 7080 /// have no associated teams construct. 7081 /// 7082 /// Otherwise, return nullptr. 7083 static llvm::Value * 7084 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 7085 const OMPExecutableDirective &D) { 7086 assert(!CGF.getLangOpts().OpenMPIsDevice && 7087 "Clauses associated with the teams directive expected to be emitted " 7088 "only for the host!"); 7089 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7090 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7091 "Expected target-based executable directive."); 7092 CGBuilderTy &Bld = CGF.Builder; 7093 llvm::Value *ThreadLimitVal = nullptr; 7094 llvm::Value *NumThreadsVal = nullptr; 7095 switch (DirectiveKind) { 7096 case OMPD_target: { 7097 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7098 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7099 return NumThreads; 7100 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7101 CGF.getContext(), CS->getCapturedStmt()); 7102 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7103 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 7104 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7105 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7106 const auto *ThreadLimitClause = 7107 Dir->getSingleClause<OMPThreadLimitClause>(); 7108 CodeGenFunction::LexicalScope Scope( 7109 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 7110 if (const auto *PreInit = 7111 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 7112 for (const auto *I : PreInit->decls()) { 7113 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7114 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7115 } else { 7116 CodeGenFunction::AutoVarEmission Emission = 7117 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7118 CGF.EmitAutoVarCleanups(Emission); 7119 } 7120 } 7121 } 7122 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7123 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7124 ThreadLimitVal = 7125 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7126 } 7127 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 7128 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 7129 CS = Dir->getInnermostCapturedStmt(); 7130 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7131 CGF.getContext(), CS->getCapturedStmt()); 7132 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 7133 } 7134 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 7135 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 7136 CS = Dir->getInnermostCapturedStmt(); 7137 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7138 return NumThreads; 7139 } 7140 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 7141 return Bld.getInt32(1); 7142 } 7143 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7144 } 7145 case OMPD_target_teams: { 7146 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7147 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7148 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7149 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7150 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7151 ThreadLimitVal = 7152 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7153 } 7154 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7155 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7156 return NumThreads; 7157 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7158 CGF.getContext(), CS->getCapturedStmt()); 7159 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7160 if (Dir->getDirectiveKind() == OMPD_distribute) { 7161 CS = Dir->getInnermostCapturedStmt(); 7162 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7163 return NumThreads; 7164 } 7165 } 7166 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7167 } 7168 case OMPD_target_teams_distribute: 7169 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7170 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7171 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7172 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7173 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7174 ThreadLimitVal = 7175 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7176 } 7177 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7178 case OMPD_target_parallel: 7179 case OMPD_target_parallel_for: 7180 case OMPD_target_parallel_for_simd: 7181 case OMPD_target_teams_distribute_parallel_for: 7182 case OMPD_target_teams_distribute_parallel_for_simd: { 7183 llvm::Value *CondVal = nullptr; 7184 // Handle if clause. If if clause present, the number of threads is 7185 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7186 if (D.hasClausesOfKind<OMPIfClause>()) { 7187 const OMPIfClause *IfClause = nullptr; 7188 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7189 if (C->getNameModifier() == OMPD_unknown || 7190 C->getNameModifier() == OMPD_parallel) { 7191 IfClause = C; 7192 break; 7193 } 7194 } 7195 if (IfClause) { 7196 const Expr *Cond = IfClause->getCondition(); 7197 bool Result; 7198 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7199 if (!Result) 7200 return Bld.getInt32(1); 7201 } else { 7202 CodeGenFunction::RunCleanupsScope Scope(CGF); 7203 CondVal = CGF.EvaluateExprAsBool(Cond); 7204 } 7205 } 7206 } 7207 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7208 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7209 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7210 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7211 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7212 ThreadLimitVal = 7213 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7214 } 7215 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7216 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7217 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7218 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7219 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7220 NumThreadsVal = 7221 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7222 ThreadLimitVal = ThreadLimitVal 7223 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7224 ThreadLimitVal), 7225 NumThreadsVal, ThreadLimitVal) 7226 : NumThreadsVal; 7227 } 7228 if (!ThreadLimitVal) 7229 ThreadLimitVal = Bld.getInt32(0); 7230 if (CondVal) 7231 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7232 return ThreadLimitVal; 7233 } 7234 case OMPD_target_teams_distribute_simd: 7235 case OMPD_target_simd: 7236 return Bld.getInt32(1); 7237 case OMPD_parallel: 7238 case OMPD_for: 7239 case OMPD_parallel_for: 7240 case OMPD_parallel_master: 7241 case OMPD_parallel_sections: 7242 case OMPD_for_simd: 7243 case OMPD_parallel_for_simd: 7244 case OMPD_cancel: 7245 case OMPD_cancellation_point: 7246 case OMPD_ordered: 7247 case OMPD_threadprivate: 7248 case OMPD_allocate: 7249 case OMPD_task: 7250 case OMPD_simd: 7251 case OMPD_sections: 7252 case OMPD_section: 7253 case OMPD_single: 7254 case OMPD_master: 7255 case OMPD_critical: 7256 case OMPD_taskyield: 7257 case OMPD_barrier: 7258 case OMPD_taskwait: 7259 case OMPD_taskgroup: 7260 case OMPD_atomic: 7261 case OMPD_flush: 7262 case OMPD_depobj: 7263 case OMPD_teams: 7264 case OMPD_target_data: 7265 case OMPD_target_exit_data: 7266 case OMPD_target_enter_data: 7267 case OMPD_distribute: 7268 case OMPD_distribute_simd: 7269 case OMPD_distribute_parallel_for: 7270 case OMPD_distribute_parallel_for_simd: 7271 case OMPD_teams_distribute: 7272 case OMPD_teams_distribute_simd: 7273 case OMPD_teams_distribute_parallel_for: 7274 case OMPD_teams_distribute_parallel_for_simd: 7275 case OMPD_target_update: 7276 case OMPD_declare_simd: 7277 case OMPD_declare_variant: 7278 case OMPD_declare_target: 7279 case OMPD_end_declare_target: 7280 case OMPD_declare_reduction: 7281 case OMPD_declare_mapper: 7282 case OMPD_taskloop: 7283 case OMPD_taskloop_simd: 7284 case OMPD_master_taskloop: 7285 case OMPD_master_taskloop_simd: 7286 case OMPD_parallel_master_taskloop: 7287 case OMPD_parallel_master_taskloop_simd: 7288 case OMPD_requires: 7289 case OMPD_unknown: 7290 break; 7291 } 7292 llvm_unreachable("Unsupported directive kind."); 7293 } 7294 7295 namespace { 7296 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7297 7298 // Utility to handle information from clauses associated with a given 7299 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7300 // It provides a convenient interface to obtain the information and generate 7301 // code for that information. 7302 class MappableExprsHandler { 7303 public: 7304 /// Values for bit flags used to specify the mapping type for 7305 /// offloading. 7306 enum OpenMPOffloadMappingFlags : uint64_t { 7307 /// No flags 7308 OMP_MAP_NONE = 0x0, 7309 /// Allocate memory on the device and move data from host to device. 7310 OMP_MAP_TO = 0x01, 7311 /// Allocate memory on the device and move data from device to host. 7312 OMP_MAP_FROM = 0x02, 7313 /// Always perform the requested mapping action on the element, even 7314 /// if it was already mapped before. 7315 OMP_MAP_ALWAYS = 0x04, 7316 /// Delete the element from the device environment, ignoring the 7317 /// current reference count associated with the element. 7318 OMP_MAP_DELETE = 0x08, 7319 /// The element being mapped is a pointer-pointee pair; both the 7320 /// pointer and the pointee should be mapped. 7321 OMP_MAP_PTR_AND_OBJ = 0x10, 7322 /// This flags signals that the base address of an entry should be 7323 /// passed to the target kernel as an argument. 7324 OMP_MAP_TARGET_PARAM = 0x20, 7325 /// Signal that the runtime library has to return the device pointer 7326 /// in the current position for the data being mapped. Used when we have the 7327 /// use_device_ptr clause. 7328 OMP_MAP_RETURN_PARAM = 0x40, 7329 /// This flag signals that the reference being passed is a pointer to 7330 /// private data. 7331 OMP_MAP_PRIVATE = 0x80, 7332 /// Pass the element to the device by value. 7333 OMP_MAP_LITERAL = 0x100, 7334 /// Implicit map 7335 OMP_MAP_IMPLICIT = 0x200, 7336 /// Close is a hint to the runtime to allocate memory close to 7337 /// the target device. 7338 OMP_MAP_CLOSE = 0x400, 7339 /// The 16 MSBs of the flags indicate whether the entry is member of some 7340 /// struct/class. 7341 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7342 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7343 }; 7344 7345 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7346 static unsigned getFlagMemberOffset() { 7347 unsigned Offset = 0; 7348 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7349 Remain = Remain >> 1) 7350 Offset++; 7351 return Offset; 7352 } 7353 7354 /// Class that associates information with a base pointer to be passed to the 7355 /// runtime library. 7356 class BasePointerInfo { 7357 /// The base pointer. 7358 llvm::Value *Ptr = nullptr; 7359 /// The base declaration that refers to this device pointer, or null if 7360 /// there is none. 7361 const ValueDecl *DevPtrDecl = nullptr; 7362 7363 public: 7364 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7365 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7366 llvm::Value *operator*() const { return Ptr; } 7367 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7368 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7369 }; 7370 7371 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7372 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7373 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7374 7375 /// Map between a struct and the its lowest & highest elements which have been 7376 /// mapped. 7377 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7378 /// HE(FieldIndex, Pointer)} 7379 struct StructRangeInfoTy { 7380 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7381 0, Address::invalid()}; 7382 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7383 0, Address::invalid()}; 7384 Address Base = Address::invalid(); 7385 }; 7386 7387 private: 7388 /// Kind that defines how a device pointer has to be returned. 7389 struct MapInfo { 7390 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7391 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7392 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7393 bool ReturnDevicePointer = false; 7394 bool IsImplicit = false; 7395 7396 MapInfo() = default; 7397 MapInfo( 7398 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7399 OpenMPMapClauseKind MapType, 7400 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7401 bool ReturnDevicePointer, bool IsImplicit) 7402 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7403 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 7404 }; 7405 7406 /// If use_device_ptr is used on a pointer which is a struct member and there 7407 /// is no map information about it, then emission of that entry is deferred 7408 /// until the whole struct has been processed. 7409 struct DeferredDevicePtrEntryTy { 7410 const Expr *IE = nullptr; 7411 const ValueDecl *VD = nullptr; 7412 7413 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 7414 : IE(IE), VD(VD) {} 7415 }; 7416 7417 /// The target directive from where the mappable clauses were extracted. It 7418 /// is either a executable directive or a user-defined mapper directive. 7419 llvm::PointerUnion<const OMPExecutableDirective *, 7420 const OMPDeclareMapperDecl *> 7421 CurDir; 7422 7423 /// Function the directive is being generated for. 7424 CodeGenFunction &CGF; 7425 7426 /// Set of all first private variables in the current directive. 7427 /// bool data is set to true if the variable is implicitly marked as 7428 /// firstprivate, false otherwise. 7429 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7430 7431 /// Map between device pointer declarations and their expression components. 7432 /// The key value for declarations in 'this' is null. 7433 llvm::DenseMap< 7434 const ValueDecl *, 7435 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7436 DevPointersMap; 7437 7438 llvm::Value *getExprTypeSize(const Expr *E) const { 7439 QualType ExprTy = E->getType().getCanonicalType(); 7440 7441 // Reference types are ignored for mapping purposes. 7442 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7443 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7444 7445 // Given that an array section is considered a built-in type, we need to 7446 // do the calculation based on the length of the section instead of relying 7447 // on CGF.getTypeSize(E->getType()). 7448 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7449 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7450 OAE->getBase()->IgnoreParenImpCasts()) 7451 .getCanonicalType(); 7452 7453 // If there is no length associated with the expression and lower bound is 7454 // not specified too, that means we are using the whole length of the 7455 // base. 7456 if (!OAE->getLength() && OAE->getColonLoc().isValid() && 7457 !OAE->getLowerBound()) 7458 return CGF.getTypeSize(BaseTy); 7459 7460 llvm::Value *ElemSize; 7461 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7462 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7463 } else { 7464 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7465 assert(ATy && "Expecting array type if not a pointer type."); 7466 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7467 } 7468 7469 // If we don't have a length at this point, that is because we have an 7470 // array section with a single element. 7471 if (!OAE->getLength() && OAE->getColonLoc().isInvalid()) 7472 return ElemSize; 7473 7474 if (const Expr *LenExpr = OAE->getLength()) { 7475 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7476 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7477 CGF.getContext().getSizeType(), 7478 LenExpr->getExprLoc()); 7479 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7480 } 7481 assert(!OAE->getLength() && OAE->getColonLoc().isValid() && 7482 OAE->getLowerBound() && "expected array_section[lb:]."); 7483 // Size = sizetype - lb * elemtype; 7484 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7485 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7486 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7487 CGF.getContext().getSizeType(), 7488 OAE->getLowerBound()->getExprLoc()); 7489 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7490 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7491 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7492 LengthVal = CGF.Builder.CreateSelect( 7493 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7494 return LengthVal; 7495 } 7496 return CGF.getTypeSize(ExprTy); 7497 } 7498 7499 /// Return the corresponding bits for a given map clause modifier. Add 7500 /// a flag marking the map as a pointer if requested. Add a flag marking the 7501 /// map as the first one of a series of maps that relate to the same map 7502 /// expression. 7503 OpenMPOffloadMappingFlags getMapTypeBits( 7504 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7505 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7506 OpenMPOffloadMappingFlags Bits = 7507 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7508 switch (MapType) { 7509 case OMPC_MAP_alloc: 7510 case OMPC_MAP_release: 7511 // alloc and release is the default behavior in the runtime library, i.e. 7512 // if we don't pass any bits alloc/release that is what the runtime is 7513 // going to do. Therefore, we don't need to signal anything for these two 7514 // type modifiers. 7515 break; 7516 case OMPC_MAP_to: 7517 Bits |= OMP_MAP_TO; 7518 break; 7519 case OMPC_MAP_from: 7520 Bits |= OMP_MAP_FROM; 7521 break; 7522 case OMPC_MAP_tofrom: 7523 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7524 break; 7525 case OMPC_MAP_delete: 7526 Bits |= OMP_MAP_DELETE; 7527 break; 7528 case OMPC_MAP_unknown: 7529 llvm_unreachable("Unexpected map type!"); 7530 } 7531 if (AddPtrFlag) 7532 Bits |= OMP_MAP_PTR_AND_OBJ; 7533 if (AddIsTargetParamFlag) 7534 Bits |= OMP_MAP_TARGET_PARAM; 7535 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7536 != MapModifiers.end()) 7537 Bits |= OMP_MAP_ALWAYS; 7538 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7539 != MapModifiers.end()) 7540 Bits |= OMP_MAP_CLOSE; 7541 return Bits; 7542 } 7543 7544 /// Return true if the provided expression is a final array section. A 7545 /// final array section, is one whose length can't be proved to be one. 7546 bool isFinalArraySectionExpression(const Expr *E) const { 7547 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7548 7549 // It is not an array section and therefore not a unity-size one. 7550 if (!OASE) 7551 return false; 7552 7553 // An array section with no colon always refer to a single element. 7554 if (OASE->getColonLoc().isInvalid()) 7555 return false; 7556 7557 const Expr *Length = OASE->getLength(); 7558 7559 // If we don't have a length we have to check if the array has size 1 7560 // for this dimension. Also, we should always expect a length if the 7561 // base type is pointer. 7562 if (!Length) { 7563 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7564 OASE->getBase()->IgnoreParenImpCasts()) 7565 .getCanonicalType(); 7566 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7567 return ATy->getSize().getSExtValue() != 1; 7568 // If we don't have a constant dimension length, we have to consider 7569 // the current section as having any size, so it is not necessarily 7570 // unitary. If it happen to be unity size, that's user fault. 7571 return true; 7572 } 7573 7574 // Check if the length evaluates to 1. 7575 Expr::EvalResult Result; 7576 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7577 return true; // Can have more that size 1. 7578 7579 llvm::APSInt ConstLength = Result.Val.getInt(); 7580 return ConstLength.getSExtValue() != 1; 7581 } 7582 7583 /// Generate the base pointers, section pointers, sizes and map type 7584 /// bits for the provided map type, map modifier, and expression components. 7585 /// \a IsFirstComponent should be set to true if the provided set of 7586 /// components is the first associated with a capture. 7587 void generateInfoForComponentList( 7588 OpenMPMapClauseKind MapType, 7589 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7590 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7591 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7592 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7593 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 7594 bool IsImplicit, 7595 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7596 OverlappedElements = llvm::None) const { 7597 // The following summarizes what has to be generated for each map and the 7598 // types below. The generated information is expressed in this order: 7599 // base pointer, section pointer, size, flags 7600 // (to add to the ones that come from the map type and modifier). 7601 // 7602 // double d; 7603 // int i[100]; 7604 // float *p; 7605 // 7606 // struct S1 { 7607 // int i; 7608 // float f[50]; 7609 // } 7610 // struct S2 { 7611 // int i; 7612 // float f[50]; 7613 // S1 s; 7614 // double *p; 7615 // struct S2 *ps; 7616 // } 7617 // S2 s; 7618 // S2 *ps; 7619 // 7620 // map(d) 7621 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7622 // 7623 // map(i) 7624 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7625 // 7626 // map(i[1:23]) 7627 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7628 // 7629 // map(p) 7630 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7631 // 7632 // map(p[1:24]) 7633 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7634 // 7635 // map(s) 7636 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7637 // 7638 // map(s.i) 7639 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7640 // 7641 // map(s.s.f) 7642 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7643 // 7644 // map(s.p) 7645 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7646 // 7647 // map(to: s.p[:22]) 7648 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7649 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7650 // &(s.p), &(s.p[0]), 22*sizeof(double), 7651 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7652 // (*) alloc space for struct members, only this is a target parameter 7653 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7654 // optimizes this entry out, same in the examples below) 7655 // (***) map the pointee (map: to) 7656 // 7657 // map(s.ps) 7658 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7659 // 7660 // map(from: s.ps->s.i) 7661 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7662 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7663 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7664 // 7665 // map(to: s.ps->ps) 7666 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7667 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7668 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7669 // 7670 // map(s.ps->ps->ps) 7671 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7672 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7673 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7674 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7675 // 7676 // map(to: s.ps->ps->s.f[:22]) 7677 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7678 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7679 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7680 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7681 // 7682 // map(ps) 7683 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7684 // 7685 // map(ps->i) 7686 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7687 // 7688 // map(ps->s.f) 7689 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7690 // 7691 // map(from: ps->p) 7692 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7693 // 7694 // map(to: ps->p[:22]) 7695 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7696 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7697 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7698 // 7699 // map(ps->ps) 7700 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7701 // 7702 // map(from: ps->ps->s.i) 7703 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7704 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7705 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7706 // 7707 // map(from: ps->ps->ps) 7708 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7709 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7710 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7711 // 7712 // map(ps->ps->ps->ps) 7713 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7714 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7715 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7716 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7717 // 7718 // map(to: ps->ps->ps->s.f[:22]) 7719 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7720 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7721 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7722 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7723 // 7724 // map(to: s.f[:22]) map(from: s.p[:33]) 7725 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7726 // sizeof(double*) (**), TARGET_PARAM 7727 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7728 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7729 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7730 // (*) allocate contiguous space needed to fit all mapped members even if 7731 // we allocate space for members not mapped (in this example, 7732 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7733 // them as well because they fall between &s.f[0] and &s.p) 7734 // 7735 // map(from: s.f[:22]) map(to: ps->p[:33]) 7736 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7737 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7738 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7739 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7740 // (*) the struct this entry pertains to is the 2nd element in the list of 7741 // arguments, hence MEMBER_OF(2) 7742 // 7743 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7744 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7745 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7746 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7747 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7748 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7749 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7750 // (*) the struct this entry pertains to is the 4th element in the list 7751 // of arguments, hence MEMBER_OF(4) 7752 7753 // Track if the map information being generated is the first for a capture. 7754 bool IsCaptureFirstInfo = IsFirstComponentList; 7755 // When the variable is on a declare target link or in a to clause with 7756 // unified memory, a reference is needed to hold the host/device address 7757 // of the variable. 7758 bool RequiresReference = false; 7759 7760 // Scan the components from the base to the complete expression. 7761 auto CI = Components.rbegin(); 7762 auto CE = Components.rend(); 7763 auto I = CI; 7764 7765 // Track if the map information being generated is the first for a list of 7766 // components. 7767 bool IsExpressionFirstInfo = true; 7768 Address BP = Address::invalid(); 7769 const Expr *AssocExpr = I->getAssociatedExpression(); 7770 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7771 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7772 7773 if (isa<MemberExpr>(AssocExpr)) { 7774 // The base is the 'this' pointer. The content of the pointer is going 7775 // to be the base of the field being mapped. 7776 BP = CGF.LoadCXXThisAddress(); 7777 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7778 (OASE && 7779 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7780 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7781 } else { 7782 // The base is the reference to the variable. 7783 // BP = &Var. 7784 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7785 if (const auto *VD = 7786 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7787 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7788 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7789 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7790 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7791 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7792 RequiresReference = true; 7793 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7794 } 7795 } 7796 } 7797 7798 // If the variable is a pointer and is being dereferenced (i.e. is not 7799 // the last component), the base has to be the pointer itself, not its 7800 // reference. References are ignored for mapping purposes. 7801 QualType Ty = 7802 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7803 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7804 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7805 7806 // We do not need to generate individual map information for the 7807 // pointer, it can be associated with the combined storage. 7808 ++I; 7809 } 7810 } 7811 7812 // Track whether a component of the list should be marked as MEMBER_OF some 7813 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7814 // in a component list should be marked as MEMBER_OF, all subsequent entries 7815 // do not belong to the base struct. E.g. 7816 // struct S2 s; 7817 // s.ps->ps->ps->f[:] 7818 // (1) (2) (3) (4) 7819 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7820 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7821 // is the pointee of ps(2) which is not member of struct s, so it should not 7822 // be marked as such (it is still PTR_AND_OBJ). 7823 // The variable is initialized to false so that PTR_AND_OBJ entries which 7824 // are not struct members are not considered (e.g. array of pointers to 7825 // data). 7826 bool ShouldBeMemberOf = false; 7827 7828 // Variable keeping track of whether or not we have encountered a component 7829 // in the component list which is a member expression. Useful when we have a 7830 // pointer or a final array section, in which case it is the previous 7831 // component in the list which tells us whether we have a member expression. 7832 // E.g. X.f[:] 7833 // While processing the final array section "[:]" it is "f" which tells us 7834 // whether we are dealing with a member of a declared struct. 7835 const MemberExpr *EncounteredME = nullptr; 7836 7837 for (; I != CE; ++I) { 7838 // If the current component is member of a struct (parent struct) mark it. 7839 if (!EncounteredME) { 7840 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7841 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7842 // as MEMBER_OF the parent struct. 7843 if (EncounteredME) 7844 ShouldBeMemberOf = true; 7845 } 7846 7847 auto Next = std::next(I); 7848 7849 // We need to generate the addresses and sizes if this is the last 7850 // component, if the component is a pointer or if it is an array section 7851 // whose length can't be proved to be one. If this is a pointer, it 7852 // becomes the base address for the following components. 7853 7854 // A final array section, is one whose length can't be proved to be one. 7855 bool IsFinalArraySection = 7856 isFinalArraySectionExpression(I->getAssociatedExpression()); 7857 7858 // Get information on whether the element is a pointer. Have to do a 7859 // special treatment for array sections given that they are built-in 7860 // types. 7861 const auto *OASE = 7862 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7863 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 7864 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 7865 bool IsPointer = 7866 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7867 .getCanonicalType() 7868 ->isAnyPointerType()) || 7869 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7870 bool IsNonDerefPointer = IsPointer && !UO && !BO; 7871 7872 if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { 7873 // If this is not the last component, we expect the pointer to be 7874 // associated with an array expression or member expression. 7875 assert((Next == CE || 7876 isa<MemberExpr>(Next->getAssociatedExpression()) || 7877 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7878 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 7879 isa<UnaryOperator>(Next->getAssociatedExpression()) || 7880 isa<BinaryOperator>(Next->getAssociatedExpression())) && 7881 "Unexpected expression"); 7882 7883 Address LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7884 .getAddress(CGF); 7885 7886 // If this component is a pointer inside the base struct then we don't 7887 // need to create any entry for it - it will be combined with the object 7888 // it is pointing to into a single PTR_AND_OBJ entry. 7889 bool IsMemberPointer = 7890 IsPointer && EncounteredME && 7891 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7892 EncounteredME); 7893 if (!OverlappedElements.empty()) { 7894 // Handle base element with the info for overlapped elements. 7895 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7896 assert(Next == CE && 7897 "Expected last element for the overlapped elements."); 7898 assert(!IsPointer && 7899 "Unexpected base element with the pointer type."); 7900 // Mark the whole struct as the struct that requires allocation on the 7901 // device. 7902 PartialStruct.LowestElem = {0, LB}; 7903 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7904 I->getAssociatedExpression()->getType()); 7905 Address HB = CGF.Builder.CreateConstGEP( 7906 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7907 CGF.VoidPtrTy), 7908 TypeSize.getQuantity() - 1); 7909 PartialStruct.HighestElem = { 7910 std::numeric_limits<decltype( 7911 PartialStruct.HighestElem.first)>::max(), 7912 HB}; 7913 PartialStruct.Base = BP; 7914 // Emit data for non-overlapped data. 7915 OpenMPOffloadMappingFlags Flags = 7916 OMP_MAP_MEMBER_OF | 7917 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7918 /*AddPtrFlag=*/false, 7919 /*AddIsTargetParamFlag=*/false); 7920 LB = BP; 7921 llvm::Value *Size = nullptr; 7922 // Do bitcopy of all non-overlapped structure elements. 7923 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7924 Component : OverlappedElements) { 7925 Address ComponentLB = Address::invalid(); 7926 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7927 Component) { 7928 if (MC.getAssociatedDeclaration()) { 7929 ComponentLB = 7930 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7931 .getAddress(CGF); 7932 Size = CGF.Builder.CreatePtrDiff( 7933 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7934 CGF.EmitCastToVoidPtr(LB.getPointer())); 7935 break; 7936 } 7937 } 7938 BasePointers.push_back(BP.getPointer()); 7939 Pointers.push_back(LB.getPointer()); 7940 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, 7941 /*isSigned=*/true)); 7942 Types.push_back(Flags); 7943 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7944 } 7945 BasePointers.push_back(BP.getPointer()); 7946 Pointers.push_back(LB.getPointer()); 7947 Size = CGF.Builder.CreatePtrDiff( 7948 CGF.EmitCastToVoidPtr( 7949 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7950 CGF.EmitCastToVoidPtr(LB.getPointer())); 7951 Sizes.push_back( 7952 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7953 Types.push_back(Flags); 7954 break; 7955 } 7956 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7957 if (!IsMemberPointer) { 7958 BasePointers.push_back(BP.getPointer()); 7959 Pointers.push_back(LB.getPointer()); 7960 Sizes.push_back( 7961 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7962 7963 // We need to add a pointer flag for each map that comes from the 7964 // same expression except for the first one. We also need to signal 7965 // this map is the first one that relates with the current capture 7966 // (there is a set of entries for each capture). 7967 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7968 MapType, MapModifiers, IsImplicit, 7969 !IsExpressionFirstInfo || RequiresReference, 7970 IsCaptureFirstInfo && !RequiresReference); 7971 7972 if (!IsExpressionFirstInfo) { 7973 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7974 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7975 if (IsPointer) 7976 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7977 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7978 7979 if (ShouldBeMemberOf) { 7980 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7981 // should be later updated with the correct value of MEMBER_OF. 7982 Flags |= OMP_MAP_MEMBER_OF; 7983 // From now on, all subsequent PTR_AND_OBJ entries should not be 7984 // marked as MEMBER_OF. 7985 ShouldBeMemberOf = false; 7986 } 7987 } 7988 7989 Types.push_back(Flags); 7990 } 7991 7992 // If we have encountered a member expression so far, keep track of the 7993 // mapped member. If the parent is "*this", then the value declaration 7994 // is nullptr. 7995 if (EncounteredME) { 7996 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 7997 unsigned FieldIndex = FD->getFieldIndex(); 7998 7999 // Update info about the lowest and highest elements for this struct 8000 if (!PartialStruct.Base.isValid()) { 8001 PartialStruct.LowestElem = {FieldIndex, LB}; 8002 PartialStruct.HighestElem = {FieldIndex, LB}; 8003 PartialStruct.Base = BP; 8004 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8005 PartialStruct.LowestElem = {FieldIndex, LB}; 8006 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8007 PartialStruct.HighestElem = {FieldIndex, LB}; 8008 } 8009 } 8010 8011 // If we have a final array section, we are done with this expression. 8012 if (IsFinalArraySection) 8013 break; 8014 8015 // The pointer becomes the base for the next element. 8016 if (Next != CE) 8017 BP = LB; 8018 8019 IsExpressionFirstInfo = false; 8020 IsCaptureFirstInfo = false; 8021 } 8022 } 8023 } 8024 8025 /// Return the adjusted map modifiers if the declaration a capture refers to 8026 /// appears in a first-private clause. This is expected to be used only with 8027 /// directives that start with 'target'. 8028 MappableExprsHandler::OpenMPOffloadMappingFlags 8029 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8030 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8031 8032 // A first private variable captured by reference will use only the 8033 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8034 // declaration is known as first-private in this handler. 8035 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8036 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 8037 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 8038 return MappableExprsHandler::OMP_MAP_ALWAYS | 8039 MappableExprsHandler::OMP_MAP_TO; 8040 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8041 return MappableExprsHandler::OMP_MAP_TO | 8042 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8043 return MappableExprsHandler::OMP_MAP_PRIVATE | 8044 MappableExprsHandler::OMP_MAP_TO; 8045 } 8046 return MappableExprsHandler::OMP_MAP_TO | 8047 MappableExprsHandler::OMP_MAP_FROM; 8048 } 8049 8050 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8051 // Rotate by getFlagMemberOffset() bits. 8052 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8053 << getFlagMemberOffset()); 8054 } 8055 8056 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8057 OpenMPOffloadMappingFlags MemberOfFlag) { 8058 // If the entry is PTR_AND_OBJ but has not been marked with the special 8059 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8060 // marked as MEMBER_OF. 8061 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8062 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8063 return; 8064 8065 // Reset the placeholder value to prepare the flag for the assignment of the 8066 // proper MEMBER_OF value. 8067 Flags &= ~OMP_MAP_MEMBER_OF; 8068 Flags |= MemberOfFlag; 8069 } 8070 8071 void getPlainLayout(const CXXRecordDecl *RD, 8072 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8073 bool AsBase) const { 8074 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8075 8076 llvm::StructType *St = 8077 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8078 8079 unsigned NumElements = St->getNumElements(); 8080 llvm::SmallVector< 8081 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8082 RecordLayout(NumElements); 8083 8084 // Fill bases. 8085 for (const auto &I : RD->bases()) { 8086 if (I.isVirtual()) 8087 continue; 8088 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8089 // Ignore empty bases. 8090 if (Base->isEmpty() || CGF.getContext() 8091 .getASTRecordLayout(Base) 8092 .getNonVirtualSize() 8093 .isZero()) 8094 continue; 8095 8096 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8097 RecordLayout[FieldIndex] = Base; 8098 } 8099 // Fill in virtual bases. 8100 for (const auto &I : RD->vbases()) { 8101 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8102 // Ignore empty bases. 8103 if (Base->isEmpty()) 8104 continue; 8105 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8106 if (RecordLayout[FieldIndex]) 8107 continue; 8108 RecordLayout[FieldIndex] = Base; 8109 } 8110 // Fill in all the fields. 8111 assert(!RD->isUnion() && "Unexpected union."); 8112 for (const auto *Field : RD->fields()) { 8113 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8114 // will fill in later.) 8115 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8116 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8117 RecordLayout[FieldIndex] = Field; 8118 } 8119 } 8120 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8121 &Data : RecordLayout) { 8122 if (Data.isNull()) 8123 continue; 8124 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8125 getPlainLayout(Base, Layout, /*AsBase=*/true); 8126 else 8127 Layout.push_back(Data.get<const FieldDecl *>()); 8128 } 8129 } 8130 8131 public: 8132 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8133 : CurDir(&Dir), CGF(CGF) { 8134 // Extract firstprivate clause information. 8135 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8136 for (const auto *D : C->varlists()) 8137 FirstPrivateDecls.try_emplace( 8138 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8139 // Extract device pointer clause information. 8140 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8141 for (auto L : C->component_lists()) 8142 DevPointersMap[L.first].push_back(L.second); 8143 } 8144 8145 /// Constructor for the declare mapper directive. 8146 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8147 : CurDir(&Dir), CGF(CGF) {} 8148 8149 /// Generate code for the combined entry if we have a partially mapped struct 8150 /// and take care of the mapping flags of the arguments corresponding to 8151 /// individual struct members. 8152 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 8153 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8154 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 8155 const StructRangeInfoTy &PartialStruct) const { 8156 // Base is the base of the struct 8157 BasePointers.push_back(PartialStruct.Base.getPointer()); 8158 // Pointer is the address of the lowest element 8159 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 8160 Pointers.push_back(LB); 8161 // Size is (addr of {highest+1} element) - (addr of lowest element) 8162 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 8163 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 8164 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8165 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8166 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8167 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8168 /*isSigned=*/false); 8169 Sizes.push_back(Size); 8170 // Map type is always TARGET_PARAM 8171 Types.push_back(OMP_MAP_TARGET_PARAM); 8172 // Remove TARGET_PARAM flag from the first element 8173 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8174 8175 // All other current entries will be MEMBER_OF the combined entry 8176 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8177 // 0xFFFF in the MEMBER_OF field). 8178 OpenMPOffloadMappingFlags MemberOfFlag = 8179 getMemberOfFlag(BasePointers.size() - 1); 8180 for (auto &M : CurTypes) 8181 setCorrectMemberOfFlag(M, MemberOfFlag); 8182 } 8183 8184 /// Generate all the base pointers, section pointers, sizes and map 8185 /// types for the extracted mappable expressions. Also, for each item that 8186 /// relates with a device pointer, a pair of the relevant declaration and 8187 /// index where it occurs is appended to the device pointers info array. 8188 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 8189 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8190 MapFlagsArrayTy &Types) const { 8191 // We have to process the component lists that relate with the same 8192 // declaration in a single chunk so that we can generate the map flags 8193 // correctly. Therefore, we organize all lists in a map. 8194 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8195 8196 // Helper function to fill the information map for the different supported 8197 // clauses. 8198 auto &&InfoGen = [&Info]( 8199 const ValueDecl *D, 8200 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8201 OpenMPMapClauseKind MapType, 8202 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8203 bool ReturnDevicePointer, bool IsImplicit) { 8204 const ValueDecl *VD = 8205 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8206 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8207 IsImplicit); 8208 }; 8209 8210 assert(CurDir.is<const OMPExecutableDirective *>() && 8211 "Expect a executable directive"); 8212 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8213 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 8214 for (const auto L : C->component_lists()) { 8215 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 8216 /*ReturnDevicePointer=*/false, C->isImplicit()); 8217 } 8218 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 8219 for (const auto L : C->component_lists()) { 8220 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 8221 /*ReturnDevicePointer=*/false, C->isImplicit()); 8222 } 8223 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 8224 for (const auto L : C->component_lists()) { 8225 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 8226 /*ReturnDevicePointer=*/false, C->isImplicit()); 8227 } 8228 8229 // Look at the use_device_ptr clause information and mark the existing map 8230 // entries as such. If there is no map information for an entry in the 8231 // use_device_ptr list, we create one with map type 'alloc' and zero size 8232 // section. It is the user fault if that was not mapped before. If there is 8233 // no map information and the pointer is a struct member, then we defer the 8234 // emission of that entry until the whole struct has been processed. 8235 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 8236 DeferredInfo; 8237 8238 for (const auto *C : 8239 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 8240 for (const auto L : C->component_lists()) { 8241 assert(!L.second.empty() && "Not expecting empty list of components!"); 8242 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 8243 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8244 const Expr *IE = L.second.back().getAssociatedExpression(); 8245 // If the first component is a member expression, we have to look into 8246 // 'this', which maps to null in the map of map information. Otherwise 8247 // look directly for the information. 8248 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8249 8250 // We potentially have map information for this declaration already. 8251 // Look for the first set of components that refer to it. 8252 if (It != Info.end()) { 8253 auto CI = std::find_if( 8254 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 8255 return MI.Components.back().getAssociatedDeclaration() == VD; 8256 }); 8257 // If we found a map entry, signal that the pointer has to be returned 8258 // and move on to the next declaration. 8259 if (CI != It->second.end()) { 8260 CI->ReturnDevicePointer = true; 8261 continue; 8262 } 8263 } 8264 8265 // We didn't find any match in our map information - generate a zero 8266 // size array section - if the pointer is a struct member we defer this 8267 // action until the whole struct has been processed. 8268 if (isa<MemberExpr>(IE)) { 8269 // Insert the pointer into Info to be processed by 8270 // generateInfoForComponentList. Because it is a member pointer 8271 // without a pointee, no entry will be generated for it, therefore 8272 // we need to generate one after the whole struct has been processed. 8273 // Nonetheless, generateInfoForComponentList must be called to take 8274 // the pointer into account for the calculation of the range of the 8275 // partial struct. 8276 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 8277 /*ReturnDevicePointer=*/false, C->isImplicit()); 8278 DeferredInfo[nullptr].emplace_back(IE, VD); 8279 } else { 8280 llvm::Value *Ptr = 8281 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8282 BasePointers.emplace_back(Ptr, VD); 8283 Pointers.push_back(Ptr); 8284 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8285 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8286 } 8287 } 8288 } 8289 8290 for (const auto &M : Info) { 8291 // We need to know when we generate information for the first component 8292 // associated with a capture, because the mapping flags depend on it. 8293 bool IsFirstComponentList = true; 8294 8295 // Temporary versions of arrays 8296 MapBaseValuesArrayTy CurBasePointers; 8297 MapValuesArrayTy CurPointers; 8298 MapValuesArrayTy CurSizes; 8299 MapFlagsArrayTy CurTypes; 8300 StructRangeInfoTy PartialStruct; 8301 8302 for (const MapInfo &L : M.second) { 8303 assert(!L.Components.empty() && 8304 "Not expecting declaration with no component lists."); 8305 8306 // Remember the current base pointer index. 8307 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 8308 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8309 CurBasePointers, CurPointers, CurSizes, 8310 CurTypes, PartialStruct, 8311 IsFirstComponentList, L.IsImplicit); 8312 8313 // If this entry relates with a device pointer, set the relevant 8314 // declaration and add the 'return pointer' flag. 8315 if (L.ReturnDevicePointer) { 8316 assert(CurBasePointers.size() > CurrentBasePointersIdx && 8317 "Unexpected number of mapped base pointers."); 8318 8319 const ValueDecl *RelevantVD = 8320 L.Components.back().getAssociatedDeclaration(); 8321 assert(RelevantVD && 8322 "No relevant declaration related with device pointer??"); 8323 8324 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 8325 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8326 } 8327 IsFirstComponentList = false; 8328 } 8329 8330 // Append any pending zero-length pointers which are struct members and 8331 // used with use_device_ptr. 8332 auto CI = DeferredInfo.find(M.first); 8333 if (CI != DeferredInfo.end()) { 8334 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8335 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8336 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 8337 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 8338 CurBasePointers.emplace_back(BasePtr, L.VD); 8339 CurPointers.push_back(Ptr); 8340 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8341 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8342 // value MEMBER_OF=FFFF so that the entry is later updated with the 8343 // correct value of MEMBER_OF. 8344 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8345 OMP_MAP_MEMBER_OF); 8346 } 8347 } 8348 8349 // If there is an entry in PartialStruct it means we have a struct with 8350 // individual members mapped. Emit an extra combined entry. 8351 if (PartialStruct.Base.isValid()) 8352 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8353 PartialStruct); 8354 8355 // We need to append the results of this capture to what we already have. 8356 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8357 Pointers.append(CurPointers.begin(), CurPointers.end()); 8358 Sizes.append(CurSizes.begin(), CurSizes.end()); 8359 Types.append(CurTypes.begin(), CurTypes.end()); 8360 } 8361 } 8362 8363 /// Generate all the base pointers, section pointers, sizes and map types for 8364 /// the extracted map clauses of user-defined mapper. 8365 void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, 8366 MapValuesArrayTy &Pointers, 8367 MapValuesArrayTy &Sizes, 8368 MapFlagsArrayTy &Types) const { 8369 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8370 "Expect a declare mapper directive"); 8371 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8372 // We have to process the component lists that relate with the same 8373 // declaration in a single chunk so that we can generate the map flags 8374 // correctly. Therefore, we organize all lists in a map. 8375 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8376 8377 // Helper function to fill the information map for the different supported 8378 // clauses. 8379 auto &&InfoGen = [&Info]( 8380 const ValueDecl *D, 8381 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8382 OpenMPMapClauseKind MapType, 8383 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8384 bool ReturnDevicePointer, bool IsImplicit) { 8385 const ValueDecl *VD = 8386 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8387 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8388 IsImplicit); 8389 }; 8390 8391 for (const auto *C : CurMapperDir->clauselists()) { 8392 const auto *MC = cast<OMPMapClause>(C); 8393 for (const auto L : MC->component_lists()) { 8394 InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), 8395 /*ReturnDevicePointer=*/false, MC->isImplicit()); 8396 } 8397 } 8398 8399 for (const auto &M : Info) { 8400 // We need to know when we generate information for the first component 8401 // associated with a capture, because the mapping flags depend on it. 8402 bool IsFirstComponentList = true; 8403 8404 // Temporary versions of arrays 8405 MapBaseValuesArrayTy CurBasePointers; 8406 MapValuesArrayTy CurPointers; 8407 MapValuesArrayTy CurSizes; 8408 MapFlagsArrayTy CurTypes; 8409 StructRangeInfoTy PartialStruct; 8410 8411 for (const MapInfo &L : M.second) { 8412 assert(!L.Components.empty() && 8413 "Not expecting declaration with no component lists."); 8414 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8415 CurBasePointers, CurPointers, CurSizes, 8416 CurTypes, PartialStruct, 8417 IsFirstComponentList, L.IsImplicit); 8418 IsFirstComponentList = false; 8419 } 8420 8421 // If there is an entry in PartialStruct it means we have a struct with 8422 // individual members mapped. Emit an extra combined entry. 8423 if (PartialStruct.Base.isValid()) 8424 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8425 PartialStruct); 8426 8427 // We need to append the results of this capture to what we already have. 8428 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8429 Pointers.append(CurPointers.begin(), CurPointers.end()); 8430 Sizes.append(CurSizes.begin(), CurSizes.end()); 8431 Types.append(CurTypes.begin(), CurTypes.end()); 8432 } 8433 } 8434 8435 /// Emit capture info for lambdas for variables captured by reference. 8436 void generateInfoForLambdaCaptures( 8437 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 8438 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8439 MapFlagsArrayTy &Types, 8440 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8441 const auto *RD = VD->getType() 8442 .getCanonicalType() 8443 .getNonReferenceType() 8444 ->getAsCXXRecordDecl(); 8445 if (!RD || !RD->isLambda()) 8446 return; 8447 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8448 LValue VDLVal = CGF.MakeAddrLValue( 8449 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8450 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8451 FieldDecl *ThisCapture = nullptr; 8452 RD->getCaptureFields(Captures, ThisCapture); 8453 if (ThisCapture) { 8454 LValue ThisLVal = 8455 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8456 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8457 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8458 VDLVal.getPointer(CGF)); 8459 BasePointers.push_back(ThisLVal.getPointer(CGF)); 8460 Pointers.push_back(ThisLValVal.getPointer(CGF)); 8461 Sizes.push_back( 8462 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8463 CGF.Int64Ty, /*isSigned=*/true)); 8464 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8465 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8466 } 8467 for (const LambdaCapture &LC : RD->captures()) { 8468 if (!LC.capturesVariable()) 8469 continue; 8470 const VarDecl *VD = LC.getCapturedVar(); 8471 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8472 continue; 8473 auto It = Captures.find(VD); 8474 assert(It != Captures.end() && "Found lambda capture without field."); 8475 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8476 if (LC.getCaptureKind() == LCK_ByRef) { 8477 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8478 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8479 VDLVal.getPointer(CGF)); 8480 BasePointers.push_back(VarLVal.getPointer(CGF)); 8481 Pointers.push_back(VarLValVal.getPointer(CGF)); 8482 Sizes.push_back(CGF.Builder.CreateIntCast( 8483 CGF.getTypeSize( 8484 VD->getType().getCanonicalType().getNonReferenceType()), 8485 CGF.Int64Ty, /*isSigned=*/true)); 8486 } else { 8487 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8488 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8489 VDLVal.getPointer(CGF)); 8490 BasePointers.push_back(VarLVal.getPointer(CGF)); 8491 Pointers.push_back(VarRVal.getScalarVal()); 8492 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8493 } 8494 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8495 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8496 } 8497 } 8498 8499 /// Set correct indices for lambdas captures. 8500 void adjustMemberOfForLambdaCaptures( 8501 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8502 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8503 MapFlagsArrayTy &Types) const { 8504 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8505 // Set correct member_of idx for all implicit lambda captures. 8506 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8507 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8508 continue; 8509 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8510 assert(BasePtr && "Unable to find base lambda address."); 8511 int TgtIdx = -1; 8512 for (unsigned J = I; J > 0; --J) { 8513 unsigned Idx = J - 1; 8514 if (Pointers[Idx] != BasePtr) 8515 continue; 8516 TgtIdx = Idx; 8517 break; 8518 } 8519 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8520 // All other current entries will be MEMBER_OF the combined entry 8521 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8522 // 0xFFFF in the MEMBER_OF field). 8523 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8524 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8525 } 8526 } 8527 8528 /// Generate the base pointers, section pointers, sizes and map types 8529 /// associated to a given capture. 8530 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8531 llvm::Value *Arg, 8532 MapBaseValuesArrayTy &BasePointers, 8533 MapValuesArrayTy &Pointers, 8534 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8535 StructRangeInfoTy &PartialStruct) const { 8536 assert(!Cap->capturesVariableArrayType() && 8537 "Not expecting to generate map info for a variable array type!"); 8538 8539 // We need to know when we generating information for the first component 8540 const ValueDecl *VD = Cap->capturesThis() 8541 ? nullptr 8542 : Cap->getCapturedVar()->getCanonicalDecl(); 8543 8544 // If this declaration appears in a is_device_ptr clause we just have to 8545 // pass the pointer by value. If it is a reference to a declaration, we just 8546 // pass its value. 8547 if (DevPointersMap.count(VD)) { 8548 BasePointers.emplace_back(Arg, VD); 8549 Pointers.push_back(Arg); 8550 Sizes.push_back( 8551 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8552 CGF.Int64Ty, /*isSigned=*/true)); 8553 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8554 return; 8555 } 8556 8557 using MapData = 8558 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8559 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 8560 SmallVector<MapData, 4> DeclComponentLists; 8561 assert(CurDir.is<const OMPExecutableDirective *>() && 8562 "Expect a executable directive"); 8563 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8564 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8565 for (const auto L : C->decl_component_lists(VD)) { 8566 assert(L.first == VD && 8567 "We got information for the wrong declaration??"); 8568 assert(!L.second.empty() && 8569 "Not expecting declaration with no component lists."); 8570 DeclComponentLists.emplace_back(L.second, C->getMapType(), 8571 C->getMapTypeModifiers(), 8572 C->isImplicit()); 8573 } 8574 } 8575 8576 // Find overlapping elements (including the offset from the base element). 8577 llvm::SmallDenseMap< 8578 const MapData *, 8579 llvm::SmallVector< 8580 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8581 4> 8582 OverlappedData; 8583 size_t Count = 0; 8584 for (const MapData &L : DeclComponentLists) { 8585 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8586 OpenMPMapClauseKind MapType; 8587 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8588 bool IsImplicit; 8589 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8590 ++Count; 8591 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8592 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8593 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 8594 auto CI = Components.rbegin(); 8595 auto CE = Components.rend(); 8596 auto SI = Components1.rbegin(); 8597 auto SE = Components1.rend(); 8598 for (; CI != CE && SI != SE; ++CI, ++SI) { 8599 if (CI->getAssociatedExpression()->getStmtClass() != 8600 SI->getAssociatedExpression()->getStmtClass()) 8601 break; 8602 // Are we dealing with different variables/fields? 8603 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8604 break; 8605 } 8606 // Found overlapping if, at least for one component, reached the head of 8607 // the components list. 8608 if (CI == CE || SI == SE) { 8609 assert((CI != CE || SI != SE) && 8610 "Unexpected full match of the mapping components."); 8611 const MapData &BaseData = CI == CE ? L : L1; 8612 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8613 SI == SE ? Components : Components1; 8614 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8615 OverlappedElements.getSecond().push_back(SubData); 8616 } 8617 } 8618 } 8619 // Sort the overlapped elements for each item. 8620 llvm::SmallVector<const FieldDecl *, 4> Layout; 8621 if (!OverlappedData.empty()) { 8622 if (const auto *CRD = 8623 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8624 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8625 else { 8626 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8627 Layout.append(RD->field_begin(), RD->field_end()); 8628 } 8629 } 8630 for (auto &Pair : OverlappedData) { 8631 llvm::sort( 8632 Pair.getSecond(), 8633 [&Layout]( 8634 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8635 OMPClauseMappableExprCommon::MappableExprComponentListRef 8636 Second) { 8637 auto CI = First.rbegin(); 8638 auto CE = First.rend(); 8639 auto SI = Second.rbegin(); 8640 auto SE = Second.rend(); 8641 for (; CI != CE && SI != SE; ++CI, ++SI) { 8642 if (CI->getAssociatedExpression()->getStmtClass() != 8643 SI->getAssociatedExpression()->getStmtClass()) 8644 break; 8645 // Are we dealing with different variables/fields? 8646 if (CI->getAssociatedDeclaration() != 8647 SI->getAssociatedDeclaration()) 8648 break; 8649 } 8650 8651 // Lists contain the same elements. 8652 if (CI == CE && SI == SE) 8653 return false; 8654 8655 // List with less elements is less than list with more elements. 8656 if (CI == CE || SI == SE) 8657 return CI == CE; 8658 8659 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8660 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8661 if (FD1->getParent() == FD2->getParent()) 8662 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8663 const auto It = 8664 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8665 return FD == FD1 || FD == FD2; 8666 }); 8667 return *It == FD1; 8668 }); 8669 } 8670 8671 // Associated with a capture, because the mapping flags depend on it. 8672 // Go through all of the elements with the overlapped elements. 8673 for (const auto &Pair : OverlappedData) { 8674 const MapData &L = *Pair.getFirst(); 8675 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8676 OpenMPMapClauseKind MapType; 8677 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8678 bool IsImplicit; 8679 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8680 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8681 OverlappedComponents = Pair.getSecond(); 8682 bool IsFirstComponentList = true; 8683 generateInfoForComponentList(MapType, MapModifiers, Components, 8684 BasePointers, Pointers, Sizes, Types, 8685 PartialStruct, IsFirstComponentList, 8686 IsImplicit, OverlappedComponents); 8687 } 8688 // Go through other elements without overlapped elements. 8689 bool IsFirstComponentList = OverlappedData.empty(); 8690 for (const MapData &L : DeclComponentLists) { 8691 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8692 OpenMPMapClauseKind MapType; 8693 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8694 bool IsImplicit; 8695 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8696 auto It = OverlappedData.find(&L); 8697 if (It == OverlappedData.end()) 8698 generateInfoForComponentList(MapType, MapModifiers, Components, 8699 BasePointers, Pointers, Sizes, Types, 8700 PartialStruct, IsFirstComponentList, 8701 IsImplicit); 8702 IsFirstComponentList = false; 8703 } 8704 } 8705 8706 /// Generate the base pointers, section pointers, sizes and map types 8707 /// associated with the declare target link variables. 8708 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 8709 MapValuesArrayTy &Pointers, 8710 MapValuesArrayTy &Sizes, 8711 MapFlagsArrayTy &Types) const { 8712 assert(CurDir.is<const OMPExecutableDirective *>() && 8713 "Expect a executable directive"); 8714 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8715 // Map other list items in the map clause which are not captured variables 8716 // but "declare target link" global variables. 8717 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8718 for (const auto L : C->component_lists()) { 8719 if (!L.first) 8720 continue; 8721 const auto *VD = dyn_cast<VarDecl>(L.first); 8722 if (!VD) 8723 continue; 8724 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8725 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8726 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8727 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 8728 continue; 8729 StructRangeInfoTy PartialStruct; 8730 generateInfoForComponentList( 8731 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 8732 Pointers, Sizes, Types, PartialStruct, 8733 /*IsFirstComponentList=*/true, C->isImplicit()); 8734 assert(!PartialStruct.Base.isValid() && 8735 "No partial structs for declare target link expected."); 8736 } 8737 } 8738 } 8739 8740 /// Generate the default map information for a given capture \a CI, 8741 /// record field declaration \a RI and captured value \a CV. 8742 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8743 const FieldDecl &RI, llvm::Value *CV, 8744 MapBaseValuesArrayTy &CurBasePointers, 8745 MapValuesArrayTy &CurPointers, 8746 MapValuesArrayTy &CurSizes, 8747 MapFlagsArrayTy &CurMapTypes) const { 8748 bool IsImplicit = true; 8749 // Do the default mapping. 8750 if (CI.capturesThis()) { 8751 CurBasePointers.push_back(CV); 8752 CurPointers.push_back(CV); 8753 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8754 CurSizes.push_back( 8755 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8756 CGF.Int64Ty, /*isSigned=*/true)); 8757 // Default map type. 8758 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8759 } else if (CI.capturesVariableByCopy()) { 8760 CurBasePointers.push_back(CV); 8761 CurPointers.push_back(CV); 8762 if (!RI.getType()->isAnyPointerType()) { 8763 // We have to signal to the runtime captures passed by value that are 8764 // not pointers. 8765 CurMapTypes.push_back(OMP_MAP_LITERAL); 8766 CurSizes.push_back(CGF.Builder.CreateIntCast( 8767 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8768 } else { 8769 // Pointers are implicitly mapped with a zero size and no flags 8770 // (other than first map that is added for all implicit maps). 8771 CurMapTypes.push_back(OMP_MAP_NONE); 8772 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8773 } 8774 const VarDecl *VD = CI.getCapturedVar(); 8775 auto I = FirstPrivateDecls.find(VD); 8776 if (I != FirstPrivateDecls.end()) 8777 IsImplicit = I->getSecond(); 8778 } else { 8779 assert(CI.capturesVariable() && "Expected captured reference."); 8780 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8781 QualType ElementType = PtrTy->getPointeeType(); 8782 CurSizes.push_back(CGF.Builder.CreateIntCast( 8783 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8784 // The default map type for a scalar/complex type is 'to' because by 8785 // default the value doesn't have to be retrieved. For an aggregate 8786 // type, the default is 'tofrom'. 8787 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 8788 const VarDecl *VD = CI.getCapturedVar(); 8789 auto I = FirstPrivateDecls.find(VD); 8790 if (I != FirstPrivateDecls.end() && 8791 VD->getType().isConstant(CGF.getContext())) { 8792 llvm::Constant *Addr = 8793 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8794 // Copy the value of the original variable to the new global copy. 8795 CGF.Builder.CreateMemCpy( 8796 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 8797 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8798 CurSizes.back(), /*IsVolatile=*/false); 8799 // Use new global variable as the base pointers. 8800 CurBasePointers.push_back(Addr); 8801 CurPointers.push_back(Addr); 8802 } else { 8803 CurBasePointers.push_back(CV); 8804 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8805 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8806 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8807 AlignmentSource::Decl)); 8808 CurPointers.push_back(PtrAddr.getPointer()); 8809 } else { 8810 CurPointers.push_back(CV); 8811 } 8812 } 8813 if (I != FirstPrivateDecls.end()) 8814 IsImplicit = I->getSecond(); 8815 } 8816 // Every default map produces a single argument which is a target parameter. 8817 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 8818 8819 // Add flag stating this is an implicit map. 8820 if (IsImplicit) 8821 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 8822 } 8823 }; 8824 } // anonymous namespace 8825 8826 /// Emit the arrays used to pass the captures and map information to the 8827 /// offloading runtime library. If there is no map or capture information, 8828 /// return nullptr by reference. 8829 static void 8830 emitOffloadingArrays(CodeGenFunction &CGF, 8831 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 8832 MappableExprsHandler::MapValuesArrayTy &Pointers, 8833 MappableExprsHandler::MapValuesArrayTy &Sizes, 8834 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 8835 CGOpenMPRuntime::TargetDataInfo &Info) { 8836 CodeGenModule &CGM = CGF.CGM; 8837 ASTContext &Ctx = CGF.getContext(); 8838 8839 // Reset the array information. 8840 Info.clearArrayInfo(); 8841 Info.NumberOfPtrs = BasePointers.size(); 8842 8843 if (Info.NumberOfPtrs) { 8844 // Detect if we have any capture size requiring runtime evaluation of the 8845 // size so that a constant array could be eventually used. 8846 bool hasRuntimeEvaluationCaptureSize = false; 8847 for (llvm::Value *S : Sizes) 8848 if (!isa<llvm::Constant>(S)) { 8849 hasRuntimeEvaluationCaptureSize = true; 8850 break; 8851 } 8852 8853 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8854 QualType PointerArrayType = Ctx.getConstantArrayType( 8855 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 8856 /*IndexTypeQuals=*/0); 8857 8858 Info.BasePointersArray = 8859 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8860 Info.PointersArray = 8861 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8862 8863 // If we don't have any VLA types or other types that require runtime 8864 // evaluation, we can use a constant array for the map sizes, otherwise we 8865 // need to fill up the arrays as we do for the pointers. 8866 QualType Int64Ty = 8867 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8868 if (hasRuntimeEvaluationCaptureSize) { 8869 QualType SizeArrayType = Ctx.getConstantArrayType( 8870 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 8871 /*IndexTypeQuals=*/0); 8872 Info.SizesArray = 8873 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8874 } else { 8875 // We expect all the sizes to be constant, so we collect them to create 8876 // a constant array. 8877 SmallVector<llvm::Constant *, 16> ConstSizes; 8878 for (llvm::Value *S : Sizes) 8879 ConstSizes.push_back(cast<llvm::Constant>(S)); 8880 8881 auto *SizesArrayInit = llvm::ConstantArray::get( 8882 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 8883 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8884 auto *SizesArrayGbl = new llvm::GlobalVariable( 8885 CGM.getModule(), SizesArrayInit->getType(), 8886 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8887 SizesArrayInit, Name); 8888 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8889 Info.SizesArray = SizesArrayGbl; 8890 } 8891 8892 // The map types are always constant so we don't need to generate code to 8893 // fill arrays. Instead, we create an array constant. 8894 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 8895 llvm::copy(MapTypes, Mapping.begin()); 8896 llvm::Constant *MapTypesArrayInit = 8897 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8898 std::string MaptypesName = 8899 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8900 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8901 CGM.getModule(), MapTypesArrayInit->getType(), 8902 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8903 MapTypesArrayInit, MaptypesName); 8904 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8905 Info.MapTypesArray = MapTypesArrayGbl; 8906 8907 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8908 llvm::Value *BPVal = *BasePointers[I]; 8909 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8910 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8911 Info.BasePointersArray, 0, I); 8912 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8913 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8914 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8915 CGF.Builder.CreateStore(BPVal, BPAddr); 8916 8917 if (Info.requiresDevicePointerInfo()) 8918 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 8919 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8920 8921 llvm::Value *PVal = Pointers[I]; 8922 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8923 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8924 Info.PointersArray, 0, I); 8925 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8926 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8927 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8928 CGF.Builder.CreateStore(PVal, PAddr); 8929 8930 if (hasRuntimeEvaluationCaptureSize) { 8931 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8932 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8933 Info.SizesArray, 8934 /*Idx0=*/0, 8935 /*Idx1=*/I); 8936 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 8937 CGF.Builder.CreateStore( 8938 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true), 8939 SAddr); 8940 } 8941 } 8942 } 8943 } 8944 8945 /// Emit the arguments to be passed to the runtime library based on the 8946 /// arrays of pointers, sizes and map types. 8947 static void emitOffloadingArraysArgument( 8948 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8949 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8950 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 8951 CodeGenModule &CGM = CGF.CGM; 8952 if (Info.NumberOfPtrs) { 8953 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8954 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8955 Info.BasePointersArray, 8956 /*Idx0=*/0, /*Idx1=*/0); 8957 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8958 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8959 Info.PointersArray, 8960 /*Idx0=*/0, 8961 /*Idx1=*/0); 8962 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8963 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 8964 /*Idx0=*/0, /*Idx1=*/0); 8965 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8966 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8967 Info.MapTypesArray, 8968 /*Idx0=*/0, 8969 /*Idx1=*/0); 8970 } else { 8971 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8972 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8973 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8974 MapTypesArrayArg = 8975 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8976 } 8977 } 8978 8979 /// Check for inner distribute directive. 8980 static const OMPExecutableDirective * 8981 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8982 const auto *CS = D.getInnermostCapturedStmt(); 8983 const auto *Body = 8984 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8985 const Stmt *ChildStmt = 8986 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8987 8988 if (const auto *NestedDir = 8989 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8990 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8991 switch (D.getDirectiveKind()) { 8992 case OMPD_target: 8993 if (isOpenMPDistributeDirective(DKind)) 8994 return NestedDir; 8995 if (DKind == OMPD_teams) { 8996 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8997 /*IgnoreCaptured=*/true); 8998 if (!Body) 8999 return nullptr; 9000 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9001 if (const auto *NND = 9002 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9003 DKind = NND->getDirectiveKind(); 9004 if (isOpenMPDistributeDirective(DKind)) 9005 return NND; 9006 } 9007 } 9008 return nullptr; 9009 case OMPD_target_teams: 9010 if (isOpenMPDistributeDirective(DKind)) 9011 return NestedDir; 9012 return nullptr; 9013 case OMPD_target_parallel: 9014 case OMPD_target_simd: 9015 case OMPD_target_parallel_for: 9016 case OMPD_target_parallel_for_simd: 9017 return nullptr; 9018 case OMPD_target_teams_distribute: 9019 case OMPD_target_teams_distribute_simd: 9020 case OMPD_target_teams_distribute_parallel_for: 9021 case OMPD_target_teams_distribute_parallel_for_simd: 9022 case OMPD_parallel: 9023 case OMPD_for: 9024 case OMPD_parallel_for: 9025 case OMPD_parallel_master: 9026 case OMPD_parallel_sections: 9027 case OMPD_for_simd: 9028 case OMPD_parallel_for_simd: 9029 case OMPD_cancel: 9030 case OMPD_cancellation_point: 9031 case OMPD_ordered: 9032 case OMPD_threadprivate: 9033 case OMPD_allocate: 9034 case OMPD_task: 9035 case OMPD_simd: 9036 case OMPD_sections: 9037 case OMPD_section: 9038 case OMPD_single: 9039 case OMPD_master: 9040 case OMPD_critical: 9041 case OMPD_taskyield: 9042 case OMPD_barrier: 9043 case OMPD_taskwait: 9044 case OMPD_taskgroup: 9045 case OMPD_atomic: 9046 case OMPD_flush: 9047 case OMPD_depobj: 9048 case OMPD_teams: 9049 case OMPD_target_data: 9050 case OMPD_target_exit_data: 9051 case OMPD_target_enter_data: 9052 case OMPD_distribute: 9053 case OMPD_distribute_simd: 9054 case OMPD_distribute_parallel_for: 9055 case OMPD_distribute_parallel_for_simd: 9056 case OMPD_teams_distribute: 9057 case OMPD_teams_distribute_simd: 9058 case OMPD_teams_distribute_parallel_for: 9059 case OMPD_teams_distribute_parallel_for_simd: 9060 case OMPD_target_update: 9061 case OMPD_declare_simd: 9062 case OMPD_declare_variant: 9063 case OMPD_declare_target: 9064 case OMPD_end_declare_target: 9065 case OMPD_declare_reduction: 9066 case OMPD_declare_mapper: 9067 case OMPD_taskloop: 9068 case OMPD_taskloop_simd: 9069 case OMPD_master_taskloop: 9070 case OMPD_master_taskloop_simd: 9071 case OMPD_parallel_master_taskloop: 9072 case OMPD_parallel_master_taskloop_simd: 9073 case OMPD_requires: 9074 case OMPD_unknown: 9075 llvm_unreachable("Unexpected directive."); 9076 } 9077 } 9078 9079 return nullptr; 9080 } 9081 9082 /// Emit the user-defined mapper function. The code generation follows the 9083 /// pattern in the example below. 9084 /// \code 9085 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9086 /// void *base, void *begin, 9087 /// int64_t size, int64_t type) { 9088 /// // Allocate space for an array section first. 9089 /// if (size > 1 && !maptype.IsDelete) 9090 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9091 /// size*sizeof(Ty), clearToFrom(type)); 9092 /// // Map members. 9093 /// for (unsigned i = 0; i < size; i++) { 9094 /// // For each component specified by this mapper: 9095 /// for (auto c : all_components) { 9096 /// if (c.hasMapper()) 9097 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9098 /// c.arg_type); 9099 /// else 9100 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9101 /// c.arg_begin, c.arg_size, c.arg_type); 9102 /// } 9103 /// } 9104 /// // Delete the array section. 9105 /// if (size > 1 && maptype.IsDelete) 9106 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9107 /// size*sizeof(Ty), clearToFrom(type)); 9108 /// } 9109 /// \endcode 9110 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9111 CodeGenFunction *CGF) { 9112 if (UDMMap.count(D) > 0) 9113 return; 9114 ASTContext &C = CGM.getContext(); 9115 QualType Ty = D->getType(); 9116 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9117 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9118 auto *MapperVarDecl = 9119 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9120 SourceLocation Loc = D->getLocation(); 9121 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9122 9123 // Prepare mapper function arguments and attributes. 9124 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9125 C.VoidPtrTy, ImplicitParamDecl::Other); 9126 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9127 ImplicitParamDecl::Other); 9128 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9129 C.VoidPtrTy, ImplicitParamDecl::Other); 9130 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9131 ImplicitParamDecl::Other); 9132 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9133 ImplicitParamDecl::Other); 9134 FunctionArgList Args; 9135 Args.push_back(&HandleArg); 9136 Args.push_back(&BaseArg); 9137 Args.push_back(&BeginArg); 9138 Args.push_back(&SizeArg); 9139 Args.push_back(&TypeArg); 9140 const CGFunctionInfo &FnInfo = 9141 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9142 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9143 SmallString<64> TyStr; 9144 llvm::raw_svector_ostream Out(TyStr); 9145 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9146 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9147 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9148 Name, &CGM.getModule()); 9149 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9150 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9151 // Start the mapper function code generation. 9152 CodeGenFunction MapperCGF(CGM); 9153 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9154 // Compute the starting and end addreses of array elements. 9155 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9156 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9157 C.getPointerType(Int64Ty), Loc); 9158 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9159 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 9160 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 9161 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9162 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9163 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9164 C.getPointerType(Int64Ty), Loc); 9165 // Prepare common arguments for array initiation and deletion. 9166 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9167 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9168 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9169 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9170 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9171 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9172 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9173 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9174 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9175 9176 // Emit array initiation if this is an array section and \p MapType indicates 9177 // that memory allocation is required. 9178 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9179 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9180 ElementSize, HeadBB, /*IsInit=*/true); 9181 9182 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9183 9184 // Emit the loop header block. 9185 MapperCGF.EmitBlock(HeadBB); 9186 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9187 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9188 // Evaluate whether the initial condition is satisfied. 9189 llvm::Value *IsEmpty = 9190 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9191 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9192 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9193 9194 // Emit the loop body block. 9195 MapperCGF.EmitBlock(BodyBB); 9196 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9197 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9198 PtrPHI->addIncoming(PtrBegin, EntryBB); 9199 Address PtrCurrent = 9200 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9201 .getAlignment() 9202 .alignmentOfArrayElement(ElementSize)); 9203 // Privatize the declared variable of mapper to be the current array element. 9204 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9205 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 9206 return MapperCGF 9207 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 9208 .getAddress(MapperCGF); 9209 }); 9210 (void)Scope.Privatize(); 9211 9212 // Get map clause information. Fill up the arrays with all mapped variables. 9213 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9214 MappableExprsHandler::MapValuesArrayTy Pointers; 9215 MappableExprsHandler::MapValuesArrayTy Sizes; 9216 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9217 MappableExprsHandler MEHandler(*D, MapperCGF); 9218 MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); 9219 9220 // Call the runtime API __tgt_mapper_num_components to get the number of 9221 // pre-existing components. 9222 llvm::Value *OffloadingArgs[] = {Handle}; 9223 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9224 createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs); 9225 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9226 PreviousSize, 9227 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9228 9229 // Fill up the runtime mapper handle for all components. 9230 for (unsigned I = 0; I < BasePointers.size(); ++I) { 9231 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9232 *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9233 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9234 Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9235 llvm::Value *CurSizeArg = Sizes[I]; 9236 9237 // Extract the MEMBER_OF field from the map type. 9238 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 9239 MapperCGF.EmitBlock(MemberBB); 9240 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); 9241 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 9242 OriMapType, 9243 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 9244 llvm::BasicBlock *MemberCombineBB = 9245 MapperCGF.createBasicBlock("omp.member.combine"); 9246 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 9247 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 9248 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 9249 // Add the number of pre-existing components to the MEMBER_OF field if it 9250 // is valid. 9251 MapperCGF.EmitBlock(MemberCombineBB); 9252 llvm::Value *CombinedMember = 9253 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9254 // Do nothing if it is not a member of previous components. 9255 MapperCGF.EmitBlock(TypeBB); 9256 llvm::PHINode *MemberMapType = 9257 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9258 MemberMapType->addIncoming(OriMapType, MemberBB); 9259 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9260 9261 // Combine the map type inherited from user-defined mapper with that 9262 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9263 // bits of the \a MapType, which is the input argument of the mapper 9264 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9265 // bits of MemberMapType. 9266 // [OpenMP 5.0], 1.2.6. map-type decay. 9267 // | alloc | to | from | tofrom | release | delete 9268 // ---------------------------------------------------------- 9269 // alloc | alloc | alloc | alloc | alloc | release | delete 9270 // to | alloc | to | alloc | to | release | delete 9271 // from | alloc | alloc | from | from | release | delete 9272 // tofrom | alloc | to | from | tofrom | release | delete 9273 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9274 MapType, 9275 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9276 MappableExprsHandler::OMP_MAP_FROM)); 9277 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9278 llvm::BasicBlock *AllocElseBB = 9279 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9280 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9281 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9282 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9283 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9284 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9285 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9286 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9287 MapperCGF.EmitBlock(AllocBB); 9288 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9289 MemberMapType, 9290 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9291 MappableExprsHandler::OMP_MAP_FROM))); 9292 MapperCGF.Builder.CreateBr(EndBB); 9293 MapperCGF.EmitBlock(AllocElseBB); 9294 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9295 LeftToFrom, 9296 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9297 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9298 // In case of to, clear OMP_MAP_FROM. 9299 MapperCGF.EmitBlock(ToBB); 9300 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9301 MemberMapType, 9302 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9303 MapperCGF.Builder.CreateBr(EndBB); 9304 MapperCGF.EmitBlock(ToElseBB); 9305 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9306 LeftToFrom, 9307 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9308 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9309 // In case of from, clear OMP_MAP_TO. 9310 MapperCGF.EmitBlock(FromBB); 9311 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9312 MemberMapType, 9313 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9314 // In case of tofrom, do nothing. 9315 MapperCGF.EmitBlock(EndBB); 9316 llvm::PHINode *CurMapType = 9317 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9318 CurMapType->addIncoming(AllocMapType, AllocBB); 9319 CurMapType->addIncoming(ToMapType, ToBB); 9320 CurMapType->addIncoming(FromMapType, FromBB); 9321 CurMapType->addIncoming(MemberMapType, ToElseBB); 9322 9323 // TODO: call the corresponding mapper function if a user-defined mapper is 9324 // associated with this map clause. 9325 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9326 // data structure. 9327 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9328 CurSizeArg, CurMapType}; 9329 MapperCGF.EmitRuntimeCall( 9330 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), 9331 OffloadingArgs); 9332 } 9333 9334 // Update the pointer to point to the next element that needs to be mapped, 9335 // and check whether we have mapped all elements. 9336 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9337 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9338 PtrPHI->addIncoming(PtrNext, BodyBB); 9339 llvm::Value *IsDone = 9340 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9341 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9342 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9343 9344 MapperCGF.EmitBlock(ExitBB); 9345 // Emit array deletion if this is an array section and \p MapType indicates 9346 // that deletion is required. 9347 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9348 ElementSize, DoneBB, /*IsInit=*/false); 9349 9350 // Emit the function exit block. 9351 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9352 MapperCGF.FinishFunction(); 9353 UDMMap.try_emplace(D, Fn); 9354 if (CGF) { 9355 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9356 Decls.second.push_back(D); 9357 } 9358 } 9359 9360 /// Emit the array initialization or deletion portion for user-defined mapper 9361 /// code generation. First, it evaluates whether an array section is mapped and 9362 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9363 /// true, and \a MapType indicates to not delete this array, array 9364 /// initialization code is generated. If \a IsInit is false, and \a MapType 9365 /// indicates to not this array, array deletion code is generated. 9366 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9367 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9368 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9369 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9370 StringRef Prefix = IsInit ? ".init" : ".del"; 9371 9372 // Evaluate if this is an array section. 9373 llvm::BasicBlock *IsDeleteBB = 9374 MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"})); 9375 llvm::BasicBlock *BodyBB = 9376 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9377 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9378 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9379 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9380 9381 // Evaluate if we are going to delete this section. 9382 MapperCGF.EmitBlock(IsDeleteBB); 9383 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9384 MapType, 9385 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9386 llvm::Value *DeleteCond; 9387 if (IsInit) { 9388 DeleteCond = MapperCGF.Builder.CreateIsNull( 9389 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9390 } else { 9391 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9392 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9393 } 9394 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9395 9396 MapperCGF.EmitBlock(BodyBB); 9397 // Get the array size by multiplying element size and element number (i.e., \p 9398 // Size). 9399 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9400 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9401 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9402 // memory allocation/deletion purpose only. 9403 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9404 MapType, 9405 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9406 MappableExprsHandler::OMP_MAP_FROM))); 9407 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9408 // data structure. 9409 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9410 MapperCGF.EmitRuntimeCall( 9411 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs); 9412 } 9413 9414 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9415 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9416 llvm::Value *DeviceID, 9417 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9418 const OMPLoopDirective &D)> 9419 SizeEmitter) { 9420 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9421 const OMPExecutableDirective *TD = &D; 9422 // Get nested teams distribute kind directive, if any. 9423 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9424 TD = getNestedDistributeDirective(CGM.getContext(), D); 9425 if (!TD) 9426 return; 9427 const auto *LD = cast<OMPLoopDirective>(TD); 9428 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, 9429 PrePostActionTy &) { 9430 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9431 llvm::Value *Args[] = {DeviceID, NumIterations}; 9432 CGF.EmitRuntimeCall( 9433 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); 9434 } 9435 }; 9436 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9437 } 9438 9439 void CGOpenMPRuntime::emitTargetCall( 9440 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9441 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9442 const Expr *Device, 9443 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9444 const OMPLoopDirective &D)> 9445 SizeEmitter) { 9446 if (!CGF.HaveInsertPoint()) 9447 return; 9448 9449 assert(OutlinedFn && "Invalid outlined function!"); 9450 9451 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 9452 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9453 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9454 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9455 PrePostActionTy &) { 9456 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9457 }; 9458 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9459 9460 CodeGenFunction::OMPTargetDataInfo InputInfo; 9461 llvm::Value *MapTypesArray = nullptr; 9462 // Fill up the pointer arrays and transfer execution to the device. 9463 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9464 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, 9465 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9466 // On top of the arrays that were filled up, the target offloading call 9467 // takes as arguments the device id as well as the host pointer. The host 9468 // pointer is used by the runtime library to identify the current target 9469 // region, so it only has to be unique and not necessarily point to 9470 // anything. It could be the pointer to the outlined function that 9471 // implements the target region, but we aren't using that so that the 9472 // compiler doesn't need to keep that, and could therefore inline the host 9473 // function if proven worthwhile during optimization. 9474 9475 // From this point on, we need to have an ID of the target region defined. 9476 assert(OutlinedFnID && "Invalid outlined function ID!"); 9477 9478 // Emit device ID if any. 9479 llvm::Value *DeviceID; 9480 if (Device) { 9481 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9482 CGF.Int64Ty, /*isSigned=*/true); 9483 } else { 9484 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9485 } 9486 9487 // Emit the number of elements in the offloading arrays. 9488 llvm::Value *PointerNum = 9489 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9490 9491 // Return value of the runtime offloading call. 9492 llvm::Value *Return; 9493 9494 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9495 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9496 9497 // Emit tripcount for the target loop-based directive. 9498 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 9499 9500 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9501 // The target region is an outlined function launched by the runtime 9502 // via calls __tgt_target() or __tgt_target_teams(). 9503 // 9504 // __tgt_target() launches a target region with one team and one thread, 9505 // executing a serial region. This master thread may in turn launch 9506 // more threads within its team upon encountering a parallel region, 9507 // however, no additional teams can be launched on the device. 9508 // 9509 // __tgt_target_teams() launches a target region with one or more teams, 9510 // each with one or more threads. This call is required for target 9511 // constructs such as: 9512 // 'target teams' 9513 // 'target' / 'teams' 9514 // 'target teams distribute parallel for' 9515 // 'target parallel' 9516 // and so on. 9517 // 9518 // Note that on the host and CPU targets, the runtime implementation of 9519 // these calls simply call the outlined function without forking threads. 9520 // The outlined functions themselves have runtime calls to 9521 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9522 // the compiler in emitTeamsCall() and emitParallelCall(). 9523 // 9524 // In contrast, on the NVPTX target, the implementation of 9525 // __tgt_target_teams() launches a GPU kernel with the requested number 9526 // of teams and threads so no additional calls to the runtime are required. 9527 if (NumTeams) { 9528 // If we have NumTeams defined this means that we have an enclosed teams 9529 // region. Therefore we also expect to have NumThreads defined. These two 9530 // values should be defined in the presence of a teams directive, 9531 // regardless of having any clauses associated. If the user is using teams 9532 // but no clauses, these two values will be the default that should be 9533 // passed to the runtime library - a 32-bit integer with the value zero. 9534 assert(NumThreads && "Thread limit expression should be available along " 9535 "with number of teams."); 9536 llvm::Value *OffloadingArgs[] = {DeviceID, 9537 OutlinedFnID, 9538 PointerNum, 9539 InputInfo.BasePointersArray.getPointer(), 9540 InputInfo.PointersArray.getPointer(), 9541 InputInfo.SizesArray.getPointer(), 9542 MapTypesArray, 9543 NumTeams, 9544 NumThreads}; 9545 Return = CGF.EmitRuntimeCall( 9546 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 9547 : OMPRTL__tgt_target_teams), 9548 OffloadingArgs); 9549 } else { 9550 llvm::Value *OffloadingArgs[] = {DeviceID, 9551 OutlinedFnID, 9552 PointerNum, 9553 InputInfo.BasePointersArray.getPointer(), 9554 InputInfo.PointersArray.getPointer(), 9555 InputInfo.SizesArray.getPointer(), 9556 MapTypesArray}; 9557 Return = CGF.EmitRuntimeCall( 9558 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 9559 : OMPRTL__tgt_target), 9560 OffloadingArgs); 9561 } 9562 9563 // Check the error code and execute the host version if required. 9564 llvm::BasicBlock *OffloadFailedBlock = 9565 CGF.createBasicBlock("omp_offload.failed"); 9566 llvm::BasicBlock *OffloadContBlock = 9567 CGF.createBasicBlock("omp_offload.cont"); 9568 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9569 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9570 9571 CGF.EmitBlock(OffloadFailedBlock); 9572 if (RequiresOuterTask) { 9573 CapturedVars.clear(); 9574 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9575 } 9576 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9577 CGF.EmitBranch(OffloadContBlock); 9578 9579 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9580 }; 9581 9582 // Notify that the host version must be executed. 9583 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 9584 RequiresOuterTask](CodeGenFunction &CGF, 9585 PrePostActionTy &) { 9586 if (RequiresOuterTask) { 9587 CapturedVars.clear(); 9588 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9589 } 9590 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9591 }; 9592 9593 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 9594 &CapturedVars, RequiresOuterTask, 9595 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 9596 // Fill up the arrays with all the captured variables. 9597 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9598 MappableExprsHandler::MapValuesArrayTy Pointers; 9599 MappableExprsHandler::MapValuesArrayTy Sizes; 9600 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9601 9602 // Get mappable expression information. 9603 MappableExprsHandler MEHandler(D, CGF); 9604 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9605 9606 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9607 auto CV = CapturedVars.begin(); 9608 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9609 CE = CS.capture_end(); 9610 CI != CE; ++CI, ++RI, ++CV) { 9611 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 9612 MappableExprsHandler::MapValuesArrayTy CurPointers; 9613 MappableExprsHandler::MapValuesArrayTy CurSizes; 9614 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 9615 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9616 9617 // VLA sizes are passed to the outlined region by copy and do not have map 9618 // information associated. 9619 if (CI->capturesVariableArrayType()) { 9620 CurBasePointers.push_back(*CV); 9621 CurPointers.push_back(*CV); 9622 CurSizes.push_back(CGF.Builder.CreateIntCast( 9623 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9624 // Copy to the device as an argument. No need to retrieve it. 9625 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 9626 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 9627 MappableExprsHandler::OMP_MAP_IMPLICIT); 9628 } else { 9629 // If we have any information in the map clause, we use it, otherwise we 9630 // just do a default mapping. 9631 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 9632 CurSizes, CurMapTypes, PartialStruct); 9633 if (CurBasePointers.empty()) 9634 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 9635 CurPointers, CurSizes, CurMapTypes); 9636 // Generate correct mapping for variables captured by reference in 9637 // lambdas. 9638 if (CI->capturesVariable()) 9639 MEHandler.generateInfoForLambdaCaptures( 9640 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 9641 CurMapTypes, LambdaPointers); 9642 } 9643 // We expect to have at least an element of information for this capture. 9644 assert(!CurBasePointers.empty() && 9645 "Non-existing map pointer for capture!"); 9646 assert(CurBasePointers.size() == CurPointers.size() && 9647 CurBasePointers.size() == CurSizes.size() && 9648 CurBasePointers.size() == CurMapTypes.size() && 9649 "Inconsistent map information sizes!"); 9650 9651 // If there is an entry in PartialStruct it means we have a struct with 9652 // individual members mapped. Emit an extra combined entry. 9653 if (PartialStruct.Base.isValid()) 9654 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 9655 CurMapTypes, PartialStruct); 9656 9657 // We need to append the results of this capture to what we already have. 9658 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 9659 Pointers.append(CurPointers.begin(), CurPointers.end()); 9660 Sizes.append(CurSizes.begin(), CurSizes.end()); 9661 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 9662 } 9663 // Adjust MEMBER_OF flags for the lambdas captures. 9664 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 9665 Pointers, MapTypes); 9666 // Map other list items in the map clause which are not captured variables 9667 // but "declare target link" global variables. 9668 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 9669 MapTypes); 9670 9671 TargetDataInfo Info; 9672 // Fill up the arrays and create the arguments. 9673 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9674 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9675 Info.PointersArray, Info.SizesArray, 9676 Info.MapTypesArray, Info); 9677 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9678 InputInfo.BasePointersArray = 9679 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9680 InputInfo.PointersArray = 9681 Address(Info.PointersArray, CGM.getPointerAlign()); 9682 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 9683 MapTypesArray = Info.MapTypesArray; 9684 if (RequiresOuterTask) 9685 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9686 else 9687 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9688 }; 9689 9690 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 9691 CodeGenFunction &CGF, PrePostActionTy &) { 9692 if (RequiresOuterTask) { 9693 CodeGenFunction::OMPTargetDataInfo InputInfo; 9694 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9695 } else { 9696 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9697 } 9698 }; 9699 9700 // If we have a target function ID it means that we need to support 9701 // offloading, otherwise, just execute on the host. We need to execute on host 9702 // regardless of the conditional in the if clause if, e.g., the user do not 9703 // specify target triples. 9704 if (OutlinedFnID) { 9705 if (IfCond) { 9706 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9707 } else { 9708 RegionCodeGenTy ThenRCG(TargetThenGen); 9709 ThenRCG(CGF); 9710 } 9711 } else { 9712 RegionCodeGenTy ElseRCG(TargetElseGen); 9713 ElseRCG(CGF); 9714 } 9715 } 9716 9717 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9718 StringRef ParentName) { 9719 if (!S) 9720 return; 9721 9722 // Codegen OMP target directives that offload compute to the device. 9723 bool RequiresDeviceCodegen = 9724 isa<OMPExecutableDirective>(S) && 9725 isOpenMPTargetExecutionDirective( 9726 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9727 9728 if (RequiresDeviceCodegen) { 9729 const auto &E = *cast<OMPExecutableDirective>(S); 9730 unsigned DeviceID; 9731 unsigned FileID; 9732 unsigned Line; 9733 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 9734 FileID, Line); 9735 9736 // Is this a target region that should not be emitted as an entry point? If 9737 // so just signal we are done with this target region. 9738 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 9739 ParentName, Line)) 9740 return; 9741 9742 switch (E.getDirectiveKind()) { 9743 case OMPD_target: 9744 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9745 cast<OMPTargetDirective>(E)); 9746 break; 9747 case OMPD_target_parallel: 9748 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9749 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9750 break; 9751 case OMPD_target_teams: 9752 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9753 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9754 break; 9755 case OMPD_target_teams_distribute: 9756 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9757 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9758 break; 9759 case OMPD_target_teams_distribute_simd: 9760 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9761 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9762 break; 9763 case OMPD_target_parallel_for: 9764 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9765 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9766 break; 9767 case OMPD_target_parallel_for_simd: 9768 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9769 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9770 break; 9771 case OMPD_target_simd: 9772 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9773 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9774 break; 9775 case OMPD_target_teams_distribute_parallel_for: 9776 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9777 CGM, ParentName, 9778 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9779 break; 9780 case OMPD_target_teams_distribute_parallel_for_simd: 9781 CodeGenFunction:: 9782 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9783 CGM, ParentName, 9784 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9785 break; 9786 case OMPD_parallel: 9787 case OMPD_for: 9788 case OMPD_parallel_for: 9789 case OMPD_parallel_master: 9790 case OMPD_parallel_sections: 9791 case OMPD_for_simd: 9792 case OMPD_parallel_for_simd: 9793 case OMPD_cancel: 9794 case OMPD_cancellation_point: 9795 case OMPD_ordered: 9796 case OMPD_threadprivate: 9797 case OMPD_allocate: 9798 case OMPD_task: 9799 case OMPD_simd: 9800 case OMPD_sections: 9801 case OMPD_section: 9802 case OMPD_single: 9803 case OMPD_master: 9804 case OMPD_critical: 9805 case OMPD_taskyield: 9806 case OMPD_barrier: 9807 case OMPD_taskwait: 9808 case OMPD_taskgroup: 9809 case OMPD_atomic: 9810 case OMPD_flush: 9811 case OMPD_depobj: 9812 case OMPD_teams: 9813 case OMPD_target_data: 9814 case OMPD_target_exit_data: 9815 case OMPD_target_enter_data: 9816 case OMPD_distribute: 9817 case OMPD_distribute_simd: 9818 case OMPD_distribute_parallel_for: 9819 case OMPD_distribute_parallel_for_simd: 9820 case OMPD_teams_distribute: 9821 case OMPD_teams_distribute_simd: 9822 case OMPD_teams_distribute_parallel_for: 9823 case OMPD_teams_distribute_parallel_for_simd: 9824 case OMPD_target_update: 9825 case OMPD_declare_simd: 9826 case OMPD_declare_variant: 9827 case OMPD_declare_target: 9828 case OMPD_end_declare_target: 9829 case OMPD_declare_reduction: 9830 case OMPD_declare_mapper: 9831 case OMPD_taskloop: 9832 case OMPD_taskloop_simd: 9833 case OMPD_master_taskloop: 9834 case OMPD_master_taskloop_simd: 9835 case OMPD_parallel_master_taskloop: 9836 case OMPD_parallel_master_taskloop_simd: 9837 case OMPD_requires: 9838 case OMPD_unknown: 9839 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9840 } 9841 return; 9842 } 9843 9844 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9845 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9846 return; 9847 9848 scanForTargetRegionsFunctions( 9849 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9850 return; 9851 } 9852 9853 // If this is a lambda function, look into its body. 9854 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9855 S = L->getBody(); 9856 9857 // Keep looking for target regions recursively. 9858 for (const Stmt *II : S->children()) 9859 scanForTargetRegionsFunctions(II, ParentName); 9860 } 9861 9862 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9863 // If emitting code for the host, we do not process FD here. Instead we do 9864 // the normal code generation. 9865 if (!CGM.getLangOpts().OpenMPIsDevice) { 9866 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 9867 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9868 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9869 // Do not emit device_type(nohost) functions for the host. 9870 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 9871 return true; 9872 } 9873 return false; 9874 } 9875 9876 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9877 // Try to detect target regions in the function. 9878 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 9879 StringRef Name = CGM.getMangledName(GD); 9880 scanForTargetRegionsFunctions(FD->getBody(), Name); 9881 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9882 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9883 // Do not emit device_type(nohost) functions for the host. 9884 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 9885 return true; 9886 } 9887 9888 // Do not to emit function if it is not marked as declare target. 9889 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9890 AlreadyEmittedTargetDecls.count(VD) == 0; 9891 } 9892 9893 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9894 if (!CGM.getLangOpts().OpenMPIsDevice) 9895 return false; 9896 9897 // Check if there are Ctors/Dtors in this declaration and look for target 9898 // regions in it. We use the complete variant to produce the kernel name 9899 // mangling. 9900 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9901 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9902 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9903 StringRef ParentName = 9904 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9905 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9906 } 9907 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9908 StringRef ParentName = 9909 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9910 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9911 } 9912 } 9913 9914 // Do not to emit variable if it is not marked as declare target. 9915 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9916 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9917 cast<VarDecl>(GD.getDecl())); 9918 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9919 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9920 HasRequiresUnifiedSharedMemory)) { 9921 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9922 return true; 9923 } 9924 return false; 9925 } 9926 9927 llvm::Constant * 9928 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9929 const VarDecl *VD) { 9930 assert(VD->getType().isConstant(CGM.getContext()) && 9931 "Expected constant variable."); 9932 StringRef VarName; 9933 llvm::Constant *Addr; 9934 llvm::GlobalValue::LinkageTypes Linkage; 9935 QualType Ty = VD->getType(); 9936 SmallString<128> Buffer; 9937 { 9938 unsigned DeviceID; 9939 unsigned FileID; 9940 unsigned Line; 9941 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9942 FileID, Line); 9943 llvm::raw_svector_ostream OS(Buffer); 9944 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9945 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9946 VarName = OS.str(); 9947 } 9948 Linkage = llvm::GlobalValue::InternalLinkage; 9949 Addr = 9950 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9951 getDefaultFirstprivateAddressSpace()); 9952 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9953 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9954 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9955 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9956 VarName, Addr, VarSize, 9957 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9958 return Addr; 9959 } 9960 9961 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9962 llvm::Constant *Addr) { 9963 if (CGM.getLangOpts().OMPTargetTriples.empty() && 9964 !CGM.getLangOpts().OpenMPIsDevice) 9965 return; 9966 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9967 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9968 if (!Res) { 9969 if (CGM.getLangOpts().OpenMPIsDevice) { 9970 // Register non-target variables being emitted in device code (debug info 9971 // may cause this). 9972 StringRef VarName = CGM.getMangledName(VD); 9973 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9974 } 9975 return; 9976 } 9977 // Register declare target variables. 9978 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9979 StringRef VarName; 9980 CharUnits VarSize; 9981 llvm::GlobalValue::LinkageTypes Linkage; 9982 9983 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9984 !HasRequiresUnifiedSharedMemory) { 9985 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9986 VarName = CGM.getMangledName(VD); 9987 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9988 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9989 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9990 } else { 9991 VarSize = CharUnits::Zero(); 9992 } 9993 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 9994 // Temp solution to prevent optimizations of the internal variables. 9995 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 9996 std::string RefName = getName({VarName, "ref"}); 9997 if (!CGM.GetGlobalValue(RefName)) { 9998 llvm::Constant *AddrRef = 9999 getOrCreateInternalVariable(Addr->getType(), RefName); 10000 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10001 GVAddrRef->setConstant(/*Val=*/true); 10002 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10003 GVAddrRef->setInitializer(Addr); 10004 CGM.addCompilerUsedGlobal(GVAddrRef); 10005 } 10006 } 10007 } else { 10008 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10009 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10010 HasRequiresUnifiedSharedMemory)) && 10011 "Declare target attribute must link or to with unified memory."); 10012 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10013 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10014 else 10015 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10016 10017 if (CGM.getLangOpts().OpenMPIsDevice) { 10018 VarName = Addr->getName(); 10019 Addr = nullptr; 10020 } else { 10021 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10022 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10023 } 10024 VarSize = CGM.getPointerSize(); 10025 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10026 } 10027 10028 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10029 VarName, Addr, VarSize, Flags, Linkage); 10030 } 10031 10032 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10033 if (isa<FunctionDecl>(GD.getDecl()) || 10034 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10035 return emitTargetFunctions(GD); 10036 10037 return emitTargetGlobalVariable(GD); 10038 } 10039 10040 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10041 for (const VarDecl *VD : DeferredGlobalVariables) { 10042 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10043 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10044 if (!Res) 10045 continue; 10046 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10047 !HasRequiresUnifiedSharedMemory) { 10048 CGM.EmitGlobal(VD); 10049 } else { 10050 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10051 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10052 HasRequiresUnifiedSharedMemory)) && 10053 "Expected link clause or to clause with unified memory."); 10054 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10055 } 10056 } 10057 } 10058 10059 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10060 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10061 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10062 " Expected target-based directive."); 10063 } 10064 10065 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10066 for (const OMPClause *Clause : D->clauselists()) { 10067 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10068 HasRequiresUnifiedSharedMemory = true; 10069 } else if (const auto *AC = 10070 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10071 switch (AC->getAtomicDefaultMemOrderKind()) { 10072 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10073 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10074 break; 10075 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10076 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10077 break; 10078 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10079 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10080 break; 10081 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10082 break; 10083 } 10084 } 10085 } 10086 } 10087 10088 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10089 return RequiresAtomicOrdering; 10090 } 10091 10092 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10093 LangAS &AS) { 10094 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10095 return false; 10096 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10097 switch(A->getAllocatorType()) { 10098 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10099 // Not supported, fallback to the default mem space. 10100 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10101 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10102 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10103 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10104 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10105 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10106 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10107 AS = LangAS::Default; 10108 return true; 10109 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10110 llvm_unreachable("Expected predefined allocator for the variables with the " 10111 "static storage."); 10112 } 10113 return false; 10114 } 10115 10116 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10117 return HasRequiresUnifiedSharedMemory; 10118 } 10119 10120 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10121 CodeGenModule &CGM) 10122 : CGM(CGM) { 10123 if (CGM.getLangOpts().OpenMPIsDevice) { 10124 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10125 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10126 } 10127 } 10128 10129 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10130 if (CGM.getLangOpts().OpenMPIsDevice) 10131 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10132 } 10133 10134 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10135 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10136 return true; 10137 10138 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10139 // Do not to emit function if it is marked as declare target as it was already 10140 // emitted. 10141 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10142 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10143 if (auto *F = dyn_cast_or_null<llvm::Function>( 10144 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10145 return !F->isDeclaration(); 10146 return false; 10147 } 10148 return true; 10149 } 10150 10151 return !AlreadyEmittedTargetDecls.insert(D).second; 10152 } 10153 10154 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10155 // If we don't have entries or if we are emitting code for the device, we 10156 // don't need to do anything. 10157 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10158 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10159 (OffloadEntriesInfoManager.empty() && 10160 !HasEmittedDeclareTargetRegion && 10161 !HasEmittedTargetRegion)) 10162 return nullptr; 10163 10164 // Create and register the function that handles the requires directives. 10165 ASTContext &C = CGM.getContext(); 10166 10167 llvm::Function *RequiresRegFn; 10168 { 10169 CodeGenFunction CGF(CGM); 10170 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10171 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10172 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10173 RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); 10174 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10175 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10176 // TODO: check for other requires clauses. 10177 // The requires directive takes effect only when a target region is 10178 // present in the compilation unit. Otherwise it is ignored and not 10179 // passed to the runtime. This avoids the runtime from throwing an error 10180 // for mismatching requires clauses across compilation units that don't 10181 // contain at least 1 target region. 10182 assert((HasEmittedTargetRegion || 10183 HasEmittedDeclareTargetRegion || 10184 !OffloadEntriesInfoManager.empty()) && 10185 "Target or declare target region expected."); 10186 if (HasRequiresUnifiedSharedMemory) 10187 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10188 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires), 10189 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10190 CGF.FinishFunction(); 10191 } 10192 return RequiresRegFn; 10193 } 10194 10195 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10196 const OMPExecutableDirective &D, 10197 SourceLocation Loc, 10198 llvm::Function *OutlinedFn, 10199 ArrayRef<llvm::Value *> CapturedVars) { 10200 if (!CGF.HaveInsertPoint()) 10201 return; 10202 10203 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10204 CodeGenFunction::RunCleanupsScope Scope(CGF); 10205 10206 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10207 llvm::Value *Args[] = { 10208 RTLoc, 10209 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10210 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10211 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10212 RealArgs.append(std::begin(Args), std::end(Args)); 10213 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10214 10215 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 10216 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10217 } 10218 10219 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10220 const Expr *NumTeams, 10221 const Expr *ThreadLimit, 10222 SourceLocation Loc) { 10223 if (!CGF.HaveInsertPoint()) 10224 return; 10225 10226 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10227 10228 llvm::Value *NumTeamsVal = 10229 NumTeams 10230 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10231 CGF.CGM.Int32Ty, /* isSigned = */ true) 10232 : CGF.Builder.getInt32(0); 10233 10234 llvm::Value *ThreadLimitVal = 10235 ThreadLimit 10236 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10237 CGF.CGM.Int32Ty, /* isSigned = */ true) 10238 : CGF.Builder.getInt32(0); 10239 10240 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10241 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10242 ThreadLimitVal}; 10243 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 10244 PushNumTeamsArgs); 10245 } 10246 10247 void CGOpenMPRuntime::emitTargetDataCalls( 10248 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10249 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10250 if (!CGF.HaveInsertPoint()) 10251 return; 10252 10253 // Action used to replace the default codegen action and turn privatization 10254 // off. 10255 PrePostActionTy NoPrivAction; 10256 10257 // Generate the code for the opening of the data environment. Capture all the 10258 // arguments of the runtime call by reference because they are used in the 10259 // closing of the region. 10260 auto &&BeginThenGen = [this, &D, Device, &Info, 10261 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10262 // Fill up the arrays with all the mapped variables. 10263 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10264 MappableExprsHandler::MapValuesArrayTy Pointers; 10265 MappableExprsHandler::MapValuesArrayTy Sizes; 10266 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10267 10268 // Get map clause information. 10269 MappableExprsHandler MCHandler(D, CGF); 10270 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10271 10272 // Fill up the arrays and create the arguments. 10273 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10274 10275 llvm::Value *BasePointersArrayArg = nullptr; 10276 llvm::Value *PointersArrayArg = nullptr; 10277 llvm::Value *SizesArrayArg = nullptr; 10278 llvm::Value *MapTypesArrayArg = nullptr; 10279 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10280 SizesArrayArg, MapTypesArrayArg, Info); 10281 10282 // Emit device ID if any. 10283 llvm::Value *DeviceID = nullptr; 10284 if (Device) { 10285 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10286 CGF.Int64Ty, /*isSigned=*/true); 10287 } else { 10288 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10289 } 10290 10291 // Emit the number of elements in the offloading arrays. 10292 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10293 10294 llvm::Value *OffloadingArgs[] = { 10295 DeviceID, PointerNum, BasePointersArrayArg, 10296 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10297 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 10298 OffloadingArgs); 10299 10300 // If device pointer privatization is required, emit the body of the region 10301 // here. It will have to be duplicated: with and without privatization. 10302 if (!Info.CaptureDeviceAddrMap.empty()) 10303 CodeGen(CGF); 10304 }; 10305 10306 // Generate code for the closing of the data region. 10307 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10308 PrePostActionTy &) { 10309 assert(Info.isValid() && "Invalid data environment closing arguments."); 10310 10311 llvm::Value *BasePointersArrayArg = nullptr; 10312 llvm::Value *PointersArrayArg = nullptr; 10313 llvm::Value *SizesArrayArg = nullptr; 10314 llvm::Value *MapTypesArrayArg = nullptr; 10315 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10316 SizesArrayArg, MapTypesArrayArg, Info); 10317 10318 // Emit device ID if any. 10319 llvm::Value *DeviceID = nullptr; 10320 if (Device) { 10321 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10322 CGF.Int64Ty, /*isSigned=*/true); 10323 } else { 10324 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10325 } 10326 10327 // Emit the number of elements in the offloading arrays. 10328 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10329 10330 llvm::Value *OffloadingArgs[] = { 10331 DeviceID, PointerNum, BasePointersArrayArg, 10332 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10333 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 10334 OffloadingArgs); 10335 }; 10336 10337 // If we need device pointer privatization, we need to emit the body of the 10338 // region with no privatization in the 'else' branch of the conditional. 10339 // Otherwise, we don't have to do anything. 10340 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10341 PrePostActionTy &) { 10342 if (!Info.CaptureDeviceAddrMap.empty()) { 10343 CodeGen.setAction(NoPrivAction); 10344 CodeGen(CGF); 10345 } 10346 }; 10347 10348 // We don't have to do anything to close the region if the if clause evaluates 10349 // to false. 10350 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10351 10352 if (IfCond) { 10353 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10354 } else { 10355 RegionCodeGenTy RCG(BeginThenGen); 10356 RCG(CGF); 10357 } 10358 10359 // If we don't require privatization of device pointers, we emit the body in 10360 // between the runtime calls. This avoids duplicating the body code. 10361 if (Info.CaptureDeviceAddrMap.empty()) { 10362 CodeGen.setAction(NoPrivAction); 10363 CodeGen(CGF); 10364 } 10365 10366 if (IfCond) { 10367 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10368 } else { 10369 RegionCodeGenTy RCG(EndThenGen); 10370 RCG(CGF); 10371 } 10372 } 10373 10374 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10375 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10376 const Expr *Device) { 10377 if (!CGF.HaveInsertPoint()) 10378 return; 10379 10380 assert((isa<OMPTargetEnterDataDirective>(D) || 10381 isa<OMPTargetExitDataDirective>(D) || 10382 isa<OMPTargetUpdateDirective>(D)) && 10383 "Expecting either target enter, exit data, or update directives."); 10384 10385 CodeGenFunction::OMPTargetDataInfo InputInfo; 10386 llvm::Value *MapTypesArray = nullptr; 10387 // Generate the code for the opening of the data environment. 10388 auto &&ThenGen = [this, &D, Device, &InputInfo, 10389 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10390 // Emit device ID if any. 10391 llvm::Value *DeviceID = nullptr; 10392 if (Device) { 10393 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10394 CGF.Int64Ty, /*isSigned=*/true); 10395 } else { 10396 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10397 } 10398 10399 // Emit the number of elements in the offloading arrays. 10400 llvm::Constant *PointerNum = 10401 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10402 10403 llvm::Value *OffloadingArgs[] = {DeviceID, 10404 PointerNum, 10405 InputInfo.BasePointersArray.getPointer(), 10406 InputInfo.PointersArray.getPointer(), 10407 InputInfo.SizesArray.getPointer(), 10408 MapTypesArray}; 10409 10410 // Select the right runtime function call for each expected standalone 10411 // directive. 10412 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10413 OpenMPRTLFunction RTLFn; 10414 switch (D.getDirectiveKind()) { 10415 case OMPD_target_enter_data: 10416 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 10417 : OMPRTL__tgt_target_data_begin; 10418 break; 10419 case OMPD_target_exit_data: 10420 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 10421 : OMPRTL__tgt_target_data_end; 10422 break; 10423 case OMPD_target_update: 10424 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 10425 : OMPRTL__tgt_target_data_update; 10426 break; 10427 case OMPD_parallel: 10428 case OMPD_for: 10429 case OMPD_parallel_for: 10430 case OMPD_parallel_master: 10431 case OMPD_parallel_sections: 10432 case OMPD_for_simd: 10433 case OMPD_parallel_for_simd: 10434 case OMPD_cancel: 10435 case OMPD_cancellation_point: 10436 case OMPD_ordered: 10437 case OMPD_threadprivate: 10438 case OMPD_allocate: 10439 case OMPD_task: 10440 case OMPD_simd: 10441 case OMPD_sections: 10442 case OMPD_section: 10443 case OMPD_single: 10444 case OMPD_master: 10445 case OMPD_critical: 10446 case OMPD_taskyield: 10447 case OMPD_barrier: 10448 case OMPD_taskwait: 10449 case OMPD_taskgroup: 10450 case OMPD_atomic: 10451 case OMPD_flush: 10452 case OMPD_depobj: 10453 case OMPD_teams: 10454 case OMPD_target_data: 10455 case OMPD_distribute: 10456 case OMPD_distribute_simd: 10457 case OMPD_distribute_parallel_for: 10458 case OMPD_distribute_parallel_for_simd: 10459 case OMPD_teams_distribute: 10460 case OMPD_teams_distribute_simd: 10461 case OMPD_teams_distribute_parallel_for: 10462 case OMPD_teams_distribute_parallel_for_simd: 10463 case OMPD_declare_simd: 10464 case OMPD_declare_variant: 10465 case OMPD_declare_target: 10466 case OMPD_end_declare_target: 10467 case OMPD_declare_reduction: 10468 case OMPD_declare_mapper: 10469 case OMPD_taskloop: 10470 case OMPD_taskloop_simd: 10471 case OMPD_master_taskloop: 10472 case OMPD_master_taskloop_simd: 10473 case OMPD_parallel_master_taskloop: 10474 case OMPD_parallel_master_taskloop_simd: 10475 case OMPD_target: 10476 case OMPD_target_simd: 10477 case OMPD_target_teams_distribute: 10478 case OMPD_target_teams_distribute_simd: 10479 case OMPD_target_teams_distribute_parallel_for: 10480 case OMPD_target_teams_distribute_parallel_for_simd: 10481 case OMPD_target_teams: 10482 case OMPD_target_parallel: 10483 case OMPD_target_parallel_for: 10484 case OMPD_target_parallel_for_simd: 10485 case OMPD_requires: 10486 case OMPD_unknown: 10487 llvm_unreachable("Unexpected standalone target data directive."); 10488 break; 10489 } 10490 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 10491 }; 10492 10493 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 10494 CodeGenFunction &CGF, PrePostActionTy &) { 10495 // Fill up the arrays with all the mapped variables. 10496 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10497 MappableExprsHandler::MapValuesArrayTy Pointers; 10498 MappableExprsHandler::MapValuesArrayTy Sizes; 10499 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10500 10501 // Get map clause information. 10502 MappableExprsHandler MEHandler(D, CGF); 10503 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10504 10505 TargetDataInfo Info; 10506 // Fill up the arrays and create the arguments. 10507 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10508 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10509 Info.PointersArray, Info.SizesArray, 10510 Info.MapTypesArray, Info); 10511 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10512 InputInfo.BasePointersArray = 10513 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10514 InputInfo.PointersArray = 10515 Address(Info.PointersArray, CGM.getPointerAlign()); 10516 InputInfo.SizesArray = 10517 Address(Info.SizesArray, CGM.getPointerAlign()); 10518 MapTypesArray = Info.MapTypesArray; 10519 if (D.hasClausesOfKind<OMPDependClause>()) 10520 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10521 else 10522 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10523 }; 10524 10525 if (IfCond) { 10526 emitIfClause(CGF, IfCond, TargetThenGen, 10527 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10528 } else { 10529 RegionCodeGenTy ThenRCG(TargetThenGen); 10530 ThenRCG(CGF); 10531 } 10532 } 10533 10534 namespace { 10535 /// Kind of parameter in a function with 'declare simd' directive. 10536 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 10537 /// Attribute set of the parameter. 10538 struct ParamAttrTy { 10539 ParamKindTy Kind = Vector; 10540 llvm::APSInt StrideOrArg; 10541 llvm::APSInt Alignment; 10542 }; 10543 } // namespace 10544 10545 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10546 ArrayRef<ParamAttrTy> ParamAttrs) { 10547 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10548 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10549 // of that clause. The VLEN value must be power of 2. 10550 // In other case the notion of the function`s "characteristic data type" (CDT) 10551 // is used to compute the vector length. 10552 // CDT is defined in the following order: 10553 // a) For non-void function, the CDT is the return type. 10554 // b) If the function has any non-uniform, non-linear parameters, then the 10555 // CDT is the type of the first such parameter. 10556 // c) If the CDT determined by a) or b) above is struct, union, or class 10557 // type which is pass-by-value (except for the type that maps to the 10558 // built-in complex data type), the characteristic data type is int. 10559 // d) If none of the above three cases is applicable, the CDT is int. 10560 // The VLEN is then determined based on the CDT and the size of vector 10561 // register of that ISA for which current vector version is generated. The 10562 // VLEN is computed using the formula below: 10563 // VLEN = sizeof(vector_register) / sizeof(CDT), 10564 // where vector register size specified in section 3.2.1 Registers and the 10565 // Stack Frame of original AMD64 ABI document. 10566 QualType RetType = FD->getReturnType(); 10567 if (RetType.isNull()) 10568 return 0; 10569 ASTContext &C = FD->getASTContext(); 10570 QualType CDT; 10571 if (!RetType.isNull() && !RetType->isVoidType()) { 10572 CDT = RetType; 10573 } else { 10574 unsigned Offset = 0; 10575 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10576 if (ParamAttrs[Offset].Kind == Vector) 10577 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10578 ++Offset; 10579 } 10580 if (CDT.isNull()) { 10581 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10582 if (ParamAttrs[I + Offset].Kind == Vector) { 10583 CDT = FD->getParamDecl(I)->getType(); 10584 break; 10585 } 10586 } 10587 } 10588 } 10589 if (CDT.isNull()) 10590 CDT = C.IntTy; 10591 CDT = CDT->getCanonicalTypeUnqualified(); 10592 if (CDT->isRecordType() || CDT->isUnionType()) 10593 CDT = C.IntTy; 10594 return C.getTypeSize(CDT); 10595 } 10596 10597 static void 10598 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10599 const llvm::APSInt &VLENVal, 10600 ArrayRef<ParamAttrTy> ParamAttrs, 10601 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10602 struct ISADataTy { 10603 char ISA; 10604 unsigned VecRegSize; 10605 }; 10606 ISADataTy ISAData[] = { 10607 { 10608 'b', 128 10609 }, // SSE 10610 { 10611 'c', 256 10612 }, // AVX 10613 { 10614 'd', 256 10615 }, // AVX2 10616 { 10617 'e', 512 10618 }, // AVX512 10619 }; 10620 llvm::SmallVector<char, 2> Masked; 10621 switch (State) { 10622 case OMPDeclareSimdDeclAttr::BS_Undefined: 10623 Masked.push_back('N'); 10624 Masked.push_back('M'); 10625 break; 10626 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10627 Masked.push_back('N'); 10628 break; 10629 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10630 Masked.push_back('M'); 10631 break; 10632 } 10633 for (char Mask : Masked) { 10634 for (const ISADataTy &Data : ISAData) { 10635 SmallString<256> Buffer; 10636 llvm::raw_svector_ostream Out(Buffer); 10637 Out << "_ZGV" << Data.ISA << Mask; 10638 if (!VLENVal) { 10639 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10640 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10641 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10642 } else { 10643 Out << VLENVal; 10644 } 10645 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 10646 switch (ParamAttr.Kind){ 10647 case LinearWithVarStride: 10648 Out << 's' << ParamAttr.StrideOrArg; 10649 break; 10650 case Linear: 10651 Out << 'l'; 10652 if (!!ParamAttr.StrideOrArg) 10653 Out << ParamAttr.StrideOrArg; 10654 break; 10655 case Uniform: 10656 Out << 'u'; 10657 break; 10658 case Vector: 10659 Out << 'v'; 10660 break; 10661 } 10662 if (!!ParamAttr.Alignment) 10663 Out << 'a' << ParamAttr.Alignment; 10664 } 10665 Out << '_' << Fn->getName(); 10666 Fn->addFnAttr(Out.str()); 10667 } 10668 } 10669 } 10670 10671 // This are the Functions that are needed to mangle the name of the 10672 // vector functions generated by the compiler, according to the rules 10673 // defined in the "Vector Function ABI specifications for AArch64", 10674 // available at 10675 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10676 10677 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 10678 /// 10679 /// TODO: Need to implement the behavior for reference marked with a 10680 /// var or no linear modifiers (1.b in the section). For this, we 10681 /// need to extend ParamKindTy to support the linear modifiers. 10682 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10683 QT = QT.getCanonicalType(); 10684 10685 if (QT->isVoidType()) 10686 return false; 10687 10688 if (Kind == ParamKindTy::Uniform) 10689 return false; 10690 10691 if (Kind == ParamKindTy::Linear) 10692 return false; 10693 10694 // TODO: Handle linear references with modifiers 10695 10696 if (Kind == ParamKindTy::LinearWithVarStride) 10697 return false; 10698 10699 return true; 10700 } 10701 10702 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10703 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10704 QT = QT.getCanonicalType(); 10705 unsigned Size = C.getTypeSize(QT); 10706 10707 // Only scalars and complex within 16 bytes wide set PVB to true. 10708 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10709 return false; 10710 10711 if (QT->isFloatingType()) 10712 return true; 10713 10714 if (QT->isIntegerType()) 10715 return true; 10716 10717 if (QT->isPointerType()) 10718 return true; 10719 10720 // TODO: Add support for complex types (section 3.1.2, item 2). 10721 10722 return false; 10723 } 10724 10725 /// Computes the lane size (LS) of a return type or of an input parameter, 10726 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10727 /// TODO: Add support for references, section 3.2.1, item 1. 10728 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10729 if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10730 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10731 if (getAArch64PBV(PTy, C)) 10732 return C.getTypeSize(PTy); 10733 } 10734 if (getAArch64PBV(QT, C)) 10735 return C.getTypeSize(QT); 10736 10737 return C.getTypeSize(C.getUIntPtrType()); 10738 } 10739 10740 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10741 // signature of the scalar function, as defined in 3.2.2 of the 10742 // AAVFABI. 10743 static std::tuple<unsigned, unsigned, bool> 10744 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10745 QualType RetType = FD->getReturnType().getCanonicalType(); 10746 10747 ASTContext &C = FD->getASTContext(); 10748 10749 bool OutputBecomesInput = false; 10750 10751 llvm::SmallVector<unsigned, 8> Sizes; 10752 if (!RetType->isVoidType()) { 10753 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10754 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10755 OutputBecomesInput = true; 10756 } 10757 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10758 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10759 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10760 } 10761 10762 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10763 // The LS of a function parameter / return value can only be a power 10764 // of 2, starting from 8 bits, up to 128. 10765 assert(std::all_of(Sizes.begin(), Sizes.end(), 10766 [](unsigned Size) { 10767 return Size == 8 || Size == 16 || Size == 32 || 10768 Size == 64 || Size == 128; 10769 }) && 10770 "Invalid size"); 10771 10772 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10773 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10774 OutputBecomesInput); 10775 } 10776 10777 /// Mangle the parameter part of the vector function name according to 10778 /// their OpenMP classification. The mangling function is defined in 10779 /// section 3.5 of the AAVFABI. 10780 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10781 SmallString<256> Buffer; 10782 llvm::raw_svector_ostream Out(Buffer); 10783 for (const auto &ParamAttr : ParamAttrs) { 10784 switch (ParamAttr.Kind) { 10785 case LinearWithVarStride: 10786 Out << "ls" << ParamAttr.StrideOrArg; 10787 break; 10788 case Linear: 10789 Out << 'l'; 10790 // Don't print the step value if it is not present or if it is 10791 // equal to 1. 10792 if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1) 10793 Out << ParamAttr.StrideOrArg; 10794 break; 10795 case Uniform: 10796 Out << 'u'; 10797 break; 10798 case Vector: 10799 Out << 'v'; 10800 break; 10801 } 10802 10803 if (!!ParamAttr.Alignment) 10804 Out << 'a' << ParamAttr.Alignment; 10805 } 10806 10807 return std::string(Out.str()); 10808 } 10809 10810 // Function used to add the attribute. The parameter `VLEN` is 10811 // templated to allow the use of "x" when targeting scalable functions 10812 // for SVE. 10813 template <typename T> 10814 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10815 char ISA, StringRef ParSeq, 10816 StringRef MangledName, bool OutputBecomesInput, 10817 llvm::Function *Fn) { 10818 SmallString<256> Buffer; 10819 llvm::raw_svector_ostream Out(Buffer); 10820 Out << Prefix << ISA << LMask << VLEN; 10821 if (OutputBecomesInput) 10822 Out << "v"; 10823 Out << ParSeq << "_" << MangledName; 10824 Fn->addFnAttr(Out.str()); 10825 } 10826 10827 // Helper function to generate the Advanced SIMD names depending on 10828 // the value of the NDS when simdlen is not present. 10829 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10830 StringRef Prefix, char ISA, 10831 StringRef ParSeq, StringRef MangledName, 10832 bool OutputBecomesInput, 10833 llvm::Function *Fn) { 10834 switch (NDS) { 10835 case 8: 10836 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10837 OutputBecomesInput, Fn); 10838 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10839 OutputBecomesInput, Fn); 10840 break; 10841 case 16: 10842 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10843 OutputBecomesInput, Fn); 10844 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10845 OutputBecomesInput, Fn); 10846 break; 10847 case 32: 10848 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10849 OutputBecomesInput, Fn); 10850 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10851 OutputBecomesInput, Fn); 10852 break; 10853 case 64: 10854 case 128: 10855 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10856 OutputBecomesInput, Fn); 10857 break; 10858 default: 10859 llvm_unreachable("Scalar type is too wide."); 10860 } 10861 } 10862 10863 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10864 static void emitAArch64DeclareSimdFunction( 10865 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10866 ArrayRef<ParamAttrTy> ParamAttrs, 10867 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10868 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10869 10870 // Get basic data for building the vector signature. 10871 const auto Data = getNDSWDS(FD, ParamAttrs); 10872 const unsigned NDS = std::get<0>(Data); 10873 const unsigned WDS = std::get<1>(Data); 10874 const bool OutputBecomesInput = std::get<2>(Data); 10875 10876 // Check the values provided via `simdlen` by the user. 10877 // 1. A `simdlen(1)` doesn't produce vector signatures, 10878 if (UserVLEN == 1) { 10879 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10880 DiagnosticsEngine::Warning, 10881 "The clause simdlen(1) has no effect when targeting aarch64."); 10882 CGM.getDiags().Report(SLoc, DiagID); 10883 return; 10884 } 10885 10886 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10887 // Advanced SIMD output. 10888 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10889 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10890 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10891 "power of 2 when targeting Advanced SIMD."); 10892 CGM.getDiags().Report(SLoc, DiagID); 10893 return; 10894 } 10895 10896 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10897 // limits. 10898 if (ISA == 's' && UserVLEN != 0) { 10899 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10900 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10901 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10902 "lanes in the architectural constraints " 10903 "for SVE (min is 128-bit, max is " 10904 "2048-bit, by steps of 128-bit)"); 10905 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10906 return; 10907 } 10908 } 10909 10910 // Sort out parameter sequence. 10911 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10912 StringRef Prefix = "_ZGV"; 10913 // Generate simdlen from user input (if any). 10914 if (UserVLEN) { 10915 if (ISA == 's') { 10916 // SVE generates only a masked function. 10917 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10918 OutputBecomesInput, Fn); 10919 } else { 10920 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10921 // Advanced SIMD generates one or two functions, depending on 10922 // the `[not]inbranch` clause. 10923 switch (State) { 10924 case OMPDeclareSimdDeclAttr::BS_Undefined: 10925 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10926 OutputBecomesInput, Fn); 10927 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10928 OutputBecomesInput, Fn); 10929 break; 10930 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10931 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10932 OutputBecomesInput, Fn); 10933 break; 10934 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10935 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10936 OutputBecomesInput, Fn); 10937 break; 10938 } 10939 } 10940 } else { 10941 // If no user simdlen is provided, follow the AAVFABI rules for 10942 // generating the vector length. 10943 if (ISA == 's') { 10944 // SVE, section 3.4.1, item 1. 10945 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10946 OutputBecomesInput, Fn); 10947 } else { 10948 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10949 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10950 // two vector names depending on the use of the clause 10951 // `[not]inbranch`. 10952 switch (State) { 10953 case OMPDeclareSimdDeclAttr::BS_Undefined: 10954 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10955 OutputBecomesInput, Fn); 10956 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10957 OutputBecomesInput, Fn); 10958 break; 10959 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10960 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10961 OutputBecomesInput, Fn); 10962 break; 10963 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10964 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10965 OutputBecomesInput, Fn); 10966 break; 10967 } 10968 } 10969 } 10970 } 10971 10972 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10973 llvm::Function *Fn) { 10974 ASTContext &C = CGM.getContext(); 10975 FD = FD->getMostRecentDecl(); 10976 // Map params to their positions in function decl. 10977 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10978 if (isa<CXXMethodDecl>(FD)) 10979 ParamPositions.try_emplace(FD, 0); 10980 unsigned ParamPos = ParamPositions.size(); 10981 for (const ParmVarDecl *P : FD->parameters()) { 10982 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10983 ++ParamPos; 10984 } 10985 while (FD) { 10986 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10987 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10988 // Mark uniform parameters. 10989 for (const Expr *E : Attr->uniforms()) { 10990 E = E->IgnoreParenImpCasts(); 10991 unsigned Pos; 10992 if (isa<CXXThisExpr>(E)) { 10993 Pos = ParamPositions[FD]; 10994 } else { 10995 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10996 ->getCanonicalDecl(); 10997 Pos = ParamPositions[PVD]; 10998 } 10999 ParamAttrs[Pos].Kind = Uniform; 11000 } 11001 // Get alignment info. 11002 auto NI = Attr->alignments_begin(); 11003 for (const Expr *E : Attr->aligneds()) { 11004 E = E->IgnoreParenImpCasts(); 11005 unsigned Pos; 11006 QualType ParmTy; 11007 if (isa<CXXThisExpr>(E)) { 11008 Pos = ParamPositions[FD]; 11009 ParmTy = E->getType(); 11010 } else { 11011 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11012 ->getCanonicalDecl(); 11013 Pos = ParamPositions[PVD]; 11014 ParmTy = PVD->getType(); 11015 } 11016 ParamAttrs[Pos].Alignment = 11017 (*NI) 11018 ? (*NI)->EvaluateKnownConstInt(C) 11019 : llvm::APSInt::getUnsigned( 11020 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11021 .getQuantity()); 11022 ++NI; 11023 } 11024 // Mark linear parameters. 11025 auto SI = Attr->steps_begin(); 11026 auto MI = Attr->modifiers_begin(); 11027 for (const Expr *E : Attr->linears()) { 11028 E = E->IgnoreParenImpCasts(); 11029 unsigned Pos; 11030 if (isa<CXXThisExpr>(E)) { 11031 Pos = ParamPositions[FD]; 11032 } else { 11033 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11034 ->getCanonicalDecl(); 11035 Pos = ParamPositions[PVD]; 11036 } 11037 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11038 ParamAttr.Kind = Linear; 11039 if (*SI) { 11040 Expr::EvalResult Result; 11041 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11042 if (const auto *DRE = 11043 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11044 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 11045 ParamAttr.Kind = LinearWithVarStride; 11046 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 11047 ParamPositions[StridePVD->getCanonicalDecl()]); 11048 } 11049 } 11050 } else { 11051 ParamAttr.StrideOrArg = Result.Val.getInt(); 11052 } 11053 } 11054 ++SI; 11055 ++MI; 11056 } 11057 llvm::APSInt VLENVal; 11058 SourceLocation ExprLoc; 11059 const Expr *VLENExpr = Attr->getSimdlen(); 11060 if (VLENExpr) { 11061 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11062 ExprLoc = VLENExpr->getExprLoc(); 11063 } 11064 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11065 if (CGM.getTriple().isX86()) { 11066 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11067 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11068 unsigned VLEN = VLENVal.getExtValue(); 11069 StringRef MangledName = Fn->getName(); 11070 if (CGM.getTarget().hasFeature("sve")) 11071 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11072 MangledName, 's', 128, Fn, ExprLoc); 11073 if (CGM.getTarget().hasFeature("neon")) 11074 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11075 MangledName, 'n', 128, Fn, ExprLoc); 11076 } 11077 } 11078 FD = FD->getPreviousDecl(); 11079 } 11080 } 11081 11082 namespace { 11083 /// Cleanup action for doacross support. 11084 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11085 public: 11086 static const int DoacrossFinArgs = 2; 11087 11088 private: 11089 llvm::FunctionCallee RTLFn; 11090 llvm::Value *Args[DoacrossFinArgs]; 11091 11092 public: 11093 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11094 ArrayRef<llvm::Value *> CallArgs) 11095 : RTLFn(RTLFn) { 11096 assert(CallArgs.size() == DoacrossFinArgs); 11097 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11098 } 11099 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11100 if (!CGF.HaveInsertPoint()) 11101 return; 11102 CGF.EmitRuntimeCall(RTLFn, Args); 11103 } 11104 }; 11105 } // namespace 11106 11107 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11108 const OMPLoopDirective &D, 11109 ArrayRef<Expr *> NumIterations) { 11110 if (!CGF.HaveInsertPoint()) 11111 return; 11112 11113 ASTContext &C = CGM.getContext(); 11114 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11115 RecordDecl *RD; 11116 if (KmpDimTy.isNull()) { 11117 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11118 // kmp_int64 lo; // lower 11119 // kmp_int64 up; // upper 11120 // kmp_int64 st; // stride 11121 // }; 11122 RD = C.buildImplicitRecord("kmp_dim"); 11123 RD->startDefinition(); 11124 addFieldToRecordDecl(C, RD, Int64Ty); 11125 addFieldToRecordDecl(C, RD, Int64Ty); 11126 addFieldToRecordDecl(C, RD, Int64Ty); 11127 RD->completeDefinition(); 11128 KmpDimTy = C.getRecordType(RD); 11129 } else { 11130 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11131 } 11132 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11133 QualType ArrayTy = 11134 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11135 11136 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11137 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11138 enum { LowerFD = 0, UpperFD, StrideFD }; 11139 // Fill dims with data. 11140 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11141 LValue DimsLVal = CGF.MakeAddrLValue( 11142 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11143 // dims.upper = num_iterations; 11144 LValue UpperLVal = CGF.EmitLValueForField( 11145 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11146 llvm::Value *NumIterVal = 11147 CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]), 11148 D.getNumIterations()->getType(), Int64Ty, 11149 D.getNumIterations()->getExprLoc()); 11150 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11151 // dims.stride = 1; 11152 LValue StrideLVal = CGF.EmitLValueForField( 11153 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11154 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11155 StrideLVal); 11156 } 11157 11158 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11159 // kmp_int32 num_dims, struct kmp_dim * dims); 11160 llvm::Value *Args[] = { 11161 emitUpdateLocation(CGF, D.getBeginLoc()), 11162 getThreadID(CGF, D.getBeginLoc()), 11163 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11164 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11165 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11166 CGM.VoidPtrTy)}; 11167 11168 llvm::FunctionCallee RTLFn = 11169 createRuntimeFunction(OMPRTL__kmpc_doacross_init); 11170 CGF.EmitRuntimeCall(RTLFn, Args); 11171 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11172 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11173 llvm::FunctionCallee FiniRTLFn = 11174 createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 11175 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11176 llvm::makeArrayRef(FiniArgs)); 11177 } 11178 11179 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11180 const OMPDependClause *C) { 11181 QualType Int64Ty = 11182 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11183 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11184 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11185 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11186 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11187 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11188 const Expr *CounterVal = C->getLoopData(I); 11189 assert(CounterVal); 11190 llvm::Value *CntVal = CGF.EmitScalarConversion( 11191 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11192 CounterVal->getExprLoc()); 11193 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11194 /*Volatile=*/false, Int64Ty); 11195 } 11196 llvm::Value *Args[] = { 11197 emitUpdateLocation(CGF, C->getBeginLoc()), 11198 getThreadID(CGF, C->getBeginLoc()), 11199 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11200 llvm::FunctionCallee RTLFn; 11201 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11202 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 11203 } else { 11204 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11205 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 11206 } 11207 CGF.EmitRuntimeCall(RTLFn, Args); 11208 } 11209 11210 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11211 llvm::FunctionCallee Callee, 11212 ArrayRef<llvm::Value *> Args) const { 11213 assert(Loc.isValid() && "Outlined function call location must be valid."); 11214 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11215 11216 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11217 if (Fn->doesNotThrow()) { 11218 CGF.EmitNounwindRuntimeCall(Fn, Args); 11219 return; 11220 } 11221 } 11222 CGF.EmitRuntimeCall(Callee, Args); 11223 } 11224 11225 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11226 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11227 ArrayRef<llvm::Value *> Args) const { 11228 emitCall(CGF, Loc, OutlinedFn, Args); 11229 } 11230 11231 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11232 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11233 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11234 HasEmittedDeclareTargetRegion = true; 11235 } 11236 11237 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11238 const VarDecl *NativeParam, 11239 const VarDecl *TargetParam) const { 11240 return CGF.GetAddrOfLocalVar(NativeParam); 11241 } 11242 11243 namespace { 11244 /// Cleanup action for allocate support. 11245 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11246 public: 11247 static const int CleanupArgs = 3; 11248 11249 private: 11250 llvm::FunctionCallee RTLFn; 11251 llvm::Value *Args[CleanupArgs]; 11252 11253 public: 11254 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 11255 ArrayRef<llvm::Value *> CallArgs) 11256 : RTLFn(RTLFn) { 11257 assert(CallArgs.size() == CleanupArgs && 11258 "Size of arguments does not match."); 11259 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11260 } 11261 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11262 if (!CGF.HaveInsertPoint()) 11263 return; 11264 CGF.EmitRuntimeCall(RTLFn, Args); 11265 } 11266 }; 11267 } // namespace 11268 11269 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11270 const VarDecl *VD) { 11271 if (!VD) 11272 return Address::invalid(); 11273 const VarDecl *CVD = VD->getCanonicalDecl(); 11274 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 11275 return Address::invalid(); 11276 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11277 // Use the default allocation. 11278 if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 11279 !AA->getAllocator()) 11280 return Address::invalid(); 11281 llvm::Value *Size; 11282 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11283 if (CVD->getType()->isVariablyModifiedType()) { 11284 Size = CGF.getTypeSize(CVD->getType()); 11285 // Align the size: ((size + align - 1) / align) * align 11286 Size = CGF.Builder.CreateNUWAdd( 11287 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11288 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11289 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11290 } else { 11291 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11292 Size = CGM.getSize(Sz.alignTo(Align)); 11293 } 11294 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11295 assert(AA->getAllocator() && 11296 "Expected allocator expression for non-default allocator."); 11297 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11298 // According to the standard, the original allocator type is a enum (integer). 11299 // Convert to pointer type, if required. 11300 if (Allocator->getType()->isIntegerTy()) 11301 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 11302 else if (Allocator->getType()->isPointerTy()) 11303 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 11304 CGM.VoidPtrTy); 11305 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11306 11307 llvm::Value *Addr = 11308 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, 11309 getName({CVD->getName(), ".void.addr"})); 11310 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 11311 Allocator}; 11312 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); 11313 11314 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11315 llvm::makeArrayRef(FiniArgs)); 11316 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11317 Addr, 11318 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 11319 getName({CVD->getName(), ".addr"})); 11320 return Address(Addr, Align); 11321 } 11322 11323 /// Finds the variant function that matches current context with its context 11324 /// selector. 11325 static const FunctionDecl *getDeclareVariantFunction(CodeGenModule &CGM, 11326 const FunctionDecl *FD) { 11327 if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>()) 11328 return FD; 11329 11330 SmallVector<Expr *, 8> VariantExprs; 11331 SmallVector<VariantMatchInfo, 8> VMIs; 11332 for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) { 11333 const OMPTraitInfo &TI = A->getTraitInfos(); 11334 VMIs.push_back(VariantMatchInfo()); 11335 TI.getAsVariantMatchInfo(CGM.getContext(), VMIs.back()); 11336 VariantExprs.push_back(A->getVariantFuncRef()); 11337 } 11338 11339 OMPContext Ctx(CGM.getLangOpts().OpenMPIsDevice, CGM.getTriple()); 11340 // FIXME: Keep the context in the OMPIRBuilder so we can add constructs as we 11341 // build them. 11342 11343 int BestMatchIdx = getBestVariantMatchForContext(VMIs, Ctx); 11344 if (BestMatchIdx < 0) 11345 return FD; 11346 11347 return cast<FunctionDecl>( 11348 cast<DeclRefExpr>(VariantExprs[BestMatchIdx]->IgnoreParenImpCasts()) 11349 ->getDecl()); 11350 } 11351 11352 bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) { 11353 const auto *D = cast<FunctionDecl>(GD.getDecl()); 11354 // If the original function is defined already, use its definition. 11355 StringRef MangledName = CGM.getMangledName(GD); 11356 llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName); 11357 if (Orig && !Orig->isDeclaration()) 11358 return false; 11359 const FunctionDecl *NewFD = getDeclareVariantFunction(CGM, D); 11360 // Emit original function if it does not have declare variant attribute or the 11361 // context does not match. 11362 if (NewFD == D) 11363 return false; 11364 GlobalDecl NewGD = GD.getWithDecl(NewFD); 11365 if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) { 11366 DeferredVariantFunction.erase(D); 11367 return true; 11368 } 11369 DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD))); 11370 return true; 11371 } 11372 11373 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11374 CodeGenModule &CGM, const OMPLoopDirective &S) 11375 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11376 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11377 if (!NeedToPush) 11378 return; 11379 NontemporalDeclsSet &DS = 11380 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11381 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11382 for (const Stmt *Ref : C->private_refs()) { 11383 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11384 const ValueDecl *VD; 11385 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11386 VD = DRE->getDecl(); 11387 } else { 11388 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11389 assert((ME->isImplicitCXXThis() || 11390 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11391 "Expected member of current class."); 11392 VD = ME->getMemberDecl(); 11393 } 11394 DS.insert(VD); 11395 } 11396 } 11397 } 11398 11399 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11400 if (!NeedToPush) 11401 return; 11402 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11403 } 11404 11405 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11406 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11407 11408 return llvm::any_of( 11409 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11410 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 11411 } 11412 11413 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11414 const OMPExecutableDirective &S, 11415 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11416 const { 11417 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11418 // Vars in target/task regions must be excluded completely. 11419 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11420 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11421 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11422 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11423 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11424 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11425 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11426 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11427 } 11428 } 11429 // Exclude vars in private clauses. 11430 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11431 for (const Expr *Ref : C->varlists()) { 11432 if (!Ref->getType()->isScalarType()) 11433 continue; 11434 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11435 if (!DRE) 11436 continue; 11437 NeedToCheckForLPCs.insert(DRE->getDecl()); 11438 } 11439 } 11440 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11441 for (const Expr *Ref : C->varlists()) { 11442 if (!Ref->getType()->isScalarType()) 11443 continue; 11444 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11445 if (!DRE) 11446 continue; 11447 NeedToCheckForLPCs.insert(DRE->getDecl()); 11448 } 11449 } 11450 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11451 for (const Expr *Ref : C->varlists()) { 11452 if (!Ref->getType()->isScalarType()) 11453 continue; 11454 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11455 if (!DRE) 11456 continue; 11457 NeedToCheckForLPCs.insert(DRE->getDecl()); 11458 } 11459 } 11460 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11461 for (const Expr *Ref : C->varlists()) { 11462 if (!Ref->getType()->isScalarType()) 11463 continue; 11464 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11465 if (!DRE) 11466 continue; 11467 NeedToCheckForLPCs.insert(DRE->getDecl()); 11468 } 11469 } 11470 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 11471 for (const Expr *Ref : C->varlists()) { 11472 if (!Ref->getType()->isScalarType()) 11473 continue; 11474 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11475 if (!DRE) 11476 continue; 11477 NeedToCheckForLPCs.insert(DRE->getDecl()); 11478 } 11479 } 11480 for (const Decl *VD : NeedToCheckForLPCs) { 11481 for (const LastprivateConditionalData &Data : 11482 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 11483 if (Data.DeclToUniqueName.count(VD) > 0) { 11484 if (!Data.Disabled) 11485 NeedToAddForLPCsAsDisabled.insert(VD); 11486 break; 11487 } 11488 } 11489 } 11490 } 11491 11492 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11493 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 11494 : CGM(CGF.CGM), 11495 Action((CGM.getLangOpts().OpenMP >= 50 && 11496 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11497 [](const OMPLastprivateClause *C) { 11498 return C->getKind() == 11499 OMPC_LASTPRIVATE_conditional; 11500 })) 11501 ? ActionToDo::PushAsLastprivateConditional 11502 : ActionToDo::DoNotPush) { 11503 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11504 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 11505 return; 11506 assert(Action == ActionToDo::PushAsLastprivateConditional && 11507 "Expected a push action."); 11508 LastprivateConditionalData &Data = 11509 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11510 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11511 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 11512 continue; 11513 11514 for (const Expr *Ref : C->varlists()) { 11515 Data.DeclToUniqueName.insert(std::make_pair( 11516 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 11517 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 11518 } 11519 } 11520 Data.IVLVal = IVLVal; 11521 Data.Fn = CGF.CurFn; 11522 } 11523 11524 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11525 CodeGenFunction &CGF, const OMPExecutableDirective &S) 11526 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 11527 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11528 if (CGM.getLangOpts().OpenMP < 50) 11529 return; 11530 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 11531 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 11532 if (!NeedToAddForLPCsAsDisabled.empty()) { 11533 Action = ActionToDo::DisableLastprivateConditional; 11534 LastprivateConditionalData &Data = 11535 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11536 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 11537 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 11538 Data.Fn = CGF.CurFn; 11539 Data.Disabled = true; 11540 } 11541 } 11542 11543 CGOpenMPRuntime::LastprivateConditionalRAII 11544 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 11545 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 11546 return LastprivateConditionalRAII(CGF, S); 11547 } 11548 11549 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 11550 if (CGM.getLangOpts().OpenMP < 50) 11551 return; 11552 if (Action == ActionToDo::DisableLastprivateConditional) { 11553 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11554 "Expected list of disabled private vars."); 11555 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11556 } 11557 if (Action == ActionToDo::PushAsLastprivateConditional) { 11558 assert( 11559 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11560 "Expected list of lastprivate conditional vars."); 11561 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11562 } 11563 } 11564 11565 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 11566 const VarDecl *VD) { 11567 ASTContext &C = CGM.getContext(); 11568 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 11569 if (I == LastprivateConditionalToTypes.end()) 11570 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 11571 QualType NewType; 11572 const FieldDecl *VDField; 11573 const FieldDecl *FiredField; 11574 LValue BaseLVal; 11575 auto VI = I->getSecond().find(VD); 11576 if (VI == I->getSecond().end()) { 11577 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 11578 RD->startDefinition(); 11579 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 11580 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 11581 RD->completeDefinition(); 11582 NewType = C.getRecordType(RD); 11583 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 11584 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 11585 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 11586 } else { 11587 NewType = std::get<0>(VI->getSecond()); 11588 VDField = std::get<1>(VI->getSecond()); 11589 FiredField = std::get<2>(VI->getSecond()); 11590 BaseLVal = std::get<3>(VI->getSecond()); 11591 } 11592 LValue FiredLVal = 11593 CGF.EmitLValueForField(BaseLVal, FiredField); 11594 CGF.EmitStoreOfScalar( 11595 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 11596 FiredLVal); 11597 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 11598 } 11599 11600 namespace { 11601 /// Checks if the lastprivate conditional variable is referenced in LHS. 11602 class LastprivateConditionalRefChecker final 11603 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 11604 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 11605 const Expr *FoundE = nullptr; 11606 const Decl *FoundD = nullptr; 11607 StringRef UniqueDeclName; 11608 LValue IVLVal; 11609 llvm::Function *FoundFn = nullptr; 11610 SourceLocation Loc; 11611 11612 public: 11613 bool VisitDeclRefExpr(const DeclRefExpr *E) { 11614 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11615 llvm::reverse(LPM)) { 11616 auto It = D.DeclToUniqueName.find(E->getDecl()); 11617 if (It == D.DeclToUniqueName.end()) 11618 continue; 11619 if (D.Disabled) 11620 return false; 11621 FoundE = E; 11622 FoundD = E->getDecl()->getCanonicalDecl(); 11623 UniqueDeclName = It->second; 11624 IVLVal = D.IVLVal; 11625 FoundFn = D.Fn; 11626 break; 11627 } 11628 return FoundE == E; 11629 } 11630 bool VisitMemberExpr(const MemberExpr *E) { 11631 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 11632 return false; 11633 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11634 llvm::reverse(LPM)) { 11635 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 11636 if (It == D.DeclToUniqueName.end()) 11637 continue; 11638 if (D.Disabled) 11639 return false; 11640 FoundE = E; 11641 FoundD = E->getMemberDecl()->getCanonicalDecl(); 11642 UniqueDeclName = It->second; 11643 IVLVal = D.IVLVal; 11644 FoundFn = D.Fn; 11645 break; 11646 } 11647 return FoundE == E; 11648 } 11649 bool VisitStmt(const Stmt *S) { 11650 for (const Stmt *Child : S->children()) { 11651 if (!Child) 11652 continue; 11653 if (const auto *E = dyn_cast<Expr>(Child)) 11654 if (!E->isGLValue()) 11655 continue; 11656 if (Visit(Child)) 11657 return true; 11658 } 11659 return false; 11660 } 11661 explicit LastprivateConditionalRefChecker( 11662 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 11663 : LPM(LPM) {} 11664 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 11665 getFoundData() const { 11666 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 11667 } 11668 }; 11669 } // namespace 11670 11671 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 11672 LValue IVLVal, 11673 StringRef UniqueDeclName, 11674 LValue LVal, 11675 SourceLocation Loc) { 11676 // Last updated loop counter for the lastprivate conditional var. 11677 // int<xx> last_iv = 0; 11678 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 11679 llvm::Constant *LastIV = 11680 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 11681 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 11682 IVLVal.getAlignment().getAsAlign()); 11683 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 11684 11685 // Last value of the lastprivate conditional. 11686 // decltype(priv_a) last_a; 11687 llvm::Constant *Last = getOrCreateInternalVariable( 11688 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 11689 cast<llvm::GlobalVariable>(Last)->setAlignment( 11690 LVal.getAlignment().getAsAlign()); 11691 LValue LastLVal = 11692 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 11693 11694 // Global loop counter. Required to handle inner parallel-for regions. 11695 // iv 11696 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 11697 11698 // #pragma omp critical(a) 11699 // if (last_iv <= iv) { 11700 // last_iv = iv; 11701 // last_a = priv_a; 11702 // } 11703 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 11704 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 11705 Action.Enter(CGF); 11706 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 11707 // (last_iv <= iv) ? Check if the variable is updated and store new 11708 // value in global var. 11709 llvm::Value *CmpRes; 11710 if (IVLVal.getType()->isSignedIntegerType()) { 11711 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 11712 } else { 11713 assert(IVLVal.getType()->isUnsignedIntegerType() && 11714 "Loop iteration variable must be integer."); 11715 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 11716 } 11717 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 11718 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 11719 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 11720 // { 11721 CGF.EmitBlock(ThenBB); 11722 11723 // last_iv = iv; 11724 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 11725 11726 // last_a = priv_a; 11727 switch (CGF.getEvaluationKind(LVal.getType())) { 11728 case TEK_Scalar: { 11729 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 11730 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 11731 break; 11732 } 11733 case TEK_Complex: { 11734 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 11735 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 11736 break; 11737 } 11738 case TEK_Aggregate: 11739 llvm_unreachable( 11740 "Aggregates are not supported in lastprivate conditional."); 11741 } 11742 // } 11743 CGF.EmitBranch(ExitBB); 11744 // There is no need to emit line number for unconditional branch. 11745 (void)ApplyDebugLocation::CreateEmpty(CGF); 11746 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 11747 }; 11748 11749 if (CGM.getLangOpts().OpenMPSimd) { 11750 // Do not emit as a critical region as no parallel region could be emitted. 11751 RegionCodeGenTy ThenRCG(CodeGen); 11752 ThenRCG(CGF); 11753 } else { 11754 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 11755 } 11756 } 11757 11758 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 11759 const Expr *LHS) { 11760 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11761 return; 11762 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 11763 if (!Checker.Visit(LHS)) 11764 return; 11765 const Expr *FoundE; 11766 const Decl *FoundD; 11767 StringRef UniqueDeclName; 11768 LValue IVLVal; 11769 llvm::Function *FoundFn; 11770 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 11771 Checker.getFoundData(); 11772 if (FoundFn != CGF.CurFn) { 11773 // Special codegen for inner parallel regions. 11774 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 11775 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 11776 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 11777 "Lastprivate conditional is not found in outer region."); 11778 QualType StructTy = std::get<0>(It->getSecond()); 11779 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 11780 LValue PrivLVal = CGF.EmitLValue(FoundE); 11781 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11782 PrivLVal.getAddress(CGF), 11783 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 11784 LValue BaseLVal = 11785 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 11786 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 11787 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 11788 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 11789 FiredLVal, llvm::AtomicOrdering::Unordered, 11790 /*IsVolatile=*/true, /*isInit=*/false); 11791 return; 11792 } 11793 11794 // Private address of the lastprivate conditional in the current context. 11795 // priv_a 11796 LValue LVal = CGF.EmitLValue(FoundE); 11797 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 11798 FoundE->getExprLoc()); 11799 } 11800 11801 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 11802 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11803 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 11804 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11805 return; 11806 auto Range = llvm::reverse(LastprivateConditionalStack); 11807 auto It = llvm::find_if( 11808 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 11809 if (It == Range.end() || It->Fn != CGF.CurFn) 11810 return; 11811 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 11812 assert(LPCI != LastprivateConditionalToTypes.end() && 11813 "Lastprivates must be registered already."); 11814 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11815 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 11816 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 11817 for (const auto &Pair : It->DeclToUniqueName) { 11818 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 11819 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 11820 continue; 11821 auto I = LPCI->getSecond().find(Pair.first); 11822 assert(I != LPCI->getSecond().end() && 11823 "Lastprivate must be rehistered already."); 11824 // bool Cmp = priv_a.Fired != 0; 11825 LValue BaseLVal = std::get<3>(I->getSecond()); 11826 LValue FiredLVal = 11827 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 11828 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 11829 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 11830 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 11831 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 11832 // if (Cmp) { 11833 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 11834 CGF.EmitBlock(ThenBB); 11835 Address Addr = CGF.GetAddrOfLocalVar(VD); 11836 LValue LVal; 11837 if (VD->getType()->isReferenceType()) 11838 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 11839 AlignmentSource::Decl); 11840 else 11841 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 11842 AlignmentSource::Decl); 11843 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 11844 D.getBeginLoc()); 11845 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 11846 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 11847 // } 11848 } 11849 } 11850 11851 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 11852 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 11853 SourceLocation Loc) { 11854 if (CGF.getLangOpts().OpenMP < 50) 11855 return; 11856 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 11857 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 11858 "Unknown lastprivate conditional variable."); 11859 StringRef UniqueName = It->second; 11860 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 11861 // The variable was not updated in the region - exit. 11862 if (!GV) 11863 return; 11864 LValue LPLVal = CGF.MakeAddrLValue( 11865 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 11866 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 11867 CGF.EmitStoreOfScalar(Res, PrivLVal); 11868 } 11869 11870 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11871 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11872 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11873 llvm_unreachable("Not supported in SIMD-only mode"); 11874 } 11875 11876 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11877 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11878 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11879 llvm_unreachable("Not supported in SIMD-only mode"); 11880 } 11881 11882 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11883 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11884 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11885 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11886 bool Tied, unsigned &NumberOfParts) { 11887 llvm_unreachable("Not supported in SIMD-only mode"); 11888 } 11889 11890 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11891 SourceLocation Loc, 11892 llvm::Function *OutlinedFn, 11893 ArrayRef<llvm::Value *> CapturedVars, 11894 const Expr *IfCond) { 11895 llvm_unreachable("Not supported in SIMD-only mode"); 11896 } 11897 11898 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11899 CodeGenFunction &CGF, StringRef CriticalName, 11900 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11901 const Expr *Hint) { 11902 llvm_unreachable("Not supported in SIMD-only mode"); 11903 } 11904 11905 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11906 const RegionCodeGenTy &MasterOpGen, 11907 SourceLocation Loc) { 11908 llvm_unreachable("Not supported in SIMD-only mode"); 11909 } 11910 11911 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11912 SourceLocation Loc) { 11913 llvm_unreachable("Not supported in SIMD-only mode"); 11914 } 11915 11916 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 11917 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 11918 SourceLocation Loc) { 11919 llvm_unreachable("Not supported in SIMD-only mode"); 11920 } 11921 11922 void CGOpenMPSIMDRuntime::emitSingleRegion( 11923 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 11924 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 11925 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 11926 ArrayRef<const Expr *> AssignmentOps) { 11927 llvm_unreachable("Not supported in SIMD-only mode"); 11928 } 11929 11930 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 11931 const RegionCodeGenTy &OrderedOpGen, 11932 SourceLocation Loc, 11933 bool IsThreads) { 11934 llvm_unreachable("Not supported in SIMD-only mode"); 11935 } 11936 11937 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 11938 SourceLocation Loc, 11939 OpenMPDirectiveKind Kind, 11940 bool EmitChecks, 11941 bool ForceSimpleCall) { 11942 llvm_unreachable("Not supported in SIMD-only mode"); 11943 } 11944 11945 void CGOpenMPSIMDRuntime::emitForDispatchInit( 11946 CodeGenFunction &CGF, SourceLocation Loc, 11947 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 11948 bool Ordered, const DispatchRTInput &DispatchValues) { 11949 llvm_unreachable("Not supported in SIMD-only mode"); 11950 } 11951 11952 void CGOpenMPSIMDRuntime::emitForStaticInit( 11953 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 11954 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 11955 llvm_unreachable("Not supported in SIMD-only mode"); 11956 } 11957 11958 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 11959 CodeGenFunction &CGF, SourceLocation Loc, 11960 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 11961 llvm_unreachable("Not supported in SIMD-only mode"); 11962 } 11963 11964 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 11965 SourceLocation Loc, 11966 unsigned IVSize, 11967 bool IVSigned) { 11968 llvm_unreachable("Not supported in SIMD-only mode"); 11969 } 11970 11971 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 11972 SourceLocation Loc, 11973 OpenMPDirectiveKind DKind) { 11974 llvm_unreachable("Not supported in SIMD-only mode"); 11975 } 11976 11977 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 11978 SourceLocation Loc, 11979 unsigned IVSize, bool IVSigned, 11980 Address IL, Address LB, 11981 Address UB, Address ST) { 11982 llvm_unreachable("Not supported in SIMD-only mode"); 11983 } 11984 11985 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 11986 llvm::Value *NumThreads, 11987 SourceLocation Loc) { 11988 llvm_unreachable("Not supported in SIMD-only mode"); 11989 } 11990 11991 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 11992 ProcBindKind ProcBind, 11993 SourceLocation Loc) { 11994 llvm_unreachable("Not supported in SIMD-only mode"); 11995 } 11996 11997 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 11998 const VarDecl *VD, 11999 Address VDAddr, 12000 SourceLocation Loc) { 12001 llvm_unreachable("Not supported in SIMD-only mode"); 12002 } 12003 12004 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12005 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12006 CodeGenFunction *CGF) { 12007 llvm_unreachable("Not supported in SIMD-only mode"); 12008 } 12009 12010 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12011 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12012 llvm_unreachable("Not supported in SIMD-only mode"); 12013 } 12014 12015 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12016 ArrayRef<const Expr *> Vars, 12017 SourceLocation Loc, 12018 llvm::AtomicOrdering AO) { 12019 llvm_unreachable("Not supported in SIMD-only mode"); 12020 } 12021 12022 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12023 const OMPExecutableDirective &D, 12024 llvm::Function *TaskFunction, 12025 QualType SharedsTy, Address Shareds, 12026 const Expr *IfCond, 12027 const OMPTaskDataTy &Data) { 12028 llvm_unreachable("Not supported in SIMD-only mode"); 12029 } 12030 12031 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12032 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12033 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12034 const Expr *IfCond, const OMPTaskDataTy &Data) { 12035 llvm_unreachable("Not supported in SIMD-only mode"); 12036 } 12037 12038 void CGOpenMPSIMDRuntime::emitReduction( 12039 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12040 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12041 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12042 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12043 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12044 ReductionOps, Options); 12045 } 12046 12047 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12048 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12049 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12050 llvm_unreachable("Not supported in SIMD-only mode"); 12051 } 12052 12053 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12054 SourceLocation Loc, 12055 ReductionCodeGen &RCG, 12056 unsigned N) { 12057 llvm_unreachable("Not supported in SIMD-only mode"); 12058 } 12059 12060 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12061 SourceLocation Loc, 12062 llvm::Value *ReductionsPtr, 12063 LValue SharedLVal) { 12064 llvm_unreachable("Not supported in SIMD-only mode"); 12065 } 12066 12067 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12068 SourceLocation Loc) { 12069 llvm_unreachable("Not supported in SIMD-only mode"); 12070 } 12071 12072 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12073 CodeGenFunction &CGF, SourceLocation Loc, 12074 OpenMPDirectiveKind CancelRegion) { 12075 llvm_unreachable("Not supported in SIMD-only mode"); 12076 } 12077 12078 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12079 SourceLocation Loc, const Expr *IfCond, 12080 OpenMPDirectiveKind CancelRegion) { 12081 llvm_unreachable("Not supported in SIMD-only mode"); 12082 } 12083 12084 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12085 const OMPExecutableDirective &D, StringRef ParentName, 12086 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12087 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12088 llvm_unreachable("Not supported in SIMD-only mode"); 12089 } 12090 12091 void CGOpenMPSIMDRuntime::emitTargetCall( 12092 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12093 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12094 const Expr *Device, 12095 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12096 const OMPLoopDirective &D)> 12097 SizeEmitter) { 12098 llvm_unreachable("Not supported in SIMD-only mode"); 12099 } 12100 12101 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12102 llvm_unreachable("Not supported in SIMD-only mode"); 12103 } 12104 12105 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12106 llvm_unreachable("Not supported in SIMD-only mode"); 12107 } 12108 12109 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12110 return false; 12111 } 12112 12113 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12114 const OMPExecutableDirective &D, 12115 SourceLocation Loc, 12116 llvm::Function *OutlinedFn, 12117 ArrayRef<llvm::Value *> CapturedVars) { 12118 llvm_unreachable("Not supported in SIMD-only mode"); 12119 } 12120 12121 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12122 const Expr *NumTeams, 12123 const Expr *ThreadLimit, 12124 SourceLocation Loc) { 12125 llvm_unreachable("Not supported in SIMD-only mode"); 12126 } 12127 12128 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12129 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12130 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12131 llvm_unreachable("Not supported in SIMD-only mode"); 12132 } 12133 12134 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12135 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12136 const Expr *Device) { 12137 llvm_unreachable("Not supported in SIMD-only mode"); 12138 } 12139 12140 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12141 const OMPLoopDirective &D, 12142 ArrayRef<Expr *> NumIterations) { 12143 llvm_unreachable("Not supported in SIMD-only mode"); 12144 } 12145 12146 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12147 const OMPDependClause *C) { 12148 llvm_unreachable("Not supported in SIMD-only mode"); 12149 } 12150 12151 const VarDecl * 12152 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12153 const VarDecl *NativeParam) const { 12154 llvm_unreachable("Not supported in SIMD-only mode"); 12155 } 12156 12157 Address 12158 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12159 const VarDecl *NativeParam, 12160 const VarDecl *TargetParam) const { 12161 llvm_unreachable("Not supported in SIMD-only mode"); 12162 } 12163