1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/CodeGen/ConstantInitBuilder.h" 25 #include "llvm/ADT/ArrayRef.h" 26 #include "llvm/ADT/SetOperations.h" 27 #include "llvm/ADT/StringExtras.h" 28 #include "llvm/Bitcode/BitcodeReader.h" 29 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 30 #include "llvm/IR/DerivedTypes.h" 31 #include "llvm/IR/GlobalValue.h" 32 #include "llvm/IR/Value.h" 33 #include "llvm/Support/Format.h" 34 #include "llvm/Support/raw_ostream.h" 35 #include <cassert> 36 37 using namespace clang; 38 using namespace CodeGen; 39 using namespace llvm::omp; 40 41 namespace { 42 /// Base class for handling code generation inside OpenMP regions. 43 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 44 public: 45 /// Kinds of OpenMP regions used in codegen. 46 enum CGOpenMPRegionKind { 47 /// Region with outlined function for standalone 'parallel' 48 /// directive. 49 ParallelOutlinedRegion, 50 /// Region with outlined function for standalone 'task' directive. 51 TaskOutlinedRegion, 52 /// Region for constructs that do not require function outlining, 53 /// like 'for', 'sections', 'atomic' etc. directives. 54 InlinedRegion, 55 /// Region with outlined function for standalone 'target' directive. 56 TargetRegion, 57 }; 58 59 CGOpenMPRegionInfo(const CapturedStmt &CS, 60 const CGOpenMPRegionKind RegionKind, 61 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 62 bool HasCancel) 63 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 64 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 65 66 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 67 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 68 bool HasCancel) 69 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 70 Kind(Kind), HasCancel(HasCancel) {} 71 72 /// Get a variable or parameter for storing global thread id 73 /// inside OpenMP construct. 74 virtual const VarDecl *getThreadIDVariable() const = 0; 75 76 /// Emit the captured statement body. 77 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 78 79 /// Get an LValue for the current ThreadID variable. 80 /// \return LValue for thread id variable. This LValue always has type int32*. 81 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 82 83 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 84 85 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 86 87 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 88 89 bool hasCancel() const { return HasCancel; } 90 91 static bool classof(const CGCapturedStmtInfo *Info) { 92 return Info->getKind() == CR_OpenMP; 93 } 94 95 ~CGOpenMPRegionInfo() override = default; 96 97 protected: 98 CGOpenMPRegionKind RegionKind; 99 RegionCodeGenTy CodeGen; 100 OpenMPDirectiveKind Kind; 101 bool HasCancel; 102 }; 103 104 /// API for captured statement code generation in OpenMP constructs. 105 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 106 public: 107 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 108 const RegionCodeGenTy &CodeGen, 109 OpenMPDirectiveKind Kind, bool HasCancel, 110 StringRef HelperName) 111 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 112 HasCancel), 113 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 114 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 115 } 116 117 /// Get a variable or parameter for storing global thread id 118 /// inside OpenMP construct. 119 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 120 121 /// Get the name of the capture helper. 122 StringRef getHelperName() const override { return HelperName; } 123 124 static bool classof(const CGCapturedStmtInfo *Info) { 125 return CGOpenMPRegionInfo::classof(Info) && 126 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 127 ParallelOutlinedRegion; 128 } 129 130 private: 131 /// A variable or parameter storing global thread id for OpenMP 132 /// constructs. 133 const VarDecl *ThreadIDVar; 134 StringRef HelperName; 135 }; 136 137 /// API for captured statement code generation in OpenMP constructs. 138 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 139 public: 140 class UntiedTaskActionTy final : public PrePostActionTy { 141 bool Untied; 142 const VarDecl *PartIDVar; 143 const RegionCodeGenTy UntiedCodeGen; 144 llvm::SwitchInst *UntiedSwitch = nullptr; 145 146 public: 147 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 148 const RegionCodeGenTy &UntiedCodeGen) 149 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 150 void Enter(CodeGenFunction &CGF) override { 151 if (Untied) { 152 // Emit task switching point. 153 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 154 CGF.GetAddrOfLocalVar(PartIDVar), 155 PartIDVar->getType()->castAs<PointerType>()); 156 llvm::Value *Res = 157 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 158 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 159 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 160 CGF.EmitBlock(DoneBB); 161 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 162 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 163 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 164 CGF.Builder.GetInsertBlock()); 165 emitUntiedSwitch(CGF); 166 } 167 } 168 void emitUntiedSwitch(CodeGenFunction &CGF) const { 169 if (Untied) { 170 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 171 CGF.GetAddrOfLocalVar(PartIDVar), 172 PartIDVar->getType()->castAs<PointerType>()); 173 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 174 PartIdLVal); 175 UntiedCodeGen(CGF); 176 CodeGenFunction::JumpDest CurPoint = 177 CGF.getJumpDestInCurrentScope(".untied.next."); 178 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 179 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 180 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 181 CGF.Builder.GetInsertBlock()); 182 CGF.EmitBranchThroughCleanup(CurPoint); 183 CGF.EmitBlock(CurPoint.getBlock()); 184 } 185 } 186 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 187 }; 188 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 189 const VarDecl *ThreadIDVar, 190 const RegionCodeGenTy &CodeGen, 191 OpenMPDirectiveKind Kind, bool HasCancel, 192 const UntiedTaskActionTy &Action) 193 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 194 ThreadIDVar(ThreadIDVar), Action(Action) { 195 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 196 } 197 198 /// Get a variable or parameter for storing global thread id 199 /// inside OpenMP construct. 200 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 201 202 /// Get an LValue for the current ThreadID variable. 203 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 204 205 /// Get the name of the capture helper. 206 StringRef getHelperName() const override { return ".omp_outlined."; } 207 208 void emitUntiedSwitch(CodeGenFunction &CGF) override { 209 Action.emitUntiedSwitch(CGF); 210 } 211 212 static bool classof(const CGCapturedStmtInfo *Info) { 213 return CGOpenMPRegionInfo::classof(Info) && 214 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 215 TaskOutlinedRegion; 216 } 217 218 private: 219 /// A variable or parameter storing global thread id for OpenMP 220 /// constructs. 221 const VarDecl *ThreadIDVar; 222 /// Action for emitting code for untied tasks. 223 const UntiedTaskActionTy &Action; 224 }; 225 226 /// API for inlined captured statement code generation in OpenMP 227 /// constructs. 228 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 229 public: 230 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 231 const RegionCodeGenTy &CodeGen, 232 OpenMPDirectiveKind Kind, bool HasCancel) 233 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 234 OldCSI(OldCSI), 235 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 236 237 // Retrieve the value of the context parameter. 238 llvm::Value *getContextValue() const override { 239 if (OuterRegionInfo) 240 return OuterRegionInfo->getContextValue(); 241 llvm_unreachable("No context value for inlined OpenMP region"); 242 } 243 244 void setContextValue(llvm::Value *V) override { 245 if (OuterRegionInfo) { 246 OuterRegionInfo->setContextValue(V); 247 return; 248 } 249 llvm_unreachable("No context value for inlined OpenMP region"); 250 } 251 252 /// Lookup the captured field decl for a variable. 253 const FieldDecl *lookup(const VarDecl *VD) const override { 254 if (OuterRegionInfo) 255 return OuterRegionInfo->lookup(VD); 256 // If there is no outer outlined region,no need to lookup in a list of 257 // captured variables, we can use the original one. 258 return nullptr; 259 } 260 261 FieldDecl *getThisFieldDecl() const override { 262 if (OuterRegionInfo) 263 return OuterRegionInfo->getThisFieldDecl(); 264 return nullptr; 265 } 266 267 /// Get a variable or parameter for storing global thread id 268 /// inside OpenMP construct. 269 const VarDecl *getThreadIDVariable() const override { 270 if (OuterRegionInfo) 271 return OuterRegionInfo->getThreadIDVariable(); 272 return nullptr; 273 } 274 275 /// Get an LValue for the current ThreadID variable. 276 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 277 if (OuterRegionInfo) 278 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 279 llvm_unreachable("No LValue for inlined OpenMP construct"); 280 } 281 282 /// Get the name of the capture helper. 283 StringRef getHelperName() const override { 284 if (auto *OuterRegionInfo = getOldCSI()) 285 return OuterRegionInfo->getHelperName(); 286 llvm_unreachable("No helper name for inlined OpenMP construct"); 287 } 288 289 void emitUntiedSwitch(CodeGenFunction &CGF) override { 290 if (OuterRegionInfo) 291 OuterRegionInfo->emitUntiedSwitch(CGF); 292 } 293 294 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 295 296 static bool classof(const CGCapturedStmtInfo *Info) { 297 return CGOpenMPRegionInfo::classof(Info) && 298 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 299 } 300 301 ~CGOpenMPInlinedRegionInfo() override = default; 302 303 private: 304 /// CodeGen info about outer OpenMP region. 305 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 306 CGOpenMPRegionInfo *OuterRegionInfo; 307 }; 308 309 /// API for captured statement code generation in OpenMP target 310 /// constructs. For this captures, implicit parameters are used instead of the 311 /// captured fields. The name of the target region has to be unique in a given 312 /// application so it is provided by the client, because only the client has 313 /// the information to generate that. 314 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 315 public: 316 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 317 const RegionCodeGenTy &CodeGen, StringRef HelperName) 318 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 319 /*HasCancel=*/false), 320 HelperName(HelperName) {} 321 322 /// This is unused for target regions because each starts executing 323 /// with a single thread. 324 const VarDecl *getThreadIDVariable() const override { return nullptr; } 325 326 /// Get the name of the capture helper. 327 StringRef getHelperName() const override { return HelperName; } 328 329 static bool classof(const CGCapturedStmtInfo *Info) { 330 return CGOpenMPRegionInfo::classof(Info) && 331 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 332 } 333 334 private: 335 StringRef HelperName; 336 }; 337 338 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 339 llvm_unreachable("No codegen for expressions"); 340 } 341 /// API for generation of expressions captured in a innermost OpenMP 342 /// region. 343 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 344 public: 345 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 346 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 347 OMPD_unknown, 348 /*HasCancel=*/false), 349 PrivScope(CGF) { 350 // Make sure the globals captured in the provided statement are local by 351 // using the privatization logic. We assume the same variable is not 352 // captured more than once. 353 for (const auto &C : CS.captures()) { 354 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 355 continue; 356 357 const VarDecl *VD = C.getCapturedVar(); 358 if (VD->isLocalVarDeclOrParm()) 359 continue; 360 361 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 362 /*RefersToEnclosingVariableOrCapture=*/false, 363 VD->getType().getNonReferenceType(), VK_LValue, 364 C.getLocation()); 365 PrivScope.addPrivate( 366 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 367 } 368 (void)PrivScope.Privatize(); 369 } 370 371 /// Lookup the captured field decl for a variable. 372 const FieldDecl *lookup(const VarDecl *VD) const override { 373 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 374 return FD; 375 return nullptr; 376 } 377 378 /// Emit the captured statement body. 379 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 380 llvm_unreachable("No body for expressions"); 381 } 382 383 /// Get a variable or parameter for storing global thread id 384 /// inside OpenMP construct. 385 const VarDecl *getThreadIDVariable() const override { 386 llvm_unreachable("No thread id for expressions"); 387 } 388 389 /// Get the name of the capture helper. 390 StringRef getHelperName() const override { 391 llvm_unreachable("No helper name for expressions"); 392 } 393 394 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 395 396 private: 397 /// Private scope to capture global variables. 398 CodeGenFunction::OMPPrivateScope PrivScope; 399 }; 400 401 /// RAII for emitting code of OpenMP constructs. 402 class InlinedOpenMPRegionRAII { 403 CodeGenFunction &CGF; 404 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 405 FieldDecl *LambdaThisCaptureField = nullptr; 406 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 407 408 public: 409 /// Constructs region for combined constructs. 410 /// \param CodeGen Code generation sequence for combined directives. Includes 411 /// a list of functions used for code generation of implicitly inlined 412 /// regions. 413 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 414 OpenMPDirectiveKind Kind, bool HasCancel) 415 : CGF(CGF) { 416 // Start emission for the construct. 417 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 418 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 419 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 420 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 421 CGF.LambdaThisCaptureField = nullptr; 422 BlockInfo = CGF.BlockInfo; 423 CGF.BlockInfo = nullptr; 424 } 425 426 ~InlinedOpenMPRegionRAII() { 427 // Restore original CapturedStmtInfo only if we're done with code emission. 428 auto *OldCSI = 429 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 430 delete CGF.CapturedStmtInfo; 431 CGF.CapturedStmtInfo = OldCSI; 432 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 433 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 434 CGF.BlockInfo = BlockInfo; 435 } 436 }; 437 438 /// Values for bit flags used in the ident_t to describe the fields. 439 /// All enumeric elements are named and described in accordance with the code 440 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 441 enum OpenMPLocationFlags : unsigned { 442 /// Use trampoline for internal microtask. 443 OMP_IDENT_IMD = 0x01, 444 /// Use c-style ident structure. 445 OMP_IDENT_KMPC = 0x02, 446 /// Atomic reduction option for kmpc_reduce. 447 OMP_ATOMIC_REDUCE = 0x10, 448 /// Explicit 'barrier' directive. 449 OMP_IDENT_BARRIER_EXPL = 0x20, 450 /// Implicit barrier in code. 451 OMP_IDENT_BARRIER_IMPL = 0x40, 452 /// Implicit barrier in 'for' directive. 453 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 454 /// Implicit barrier in 'sections' directive. 455 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 456 /// Implicit barrier in 'single' directive. 457 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 458 /// Call of __kmp_for_static_init for static loop. 459 OMP_IDENT_WORK_LOOP = 0x200, 460 /// Call of __kmp_for_static_init for sections. 461 OMP_IDENT_WORK_SECTIONS = 0x400, 462 /// Call of __kmp_for_static_init for distribute. 463 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 464 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 465 }; 466 467 namespace { 468 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 469 /// Values for bit flags for marking which requires clauses have been used. 470 enum OpenMPOffloadingRequiresDirFlags : int64_t { 471 /// flag undefined. 472 OMP_REQ_UNDEFINED = 0x000, 473 /// no requires clause present. 474 OMP_REQ_NONE = 0x001, 475 /// reverse_offload clause. 476 OMP_REQ_REVERSE_OFFLOAD = 0x002, 477 /// unified_address clause. 478 OMP_REQ_UNIFIED_ADDRESS = 0x004, 479 /// unified_shared_memory clause. 480 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 481 /// dynamic_allocators clause. 482 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 483 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 484 }; 485 486 enum OpenMPOffloadingReservedDeviceIDs { 487 /// Device ID if the device was not defined, runtime should get it 488 /// from environment variables in the spec. 489 OMP_DEVICEID_UNDEF = -1, 490 }; 491 } // anonymous namespace 492 493 /// Describes ident structure that describes a source location. 494 /// All descriptions are taken from 495 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 496 /// Original structure: 497 /// typedef struct ident { 498 /// kmp_int32 reserved_1; /**< might be used in Fortran; 499 /// see above */ 500 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 501 /// KMP_IDENT_KMPC identifies this union 502 /// member */ 503 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 504 /// see above */ 505 ///#if USE_ITT_BUILD 506 /// /* but currently used for storing 507 /// region-specific ITT */ 508 /// /* contextual information. */ 509 ///#endif /* USE_ITT_BUILD */ 510 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 511 /// C++ */ 512 /// char const *psource; /**< String describing the source location. 513 /// The string is composed of semi-colon separated 514 // fields which describe the source file, 515 /// the function and a pair of line numbers that 516 /// delimit the construct. 517 /// */ 518 /// } ident_t; 519 enum IdentFieldIndex { 520 /// might be used in Fortran 521 IdentField_Reserved_1, 522 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 523 IdentField_Flags, 524 /// Not really used in Fortran any more 525 IdentField_Reserved_2, 526 /// Source[4] in Fortran, do not use for C++ 527 IdentField_Reserved_3, 528 /// String describing the source location. The string is composed of 529 /// semi-colon separated fields which describe the source file, the function 530 /// and a pair of line numbers that delimit the construct. 531 IdentField_PSource 532 }; 533 534 /// Schedule types for 'omp for' loops (these enumerators are taken from 535 /// the enum sched_type in kmp.h). 536 enum OpenMPSchedType { 537 /// Lower bound for default (unordered) versions. 538 OMP_sch_lower = 32, 539 OMP_sch_static_chunked = 33, 540 OMP_sch_static = 34, 541 OMP_sch_dynamic_chunked = 35, 542 OMP_sch_guided_chunked = 36, 543 OMP_sch_runtime = 37, 544 OMP_sch_auto = 38, 545 /// static with chunk adjustment (e.g., simd) 546 OMP_sch_static_balanced_chunked = 45, 547 /// Lower bound for 'ordered' versions. 548 OMP_ord_lower = 64, 549 OMP_ord_static_chunked = 65, 550 OMP_ord_static = 66, 551 OMP_ord_dynamic_chunked = 67, 552 OMP_ord_guided_chunked = 68, 553 OMP_ord_runtime = 69, 554 OMP_ord_auto = 70, 555 OMP_sch_default = OMP_sch_static, 556 /// dist_schedule types 557 OMP_dist_sch_static_chunked = 91, 558 OMP_dist_sch_static = 92, 559 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 560 /// Set if the monotonic schedule modifier was present. 561 OMP_sch_modifier_monotonic = (1 << 29), 562 /// Set if the nonmonotonic schedule modifier was present. 563 OMP_sch_modifier_nonmonotonic = (1 << 30), 564 }; 565 566 enum OpenMPRTLFunction { 567 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 568 /// kmpc_micro microtask, ...); 569 OMPRTL__kmpc_fork_call, 570 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, 571 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 572 OMPRTL__kmpc_threadprivate_cached, 573 /// Call to void __kmpc_threadprivate_register( ident_t *, 574 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 575 OMPRTL__kmpc_threadprivate_register, 576 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 577 OMPRTL__kmpc_global_thread_num, 578 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 579 // kmp_critical_name *crit); 580 OMPRTL__kmpc_critical, 581 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 582 // global_tid, kmp_critical_name *crit, uintptr_t hint); 583 OMPRTL__kmpc_critical_with_hint, 584 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 585 // kmp_critical_name *crit); 586 OMPRTL__kmpc_end_critical, 587 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 588 // global_tid); 589 OMPRTL__kmpc_cancel_barrier, 590 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 591 OMPRTL__kmpc_barrier, 592 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 593 OMPRTL__kmpc_for_static_fini, 594 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 595 // global_tid); 596 OMPRTL__kmpc_serialized_parallel, 597 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 598 // global_tid); 599 OMPRTL__kmpc_end_serialized_parallel, 600 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 601 // kmp_int32 num_threads); 602 OMPRTL__kmpc_push_num_threads, 603 // Call to void __kmpc_flush(ident_t *loc); 604 OMPRTL__kmpc_flush, 605 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 606 OMPRTL__kmpc_master, 607 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 608 OMPRTL__kmpc_end_master, 609 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 610 // int end_part); 611 OMPRTL__kmpc_omp_taskyield, 612 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 613 OMPRTL__kmpc_single, 614 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 615 OMPRTL__kmpc_end_single, 616 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 617 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 618 // kmp_routine_entry_t *task_entry); 619 OMPRTL__kmpc_omp_task_alloc, 620 // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *, 621 // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, 622 // size_t sizeof_shareds, kmp_routine_entry_t *task_entry, 623 // kmp_int64 device_id); 624 OMPRTL__kmpc_omp_target_task_alloc, 625 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 626 // new_task); 627 OMPRTL__kmpc_omp_task, 628 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 629 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 630 // kmp_int32 didit); 631 OMPRTL__kmpc_copyprivate, 632 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 633 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 634 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 635 OMPRTL__kmpc_reduce, 636 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 637 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 638 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 639 // *lck); 640 OMPRTL__kmpc_reduce_nowait, 641 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 642 // kmp_critical_name *lck); 643 OMPRTL__kmpc_end_reduce, 644 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 645 // kmp_critical_name *lck); 646 OMPRTL__kmpc_end_reduce_nowait, 647 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 648 // kmp_task_t * new_task); 649 OMPRTL__kmpc_omp_task_begin_if0, 650 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 651 // kmp_task_t * new_task); 652 OMPRTL__kmpc_omp_task_complete_if0, 653 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 654 OMPRTL__kmpc_ordered, 655 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 656 OMPRTL__kmpc_end_ordered, 657 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 658 // global_tid); 659 OMPRTL__kmpc_omp_taskwait, 660 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 661 OMPRTL__kmpc_taskgroup, 662 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 663 OMPRTL__kmpc_end_taskgroup, 664 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 665 // int proc_bind); 666 OMPRTL__kmpc_push_proc_bind, 667 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 668 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 669 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 670 OMPRTL__kmpc_omp_task_with_deps, 671 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 672 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 673 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 674 OMPRTL__kmpc_omp_wait_deps, 675 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 676 // global_tid, kmp_int32 cncl_kind); 677 OMPRTL__kmpc_cancellationpoint, 678 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 679 // kmp_int32 cncl_kind); 680 OMPRTL__kmpc_cancel, 681 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 682 // kmp_int32 num_teams, kmp_int32 thread_limit); 683 OMPRTL__kmpc_push_num_teams, 684 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 685 // microtask, ...); 686 OMPRTL__kmpc_fork_teams, 687 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 688 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 689 // sched, kmp_uint64 grainsize, void *task_dup); 690 OMPRTL__kmpc_taskloop, 691 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 692 // num_dims, struct kmp_dim *dims); 693 OMPRTL__kmpc_doacross_init, 694 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 695 OMPRTL__kmpc_doacross_fini, 696 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 697 // *vec); 698 OMPRTL__kmpc_doacross_post, 699 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 700 // *vec); 701 OMPRTL__kmpc_doacross_wait, 702 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 703 // *data); 704 OMPRTL__kmpc_task_reduction_init, 705 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 706 // *d); 707 OMPRTL__kmpc_task_reduction_get_th_data, 708 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); 709 OMPRTL__kmpc_alloc, 710 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); 711 OMPRTL__kmpc_free, 712 713 // 714 // Offloading related calls 715 // 716 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 717 // size); 718 OMPRTL__kmpc_push_target_tripcount, 719 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 720 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 721 // *arg_types); 722 OMPRTL__tgt_target, 723 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 724 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 725 // *arg_types); 726 OMPRTL__tgt_target_nowait, 727 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 728 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 729 // *arg_types, int32_t num_teams, int32_t thread_limit); 730 OMPRTL__tgt_target_teams, 731 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 732 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 733 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 734 OMPRTL__tgt_target_teams_nowait, 735 // Call to void __tgt_register_requires(int64_t flags); 736 OMPRTL__tgt_register_requires, 737 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 738 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 739 OMPRTL__tgt_target_data_begin, 740 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 741 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 742 // *arg_types); 743 OMPRTL__tgt_target_data_begin_nowait, 744 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 745 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 746 OMPRTL__tgt_target_data_end, 747 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 748 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 749 // *arg_types); 750 OMPRTL__tgt_target_data_end_nowait, 751 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 752 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 753 OMPRTL__tgt_target_data_update, 754 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 755 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 756 // *arg_types); 757 OMPRTL__tgt_target_data_update_nowait, 758 // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 759 OMPRTL__tgt_mapper_num_components, 760 // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void 761 // *base, void *begin, int64_t size, int64_t type); 762 OMPRTL__tgt_push_mapper_component, 763 }; 764 765 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 766 /// region. 767 class CleanupTy final : public EHScopeStack::Cleanup { 768 PrePostActionTy *Action; 769 770 public: 771 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 772 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 773 if (!CGF.HaveInsertPoint()) 774 return; 775 Action->Exit(CGF); 776 } 777 }; 778 779 } // anonymous namespace 780 781 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 782 CodeGenFunction::RunCleanupsScope Scope(CGF); 783 if (PrePostAction) { 784 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 785 Callback(CodeGen, CGF, *PrePostAction); 786 } else { 787 PrePostActionTy Action; 788 Callback(CodeGen, CGF, Action); 789 } 790 } 791 792 /// Check if the combiner is a call to UDR combiner and if it is so return the 793 /// UDR decl used for reduction. 794 static const OMPDeclareReductionDecl * 795 getReductionInit(const Expr *ReductionOp) { 796 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 797 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 798 if (const auto *DRE = 799 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 800 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 801 return DRD; 802 return nullptr; 803 } 804 805 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 806 const OMPDeclareReductionDecl *DRD, 807 const Expr *InitOp, 808 Address Private, Address Original, 809 QualType Ty) { 810 if (DRD->getInitializer()) { 811 std::pair<llvm::Function *, llvm::Function *> Reduction = 812 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 813 const auto *CE = cast<CallExpr>(InitOp); 814 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 815 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 816 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 817 const auto *LHSDRE = 818 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 819 const auto *RHSDRE = 820 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 821 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 822 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 823 [=]() { return Private; }); 824 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 825 [=]() { return Original; }); 826 (void)PrivateScope.Privatize(); 827 RValue Func = RValue::get(Reduction.second); 828 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 829 CGF.EmitIgnoredExpr(InitOp); 830 } else { 831 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 832 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 833 auto *GV = new llvm::GlobalVariable( 834 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 835 llvm::GlobalValue::PrivateLinkage, Init, Name); 836 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 837 RValue InitRVal; 838 switch (CGF.getEvaluationKind(Ty)) { 839 case TEK_Scalar: 840 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 841 break; 842 case TEK_Complex: 843 InitRVal = 844 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 845 break; 846 case TEK_Aggregate: 847 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 848 break; 849 } 850 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 851 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 852 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 853 /*IsInitializer=*/false); 854 } 855 } 856 857 /// Emit initialization of arrays of complex types. 858 /// \param DestAddr Address of the array. 859 /// \param Type Type of array. 860 /// \param Init Initial expression of array. 861 /// \param SrcAddr Address of the original array. 862 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 863 QualType Type, bool EmitDeclareReductionInit, 864 const Expr *Init, 865 const OMPDeclareReductionDecl *DRD, 866 Address SrcAddr = Address::invalid()) { 867 // Perform element-by-element initialization. 868 QualType ElementTy; 869 870 // Drill down to the base element type on both arrays. 871 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 872 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 873 DestAddr = 874 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 875 if (DRD) 876 SrcAddr = 877 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 878 879 llvm::Value *SrcBegin = nullptr; 880 if (DRD) 881 SrcBegin = SrcAddr.getPointer(); 882 llvm::Value *DestBegin = DestAddr.getPointer(); 883 // Cast from pointer to array type to pointer to single element. 884 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 885 // The basic structure here is a while-do loop. 886 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 887 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 888 llvm::Value *IsEmpty = 889 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 890 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 891 892 // Enter the loop body, making that address the current address. 893 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 894 CGF.EmitBlock(BodyBB); 895 896 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 897 898 llvm::PHINode *SrcElementPHI = nullptr; 899 Address SrcElementCurrent = Address::invalid(); 900 if (DRD) { 901 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 902 "omp.arraycpy.srcElementPast"); 903 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 904 SrcElementCurrent = 905 Address(SrcElementPHI, 906 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 907 } 908 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 909 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 910 DestElementPHI->addIncoming(DestBegin, EntryBB); 911 Address DestElementCurrent = 912 Address(DestElementPHI, 913 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 914 915 // Emit copy. 916 { 917 CodeGenFunction::RunCleanupsScope InitScope(CGF); 918 if (EmitDeclareReductionInit) { 919 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 920 SrcElementCurrent, ElementTy); 921 } else 922 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 923 /*IsInitializer=*/false); 924 } 925 926 if (DRD) { 927 // Shift the address forward by one element. 928 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 929 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 930 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 931 } 932 933 // Shift the address forward by one element. 934 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 935 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 936 // Check whether we've reached the end. 937 llvm::Value *Done = 938 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 939 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 940 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 941 942 // Done. 943 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 944 } 945 946 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 947 return CGF.EmitOMPSharedLValue(E); 948 } 949 950 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 951 const Expr *E) { 952 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 953 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 954 return LValue(); 955 } 956 957 void ReductionCodeGen::emitAggregateInitialization( 958 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 959 const OMPDeclareReductionDecl *DRD) { 960 // Emit VarDecl with copy init for arrays. 961 // Get the address of the original variable captured in current 962 // captured region. 963 const auto *PrivateVD = 964 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 965 bool EmitDeclareReductionInit = 966 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 967 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 968 EmitDeclareReductionInit, 969 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 970 : PrivateVD->getInit(), 971 DRD, SharedLVal.getAddress(CGF)); 972 } 973 974 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 975 ArrayRef<const Expr *> Privates, 976 ArrayRef<const Expr *> ReductionOps) { 977 ClausesData.reserve(Shareds.size()); 978 SharedAddresses.reserve(Shareds.size()); 979 Sizes.reserve(Shareds.size()); 980 BaseDecls.reserve(Shareds.size()); 981 auto IPriv = Privates.begin(); 982 auto IRed = ReductionOps.begin(); 983 for (const Expr *Ref : Shareds) { 984 ClausesData.emplace_back(Ref, *IPriv, *IRed); 985 std::advance(IPriv, 1); 986 std::advance(IRed, 1); 987 } 988 } 989 990 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 991 assert(SharedAddresses.size() == N && 992 "Number of generated lvalues must be exactly N."); 993 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 994 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 995 SharedAddresses.emplace_back(First, Second); 996 } 997 998 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 999 const auto *PrivateVD = 1000 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1001 QualType PrivateType = PrivateVD->getType(); 1002 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 1003 if (!PrivateType->isVariablyModifiedType()) { 1004 Sizes.emplace_back( 1005 CGF.getTypeSize( 1006 SharedAddresses[N].first.getType().getNonReferenceType()), 1007 nullptr); 1008 return; 1009 } 1010 llvm::Value *Size; 1011 llvm::Value *SizeInChars; 1012 auto *ElemType = cast<llvm::PointerType>( 1013 SharedAddresses[N].first.getPointer(CGF)->getType()) 1014 ->getElementType(); 1015 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 1016 if (AsArraySection) { 1017 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF), 1018 SharedAddresses[N].first.getPointer(CGF)); 1019 Size = CGF.Builder.CreateNUWAdd( 1020 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 1021 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 1022 } else { 1023 SizeInChars = CGF.getTypeSize( 1024 SharedAddresses[N].first.getType().getNonReferenceType()); 1025 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 1026 } 1027 Sizes.emplace_back(SizeInChars, Size); 1028 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1029 CGF, 1030 cast<OpaqueValueExpr>( 1031 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1032 RValue::get(Size)); 1033 CGF.EmitVariablyModifiedType(PrivateType); 1034 } 1035 1036 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 1037 llvm::Value *Size) { 1038 const auto *PrivateVD = 1039 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1040 QualType PrivateType = PrivateVD->getType(); 1041 if (!PrivateType->isVariablyModifiedType()) { 1042 assert(!Size && !Sizes[N].second && 1043 "Size should be nullptr for non-variably modified reduction " 1044 "items."); 1045 return; 1046 } 1047 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1048 CGF, 1049 cast<OpaqueValueExpr>( 1050 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1051 RValue::get(Size)); 1052 CGF.EmitVariablyModifiedType(PrivateType); 1053 } 1054 1055 void ReductionCodeGen::emitInitialization( 1056 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1057 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1058 assert(SharedAddresses.size() > N && "No variable was generated"); 1059 const auto *PrivateVD = 1060 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1061 const OMPDeclareReductionDecl *DRD = 1062 getReductionInit(ClausesData[N].ReductionOp); 1063 QualType PrivateType = PrivateVD->getType(); 1064 PrivateAddr = CGF.Builder.CreateElementBitCast( 1065 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1066 QualType SharedType = SharedAddresses[N].first.getType(); 1067 SharedLVal = CGF.MakeAddrLValue( 1068 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 1069 CGF.ConvertTypeForMem(SharedType)), 1070 SharedType, SharedAddresses[N].first.getBaseInfo(), 1071 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1072 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1073 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1074 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1075 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1076 PrivateAddr, SharedLVal.getAddress(CGF), 1077 SharedLVal.getType()); 1078 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1079 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1080 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1081 PrivateVD->getType().getQualifiers(), 1082 /*IsInitializer=*/false); 1083 } 1084 } 1085 1086 bool ReductionCodeGen::needCleanups(unsigned N) { 1087 const auto *PrivateVD = 1088 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1089 QualType PrivateType = PrivateVD->getType(); 1090 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1091 return DTorKind != QualType::DK_none; 1092 } 1093 1094 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1095 Address PrivateAddr) { 1096 const auto *PrivateVD = 1097 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1098 QualType PrivateType = PrivateVD->getType(); 1099 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1100 if (needCleanups(N)) { 1101 PrivateAddr = CGF.Builder.CreateElementBitCast( 1102 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1103 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1104 } 1105 } 1106 1107 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1108 LValue BaseLV) { 1109 BaseTy = BaseTy.getNonReferenceType(); 1110 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1111 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1112 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1113 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 1114 } else { 1115 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 1116 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1117 } 1118 BaseTy = BaseTy->getPointeeType(); 1119 } 1120 return CGF.MakeAddrLValue( 1121 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 1122 CGF.ConvertTypeForMem(ElTy)), 1123 BaseLV.getType(), BaseLV.getBaseInfo(), 1124 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1125 } 1126 1127 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1128 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1129 llvm::Value *Addr) { 1130 Address Tmp = Address::invalid(); 1131 Address TopTmp = Address::invalid(); 1132 Address MostTopTmp = Address::invalid(); 1133 BaseTy = BaseTy.getNonReferenceType(); 1134 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1135 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1136 Tmp = CGF.CreateMemTemp(BaseTy); 1137 if (TopTmp.isValid()) 1138 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1139 else 1140 MostTopTmp = Tmp; 1141 TopTmp = Tmp; 1142 BaseTy = BaseTy->getPointeeType(); 1143 } 1144 llvm::Type *Ty = BaseLVType; 1145 if (Tmp.isValid()) 1146 Ty = Tmp.getElementType(); 1147 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1148 if (Tmp.isValid()) { 1149 CGF.Builder.CreateStore(Addr, Tmp); 1150 return MostTopTmp; 1151 } 1152 return Address(Addr, BaseLVAlignment); 1153 } 1154 1155 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1156 const VarDecl *OrigVD = nullptr; 1157 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1158 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1159 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1160 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1161 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1162 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1163 DE = cast<DeclRefExpr>(Base); 1164 OrigVD = cast<VarDecl>(DE->getDecl()); 1165 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1166 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1167 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1168 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1169 DE = cast<DeclRefExpr>(Base); 1170 OrigVD = cast<VarDecl>(DE->getDecl()); 1171 } 1172 return OrigVD; 1173 } 1174 1175 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1176 Address PrivateAddr) { 1177 const DeclRefExpr *DE; 1178 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1179 BaseDecls.emplace_back(OrigVD); 1180 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1181 LValue BaseLValue = 1182 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1183 OriginalBaseLValue); 1184 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1185 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1186 llvm::Value *PrivatePointer = 1187 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1188 PrivateAddr.getPointer(), 1189 SharedAddresses[N].first.getAddress(CGF).getType()); 1190 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1191 return castToBase(CGF, OrigVD->getType(), 1192 SharedAddresses[N].first.getType(), 1193 OriginalBaseLValue.getAddress(CGF).getType(), 1194 OriginalBaseLValue.getAlignment(), Ptr); 1195 } 1196 BaseDecls.emplace_back( 1197 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1198 return PrivateAddr; 1199 } 1200 1201 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1202 const OMPDeclareReductionDecl *DRD = 1203 getReductionInit(ClausesData[N].ReductionOp); 1204 return DRD && DRD->getInitializer(); 1205 } 1206 1207 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1208 return CGF.EmitLoadOfPointerLValue( 1209 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1210 getThreadIDVariable()->getType()->castAs<PointerType>()); 1211 } 1212 1213 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1214 if (!CGF.HaveInsertPoint()) 1215 return; 1216 // 1.2.2 OpenMP Language Terminology 1217 // Structured block - An executable statement with a single entry at the 1218 // top and a single exit at the bottom. 1219 // The point of exit cannot be a branch out of the structured block. 1220 // longjmp() and throw() must not violate the entry/exit criteria. 1221 CGF.EHStack.pushTerminate(); 1222 CodeGen(CGF); 1223 CGF.EHStack.popTerminate(); 1224 } 1225 1226 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1227 CodeGenFunction &CGF) { 1228 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1229 getThreadIDVariable()->getType(), 1230 AlignmentSource::Decl); 1231 } 1232 1233 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1234 QualType FieldTy) { 1235 auto *Field = FieldDecl::Create( 1236 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1237 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1238 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1239 Field->setAccess(AS_public); 1240 DC->addDecl(Field); 1241 return Field; 1242 } 1243 1244 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1245 StringRef Separator) 1246 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1247 OffloadEntriesInfoManager(CGM) { 1248 ASTContext &C = CGM.getContext(); 1249 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1250 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1251 RD->startDefinition(); 1252 // reserved_1 1253 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1254 // flags 1255 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1256 // reserved_2 1257 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1258 // reserved_3 1259 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1260 // psource 1261 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1262 RD->completeDefinition(); 1263 IdentQTy = C.getRecordType(RD); 1264 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1265 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1266 1267 loadOffloadInfoMetadata(); 1268 } 1269 1270 bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD, 1271 const GlobalDecl &OldGD, 1272 llvm::GlobalValue *OrigAddr, 1273 bool IsForDefinition) { 1274 // Emit at least a definition for the aliasee if the the address of the 1275 // original function is requested. 1276 if (IsForDefinition || OrigAddr) 1277 (void)CGM.GetAddrOfGlobal(NewGD); 1278 StringRef NewMangledName = CGM.getMangledName(NewGD); 1279 llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName); 1280 if (Addr && !Addr->isDeclaration()) { 1281 const auto *D = cast<FunctionDecl>(OldGD.getDecl()); 1282 const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(NewGD); 1283 llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI); 1284 1285 // Create a reference to the named value. This ensures that it is emitted 1286 // if a deferred decl. 1287 llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD); 1288 1289 // Create the new alias itself, but don't set a name yet. 1290 auto *GA = 1291 llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule()); 1292 1293 if (OrigAddr) { 1294 assert(OrigAddr->isDeclaration() && "Expected declaration"); 1295 1296 GA->takeName(OrigAddr); 1297 OrigAddr->replaceAllUsesWith( 1298 llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType())); 1299 OrigAddr->eraseFromParent(); 1300 } else { 1301 GA->setName(CGM.getMangledName(OldGD)); 1302 } 1303 1304 // Set attributes which are particular to an alias; this is a 1305 // specialization of the attributes which may be set on a global function. 1306 if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() || 1307 D->isWeakImported()) 1308 GA->setLinkage(llvm::Function::WeakAnyLinkage); 1309 1310 CGM.SetCommonAttributes(OldGD, GA); 1311 return true; 1312 } 1313 return false; 1314 } 1315 1316 void CGOpenMPRuntime::clear() { 1317 InternalVars.clear(); 1318 // Clean non-target variable declarations possibly used only in debug info. 1319 for (const auto &Data : EmittedNonTargetVariables) { 1320 if (!Data.getValue().pointsToAliveValue()) 1321 continue; 1322 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1323 if (!GV) 1324 continue; 1325 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1326 continue; 1327 GV->eraseFromParent(); 1328 } 1329 // Emit aliases for the deferred aliasees. 1330 for (const auto &Pair : DeferredVariantFunction) { 1331 StringRef MangledName = CGM.getMangledName(Pair.second.second); 1332 llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName); 1333 // If not able to emit alias, just emit original declaration. 1334 (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr, 1335 /*IsForDefinition=*/false); 1336 } 1337 } 1338 1339 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1340 SmallString<128> Buffer; 1341 llvm::raw_svector_ostream OS(Buffer); 1342 StringRef Sep = FirstSeparator; 1343 for (StringRef Part : Parts) { 1344 OS << Sep << Part; 1345 Sep = Separator; 1346 } 1347 return std::string(OS.str()); 1348 } 1349 1350 static llvm::Function * 1351 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1352 const Expr *CombinerInitializer, const VarDecl *In, 1353 const VarDecl *Out, bool IsCombiner) { 1354 // void .omp_combiner.(Ty *in, Ty *out); 1355 ASTContext &C = CGM.getContext(); 1356 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1357 FunctionArgList Args; 1358 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1359 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1360 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1361 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1362 Args.push_back(&OmpOutParm); 1363 Args.push_back(&OmpInParm); 1364 const CGFunctionInfo &FnInfo = 1365 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1366 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1367 std::string Name = CGM.getOpenMPRuntime().getName( 1368 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1369 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1370 Name, &CGM.getModule()); 1371 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1372 if (CGM.getLangOpts().Optimize) { 1373 Fn->removeFnAttr(llvm::Attribute::NoInline); 1374 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1375 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1376 } 1377 CodeGenFunction CGF(CGM); 1378 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1379 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1380 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1381 Out->getLocation()); 1382 CodeGenFunction::OMPPrivateScope Scope(CGF); 1383 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1384 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1385 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1386 .getAddress(CGF); 1387 }); 1388 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1389 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1390 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1391 .getAddress(CGF); 1392 }); 1393 (void)Scope.Privatize(); 1394 if (!IsCombiner && Out->hasInit() && 1395 !CGF.isTrivialInitializer(Out->getInit())) { 1396 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1397 Out->getType().getQualifiers(), 1398 /*IsInitializer=*/true); 1399 } 1400 if (CombinerInitializer) 1401 CGF.EmitIgnoredExpr(CombinerInitializer); 1402 Scope.ForceCleanup(); 1403 CGF.FinishFunction(); 1404 return Fn; 1405 } 1406 1407 void CGOpenMPRuntime::emitUserDefinedReduction( 1408 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1409 if (UDRMap.count(D) > 0) 1410 return; 1411 llvm::Function *Combiner = emitCombinerOrInitializer( 1412 CGM, D->getType(), D->getCombiner(), 1413 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1414 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1415 /*IsCombiner=*/true); 1416 llvm::Function *Initializer = nullptr; 1417 if (const Expr *Init = D->getInitializer()) { 1418 Initializer = emitCombinerOrInitializer( 1419 CGM, D->getType(), 1420 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1421 : nullptr, 1422 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1423 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1424 /*IsCombiner=*/false); 1425 } 1426 UDRMap.try_emplace(D, Combiner, Initializer); 1427 if (CGF) { 1428 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1429 Decls.second.push_back(D); 1430 } 1431 } 1432 1433 std::pair<llvm::Function *, llvm::Function *> 1434 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1435 auto I = UDRMap.find(D); 1436 if (I != UDRMap.end()) 1437 return I->second; 1438 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1439 return UDRMap.lookup(D); 1440 } 1441 1442 namespace { 1443 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1444 // Builder if one is present. 1445 struct PushAndPopStackRAII { 1446 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1447 bool HasCancel) 1448 : OMPBuilder(OMPBuilder) { 1449 if (!OMPBuilder) 1450 return; 1451 1452 // The following callback is the crucial part of clangs cleanup process. 1453 // 1454 // NOTE: 1455 // Once the OpenMPIRBuilder is used to create parallel regions (and 1456 // similar), the cancellation destination (Dest below) is determined via 1457 // IP. That means if we have variables to finalize we split the block at IP, 1458 // use the new block (=BB) as destination to build a JumpDest (via 1459 // getJumpDestInCurrentScope(BB)) which then is fed to 1460 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1461 // to push & pop an FinalizationInfo object. 1462 // The FiniCB will still be needed but at the point where the 1463 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1464 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1465 assert(IP.getBlock()->end() == IP.getPoint() && 1466 "Clang CG should cause non-terminated block!"); 1467 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1468 CGF.Builder.restoreIP(IP); 1469 CodeGenFunction::JumpDest Dest = 1470 CGF.getOMPCancelDestination(OMPD_parallel); 1471 CGF.EmitBranchThroughCleanup(Dest); 1472 }; 1473 1474 // TODO: Remove this once we emit parallel regions through the 1475 // OpenMPIRBuilder as it can do this setup internally. 1476 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1477 {FiniCB, OMPD_parallel, HasCancel}); 1478 OMPBuilder->pushFinalizationCB(std::move(FI)); 1479 } 1480 ~PushAndPopStackRAII() { 1481 if (OMPBuilder) 1482 OMPBuilder->popFinalizationCB(); 1483 } 1484 llvm::OpenMPIRBuilder *OMPBuilder; 1485 }; 1486 } // namespace 1487 1488 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1489 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1490 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1491 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1492 assert(ThreadIDVar->getType()->isPointerType() && 1493 "thread id variable must be of type kmp_int32 *"); 1494 CodeGenFunction CGF(CGM, true); 1495 bool HasCancel = false; 1496 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1497 HasCancel = OPD->hasCancel(); 1498 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1499 HasCancel = OPSD->hasCancel(); 1500 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1501 HasCancel = OPFD->hasCancel(); 1502 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1503 HasCancel = OPFD->hasCancel(); 1504 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1505 HasCancel = OPFD->hasCancel(); 1506 else if (const auto *OPFD = 1507 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1508 HasCancel = OPFD->hasCancel(); 1509 else if (const auto *OPFD = 1510 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1511 HasCancel = OPFD->hasCancel(); 1512 1513 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1514 // parallel region to make cancellation barriers work properly. 1515 llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder(); 1516 PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel); 1517 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1518 HasCancel, OutlinedHelperName); 1519 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1520 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1521 } 1522 1523 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1524 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1525 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1526 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1527 return emitParallelOrTeamsOutlinedFunction( 1528 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1529 } 1530 1531 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1532 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1533 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1534 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1535 return emitParallelOrTeamsOutlinedFunction( 1536 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1537 } 1538 1539 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1540 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1541 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1542 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1543 bool Tied, unsigned &NumberOfParts) { 1544 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1545 PrePostActionTy &) { 1546 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1547 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1548 llvm::Value *TaskArgs[] = { 1549 UpLoc, ThreadID, 1550 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1551 TaskTVar->getType()->castAs<PointerType>()) 1552 .getPointer(CGF)}; 1553 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1554 }; 1555 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1556 UntiedCodeGen); 1557 CodeGen.setAction(Action); 1558 assert(!ThreadIDVar->getType()->isPointerType() && 1559 "thread id variable must be of type kmp_int32 for tasks"); 1560 const OpenMPDirectiveKind Region = 1561 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1562 : OMPD_task; 1563 const CapturedStmt *CS = D.getCapturedStmt(Region); 1564 const auto *TD = dyn_cast<OMPTaskDirective>(&D); 1565 CodeGenFunction CGF(CGM, true); 1566 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1567 InnermostKind, 1568 TD ? TD->hasCancel() : false, Action); 1569 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1570 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1571 if (!Tied) 1572 NumberOfParts = Action.getNumberOfParts(); 1573 return Res; 1574 } 1575 1576 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1577 const RecordDecl *RD, const CGRecordLayout &RL, 1578 ArrayRef<llvm::Constant *> Data) { 1579 llvm::StructType *StructTy = RL.getLLVMType(); 1580 unsigned PrevIdx = 0; 1581 ConstantInitBuilder CIBuilder(CGM); 1582 auto DI = Data.begin(); 1583 for (const FieldDecl *FD : RD->fields()) { 1584 unsigned Idx = RL.getLLVMFieldNo(FD); 1585 // Fill the alignment. 1586 for (unsigned I = PrevIdx; I < Idx; ++I) 1587 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1588 PrevIdx = Idx + 1; 1589 Fields.add(*DI); 1590 ++DI; 1591 } 1592 } 1593 1594 template <class... As> 1595 static llvm::GlobalVariable * 1596 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1597 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1598 As &&... Args) { 1599 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1600 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1601 ConstantInitBuilder CIBuilder(CGM); 1602 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1603 buildStructValue(Fields, CGM, RD, RL, Data); 1604 return Fields.finishAndCreateGlobal( 1605 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1606 std::forward<As>(Args)...); 1607 } 1608 1609 template <typename T> 1610 static void 1611 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1612 ArrayRef<llvm::Constant *> Data, 1613 T &Parent) { 1614 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1615 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1616 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1617 buildStructValue(Fields, CGM, RD, RL, Data); 1618 Fields.finishAndAddTo(Parent); 1619 } 1620 1621 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1622 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1623 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1624 FlagsTy FlagsKey(Flags, Reserved2Flags); 1625 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1626 if (!Entry) { 1627 if (!DefaultOpenMPPSource) { 1628 // Initialize default location for psource field of ident_t structure of 1629 // all ident_t objects. Format is ";file;function;line;column;;". 1630 // Taken from 1631 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1632 DefaultOpenMPPSource = 1633 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1634 DefaultOpenMPPSource = 1635 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1636 } 1637 1638 llvm::Constant *Data[] = { 1639 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1640 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1641 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1642 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1643 llvm::GlobalValue *DefaultOpenMPLocation = 1644 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1645 llvm::GlobalValue::PrivateLinkage); 1646 DefaultOpenMPLocation->setUnnamedAddr( 1647 llvm::GlobalValue::UnnamedAddr::Global); 1648 1649 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1650 } 1651 return Address(Entry, Align); 1652 } 1653 1654 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1655 bool AtCurrentPoint) { 1656 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1657 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1658 1659 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1660 if (AtCurrentPoint) { 1661 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1662 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1663 } else { 1664 Elem.second.ServiceInsertPt = 1665 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1666 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1667 } 1668 } 1669 1670 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1671 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1672 if (Elem.second.ServiceInsertPt) { 1673 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1674 Elem.second.ServiceInsertPt = nullptr; 1675 Ptr->eraseFromParent(); 1676 } 1677 } 1678 1679 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1680 SourceLocation Loc, 1681 unsigned Flags) { 1682 Flags |= OMP_IDENT_KMPC; 1683 // If no debug info is generated - return global default location. 1684 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1685 Loc.isInvalid()) 1686 return getOrCreateDefaultLocation(Flags).getPointer(); 1687 1688 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1689 1690 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1691 Address LocValue = Address::invalid(); 1692 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1693 if (I != OpenMPLocThreadIDMap.end()) 1694 LocValue = Address(I->second.DebugLoc, Align); 1695 1696 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1697 // GetOpenMPThreadID was called before this routine. 1698 if (!LocValue.isValid()) { 1699 // Generate "ident_t .kmpc_loc.addr;" 1700 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1701 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1702 Elem.second.DebugLoc = AI.getPointer(); 1703 LocValue = AI; 1704 1705 if (!Elem.second.ServiceInsertPt) 1706 setLocThreadIdInsertPt(CGF); 1707 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1708 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1709 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1710 CGF.getTypeSize(IdentQTy)); 1711 } 1712 1713 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1714 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1715 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1716 LValue PSource = 1717 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1718 1719 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1720 if (OMPDebugLoc == nullptr) { 1721 SmallString<128> Buffer2; 1722 llvm::raw_svector_ostream OS2(Buffer2); 1723 // Build debug location 1724 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1725 OS2 << ";" << PLoc.getFilename() << ";"; 1726 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1727 OS2 << FD->getQualifiedNameAsString(); 1728 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1729 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1730 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1731 } 1732 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1733 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1734 1735 // Our callers always pass this to a runtime function, so for 1736 // convenience, go ahead and return a naked pointer. 1737 return LocValue.getPointer(); 1738 } 1739 1740 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1741 SourceLocation Loc) { 1742 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1743 1744 llvm::Value *ThreadID = nullptr; 1745 // Check whether we've already cached a load of the thread id in this 1746 // function. 1747 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1748 if (I != OpenMPLocThreadIDMap.end()) { 1749 ThreadID = I->second.ThreadID; 1750 if (ThreadID != nullptr) 1751 return ThreadID; 1752 } 1753 // If exceptions are enabled, do not use parameter to avoid possible crash. 1754 if (auto *OMPRegionInfo = 1755 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1756 if (OMPRegionInfo->getThreadIDVariable()) { 1757 // Check if this an outlined function with thread id passed as argument. 1758 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1759 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1760 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1761 !CGF.getLangOpts().CXXExceptions || 1762 CGF.Builder.GetInsertBlock() == TopBlock || 1763 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1764 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1765 TopBlock || 1766 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1767 CGF.Builder.GetInsertBlock()) { 1768 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1769 // If value loaded in entry block, cache it and use it everywhere in 1770 // function. 1771 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1772 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1773 Elem.second.ThreadID = ThreadID; 1774 } 1775 return ThreadID; 1776 } 1777 } 1778 } 1779 1780 // This is not an outlined function region - need to call __kmpc_int32 1781 // kmpc_global_thread_num(ident_t *loc). 1782 // Generate thread id value and cache this value for use across the 1783 // function. 1784 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1785 if (!Elem.second.ServiceInsertPt) 1786 setLocThreadIdInsertPt(CGF); 1787 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1788 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1789 llvm::CallInst *Call = CGF.Builder.CreateCall( 1790 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1791 emitUpdateLocation(CGF, Loc)); 1792 Call->setCallingConv(CGF.getRuntimeCC()); 1793 Elem.second.ThreadID = Call; 1794 return Call; 1795 } 1796 1797 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1798 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1799 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1800 clearLocThreadIdInsertPt(CGF); 1801 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1802 } 1803 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1804 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1805 UDRMap.erase(D); 1806 FunctionUDRMap.erase(CGF.CurFn); 1807 } 1808 auto I = FunctionUDMMap.find(CGF.CurFn); 1809 if (I != FunctionUDMMap.end()) { 1810 for(const auto *D : I->second) 1811 UDMMap.erase(D); 1812 FunctionUDMMap.erase(I); 1813 } 1814 LastprivateConditionalToTypes.erase(CGF.CurFn); 1815 } 1816 1817 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1818 return IdentTy->getPointerTo(); 1819 } 1820 1821 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1822 if (!Kmpc_MicroTy) { 1823 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1824 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1825 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1826 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1827 } 1828 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1829 } 1830 1831 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1832 llvm::FunctionCallee RTLFn = nullptr; 1833 switch (static_cast<OpenMPRTLFunction>(Function)) { 1834 case OMPRTL__kmpc_fork_call: { 1835 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1836 // microtask, ...); 1837 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1838 getKmpc_MicroPointerTy()}; 1839 auto *FnTy = 1840 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1841 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1842 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 1843 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 1844 llvm::LLVMContext &Ctx = F->getContext(); 1845 llvm::MDBuilder MDB(Ctx); 1846 // Annotate the callback behavior of the __kmpc_fork_call: 1847 // - The callback callee is argument number 2 (microtask). 1848 // - The first two arguments of the callback callee are unknown (-1). 1849 // - All variadic arguments to the __kmpc_fork_call are passed to the 1850 // callback callee. 1851 F->addMetadata( 1852 llvm::LLVMContext::MD_callback, 1853 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 1854 2, {-1, -1}, 1855 /* VarArgsArePassed */ true)})); 1856 } 1857 } 1858 break; 1859 } 1860 case OMPRTL__kmpc_global_thread_num: { 1861 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1862 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1863 auto *FnTy = 1864 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1865 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1866 break; 1867 } 1868 case OMPRTL__kmpc_threadprivate_cached: { 1869 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1870 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1871 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1872 CGM.VoidPtrTy, CGM.SizeTy, 1873 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1874 auto *FnTy = 1875 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1876 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1877 break; 1878 } 1879 case OMPRTL__kmpc_critical: { 1880 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1881 // kmp_critical_name *crit); 1882 llvm::Type *TypeParams[] = { 1883 getIdentTyPointerTy(), CGM.Int32Ty, 1884 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1885 auto *FnTy = 1886 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1887 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1888 break; 1889 } 1890 case OMPRTL__kmpc_critical_with_hint: { 1891 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1892 // kmp_critical_name *crit, uintptr_t hint); 1893 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1894 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1895 CGM.IntPtrTy}; 1896 auto *FnTy = 1897 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1898 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1899 break; 1900 } 1901 case OMPRTL__kmpc_threadprivate_register: { 1902 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1903 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1904 // typedef void *(*kmpc_ctor)(void *); 1905 auto *KmpcCtorTy = 1906 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1907 /*isVarArg*/ false)->getPointerTo(); 1908 // typedef void *(*kmpc_cctor)(void *, void *); 1909 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1910 auto *KmpcCopyCtorTy = 1911 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1912 /*isVarArg*/ false) 1913 ->getPointerTo(); 1914 // typedef void (*kmpc_dtor)(void *); 1915 auto *KmpcDtorTy = 1916 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1917 ->getPointerTo(); 1918 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1919 KmpcCopyCtorTy, KmpcDtorTy}; 1920 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1921 /*isVarArg*/ false); 1922 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1923 break; 1924 } 1925 case OMPRTL__kmpc_end_critical: { 1926 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1927 // kmp_critical_name *crit); 1928 llvm::Type *TypeParams[] = { 1929 getIdentTyPointerTy(), CGM.Int32Ty, 1930 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1931 auto *FnTy = 1932 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1933 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1934 break; 1935 } 1936 case OMPRTL__kmpc_cancel_barrier: { 1937 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1938 // global_tid); 1939 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1940 auto *FnTy = 1941 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1942 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1943 break; 1944 } 1945 case OMPRTL__kmpc_barrier: { 1946 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1947 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1948 auto *FnTy = 1949 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1950 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1951 break; 1952 } 1953 case OMPRTL__kmpc_for_static_fini: { 1954 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1955 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1956 auto *FnTy = 1957 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1958 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1959 break; 1960 } 1961 case OMPRTL__kmpc_push_num_threads: { 1962 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1963 // kmp_int32 num_threads) 1964 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1965 CGM.Int32Ty}; 1966 auto *FnTy = 1967 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1968 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1969 break; 1970 } 1971 case OMPRTL__kmpc_serialized_parallel: { 1972 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1973 // global_tid); 1974 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1975 auto *FnTy = 1976 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1977 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1978 break; 1979 } 1980 case OMPRTL__kmpc_end_serialized_parallel: { 1981 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1982 // global_tid); 1983 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1984 auto *FnTy = 1985 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1986 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1987 break; 1988 } 1989 case OMPRTL__kmpc_flush: { 1990 // Build void __kmpc_flush(ident_t *loc); 1991 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1992 auto *FnTy = 1993 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1994 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1995 break; 1996 } 1997 case OMPRTL__kmpc_master: { 1998 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1999 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2000 auto *FnTy = 2001 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2002 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 2003 break; 2004 } 2005 case OMPRTL__kmpc_end_master: { 2006 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 2007 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2008 auto *FnTy = 2009 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2010 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 2011 break; 2012 } 2013 case OMPRTL__kmpc_omp_taskyield: { 2014 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 2015 // int end_part); 2016 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2017 auto *FnTy = 2018 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2019 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 2020 break; 2021 } 2022 case OMPRTL__kmpc_single: { 2023 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 2024 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2025 auto *FnTy = 2026 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2027 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 2028 break; 2029 } 2030 case OMPRTL__kmpc_end_single: { 2031 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 2032 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2033 auto *FnTy = 2034 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2035 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 2036 break; 2037 } 2038 case OMPRTL__kmpc_omp_task_alloc: { 2039 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 2040 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2041 // kmp_routine_entry_t *task_entry); 2042 assert(KmpRoutineEntryPtrTy != nullptr && 2043 "Type kmp_routine_entry_t must be created."); 2044 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2045 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 2046 // Return void * and then cast to particular kmp_task_t type. 2047 auto *FnTy = 2048 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2049 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 2050 break; 2051 } 2052 case OMPRTL__kmpc_omp_target_task_alloc: { 2053 // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid, 2054 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2055 // kmp_routine_entry_t *task_entry, kmp_int64 device_id); 2056 assert(KmpRoutineEntryPtrTy != nullptr && 2057 "Type kmp_routine_entry_t must be created."); 2058 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2059 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy, 2060 CGM.Int64Ty}; 2061 // Return void * and then cast to particular kmp_task_t type. 2062 auto *FnTy = 2063 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2064 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc"); 2065 break; 2066 } 2067 case OMPRTL__kmpc_omp_task: { 2068 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2069 // *new_task); 2070 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2071 CGM.VoidPtrTy}; 2072 auto *FnTy = 2073 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2074 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 2075 break; 2076 } 2077 case OMPRTL__kmpc_copyprivate: { 2078 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 2079 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 2080 // kmp_int32 didit); 2081 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2082 auto *CpyFnTy = 2083 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 2084 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 2085 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 2086 CGM.Int32Ty}; 2087 auto *FnTy = 2088 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2089 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 2090 break; 2091 } 2092 case OMPRTL__kmpc_reduce: { 2093 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 2094 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 2095 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 2096 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2097 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2098 /*isVarArg=*/false); 2099 llvm::Type *TypeParams[] = { 2100 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2101 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2102 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2103 auto *FnTy = 2104 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2105 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 2106 break; 2107 } 2108 case OMPRTL__kmpc_reduce_nowait: { 2109 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 2110 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 2111 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 2112 // *lck); 2113 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2114 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2115 /*isVarArg=*/false); 2116 llvm::Type *TypeParams[] = { 2117 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2118 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2119 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2120 auto *FnTy = 2121 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2122 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 2123 break; 2124 } 2125 case OMPRTL__kmpc_end_reduce: { 2126 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 2127 // kmp_critical_name *lck); 2128 llvm::Type *TypeParams[] = { 2129 getIdentTyPointerTy(), CGM.Int32Ty, 2130 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2131 auto *FnTy = 2132 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2133 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 2134 break; 2135 } 2136 case OMPRTL__kmpc_end_reduce_nowait: { 2137 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 2138 // kmp_critical_name *lck); 2139 llvm::Type *TypeParams[] = { 2140 getIdentTyPointerTy(), CGM.Int32Ty, 2141 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2142 auto *FnTy = 2143 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2144 RTLFn = 2145 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 2146 break; 2147 } 2148 case OMPRTL__kmpc_omp_task_begin_if0: { 2149 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2150 // *new_task); 2151 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2152 CGM.VoidPtrTy}; 2153 auto *FnTy = 2154 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2155 RTLFn = 2156 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 2157 break; 2158 } 2159 case OMPRTL__kmpc_omp_task_complete_if0: { 2160 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2161 // *new_task); 2162 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2163 CGM.VoidPtrTy}; 2164 auto *FnTy = 2165 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2166 RTLFn = CGM.CreateRuntimeFunction(FnTy, 2167 /*Name=*/"__kmpc_omp_task_complete_if0"); 2168 break; 2169 } 2170 case OMPRTL__kmpc_ordered: { 2171 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 2172 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2173 auto *FnTy = 2174 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2175 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 2176 break; 2177 } 2178 case OMPRTL__kmpc_end_ordered: { 2179 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 2180 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2181 auto *FnTy = 2182 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2183 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 2184 break; 2185 } 2186 case OMPRTL__kmpc_omp_taskwait: { 2187 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 2188 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2189 auto *FnTy = 2190 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2191 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 2192 break; 2193 } 2194 case OMPRTL__kmpc_taskgroup: { 2195 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 2196 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2197 auto *FnTy = 2198 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2199 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 2200 break; 2201 } 2202 case OMPRTL__kmpc_end_taskgroup: { 2203 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 2204 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2205 auto *FnTy = 2206 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2207 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 2208 break; 2209 } 2210 case OMPRTL__kmpc_push_proc_bind: { 2211 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 2212 // int proc_bind) 2213 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2214 auto *FnTy = 2215 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2216 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 2217 break; 2218 } 2219 case OMPRTL__kmpc_omp_task_with_deps: { 2220 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2221 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2222 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 2223 llvm::Type *TypeParams[] = { 2224 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 2225 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 2226 auto *FnTy = 2227 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2228 RTLFn = 2229 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 2230 break; 2231 } 2232 case OMPRTL__kmpc_omp_wait_deps: { 2233 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2234 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2235 // kmp_depend_info_t *noalias_dep_list); 2236 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2237 CGM.Int32Ty, CGM.VoidPtrTy, 2238 CGM.Int32Ty, CGM.VoidPtrTy}; 2239 auto *FnTy = 2240 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2241 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2242 break; 2243 } 2244 case OMPRTL__kmpc_cancellationpoint: { 2245 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2246 // global_tid, kmp_int32 cncl_kind) 2247 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2248 auto *FnTy = 2249 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2250 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2251 break; 2252 } 2253 case OMPRTL__kmpc_cancel: { 2254 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2255 // kmp_int32 cncl_kind) 2256 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2257 auto *FnTy = 2258 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2259 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2260 break; 2261 } 2262 case OMPRTL__kmpc_push_num_teams: { 2263 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2264 // kmp_int32 num_teams, kmp_int32 num_threads) 2265 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2266 CGM.Int32Ty}; 2267 auto *FnTy = 2268 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2269 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2270 break; 2271 } 2272 case OMPRTL__kmpc_fork_teams: { 2273 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2274 // microtask, ...); 2275 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2276 getKmpc_MicroPointerTy()}; 2277 auto *FnTy = 2278 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2279 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2280 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 2281 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 2282 llvm::LLVMContext &Ctx = F->getContext(); 2283 llvm::MDBuilder MDB(Ctx); 2284 // Annotate the callback behavior of the __kmpc_fork_teams: 2285 // - The callback callee is argument number 2 (microtask). 2286 // - The first two arguments of the callback callee are unknown (-1). 2287 // - All variadic arguments to the __kmpc_fork_teams are passed to the 2288 // callback callee. 2289 F->addMetadata( 2290 llvm::LLVMContext::MD_callback, 2291 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 2292 2, {-1, -1}, 2293 /* VarArgsArePassed */ true)})); 2294 } 2295 } 2296 break; 2297 } 2298 case OMPRTL__kmpc_taskloop: { 2299 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2300 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2301 // sched, kmp_uint64 grainsize, void *task_dup); 2302 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2303 CGM.IntTy, 2304 CGM.VoidPtrTy, 2305 CGM.IntTy, 2306 CGM.Int64Ty->getPointerTo(), 2307 CGM.Int64Ty->getPointerTo(), 2308 CGM.Int64Ty, 2309 CGM.IntTy, 2310 CGM.IntTy, 2311 CGM.Int64Ty, 2312 CGM.VoidPtrTy}; 2313 auto *FnTy = 2314 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2315 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2316 break; 2317 } 2318 case OMPRTL__kmpc_doacross_init: { 2319 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2320 // num_dims, struct kmp_dim *dims); 2321 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2322 CGM.Int32Ty, 2323 CGM.Int32Ty, 2324 CGM.VoidPtrTy}; 2325 auto *FnTy = 2326 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2327 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2328 break; 2329 } 2330 case OMPRTL__kmpc_doacross_fini: { 2331 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2332 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2333 auto *FnTy = 2334 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2335 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2336 break; 2337 } 2338 case OMPRTL__kmpc_doacross_post: { 2339 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2340 // *vec); 2341 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2342 CGM.Int64Ty->getPointerTo()}; 2343 auto *FnTy = 2344 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2345 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2346 break; 2347 } 2348 case OMPRTL__kmpc_doacross_wait: { 2349 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2350 // *vec); 2351 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2352 CGM.Int64Ty->getPointerTo()}; 2353 auto *FnTy = 2354 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2355 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2356 break; 2357 } 2358 case OMPRTL__kmpc_task_reduction_init: { 2359 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2360 // *data); 2361 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2362 auto *FnTy = 2363 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2364 RTLFn = 2365 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2366 break; 2367 } 2368 case OMPRTL__kmpc_task_reduction_get_th_data: { 2369 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2370 // *d); 2371 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2372 auto *FnTy = 2373 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2374 RTLFn = CGM.CreateRuntimeFunction( 2375 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2376 break; 2377 } 2378 case OMPRTL__kmpc_alloc: { 2379 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t 2380 // al); omp_allocator_handle_t type is void *. 2381 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy}; 2382 auto *FnTy = 2383 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2384 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc"); 2385 break; 2386 } 2387 case OMPRTL__kmpc_free: { 2388 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t 2389 // al); omp_allocator_handle_t type is void *. 2390 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2391 auto *FnTy = 2392 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2393 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); 2394 break; 2395 } 2396 case OMPRTL__kmpc_push_target_tripcount: { 2397 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 2398 // size); 2399 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; 2400 llvm::FunctionType *FnTy = 2401 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2402 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); 2403 break; 2404 } 2405 case OMPRTL__tgt_target: { 2406 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2407 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2408 // *arg_types); 2409 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2410 CGM.VoidPtrTy, 2411 CGM.Int32Ty, 2412 CGM.VoidPtrPtrTy, 2413 CGM.VoidPtrPtrTy, 2414 CGM.Int64Ty->getPointerTo(), 2415 CGM.Int64Ty->getPointerTo()}; 2416 auto *FnTy = 2417 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2418 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2419 break; 2420 } 2421 case OMPRTL__tgt_target_nowait: { 2422 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2423 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2424 // int64_t *arg_types); 2425 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2426 CGM.VoidPtrTy, 2427 CGM.Int32Ty, 2428 CGM.VoidPtrPtrTy, 2429 CGM.VoidPtrPtrTy, 2430 CGM.Int64Ty->getPointerTo(), 2431 CGM.Int64Ty->getPointerTo()}; 2432 auto *FnTy = 2433 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2434 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2435 break; 2436 } 2437 case OMPRTL__tgt_target_teams: { 2438 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2439 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2440 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2441 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2442 CGM.VoidPtrTy, 2443 CGM.Int32Ty, 2444 CGM.VoidPtrPtrTy, 2445 CGM.VoidPtrPtrTy, 2446 CGM.Int64Ty->getPointerTo(), 2447 CGM.Int64Ty->getPointerTo(), 2448 CGM.Int32Ty, 2449 CGM.Int32Ty}; 2450 auto *FnTy = 2451 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2452 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2453 break; 2454 } 2455 case OMPRTL__tgt_target_teams_nowait: { 2456 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2457 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 2458 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2459 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2460 CGM.VoidPtrTy, 2461 CGM.Int32Ty, 2462 CGM.VoidPtrPtrTy, 2463 CGM.VoidPtrPtrTy, 2464 CGM.Int64Ty->getPointerTo(), 2465 CGM.Int64Ty->getPointerTo(), 2466 CGM.Int32Ty, 2467 CGM.Int32Ty}; 2468 auto *FnTy = 2469 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2470 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2471 break; 2472 } 2473 case OMPRTL__tgt_register_requires: { 2474 // Build void __tgt_register_requires(int64_t flags); 2475 llvm::Type *TypeParams[] = {CGM.Int64Ty}; 2476 auto *FnTy = 2477 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2478 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); 2479 break; 2480 } 2481 case OMPRTL__tgt_target_data_begin: { 2482 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2483 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2484 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2485 CGM.Int32Ty, 2486 CGM.VoidPtrPtrTy, 2487 CGM.VoidPtrPtrTy, 2488 CGM.Int64Ty->getPointerTo(), 2489 CGM.Int64Ty->getPointerTo()}; 2490 auto *FnTy = 2491 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2492 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2493 break; 2494 } 2495 case OMPRTL__tgt_target_data_begin_nowait: { 2496 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2497 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2498 // *arg_types); 2499 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2500 CGM.Int32Ty, 2501 CGM.VoidPtrPtrTy, 2502 CGM.VoidPtrPtrTy, 2503 CGM.Int64Ty->getPointerTo(), 2504 CGM.Int64Ty->getPointerTo()}; 2505 auto *FnTy = 2506 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2507 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2508 break; 2509 } 2510 case OMPRTL__tgt_target_data_end: { 2511 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2512 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2513 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2514 CGM.Int32Ty, 2515 CGM.VoidPtrPtrTy, 2516 CGM.VoidPtrPtrTy, 2517 CGM.Int64Ty->getPointerTo(), 2518 CGM.Int64Ty->getPointerTo()}; 2519 auto *FnTy = 2520 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2521 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2522 break; 2523 } 2524 case OMPRTL__tgt_target_data_end_nowait: { 2525 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2526 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2527 // *arg_types); 2528 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2529 CGM.Int32Ty, 2530 CGM.VoidPtrPtrTy, 2531 CGM.VoidPtrPtrTy, 2532 CGM.Int64Ty->getPointerTo(), 2533 CGM.Int64Ty->getPointerTo()}; 2534 auto *FnTy = 2535 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2536 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2537 break; 2538 } 2539 case OMPRTL__tgt_target_data_update: { 2540 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2541 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2542 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2543 CGM.Int32Ty, 2544 CGM.VoidPtrPtrTy, 2545 CGM.VoidPtrPtrTy, 2546 CGM.Int64Ty->getPointerTo(), 2547 CGM.Int64Ty->getPointerTo()}; 2548 auto *FnTy = 2549 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2550 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2551 break; 2552 } 2553 case OMPRTL__tgt_target_data_update_nowait: { 2554 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2555 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2556 // *arg_types); 2557 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2558 CGM.Int32Ty, 2559 CGM.VoidPtrPtrTy, 2560 CGM.VoidPtrPtrTy, 2561 CGM.Int64Ty->getPointerTo(), 2562 CGM.Int64Ty->getPointerTo()}; 2563 auto *FnTy = 2564 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2565 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2566 break; 2567 } 2568 case OMPRTL__tgt_mapper_num_components: { 2569 // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 2570 llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; 2571 auto *FnTy = 2572 llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); 2573 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components"); 2574 break; 2575 } 2576 case OMPRTL__tgt_push_mapper_component: { 2577 // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void 2578 // *base, void *begin, int64_t size, int64_t type); 2579 llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy, 2580 CGM.Int64Ty, CGM.Int64Ty}; 2581 auto *FnTy = 2582 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2583 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component"); 2584 break; 2585 } 2586 } 2587 assert(RTLFn && "Unable to find OpenMP runtime function"); 2588 return RTLFn; 2589 } 2590 2591 llvm::FunctionCallee 2592 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 2593 assert((IVSize == 32 || IVSize == 64) && 2594 "IV size is not compatible with the omp runtime"); 2595 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2596 : "__kmpc_for_static_init_4u") 2597 : (IVSigned ? "__kmpc_for_static_init_8" 2598 : "__kmpc_for_static_init_8u"); 2599 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2600 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2601 llvm::Type *TypeParams[] = { 2602 getIdentTyPointerTy(), // loc 2603 CGM.Int32Ty, // tid 2604 CGM.Int32Ty, // schedtype 2605 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2606 PtrTy, // p_lower 2607 PtrTy, // p_upper 2608 PtrTy, // p_stride 2609 ITy, // incr 2610 ITy // chunk 2611 }; 2612 auto *FnTy = 2613 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2614 return CGM.CreateRuntimeFunction(FnTy, Name); 2615 } 2616 2617 llvm::FunctionCallee 2618 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 2619 assert((IVSize == 32 || IVSize == 64) && 2620 "IV size is not compatible with the omp runtime"); 2621 StringRef Name = 2622 IVSize == 32 2623 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2624 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2625 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2626 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2627 CGM.Int32Ty, // tid 2628 CGM.Int32Ty, // schedtype 2629 ITy, // lower 2630 ITy, // upper 2631 ITy, // stride 2632 ITy // chunk 2633 }; 2634 auto *FnTy = 2635 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2636 return CGM.CreateRuntimeFunction(FnTy, Name); 2637 } 2638 2639 llvm::FunctionCallee 2640 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 2641 assert((IVSize == 32 || IVSize == 64) && 2642 "IV size is not compatible with the omp runtime"); 2643 StringRef Name = 2644 IVSize == 32 2645 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2646 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2647 llvm::Type *TypeParams[] = { 2648 getIdentTyPointerTy(), // loc 2649 CGM.Int32Ty, // tid 2650 }; 2651 auto *FnTy = 2652 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2653 return CGM.CreateRuntimeFunction(FnTy, Name); 2654 } 2655 2656 llvm::FunctionCallee 2657 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 2658 assert((IVSize == 32 || IVSize == 64) && 2659 "IV size is not compatible with the omp runtime"); 2660 StringRef Name = 2661 IVSize == 32 2662 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2663 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2664 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2665 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2666 llvm::Type *TypeParams[] = { 2667 getIdentTyPointerTy(), // loc 2668 CGM.Int32Ty, // tid 2669 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2670 PtrTy, // p_lower 2671 PtrTy, // p_upper 2672 PtrTy // p_stride 2673 }; 2674 auto *FnTy = 2675 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2676 return CGM.CreateRuntimeFunction(FnTy, Name); 2677 } 2678 2679 /// Obtain information that uniquely identifies a target entry. This 2680 /// consists of the file and device IDs as well as line number associated with 2681 /// the relevant entry source location. 2682 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2683 unsigned &DeviceID, unsigned &FileID, 2684 unsigned &LineNum) { 2685 SourceManager &SM = C.getSourceManager(); 2686 2687 // The loc should be always valid and have a file ID (the user cannot use 2688 // #pragma directives in macros) 2689 2690 assert(Loc.isValid() && "Source location is expected to be always valid."); 2691 2692 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2693 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2694 2695 llvm::sys::fs::UniqueID ID; 2696 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2697 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2698 << PLoc.getFilename() << EC.message(); 2699 2700 DeviceID = ID.getDevice(); 2701 FileID = ID.getFile(); 2702 LineNum = PLoc.getLine(); 2703 } 2704 2705 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 2706 if (CGM.getLangOpts().OpenMPSimd) 2707 return Address::invalid(); 2708 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2709 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2710 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 2711 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2712 HasRequiresUnifiedSharedMemory))) { 2713 SmallString<64> PtrName; 2714 { 2715 llvm::raw_svector_ostream OS(PtrName); 2716 OS << CGM.getMangledName(GlobalDecl(VD)); 2717 if (!VD->isExternallyVisible()) { 2718 unsigned DeviceID, FileID, Line; 2719 getTargetEntryUniqueInfo(CGM.getContext(), 2720 VD->getCanonicalDecl()->getBeginLoc(), 2721 DeviceID, FileID, Line); 2722 OS << llvm::format("_%x", FileID); 2723 } 2724 OS << "_decl_tgt_ref_ptr"; 2725 } 2726 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2727 if (!Ptr) { 2728 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2729 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2730 PtrName); 2731 2732 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2733 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 2734 2735 if (!CGM.getLangOpts().OpenMPIsDevice) 2736 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2737 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2738 } 2739 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2740 } 2741 return Address::invalid(); 2742 } 2743 2744 llvm::Constant * 2745 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2746 assert(!CGM.getLangOpts().OpenMPUseTLS || 2747 !CGM.getContext().getTargetInfo().isTLSSupported()); 2748 // Lookup the entry, lazily creating it if necessary. 2749 std::string Suffix = getName({"cache", ""}); 2750 return getOrCreateInternalVariable( 2751 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 2752 } 2753 2754 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2755 const VarDecl *VD, 2756 Address VDAddr, 2757 SourceLocation Loc) { 2758 if (CGM.getLangOpts().OpenMPUseTLS && 2759 CGM.getContext().getTargetInfo().isTLSSupported()) 2760 return VDAddr; 2761 2762 llvm::Type *VarTy = VDAddr.getElementType(); 2763 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2764 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2765 CGM.Int8PtrTy), 2766 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2767 getOrCreateThreadPrivateCache(VD)}; 2768 return Address(CGF.EmitRuntimeCall( 2769 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2770 VDAddr.getAlignment()); 2771 } 2772 2773 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2774 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2775 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2776 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2777 // library. 2778 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2779 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2780 OMPLoc); 2781 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2782 // to register constructor/destructor for variable. 2783 llvm::Value *Args[] = { 2784 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2785 Ctor, CopyCtor, Dtor}; 2786 CGF.EmitRuntimeCall( 2787 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2788 } 2789 2790 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2791 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2792 bool PerformInit, CodeGenFunction *CGF) { 2793 if (CGM.getLangOpts().OpenMPUseTLS && 2794 CGM.getContext().getTargetInfo().isTLSSupported()) 2795 return nullptr; 2796 2797 VD = VD->getDefinition(CGM.getContext()); 2798 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 2799 QualType ASTTy = VD->getType(); 2800 2801 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2802 const Expr *Init = VD->getAnyInitializer(); 2803 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2804 // Generate function that re-emits the declaration's initializer into the 2805 // threadprivate copy of the variable VD 2806 CodeGenFunction CtorCGF(CGM); 2807 FunctionArgList Args; 2808 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2809 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2810 ImplicitParamDecl::Other); 2811 Args.push_back(&Dst); 2812 2813 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2814 CGM.getContext().VoidPtrTy, Args); 2815 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2816 std::string Name = getName({"__kmpc_global_ctor_", ""}); 2817 llvm::Function *Fn = 2818 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2819 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2820 Args, Loc, Loc); 2821 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2822 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2823 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2824 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2825 Arg = CtorCGF.Builder.CreateElementBitCast( 2826 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2827 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2828 /*IsInitializer=*/true); 2829 ArgVal = CtorCGF.EmitLoadOfScalar( 2830 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2831 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2832 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2833 CtorCGF.FinishFunction(); 2834 Ctor = Fn; 2835 } 2836 if (VD->getType().isDestructedType() != QualType::DK_none) { 2837 // Generate function that emits destructor call for the threadprivate copy 2838 // of the variable VD 2839 CodeGenFunction DtorCGF(CGM); 2840 FunctionArgList Args; 2841 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2842 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2843 ImplicitParamDecl::Other); 2844 Args.push_back(&Dst); 2845 2846 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2847 CGM.getContext().VoidTy, Args); 2848 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2849 std::string Name = getName({"__kmpc_global_dtor_", ""}); 2850 llvm::Function *Fn = 2851 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2852 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2853 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2854 Loc, Loc); 2855 // Create a scope with an artificial location for the body of this function. 2856 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2857 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2858 DtorCGF.GetAddrOfLocalVar(&Dst), 2859 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2860 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2861 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2862 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2863 DtorCGF.FinishFunction(); 2864 Dtor = Fn; 2865 } 2866 // Do not emit init function if it is not required. 2867 if (!Ctor && !Dtor) 2868 return nullptr; 2869 2870 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2871 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2872 /*isVarArg=*/false) 2873 ->getPointerTo(); 2874 // Copying constructor for the threadprivate variable. 2875 // Must be NULL - reserved by runtime, but currently it requires that this 2876 // parameter is always NULL. Otherwise it fires assertion. 2877 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2878 if (Ctor == nullptr) { 2879 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2880 /*isVarArg=*/false) 2881 ->getPointerTo(); 2882 Ctor = llvm::Constant::getNullValue(CtorTy); 2883 } 2884 if (Dtor == nullptr) { 2885 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2886 /*isVarArg=*/false) 2887 ->getPointerTo(); 2888 Dtor = llvm::Constant::getNullValue(DtorTy); 2889 } 2890 if (!CGF) { 2891 auto *InitFunctionTy = 2892 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2893 std::string Name = getName({"__omp_threadprivate_init_", ""}); 2894 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2895 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 2896 CodeGenFunction InitCGF(CGM); 2897 FunctionArgList ArgList; 2898 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2899 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2900 Loc, Loc); 2901 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2902 InitCGF.FinishFunction(); 2903 return InitFunction; 2904 } 2905 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2906 } 2907 return nullptr; 2908 } 2909 2910 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2911 llvm::GlobalVariable *Addr, 2912 bool PerformInit) { 2913 if (CGM.getLangOpts().OMPTargetTriples.empty() && 2914 !CGM.getLangOpts().OpenMPIsDevice) 2915 return false; 2916 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2917 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2918 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 2919 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2920 HasRequiresUnifiedSharedMemory)) 2921 return CGM.getLangOpts().OpenMPIsDevice; 2922 VD = VD->getDefinition(CGM.getContext()); 2923 if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 2924 return CGM.getLangOpts().OpenMPIsDevice; 2925 2926 QualType ASTTy = VD->getType(); 2927 2928 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 2929 // Produce the unique prefix to identify the new target regions. We use 2930 // the source location of the variable declaration which we know to not 2931 // conflict with any target region. 2932 unsigned DeviceID; 2933 unsigned FileID; 2934 unsigned Line; 2935 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2936 SmallString<128> Buffer, Out; 2937 { 2938 llvm::raw_svector_ostream OS(Buffer); 2939 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2940 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2941 } 2942 2943 const Expr *Init = VD->getAnyInitializer(); 2944 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2945 llvm::Constant *Ctor; 2946 llvm::Constant *ID; 2947 if (CGM.getLangOpts().OpenMPIsDevice) { 2948 // Generate function that re-emits the declaration's initializer into 2949 // the threadprivate copy of the variable VD 2950 CodeGenFunction CtorCGF(CGM); 2951 2952 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2953 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2954 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2955 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2956 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2957 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2958 FunctionArgList(), Loc, Loc); 2959 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2960 CtorCGF.EmitAnyExprToMem(Init, 2961 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2962 Init->getType().getQualifiers(), 2963 /*IsInitializer=*/true); 2964 CtorCGF.FinishFunction(); 2965 Ctor = Fn; 2966 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2967 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 2968 } else { 2969 Ctor = new llvm::GlobalVariable( 2970 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2971 llvm::GlobalValue::PrivateLinkage, 2972 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2973 ID = Ctor; 2974 } 2975 2976 // Register the information for the entry associated with the constructor. 2977 Out.clear(); 2978 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2979 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2980 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2981 } 2982 if (VD->getType().isDestructedType() != QualType::DK_none) { 2983 llvm::Constant *Dtor; 2984 llvm::Constant *ID; 2985 if (CGM.getLangOpts().OpenMPIsDevice) { 2986 // Generate function that emits destructor call for the threadprivate 2987 // copy of the variable VD 2988 CodeGenFunction DtorCGF(CGM); 2989 2990 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2991 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2992 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2993 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2994 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2995 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2996 FunctionArgList(), Loc, Loc); 2997 // Create a scope with an artificial location for the body of this 2998 // function. 2999 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 3000 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 3001 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 3002 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 3003 DtorCGF.FinishFunction(); 3004 Dtor = Fn; 3005 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 3006 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 3007 } else { 3008 Dtor = new llvm::GlobalVariable( 3009 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 3010 llvm::GlobalValue::PrivateLinkage, 3011 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 3012 ID = Dtor; 3013 } 3014 // Register the information for the entry associated with the destructor. 3015 Out.clear(); 3016 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 3017 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 3018 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 3019 } 3020 return CGM.getLangOpts().OpenMPIsDevice; 3021 } 3022 3023 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 3024 QualType VarType, 3025 StringRef Name) { 3026 std::string Suffix = getName({"artificial", ""}); 3027 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 3028 llvm::Value *GAddr = 3029 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 3030 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 3031 CGM.getTarget().isTLSSupported()) { 3032 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 3033 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 3034 } 3035 std::string CacheSuffix = getName({"cache", ""}); 3036 llvm::Value *Args[] = { 3037 emitUpdateLocation(CGF, SourceLocation()), 3038 getThreadID(CGF, SourceLocation()), 3039 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 3040 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 3041 /*isSigned=*/false), 3042 getOrCreateInternalVariable( 3043 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 3044 return Address( 3045 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3046 CGF.EmitRuntimeCall( 3047 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 3048 VarLVType->getPointerTo(/*AddrSpace=*/0)), 3049 CGM.getContext().getTypeAlignInChars(VarType)); 3050 } 3051 3052 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 3053 const RegionCodeGenTy &ThenGen, 3054 const RegionCodeGenTy &ElseGen) { 3055 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 3056 3057 // If the condition constant folds and can be elided, try to avoid emitting 3058 // the condition and the dead arm of the if/else. 3059 bool CondConstant; 3060 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 3061 if (CondConstant) 3062 ThenGen(CGF); 3063 else 3064 ElseGen(CGF); 3065 return; 3066 } 3067 3068 // Otherwise, the condition did not fold, or we couldn't elide it. Just 3069 // emit the conditional branch. 3070 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3071 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 3072 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 3073 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 3074 3075 // Emit the 'then' code. 3076 CGF.EmitBlock(ThenBlock); 3077 ThenGen(CGF); 3078 CGF.EmitBranch(ContBlock); 3079 // Emit the 'else' code if present. 3080 // There is no need to emit line number for unconditional branch. 3081 (void)ApplyDebugLocation::CreateEmpty(CGF); 3082 CGF.EmitBlock(ElseBlock); 3083 ElseGen(CGF); 3084 // There is no need to emit line number for unconditional branch. 3085 (void)ApplyDebugLocation::CreateEmpty(CGF); 3086 CGF.EmitBranch(ContBlock); 3087 // Emit the continuation block for code after the if. 3088 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 3089 } 3090 3091 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 3092 llvm::Function *OutlinedFn, 3093 ArrayRef<llvm::Value *> CapturedVars, 3094 const Expr *IfCond) { 3095 if (!CGF.HaveInsertPoint()) 3096 return; 3097 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 3098 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 3099 PrePostActionTy &) { 3100 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 3101 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3102 llvm::Value *Args[] = { 3103 RTLoc, 3104 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 3105 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 3106 llvm::SmallVector<llvm::Value *, 16> RealArgs; 3107 RealArgs.append(std::begin(Args), std::end(Args)); 3108 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 3109 3110 llvm::FunctionCallee RTLFn = 3111 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 3112 CGF.EmitRuntimeCall(RTLFn, RealArgs); 3113 }; 3114 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 3115 PrePostActionTy &) { 3116 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3117 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 3118 // Build calls: 3119 // __kmpc_serialized_parallel(&Loc, GTid); 3120 llvm::Value *Args[] = {RTLoc, ThreadID}; 3121 CGF.EmitRuntimeCall( 3122 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 3123 3124 // OutlinedFn(>id, &zero_bound, CapturedStruct); 3125 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 3126 Address ZeroAddrBound = 3127 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 3128 /*Name=*/".bound.zero.addr"); 3129 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 3130 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 3131 // ThreadId for serialized parallels is 0. 3132 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 3133 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 3134 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 3135 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 3136 3137 // __kmpc_end_serialized_parallel(&Loc, GTid); 3138 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 3139 CGF.EmitRuntimeCall( 3140 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 3141 EndArgs); 3142 }; 3143 if (IfCond) { 3144 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 3145 } else { 3146 RegionCodeGenTy ThenRCG(ThenGen); 3147 ThenRCG(CGF); 3148 } 3149 } 3150 3151 // If we're inside an (outlined) parallel region, use the region info's 3152 // thread-ID variable (it is passed in a first argument of the outlined function 3153 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 3154 // regular serial code region, get thread ID by calling kmp_int32 3155 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 3156 // return the address of that temp. 3157 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 3158 SourceLocation Loc) { 3159 if (auto *OMPRegionInfo = 3160 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3161 if (OMPRegionInfo->getThreadIDVariable()) 3162 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 3163 3164 llvm::Value *ThreadID = getThreadID(CGF, Loc); 3165 QualType Int32Ty = 3166 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 3167 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 3168 CGF.EmitStoreOfScalar(ThreadID, 3169 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 3170 3171 return ThreadIDTemp; 3172 } 3173 3174 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 3175 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 3176 SmallString<256> Buffer; 3177 llvm::raw_svector_ostream Out(Buffer); 3178 Out << Name; 3179 StringRef RuntimeName = Out.str(); 3180 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 3181 if (Elem.second) { 3182 assert(Elem.second->getType()->getPointerElementType() == Ty && 3183 "OMP internal variable has different type than requested"); 3184 return &*Elem.second; 3185 } 3186 3187 return Elem.second = new llvm::GlobalVariable( 3188 CGM.getModule(), Ty, /*IsConstant*/ false, 3189 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 3190 Elem.first(), /*InsertBefore=*/nullptr, 3191 llvm::GlobalValue::NotThreadLocal, AddressSpace); 3192 } 3193 3194 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 3195 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 3196 std::string Name = getName({Prefix, "var"}); 3197 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 3198 } 3199 3200 namespace { 3201 /// Common pre(post)-action for different OpenMP constructs. 3202 class CommonActionTy final : public PrePostActionTy { 3203 llvm::FunctionCallee EnterCallee; 3204 ArrayRef<llvm::Value *> EnterArgs; 3205 llvm::FunctionCallee ExitCallee; 3206 ArrayRef<llvm::Value *> ExitArgs; 3207 bool Conditional; 3208 llvm::BasicBlock *ContBlock = nullptr; 3209 3210 public: 3211 CommonActionTy(llvm::FunctionCallee EnterCallee, 3212 ArrayRef<llvm::Value *> EnterArgs, 3213 llvm::FunctionCallee ExitCallee, 3214 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 3215 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 3216 ExitArgs(ExitArgs), Conditional(Conditional) {} 3217 void Enter(CodeGenFunction &CGF) override { 3218 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 3219 if (Conditional) { 3220 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 3221 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3222 ContBlock = CGF.createBasicBlock("omp_if.end"); 3223 // Generate the branch (If-stmt) 3224 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 3225 CGF.EmitBlock(ThenBlock); 3226 } 3227 } 3228 void Done(CodeGenFunction &CGF) { 3229 // Emit the rest of blocks/branches 3230 CGF.EmitBranch(ContBlock); 3231 CGF.EmitBlock(ContBlock, true); 3232 } 3233 void Exit(CodeGenFunction &CGF) override { 3234 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 3235 } 3236 }; 3237 } // anonymous namespace 3238 3239 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 3240 StringRef CriticalName, 3241 const RegionCodeGenTy &CriticalOpGen, 3242 SourceLocation Loc, const Expr *Hint) { 3243 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 3244 // CriticalOpGen(); 3245 // __kmpc_end_critical(ident_t *, gtid, Lock); 3246 // Prepare arguments and build a call to __kmpc_critical 3247 if (!CGF.HaveInsertPoint()) 3248 return; 3249 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3250 getCriticalRegionLock(CriticalName)}; 3251 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 3252 std::end(Args)); 3253 if (Hint) { 3254 EnterArgs.push_back(CGF.Builder.CreateIntCast( 3255 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 3256 } 3257 CommonActionTy Action( 3258 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 3259 : OMPRTL__kmpc_critical), 3260 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 3261 CriticalOpGen.setAction(Action); 3262 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 3263 } 3264 3265 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 3266 const RegionCodeGenTy &MasterOpGen, 3267 SourceLocation Loc) { 3268 if (!CGF.HaveInsertPoint()) 3269 return; 3270 // if(__kmpc_master(ident_t *, gtid)) { 3271 // MasterOpGen(); 3272 // __kmpc_end_master(ident_t *, gtid); 3273 // } 3274 // Prepare arguments and build a call to __kmpc_master 3275 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3276 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 3277 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 3278 /*Conditional=*/true); 3279 MasterOpGen.setAction(Action); 3280 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 3281 Action.Done(CGF); 3282 } 3283 3284 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 3285 SourceLocation Loc) { 3286 if (!CGF.HaveInsertPoint()) 3287 return; 3288 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 3289 llvm::Value *Args[] = { 3290 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3291 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 3292 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 3293 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3294 Region->emitUntiedSwitch(CGF); 3295 } 3296 3297 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 3298 const RegionCodeGenTy &TaskgroupOpGen, 3299 SourceLocation Loc) { 3300 if (!CGF.HaveInsertPoint()) 3301 return; 3302 // __kmpc_taskgroup(ident_t *, gtid); 3303 // TaskgroupOpGen(); 3304 // __kmpc_end_taskgroup(ident_t *, gtid); 3305 // Prepare arguments and build a call to __kmpc_taskgroup 3306 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3307 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3308 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3309 Args); 3310 TaskgroupOpGen.setAction(Action); 3311 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3312 } 3313 3314 /// Given an array of pointers to variables, project the address of a 3315 /// given variable. 3316 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3317 unsigned Index, const VarDecl *Var) { 3318 // Pull out the pointer to the variable. 3319 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 3320 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3321 3322 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3323 Addr = CGF.Builder.CreateElementBitCast( 3324 Addr, CGF.ConvertTypeForMem(Var->getType())); 3325 return Addr; 3326 } 3327 3328 static llvm::Value *emitCopyprivateCopyFunction( 3329 CodeGenModule &CGM, llvm::Type *ArgsType, 3330 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3331 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3332 SourceLocation Loc) { 3333 ASTContext &C = CGM.getContext(); 3334 // void copy_func(void *LHSArg, void *RHSArg); 3335 FunctionArgList Args; 3336 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3337 ImplicitParamDecl::Other); 3338 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3339 ImplicitParamDecl::Other); 3340 Args.push_back(&LHSArg); 3341 Args.push_back(&RHSArg); 3342 const auto &CGFI = 3343 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3344 std::string Name = 3345 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 3346 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 3347 llvm::GlobalValue::InternalLinkage, Name, 3348 &CGM.getModule()); 3349 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3350 Fn->setDoesNotRecurse(); 3351 CodeGenFunction CGF(CGM); 3352 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3353 // Dest = (void*[n])(LHSArg); 3354 // Src = (void*[n])(RHSArg); 3355 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3356 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3357 ArgsType), CGF.getPointerAlign()); 3358 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3359 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3360 ArgsType), CGF.getPointerAlign()); 3361 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3362 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3363 // ... 3364 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3365 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3366 const auto *DestVar = 3367 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3368 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3369 3370 const auto *SrcVar = 3371 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3372 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3373 3374 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3375 QualType Type = VD->getType(); 3376 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3377 } 3378 CGF.FinishFunction(); 3379 return Fn; 3380 } 3381 3382 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3383 const RegionCodeGenTy &SingleOpGen, 3384 SourceLocation Loc, 3385 ArrayRef<const Expr *> CopyprivateVars, 3386 ArrayRef<const Expr *> SrcExprs, 3387 ArrayRef<const Expr *> DstExprs, 3388 ArrayRef<const Expr *> AssignmentOps) { 3389 if (!CGF.HaveInsertPoint()) 3390 return; 3391 assert(CopyprivateVars.size() == SrcExprs.size() && 3392 CopyprivateVars.size() == DstExprs.size() && 3393 CopyprivateVars.size() == AssignmentOps.size()); 3394 ASTContext &C = CGM.getContext(); 3395 // int32 did_it = 0; 3396 // if(__kmpc_single(ident_t *, gtid)) { 3397 // SingleOpGen(); 3398 // __kmpc_end_single(ident_t *, gtid); 3399 // did_it = 1; 3400 // } 3401 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3402 // <copy_func>, did_it); 3403 3404 Address DidIt = Address::invalid(); 3405 if (!CopyprivateVars.empty()) { 3406 // int32 did_it = 0; 3407 QualType KmpInt32Ty = 3408 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3409 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3410 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3411 } 3412 // Prepare arguments and build a call to __kmpc_single 3413 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3414 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3415 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3416 /*Conditional=*/true); 3417 SingleOpGen.setAction(Action); 3418 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3419 if (DidIt.isValid()) { 3420 // did_it = 1; 3421 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3422 } 3423 Action.Done(CGF); 3424 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3425 // <copy_func>, did_it); 3426 if (DidIt.isValid()) { 3427 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3428 QualType CopyprivateArrayTy = C.getConstantArrayType( 3429 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 3430 /*IndexTypeQuals=*/0); 3431 // Create a list of all private variables for copyprivate. 3432 Address CopyprivateList = 3433 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3434 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3435 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 3436 CGF.Builder.CreateStore( 3437 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3438 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 3439 CGF.VoidPtrTy), 3440 Elem); 3441 } 3442 // Build function that copies private values from single region to all other 3443 // threads in the corresponding parallel region. 3444 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3445 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3446 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3447 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3448 Address CL = 3449 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3450 CGF.VoidPtrTy); 3451 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3452 llvm::Value *Args[] = { 3453 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3454 getThreadID(CGF, Loc), // i32 <gtid> 3455 BufSize, // size_t <buf_size> 3456 CL.getPointer(), // void *<copyprivate list> 3457 CpyFn, // void (*) (void *, void *) <copy_func> 3458 DidItVal // i32 did_it 3459 }; 3460 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3461 } 3462 } 3463 3464 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3465 const RegionCodeGenTy &OrderedOpGen, 3466 SourceLocation Loc, bool IsThreads) { 3467 if (!CGF.HaveInsertPoint()) 3468 return; 3469 // __kmpc_ordered(ident_t *, gtid); 3470 // OrderedOpGen(); 3471 // __kmpc_end_ordered(ident_t *, gtid); 3472 // Prepare arguments and build a call to __kmpc_ordered 3473 if (IsThreads) { 3474 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3475 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3476 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3477 Args); 3478 OrderedOpGen.setAction(Action); 3479 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3480 return; 3481 } 3482 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3483 } 3484 3485 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 3486 unsigned Flags; 3487 if (Kind == OMPD_for) 3488 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3489 else if (Kind == OMPD_sections) 3490 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3491 else if (Kind == OMPD_single) 3492 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3493 else if (Kind == OMPD_barrier) 3494 Flags = OMP_IDENT_BARRIER_EXPL; 3495 else 3496 Flags = OMP_IDENT_BARRIER_IMPL; 3497 return Flags; 3498 } 3499 3500 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 3501 CodeGenFunction &CGF, const OMPLoopDirective &S, 3502 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 3503 // Check if the loop directive is actually a doacross loop directive. In this 3504 // case choose static, 1 schedule. 3505 if (llvm::any_of( 3506 S.getClausesOfKind<OMPOrderedClause>(), 3507 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 3508 ScheduleKind = OMPC_SCHEDULE_static; 3509 // Chunk size is 1 in this case. 3510 llvm::APInt ChunkSize(32, 1); 3511 ChunkExpr = IntegerLiteral::Create( 3512 CGF.getContext(), ChunkSize, 3513 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3514 SourceLocation()); 3515 } 3516 } 3517 3518 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3519 OpenMPDirectiveKind Kind, bool EmitChecks, 3520 bool ForceSimpleCall) { 3521 // Check if we should use the OMPBuilder 3522 auto *OMPRegionInfo = 3523 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 3524 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3525 if (OMPBuilder) { 3526 CGF.Builder.restoreIP(OMPBuilder->CreateBarrier( 3527 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 3528 return; 3529 } 3530 3531 if (!CGF.HaveInsertPoint()) 3532 return; 3533 // Build call __kmpc_cancel_barrier(loc, thread_id); 3534 // Build call __kmpc_barrier(loc, thread_id); 3535 unsigned Flags = getDefaultFlagsForBarriers(Kind); 3536 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3537 // thread_id); 3538 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3539 getThreadID(CGF, Loc)}; 3540 if (OMPRegionInfo) { 3541 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3542 llvm::Value *Result = CGF.EmitRuntimeCall( 3543 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3544 if (EmitChecks) { 3545 // if (__kmpc_cancel_barrier()) { 3546 // exit from construct; 3547 // } 3548 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3549 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3550 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3551 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3552 CGF.EmitBlock(ExitBB); 3553 // exit from construct; 3554 CodeGenFunction::JumpDest CancelDestination = 3555 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3556 CGF.EmitBranchThroughCleanup(CancelDestination); 3557 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3558 } 3559 return; 3560 } 3561 } 3562 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3563 } 3564 3565 /// Map the OpenMP loop schedule to the runtime enumeration. 3566 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3567 bool Chunked, bool Ordered) { 3568 switch (ScheduleKind) { 3569 case OMPC_SCHEDULE_static: 3570 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3571 : (Ordered ? OMP_ord_static : OMP_sch_static); 3572 case OMPC_SCHEDULE_dynamic: 3573 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3574 case OMPC_SCHEDULE_guided: 3575 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3576 case OMPC_SCHEDULE_runtime: 3577 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3578 case OMPC_SCHEDULE_auto: 3579 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3580 case OMPC_SCHEDULE_unknown: 3581 assert(!Chunked && "chunk was specified but schedule kind not known"); 3582 return Ordered ? OMP_ord_static : OMP_sch_static; 3583 } 3584 llvm_unreachable("Unexpected runtime schedule"); 3585 } 3586 3587 /// Map the OpenMP distribute schedule to the runtime enumeration. 3588 static OpenMPSchedType 3589 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3590 // only static is allowed for dist_schedule 3591 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3592 } 3593 3594 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3595 bool Chunked) const { 3596 OpenMPSchedType Schedule = 3597 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3598 return Schedule == OMP_sch_static; 3599 } 3600 3601 bool CGOpenMPRuntime::isStaticNonchunked( 3602 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3603 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3604 return Schedule == OMP_dist_sch_static; 3605 } 3606 3607 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 3608 bool Chunked) const { 3609 OpenMPSchedType Schedule = 3610 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3611 return Schedule == OMP_sch_static_chunked; 3612 } 3613 3614 bool CGOpenMPRuntime::isStaticChunked( 3615 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3616 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3617 return Schedule == OMP_dist_sch_static_chunked; 3618 } 3619 3620 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3621 OpenMPSchedType Schedule = 3622 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3623 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3624 return Schedule != OMP_sch_static; 3625 } 3626 3627 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 3628 OpenMPScheduleClauseModifier M1, 3629 OpenMPScheduleClauseModifier M2) { 3630 int Modifier = 0; 3631 switch (M1) { 3632 case OMPC_SCHEDULE_MODIFIER_monotonic: 3633 Modifier = OMP_sch_modifier_monotonic; 3634 break; 3635 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3636 Modifier = OMP_sch_modifier_nonmonotonic; 3637 break; 3638 case OMPC_SCHEDULE_MODIFIER_simd: 3639 if (Schedule == OMP_sch_static_chunked) 3640 Schedule = OMP_sch_static_balanced_chunked; 3641 break; 3642 case OMPC_SCHEDULE_MODIFIER_last: 3643 case OMPC_SCHEDULE_MODIFIER_unknown: 3644 break; 3645 } 3646 switch (M2) { 3647 case OMPC_SCHEDULE_MODIFIER_monotonic: 3648 Modifier = OMP_sch_modifier_monotonic; 3649 break; 3650 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3651 Modifier = OMP_sch_modifier_nonmonotonic; 3652 break; 3653 case OMPC_SCHEDULE_MODIFIER_simd: 3654 if (Schedule == OMP_sch_static_chunked) 3655 Schedule = OMP_sch_static_balanced_chunked; 3656 break; 3657 case OMPC_SCHEDULE_MODIFIER_last: 3658 case OMPC_SCHEDULE_MODIFIER_unknown: 3659 break; 3660 } 3661 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 3662 // If the static schedule kind is specified or if the ordered clause is 3663 // specified, and if the nonmonotonic modifier is not specified, the effect is 3664 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 3665 // modifier is specified, the effect is as if the nonmonotonic modifier is 3666 // specified. 3667 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 3668 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 3669 Schedule == OMP_sch_static_balanced_chunked || 3670 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 3671 Schedule == OMP_dist_sch_static_chunked || 3672 Schedule == OMP_dist_sch_static)) 3673 Modifier = OMP_sch_modifier_nonmonotonic; 3674 } 3675 return Schedule | Modifier; 3676 } 3677 3678 void CGOpenMPRuntime::emitForDispatchInit( 3679 CodeGenFunction &CGF, SourceLocation Loc, 3680 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3681 bool Ordered, const DispatchRTInput &DispatchValues) { 3682 if (!CGF.HaveInsertPoint()) 3683 return; 3684 OpenMPSchedType Schedule = getRuntimeSchedule( 3685 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3686 assert(Ordered || 3687 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3688 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3689 Schedule != OMP_sch_static_balanced_chunked)); 3690 // Call __kmpc_dispatch_init( 3691 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3692 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3693 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3694 3695 // If the Chunk was not specified in the clause - use default value 1. 3696 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3697 : CGF.Builder.getIntN(IVSize, 1); 3698 llvm::Value *Args[] = { 3699 emitUpdateLocation(CGF, Loc), 3700 getThreadID(CGF, Loc), 3701 CGF.Builder.getInt32(addMonoNonMonoModifier( 3702 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3703 DispatchValues.LB, // Lower 3704 DispatchValues.UB, // Upper 3705 CGF.Builder.getIntN(IVSize, 1), // Stride 3706 Chunk // Chunk 3707 }; 3708 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3709 } 3710 3711 static void emitForStaticInitCall( 3712 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3713 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 3714 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3715 const CGOpenMPRuntime::StaticRTInput &Values) { 3716 if (!CGF.HaveInsertPoint()) 3717 return; 3718 3719 assert(!Values.Ordered); 3720 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3721 Schedule == OMP_sch_static_balanced_chunked || 3722 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3723 Schedule == OMP_dist_sch_static || 3724 Schedule == OMP_dist_sch_static_chunked); 3725 3726 // Call __kmpc_for_static_init( 3727 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3728 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3729 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3730 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3731 llvm::Value *Chunk = Values.Chunk; 3732 if (Chunk == nullptr) { 3733 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3734 Schedule == OMP_dist_sch_static) && 3735 "expected static non-chunked schedule"); 3736 // If the Chunk was not specified in the clause - use default value 1. 3737 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3738 } else { 3739 assert((Schedule == OMP_sch_static_chunked || 3740 Schedule == OMP_sch_static_balanced_chunked || 3741 Schedule == OMP_ord_static_chunked || 3742 Schedule == OMP_dist_sch_static_chunked) && 3743 "expected static chunked schedule"); 3744 } 3745 llvm::Value *Args[] = { 3746 UpdateLocation, 3747 ThreadId, 3748 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 3749 M2)), // Schedule type 3750 Values.IL.getPointer(), // &isLastIter 3751 Values.LB.getPointer(), // &LB 3752 Values.UB.getPointer(), // &UB 3753 Values.ST.getPointer(), // &Stride 3754 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3755 Chunk // Chunk 3756 }; 3757 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3758 } 3759 3760 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3761 SourceLocation Loc, 3762 OpenMPDirectiveKind DKind, 3763 const OpenMPScheduleTy &ScheduleKind, 3764 const StaticRTInput &Values) { 3765 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3766 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3767 assert(isOpenMPWorksharingDirective(DKind) && 3768 "Expected loop-based or sections-based directive."); 3769 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3770 isOpenMPLoopDirective(DKind) 3771 ? OMP_IDENT_WORK_LOOP 3772 : OMP_IDENT_WORK_SECTIONS); 3773 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3774 llvm::FunctionCallee StaticInitFunction = 3775 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3776 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 3777 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3778 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3779 } 3780 3781 void CGOpenMPRuntime::emitDistributeStaticInit( 3782 CodeGenFunction &CGF, SourceLocation Loc, 3783 OpenMPDistScheduleClauseKind SchedKind, 3784 const CGOpenMPRuntime::StaticRTInput &Values) { 3785 OpenMPSchedType ScheduleNum = 3786 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3787 llvm::Value *UpdatedLocation = 3788 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3789 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3790 llvm::FunctionCallee StaticInitFunction = 3791 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3792 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3793 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3794 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3795 } 3796 3797 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3798 SourceLocation Loc, 3799 OpenMPDirectiveKind DKind) { 3800 if (!CGF.HaveInsertPoint()) 3801 return; 3802 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3803 llvm::Value *Args[] = { 3804 emitUpdateLocation(CGF, Loc, 3805 isOpenMPDistributeDirective(DKind) 3806 ? OMP_IDENT_WORK_DISTRIBUTE 3807 : isOpenMPLoopDirective(DKind) 3808 ? OMP_IDENT_WORK_LOOP 3809 : OMP_IDENT_WORK_SECTIONS), 3810 getThreadID(CGF, Loc)}; 3811 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 3812 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3813 Args); 3814 } 3815 3816 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3817 SourceLocation Loc, 3818 unsigned IVSize, 3819 bool IVSigned) { 3820 if (!CGF.HaveInsertPoint()) 3821 return; 3822 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3823 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3824 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3825 } 3826 3827 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3828 SourceLocation Loc, unsigned IVSize, 3829 bool IVSigned, Address IL, 3830 Address LB, Address UB, 3831 Address ST) { 3832 // Call __kmpc_dispatch_next( 3833 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3834 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3835 // kmp_int[32|64] *p_stride); 3836 llvm::Value *Args[] = { 3837 emitUpdateLocation(CGF, Loc), 3838 getThreadID(CGF, Loc), 3839 IL.getPointer(), // &isLastIter 3840 LB.getPointer(), // &Lower 3841 UB.getPointer(), // &Upper 3842 ST.getPointer() // &Stride 3843 }; 3844 llvm::Value *Call = 3845 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3846 return CGF.EmitScalarConversion( 3847 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3848 CGF.getContext().BoolTy, Loc); 3849 } 3850 3851 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3852 llvm::Value *NumThreads, 3853 SourceLocation Loc) { 3854 if (!CGF.HaveInsertPoint()) 3855 return; 3856 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3857 llvm::Value *Args[] = { 3858 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3859 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3860 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3861 Args); 3862 } 3863 3864 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3865 ProcBindKind ProcBind, 3866 SourceLocation Loc) { 3867 if (!CGF.HaveInsertPoint()) 3868 return; 3869 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 3870 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3871 llvm::Value *Args[] = { 3872 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3873 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 3874 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3875 } 3876 3877 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3878 SourceLocation Loc, llvm::AtomicOrdering AO) { 3879 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3880 if (OMPBuilder) { 3881 OMPBuilder->CreateFlush(CGF.Builder); 3882 } else { 3883 if (!CGF.HaveInsertPoint()) 3884 return; 3885 // Build call void __kmpc_flush(ident_t *loc) 3886 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3887 emitUpdateLocation(CGF, Loc)); 3888 } 3889 } 3890 3891 namespace { 3892 /// Indexes of fields for type kmp_task_t. 3893 enum KmpTaskTFields { 3894 /// List of shared variables. 3895 KmpTaskTShareds, 3896 /// Task routine. 3897 KmpTaskTRoutine, 3898 /// Partition id for the untied tasks. 3899 KmpTaskTPartId, 3900 /// Function with call of destructors for private variables. 3901 Data1, 3902 /// Task priority. 3903 Data2, 3904 /// (Taskloops only) Lower bound. 3905 KmpTaskTLowerBound, 3906 /// (Taskloops only) Upper bound. 3907 KmpTaskTUpperBound, 3908 /// (Taskloops only) Stride. 3909 KmpTaskTStride, 3910 /// (Taskloops only) Is last iteration flag. 3911 KmpTaskTLastIter, 3912 /// (Taskloops only) Reduction data. 3913 KmpTaskTReductions, 3914 }; 3915 } // anonymous namespace 3916 3917 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3918 return OffloadEntriesTargetRegion.empty() && 3919 OffloadEntriesDeviceGlobalVar.empty(); 3920 } 3921 3922 /// Initialize target region entry. 3923 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3924 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3925 StringRef ParentName, unsigned LineNum, 3926 unsigned Order) { 3927 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3928 "only required for the device " 3929 "code generation."); 3930 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3931 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3932 OMPTargetRegionEntryTargetRegion); 3933 ++OffloadingEntriesNum; 3934 } 3935 3936 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3937 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3938 StringRef ParentName, unsigned LineNum, 3939 llvm::Constant *Addr, llvm::Constant *ID, 3940 OMPTargetRegionEntryKind Flags) { 3941 // If we are emitting code for a target, the entry is already initialized, 3942 // only has to be registered. 3943 if (CGM.getLangOpts().OpenMPIsDevice) { 3944 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3945 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3946 DiagnosticsEngine::Error, 3947 "Unable to find target region on line '%0' in the device code."); 3948 CGM.getDiags().Report(DiagID) << LineNum; 3949 return; 3950 } 3951 auto &Entry = 3952 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3953 assert(Entry.isValid() && "Entry not initialized!"); 3954 Entry.setAddress(Addr); 3955 Entry.setID(ID); 3956 Entry.setFlags(Flags); 3957 } else { 3958 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3959 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3960 ++OffloadingEntriesNum; 3961 } 3962 } 3963 3964 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3965 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3966 unsigned LineNum) const { 3967 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3968 if (PerDevice == OffloadEntriesTargetRegion.end()) 3969 return false; 3970 auto PerFile = PerDevice->second.find(FileID); 3971 if (PerFile == PerDevice->second.end()) 3972 return false; 3973 auto PerParentName = PerFile->second.find(ParentName); 3974 if (PerParentName == PerFile->second.end()) 3975 return false; 3976 auto PerLine = PerParentName->second.find(LineNum); 3977 if (PerLine == PerParentName->second.end()) 3978 return false; 3979 // Fail if this entry is already registered. 3980 if (PerLine->second.getAddress() || PerLine->second.getID()) 3981 return false; 3982 return true; 3983 } 3984 3985 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3986 const OffloadTargetRegionEntryInfoActTy &Action) { 3987 // Scan all target region entries and perform the provided action. 3988 for (const auto &D : OffloadEntriesTargetRegion) 3989 for (const auto &F : D.second) 3990 for (const auto &P : F.second) 3991 for (const auto &L : P.second) 3992 Action(D.first, F.first, P.first(), L.first, L.second); 3993 } 3994 3995 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3996 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3997 OMPTargetGlobalVarEntryKind Flags, 3998 unsigned Order) { 3999 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 4000 "only required for the device " 4001 "code generation."); 4002 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 4003 ++OffloadingEntriesNum; 4004 } 4005 4006 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4007 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 4008 CharUnits VarSize, 4009 OMPTargetGlobalVarEntryKind Flags, 4010 llvm::GlobalValue::LinkageTypes Linkage) { 4011 if (CGM.getLangOpts().OpenMPIsDevice) { 4012 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 4013 assert(Entry.isValid() && Entry.getFlags() == Flags && 4014 "Entry not initialized!"); 4015 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 4016 "Resetting with the new address."); 4017 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 4018 if (Entry.getVarSize().isZero()) { 4019 Entry.setVarSize(VarSize); 4020 Entry.setLinkage(Linkage); 4021 } 4022 return; 4023 } 4024 Entry.setVarSize(VarSize); 4025 Entry.setLinkage(Linkage); 4026 Entry.setAddress(Addr); 4027 } else { 4028 if (hasDeviceGlobalVarEntryInfo(VarName)) { 4029 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 4030 assert(Entry.isValid() && Entry.getFlags() == Flags && 4031 "Entry not initialized!"); 4032 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 4033 "Resetting with the new address."); 4034 if (Entry.getVarSize().isZero()) { 4035 Entry.setVarSize(VarSize); 4036 Entry.setLinkage(Linkage); 4037 } 4038 return; 4039 } 4040 OffloadEntriesDeviceGlobalVar.try_emplace( 4041 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 4042 ++OffloadingEntriesNum; 4043 } 4044 } 4045 4046 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4047 actOnDeviceGlobalVarEntriesInfo( 4048 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 4049 // Scan all target region entries and perform the provided action. 4050 for (const auto &E : OffloadEntriesDeviceGlobalVar) 4051 Action(E.getKey(), E.getValue()); 4052 } 4053 4054 void CGOpenMPRuntime::createOffloadEntry( 4055 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 4056 llvm::GlobalValue::LinkageTypes Linkage) { 4057 StringRef Name = Addr->getName(); 4058 llvm::Module &M = CGM.getModule(); 4059 llvm::LLVMContext &C = M.getContext(); 4060 4061 // Create constant string with the name. 4062 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 4063 4064 std::string StringName = getName({"omp_offloading", "entry_name"}); 4065 auto *Str = new llvm::GlobalVariable( 4066 M, StrPtrInit->getType(), /*isConstant=*/true, 4067 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 4068 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4069 4070 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 4071 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 4072 llvm::ConstantInt::get(CGM.SizeTy, Size), 4073 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 4074 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 4075 std::string EntryName = getName({"omp_offloading", "entry", ""}); 4076 llvm::GlobalVariable *Entry = createGlobalStruct( 4077 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 4078 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 4079 4080 // The entry has to be created in the section the linker expects it to be. 4081 Entry->setSection("omp_offloading_entries"); 4082 } 4083 4084 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 4085 // Emit the offloading entries and metadata so that the device codegen side 4086 // can easily figure out what to emit. The produced metadata looks like 4087 // this: 4088 // 4089 // !omp_offload.info = !{!1, ...} 4090 // 4091 // Right now we only generate metadata for function that contain target 4092 // regions. 4093 4094 // If we are in simd mode or there are no entries, we don't need to do 4095 // anything. 4096 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 4097 return; 4098 4099 llvm::Module &M = CGM.getModule(); 4100 llvm::LLVMContext &C = M.getContext(); 4101 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 4102 SourceLocation, StringRef>, 4103 16> 4104 OrderedEntries(OffloadEntriesInfoManager.size()); 4105 llvm::SmallVector<StringRef, 16> ParentFunctions( 4106 OffloadEntriesInfoManager.size()); 4107 4108 // Auxiliary methods to create metadata values and strings. 4109 auto &&GetMDInt = [this](unsigned V) { 4110 return llvm::ConstantAsMetadata::get( 4111 llvm::ConstantInt::get(CGM.Int32Ty, V)); 4112 }; 4113 4114 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 4115 4116 // Create the offloading info metadata node. 4117 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 4118 4119 // Create function that emits metadata for each target region entry; 4120 auto &&TargetRegionMetadataEmitter = 4121 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 4122 &GetMDString]( 4123 unsigned DeviceID, unsigned FileID, StringRef ParentName, 4124 unsigned Line, 4125 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 4126 // Generate metadata for target regions. Each entry of this metadata 4127 // contains: 4128 // - Entry 0 -> Kind of this type of metadata (0). 4129 // - Entry 1 -> Device ID of the file where the entry was identified. 4130 // - Entry 2 -> File ID of the file where the entry was identified. 4131 // - Entry 3 -> Mangled name of the function where the entry was 4132 // identified. 4133 // - Entry 4 -> Line in the file where the entry was identified. 4134 // - Entry 5 -> Order the entry was created. 4135 // The first element of the metadata node is the kind. 4136 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 4137 GetMDInt(FileID), GetMDString(ParentName), 4138 GetMDInt(Line), GetMDInt(E.getOrder())}; 4139 4140 SourceLocation Loc; 4141 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 4142 E = CGM.getContext().getSourceManager().fileinfo_end(); 4143 I != E; ++I) { 4144 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 4145 I->getFirst()->getUniqueID().getFile() == FileID) { 4146 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 4147 I->getFirst(), Line, 1); 4148 break; 4149 } 4150 } 4151 // Save this entry in the right position of the ordered entries array. 4152 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 4153 ParentFunctions[E.getOrder()] = ParentName; 4154 4155 // Add metadata to the named metadata node. 4156 MD->addOperand(llvm::MDNode::get(C, Ops)); 4157 }; 4158 4159 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 4160 TargetRegionMetadataEmitter); 4161 4162 // Create function that emits metadata for each device global variable entry; 4163 auto &&DeviceGlobalVarMetadataEmitter = 4164 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 4165 MD](StringRef MangledName, 4166 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 4167 &E) { 4168 // Generate metadata for global variables. Each entry of this metadata 4169 // contains: 4170 // - Entry 0 -> Kind of this type of metadata (1). 4171 // - Entry 1 -> Mangled name of the variable. 4172 // - Entry 2 -> Declare target kind. 4173 // - Entry 3 -> Order the entry was created. 4174 // The first element of the metadata node is the kind. 4175 llvm::Metadata *Ops[] = { 4176 GetMDInt(E.getKind()), GetMDString(MangledName), 4177 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 4178 4179 // Save this entry in the right position of the ordered entries array. 4180 OrderedEntries[E.getOrder()] = 4181 std::make_tuple(&E, SourceLocation(), MangledName); 4182 4183 // Add metadata to the named metadata node. 4184 MD->addOperand(llvm::MDNode::get(C, Ops)); 4185 }; 4186 4187 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 4188 DeviceGlobalVarMetadataEmitter); 4189 4190 for (const auto &E : OrderedEntries) { 4191 assert(std::get<0>(E) && "All ordered entries must exist!"); 4192 if (const auto *CE = 4193 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 4194 std::get<0>(E))) { 4195 if (!CE->getID() || !CE->getAddress()) { 4196 // Do not blame the entry if the parent funtion is not emitted. 4197 StringRef FnName = ParentFunctions[CE->getOrder()]; 4198 if (!CGM.GetGlobalValue(FnName)) 4199 continue; 4200 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4201 DiagnosticsEngine::Error, 4202 "Offloading entry for target region in %0 is incorrect: either the " 4203 "address or the ID is invalid."); 4204 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 4205 continue; 4206 } 4207 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 4208 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 4209 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 4210 OffloadEntryInfoDeviceGlobalVar>( 4211 std::get<0>(E))) { 4212 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 4213 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4214 CE->getFlags()); 4215 switch (Flags) { 4216 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 4217 if (CGM.getLangOpts().OpenMPIsDevice && 4218 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 4219 continue; 4220 if (!CE->getAddress()) { 4221 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4222 DiagnosticsEngine::Error, "Offloading entry for declare target " 4223 "variable %0 is incorrect: the " 4224 "address is invalid."); 4225 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 4226 continue; 4227 } 4228 // The vaiable has no definition - no need to add the entry. 4229 if (CE->getVarSize().isZero()) 4230 continue; 4231 break; 4232 } 4233 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 4234 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 4235 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 4236 "Declaret target link address is set."); 4237 if (CGM.getLangOpts().OpenMPIsDevice) 4238 continue; 4239 if (!CE->getAddress()) { 4240 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4241 DiagnosticsEngine::Error, 4242 "Offloading entry for declare target variable is incorrect: the " 4243 "address is invalid."); 4244 CGM.getDiags().Report(DiagID); 4245 continue; 4246 } 4247 break; 4248 } 4249 createOffloadEntry(CE->getAddress(), CE->getAddress(), 4250 CE->getVarSize().getQuantity(), Flags, 4251 CE->getLinkage()); 4252 } else { 4253 llvm_unreachable("Unsupported entry kind."); 4254 } 4255 } 4256 } 4257 4258 /// Loads all the offload entries information from the host IR 4259 /// metadata. 4260 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 4261 // If we are in target mode, load the metadata from the host IR. This code has 4262 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 4263 4264 if (!CGM.getLangOpts().OpenMPIsDevice) 4265 return; 4266 4267 if (CGM.getLangOpts().OMPHostIRFile.empty()) 4268 return; 4269 4270 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 4271 if (auto EC = Buf.getError()) { 4272 CGM.getDiags().Report(diag::err_cannot_open_file) 4273 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4274 return; 4275 } 4276 4277 llvm::LLVMContext C; 4278 auto ME = expectedToErrorOrAndEmitErrors( 4279 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 4280 4281 if (auto EC = ME.getError()) { 4282 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4283 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 4284 CGM.getDiags().Report(DiagID) 4285 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4286 return; 4287 } 4288 4289 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4290 if (!MD) 4291 return; 4292 4293 for (llvm::MDNode *MN : MD->operands()) { 4294 auto &&GetMDInt = [MN](unsigned Idx) { 4295 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4296 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4297 }; 4298 4299 auto &&GetMDString = [MN](unsigned Idx) { 4300 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4301 return V->getString(); 4302 }; 4303 4304 switch (GetMDInt(0)) { 4305 default: 4306 llvm_unreachable("Unexpected metadata!"); 4307 break; 4308 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4309 OffloadingEntryInfoTargetRegion: 4310 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4311 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4312 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4313 /*Order=*/GetMDInt(5)); 4314 break; 4315 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4316 OffloadingEntryInfoDeviceGlobalVar: 4317 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4318 /*MangledName=*/GetMDString(1), 4319 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4320 /*Flags=*/GetMDInt(2)), 4321 /*Order=*/GetMDInt(3)); 4322 break; 4323 } 4324 } 4325 } 4326 4327 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4328 if (!KmpRoutineEntryPtrTy) { 4329 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4330 ASTContext &C = CGM.getContext(); 4331 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4332 FunctionProtoType::ExtProtoInfo EPI; 4333 KmpRoutineEntryPtrQTy = C.getPointerType( 4334 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4335 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4336 } 4337 } 4338 4339 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4340 // Make sure the type of the entry is already created. This is the type we 4341 // have to create: 4342 // struct __tgt_offload_entry{ 4343 // void *addr; // Pointer to the offload entry info. 4344 // // (function or global) 4345 // char *name; // Name of the function or global. 4346 // size_t size; // Size of the entry info (0 if it a function). 4347 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4348 // int32_t reserved; // Reserved, to use by the runtime library. 4349 // }; 4350 if (TgtOffloadEntryQTy.isNull()) { 4351 ASTContext &C = CGM.getContext(); 4352 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4353 RD->startDefinition(); 4354 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4355 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4356 addFieldToRecordDecl(C, RD, C.getSizeType()); 4357 addFieldToRecordDecl( 4358 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4359 addFieldToRecordDecl( 4360 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4361 RD->completeDefinition(); 4362 RD->addAttr(PackedAttr::CreateImplicit(C)); 4363 TgtOffloadEntryQTy = C.getRecordType(RD); 4364 } 4365 return TgtOffloadEntryQTy; 4366 } 4367 4368 namespace { 4369 struct PrivateHelpersTy { 4370 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 4371 const VarDecl *PrivateElemInit) 4372 : Original(Original), PrivateCopy(PrivateCopy), 4373 PrivateElemInit(PrivateElemInit) {} 4374 const VarDecl *Original; 4375 const VarDecl *PrivateCopy; 4376 const VarDecl *PrivateElemInit; 4377 }; 4378 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4379 } // anonymous namespace 4380 4381 static RecordDecl * 4382 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4383 if (!Privates.empty()) { 4384 ASTContext &C = CGM.getContext(); 4385 // Build struct .kmp_privates_t. { 4386 // /* private vars */ 4387 // }; 4388 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4389 RD->startDefinition(); 4390 for (const auto &Pair : Privates) { 4391 const VarDecl *VD = Pair.second.Original; 4392 QualType Type = VD->getType().getNonReferenceType(); 4393 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4394 if (VD->hasAttrs()) { 4395 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4396 E(VD->getAttrs().end()); 4397 I != E; ++I) 4398 FD->addAttr(*I); 4399 } 4400 } 4401 RD->completeDefinition(); 4402 return RD; 4403 } 4404 return nullptr; 4405 } 4406 4407 static RecordDecl * 4408 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4409 QualType KmpInt32Ty, 4410 QualType KmpRoutineEntryPointerQTy) { 4411 ASTContext &C = CGM.getContext(); 4412 // Build struct kmp_task_t { 4413 // void * shareds; 4414 // kmp_routine_entry_t routine; 4415 // kmp_int32 part_id; 4416 // kmp_cmplrdata_t data1; 4417 // kmp_cmplrdata_t data2; 4418 // For taskloops additional fields: 4419 // kmp_uint64 lb; 4420 // kmp_uint64 ub; 4421 // kmp_int64 st; 4422 // kmp_int32 liter; 4423 // void * reductions; 4424 // }; 4425 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4426 UD->startDefinition(); 4427 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4428 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4429 UD->completeDefinition(); 4430 QualType KmpCmplrdataTy = C.getRecordType(UD); 4431 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4432 RD->startDefinition(); 4433 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4434 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4435 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4436 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4437 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4438 if (isOpenMPTaskLoopDirective(Kind)) { 4439 QualType KmpUInt64Ty = 4440 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4441 QualType KmpInt64Ty = 4442 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4443 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4444 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4445 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4446 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4447 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4448 } 4449 RD->completeDefinition(); 4450 return RD; 4451 } 4452 4453 static RecordDecl * 4454 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4455 ArrayRef<PrivateDataTy> Privates) { 4456 ASTContext &C = CGM.getContext(); 4457 // Build struct kmp_task_t_with_privates { 4458 // kmp_task_t task_data; 4459 // .kmp_privates_t. privates; 4460 // }; 4461 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4462 RD->startDefinition(); 4463 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4464 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4465 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4466 RD->completeDefinition(); 4467 return RD; 4468 } 4469 4470 /// Emit a proxy function which accepts kmp_task_t as the second 4471 /// argument. 4472 /// \code 4473 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4474 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4475 /// For taskloops: 4476 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4477 /// tt->reductions, tt->shareds); 4478 /// return 0; 4479 /// } 4480 /// \endcode 4481 static llvm::Function * 4482 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4483 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4484 QualType KmpTaskTWithPrivatesPtrQTy, 4485 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4486 QualType SharedsPtrTy, llvm::Function *TaskFunction, 4487 llvm::Value *TaskPrivatesMap) { 4488 ASTContext &C = CGM.getContext(); 4489 FunctionArgList Args; 4490 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4491 ImplicitParamDecl::Other); 4492 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4493 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4494 ImplicitParamDecl::Other); 4495 Args.push_back(&GtidArg); 4496 Args.push_back(&TaskTypeArg); 4497 const auto &TaskEntryFnInfo = 4498 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4499 llvm::FunctionType *TaskEntryTy = 4500 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4501 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 4502 auto *TaskEntry = llvm::Function::Create( 4503 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4504 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4505 TaskEntry->setDoesNotRecurse(); 4506 CodeGenFunction CGF(CGM); 4507 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4508 Loc, Loc); 4509 4510 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4511 // tt, 4512 // For taskloops: 4513 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4514 // tt->task_data.shareds); 4515 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4516 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4517 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4518 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4519 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4520 const auto *KmpTaskTWithPrivatesQTyRD = 4521 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4522 LValue Base = 4523 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4524 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4525 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4526 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4527 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 4528 4529 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4530 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4531 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4532 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4533 CGF.ConvertTypeForMem(SharedsPtrTy)); 4534 4535 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4536 llvm::Value *PrivatesParam; 4537 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4538 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4539 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4540 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 4541 } else { 4542 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4543 } 4544 4545 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4546 TaskPrivatesMap, 4547 CGF.Builder 4548 .CreatePointerBitCastOrAddrSpaceCast( 4549 TDBase.getAddress(CGF), CGF.VoidPtrTy) 4550 .getPointer()}; 4551 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4552 std::end(CommonArgs)); 4553 if (isOpenMPTaskLoopDirective(Kind)) { 4554 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4555 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4556 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4557 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4558 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4559 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4560 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4561 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4562 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4563 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4564 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4565 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4566 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4567 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4568 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4569 CallArgs.push_back(LBParam); 4570 CallArgs.push_back(UBParam); 4571 CallArgs.push_back(StParam); 4572 CallArgs.push_back(LIParam); 4573 CallArgs.push_back(RParam); 4574 } 4575 CallArgs.push_back(SharedsParam); 4576 4577 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4578 CallArgs); 4579 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4580 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4581 CGF.FinishFunction(); 4582 return TaskEntry; 4583 } 4584 4585 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4586 SourceLocation Loc, 4587 QualType KmpInt32Ty, 4588 QualType KmpTaskTWithPrivatesPtrQTy, 4589 QualType KmpTaskTWithPrivatesQTy) { 4590 ASTContext &C = CGM.getContext(); 4591 FunctionArgList Args; 4592 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4593 ImplicitParamDecl::Other); 4594 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4595 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4596 ImplicitParamDecl::Other); 4597 Args.push_back(&GtidArg); 4598 Args.push_back(&TaskTypeArg); 4599 const auto &DestructorFnInfo = 4600 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4601 llvm::FunctionType *DestructorFnTy = 4602 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4603 std::string Name = 4604 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 4605 auto *DestructorFn = 4606 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4607 Name, &CGM.getModule()); 4608 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4609 DestructorFnInfo); 4610 DestructorFn->setDoesNotRecurse(); 4611 CodeGenFunction CGF(CGM); 4612 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4613 Args, Loc, Loc); 4614 4615 LValue Base = CGF.EmitLoadOfPointerLValue( 4616 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4617 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4618 const auto *KmpTaskTWithPrivatesQTyRD = 4619 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4620 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4621 Base = CGF.EmitLValueForField(Base, *FI); 4622 for (const auto *Field : 4623 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4624 if (QualType::DestructionKind DtorKind = 4625 Field->getType().isDestructedType()) { 4626 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4627 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 4628 } 4629 } 4630 CGF.FinishFunction(); 4631 return DestructorFn; 4632 } 4633 4634 /// Emit a privates mapping function for correct handling of private and 4635 /// firstprivate variables. 4636 /// \code 4637 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4638 /// **noalias priv1,..., <tyn> **noalias privn) { 4639 /// *priv1 = &.privates.priv1; 4640 /// ...; 4641 /// *privn = &.privates.privn; 4642 /// } 4643 /// \endcode 4644 static llvm::Value * 4645 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4646 ArrayRef<const Expr *> PrivateVars, 4647 ArrayRef<const Expr *> FirstprivateVars, 4648 ArrayRef<const Expr *> LastprivateVars, 4649 QualType PrivatesQTy, 4650 ArrayRef<PrivateDataTy> Privates) { 4651 ASTContext &C = CGM.getContext(); 4652 FunctionArgList Args; 4653 ImplicitParamDecl TaskPrivatesArg( 4654 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4655 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4656 ImplicitParamDecl::Other); 4657 Args.push_back(&TaskPrivatesArg); 4658 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4659 unsigned Counter = 1; 4660 for (const Expr *E : PrivateVars) { 4661 Args.push_back(ImplicitParamDecl::Create( 4662 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4663 C.getPointerType(C.getPointerType(E->getType())) 4664 .withConst() 4665 .withRestrict(), 4666 ImplicitParamDecl::Other)); 4667 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4668 PrivateVarsPos[VD] = Counter; 4669 ++Counter; 4670 } 4671 for (const Expr *E : FirstprivateVars) { 4672 Args.push_back(ImplicitParamDecl::Create( 4673 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4674 C.getPointerType(C.getPointerType(E->getType())) 4675 .withConst() 4676 .withRestrict(), 4677 ImplicitParamDecl::Other)); 4678 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4679 PrivateVarsPos[VD] = Counter; 4680 ++Counter; 4681 } 4682 for (const Expr *E : LastprivateVars) { 4683 Args.push_back(ImplicitParamDecl::Create( 4684 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4685 C.getPointerType(C.getPointerType(E->getType())) 4686 .withConst() 4687 .withRestrict(), 4688 ImplicitParamDecl::Other)); 4689 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4690 PrivateVarsPos[VD] = Counter; 4691 ++Counter; 4692 } 4693 const auto &TaskPrivatesMapFnInfo = 4694 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4695 llvm::FunctionType *TaskPrivatesMapTy = 4696 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4697 std::string Name = 4698 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 4699 auto *TaskPrivatesMap = llvm::Function::Create( 4700 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 4701 &CGM.getModule()); 4702 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4703 TaskPrivatesMapFnInfo); 4704 if (CGM.getLangOpts().Optimize) { 4705 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4706 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4707 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4708 } 4709 CodeGenFunction CGF(CGM); 4710 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4711 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4712 4713 // *privi = &.privates.privi; 4714 LValue Base = CGF.EmitLoadOfPointerLValue( 4715 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4716 TaskPrivatesArg.getType()->castAs<PointerType>()); 4717 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4718 Counter = 0; 4719 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4720 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4721 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4722 LValue RefLVal = 4723 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4724 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4725 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 4726 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 4727 ++Counter; 4728 } 4729 CGF.FinishFunction(); 4730 return TaskPrivatesMap; 4731 } 4732 4733 /// Emit initialization for private variables in task-based directives. 4734 static void emitPrivatesInit(CodeGenFunction &CGF, 4735 const OMPExecutableDirective &D, 4736 Address KmpTaskSharedsPtr, LValue TDBase, 4737 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4738 QualType SharedsTy, QualType SharedsPtrTy, 4739 const OMPTaskDataTy &Data, 4740 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4741 ASTContext &C = CGF.getContext(); 4742 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4743 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4744 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4745 ? OMPD_taskloop 4746 : OMPD_task; 4747 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4748 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4749 LValue SrcBase; 4750 bool IsTargetTask = 4751 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4752 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4753 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4754 // PointersArray and SizesArray. The original variables for these arrays are 4755 // not captured and we get their addresses explicitly. 4756 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || 4757 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4758 SrcBase = CGF.MakeAddrLValue( 4759 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4760 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4761 SharedsTy); 4762 } 4763 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4764 for (const PrivateDataTy &Pair : Privates) { 4765 const VarDecl *VD = Pair.second.PrivateCopy; 4766 const Expr *Init = VD->getAnyInitializer(); 4767 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4768 !CGF.isTrivialInitializer(Init)))) { 4769 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4770 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4771 const VarDecl *OriginalVD = Pair.second.Original; 4772 // Check if the variable is the target-based BasePointersArray, 4773 // PointersArray or SizesArray. 4774 LValue SharedRefLValue; 4775 QualType Type = PrivateLValue.getType(); 4776 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4777 if (IsTargetTask && !SharedField) { 4778 assert(isa<ImplicitParamDecl>(OriginalVD) && 4779 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4780 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4781 ->getNumParams() == 0 && 4782 isa<TranslationUnitDecl>( 4783 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4784 ->getDeclContext()) && 4785 "Expected artificial target data variable."); 4786 SharedRefLValue = 4787 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4788 } else { 4789 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4790 SharedRefLValue = CGF.MakeAddrLValue( 4791 Address(SharedRefLValue.getPointer(CGF), 4792 C.getDeclAlign(OriginalVD)), 4793 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4794 SharedRefLValue.getTBAAInfo()); 4795 } 4796 if (Type->isArrayType()) { 4797 // Initialize firstprivate array. 4798 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4799 // Perform simple memcpy. 4800 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4801 } else { 4802 // Initialize firstprivate array using element-by-element 4803 // initialization. 4804 CGF.EmitOMPAggregateAssign( 4805 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 4806 Type, 4807 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4808 Address SrcElement) { 4809 // Clean up any temporaries needed by the initialization. 4810 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4811 InitScope.addPrivate( 4812 Elem, [SrcElement]() -> Address { return SrcElement; }); 4813 (void)InitScope.Privatize(); 4814 // Emit initialization for single element. 4815 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4816 CGF, &CapturesInfo); 4817 CGF.EmitAnyExprToMem(Init, DestElement, 4818 Init->getType().getQualifiers(), 4819 /*IsInitializer=*/false); 4820 }); 4821 } 4822 } else { 4823 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4824 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 4825 return SharedRefLValue.getAddress(CGF); 4826 }); 4827 (void)InitScope.Privatize(); 4828 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4829 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4830 /*capturedByInit=*/false); 4831 } 4832 } else { 4833 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4834 } 4835 } 4836 ++FI; 4837 } 4838 } 4839 4840 /// Check if duplication function is required for taskloops. 4841 static bool checkInitIsRequired(CodeGenFunction &CGF, 4842 ArrayRef<PrivateDataTy> Privates) { 4843 bool InitRequired = false; 4844 for (const PrivateDataTy &Pair : Privates) { 4845 const VarDecl *VD = Pair.second.PrivateCopy; 4846 const Expr *Init = VD->getAnyInitializer(); 4847 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4848 !CGF.isTrivialInitializer(Init)); 4849 if (InitRequired) 4850 break; 4851 } 4852 return InitRequired; 4853 } 4854 4855 4856 /// Emit task_dup function (for initialization of 4857 /// private/firstprivate/lastprivate vars and last_iter flag) 4858 /// \code 4859 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4860 /// lastpriv) { 4861 /// // setup lastprivate flag 4862 /// task_dst->last = lastpriv; 4863 /// // could be constructor calls here... 4864 /// } 4865 /// \endcode 4866 static llvm::Value * 4867 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4868 const OMPExecutableDirective &D, 4869 QualType KmpTaskTWithPrivatesPtrQTy, 4870 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4871 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4872 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4873 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4874 ASTContext &C = CGM.getContext(); 4875 FunctionArgList Args; 4876 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4877 KmpTaskTWithPrivatesPtrQTy, 4878 ImplicitParamDecl::Other); 4879 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4880 KmpTaskTWithPrivatesPtrQTy, 4881 ImplicitParamDecl::Other); 4882 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4883 ImplicitParamDecl::Other); 4884 Args.push_back(&DstArg); 4885 Args.push_back(&SrcArg); 4886 Args.push_back(&LastprivArg); 4887 const auto &TaskDupFnInfo = 4888 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4889 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4890 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4891 auto *TaskDup = llvm::Function::Create( 4892 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4893 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4894 TaskDup->setDoesNotRecurse(); 4895 CodeGenFunction CGF(CGM); 4896 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4897 Loc); 4898 4899 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4900 CGF.GetAddrOfLocalVar(&DstArg), 4901 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4902 // task_dst->liter = lastpriv; 4903 if (WithLastIter) { 4904 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4905 LValue Base = CGF.EmitLValueForField( 4906 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4907 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4908 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4909 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4910 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4911 } 4912 4913 // Emit initial values for private copies (if any). 4914 assert(!Privates.empty()); 4915 Address KmpTaskSharedsPtr = Address::invalid(); 4916 if (!Data.FirstprivateVars.empty()) { 4917 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4918 CGF.GetAddrOfLocalVar(&SrcArg), 4919 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4920 LValue Base = CGF.EmitLValueForField( 4921 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4922 KmpTaskSharedsPtr = Address( 4923 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4924 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4925 KmpTaskTShareds)), 4926 Loc), 4927 CGF.getNaturalTypeAlignment(SharedsTy)); 4928 } 4929 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4930 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4931 CGF.FinishFunction(); 4932 return TaskDup; 4933 } 4934 4935 /// Checks if destructor function is required to be generated. 4936 /// \return true if cleanups are required, false otherwise. 4937 static bool 4938 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4939 bool NeedsCleanup = false; 4940 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4941 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4942 for (const FieldDecl *FD : PrivateRD->fields()) { 4943 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4944 if (NeedsCleanup) 4945 break; 4946 } 4947 return NeedsCleanup; 4948 } 4949 4950 CGOpenMPRuntime::TaskResultTy 4951 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4952 const OMPExecutableDirective &D, 4953 llvm::Function *TaskFunction, QualType SharedsTy, 4954 Address Shareds, const OMPTaskDataTy &Data) { 4955 ASTContext &C = CGM.getContext(); 4956 llvm::SmallVector<PrivateDataTy, 4> Privates; 4957 // Aggregate privates and sort them by the alignment. 4958 auto I = Data.PrivateCopies.begin(); 4959 for (const Expr *E : Data.PrivateVars) { 4960 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4961 Privates.emplace_back( 4962 C.getDeclAlign(VD), 4963 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4964 /*PrivateElemInit=*/nullptr)); 4965 ++I; 4966 } 4967 I = Data.FirstprivateCopies.begin(); 4968 auto IElemInitRef = Data.FirstprivateInits.begin(); 4969 for (const Expr *E : Data.FirstprivateVars) { 4970 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4971 Privates.emplace_back( 4972 C.getDeclAlign(VD), 4973 PrivateHelpersTy( 4974 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4975 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4976 ++I; 4977 ++IElemInitRef; 4978 } 4979 I = Data.LastprivateCopies.begin(); 4980 for (const Expr *E : Data.LastprivateVars) { 4981 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4982 Privates.emplace_back( 4983 C.getDeclAlign(VD), 4984 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4985 /*PrivateElemInit=*/nullptr)); 4986 ++I; 4987 } 4988 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 4989 return L.first > R.first; 4990 }); 4991 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 4992 // Build type kmp_routine_entry_t (if not built yet). 4993 emitKmpRoutineEntryT(KmpInt32Ty); 4994 // Build type kmp_task_t (if not built yet). 4995 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 4996 if (SavedKmpTaskloopTQTy.isNull()) { 4997 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 4998 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 4999 } 5000 KmpTaskTQTy = SavedKmpTaskloopTQTy; 5001 } else { 5002 assert((D.getDirectiveKind() == OMPD_task || 5003 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 5004 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 5005 "Expected taskloop, task or target directive"); 5006 if (SavedKmpTaskTQTy.isNull()) { 5007 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5008 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5009 } 5010 KmpTaskTQTy = SavedKmpTaskTQTy; 5011 } 5012 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 5013 // Build particular struct kmp_task_t for the given task. 5014 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 5015 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 5016 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 5017 QualType KmpTaskTWithPrivatesPtrQTy = 5018 C.getPointerType(KmpTaskTWithPrivatesQTy); 5019 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 5020 llvm::Type *KmpTaskTWithPrivatesPtrTy = 5021 KmpTaskTWithPrivatesTy->getPointerTo(); 5022 llvm::Value *KmpTaskTWithPrivatesTySize = 5023 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 5024 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 5025 5026 // Emit initial values for private copies (if any). 5027 llvm::Value *TaskPrivatesMap = nullptr; 5028 llvm::Type *TaskPrivatesMapTy = 5029 std::next(TaskFunction->arg_begin(), 3)->getType(); 5030 if (!Privates.empty()) { 5031 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 5032 TaskPrivatesMap = emitTaskPrivateMappingFunction( 5033 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 5034 FI->getType(), Privates); 5035 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5036 TaskPrivatesMap, TaskPrivatesMapTy); 5037 } else { 5038 TaskPrivatesMap = llvm::ConstantPointerNull::get( 5039 cast<llvm::PointerType>(TaskPrivatesMapTy)); 5040 } 5041 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 5042 // kmp_task_t *tt); 5043 llvm::Function *TaskEntry = emitProxyTaskFunction( 5044 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5045 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 5046 TaskPrivatesMap); 5047 5048 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 5049 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 5050 // kmp_routine_entry_t *task_entry); 5051 // Task flags. Format is taken from 5052 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 5053 // description of kmp_tasking_flags struct. 5054 enum { 5055 TiedFlag = 0x1, 5056 FinalFlag = 0x2, 5057 DestructorsFlag = 0x8, 5058 PriorityFlag = 0x20 5059 }; 5060 unsigned Flags = Data.Tied ? TiedFlag : 0; 5061 bool NeedsCleanup = false; 5062 if (!Privates.empty()) { 5063 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 5064 if (NeedsCleanup) 5065 Flags = Flags | DestructorsFlag; 5066 } 5067 if (Data.Priority.getInt()) 5068 Flags = Flags | PriorityFlag; 5069 llvm::Value *TaskFlags = 5070 Data.Final.getPointer() 5071 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 5072 CGF.Builder.getInt32(FinalFlag), 5073 CGF.Builder.getInt32(/*C=*/0)) 5074 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 5075 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 5076 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 5077 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 5078 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 5079 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5080 TaskEntry, KmpRoutineEntryPtrTy)}; 5081 llvm::Value *NewTask; 5082 if (D.hasClausesOfKind<OMPNowaitClause>()) { 5083 // Check if we have any device clause associated with the directive. 5084 const Expr *Device = nullptr; 5085 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 5086 Device = C->getDevice(); 5087 // Emit device ID if any otherwise use default value. 5088 llvm::Value *DeviceID; 5089 if (Device) 5090 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 5091 CGF.Int64Ty, /*isSigned=*/true); 5092 else 5093 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 5094 AllocArgs.push_back(DeviceID); 5095 NewTask = CGF.EmitRuntimeCall( 5096 createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs); 5097 } else { 5098 NewTask = CGF.EmitRuntimeCall( 5099 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 5100 } 5101 llvm::Value *NewTaskNewTaskTTy = 5102 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5103 NewTask, KmpTaskTWithPrivatesPtrTy); 5104 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 5105 KmpTaskTWithPrivatesQTy); 5106 LValue TDBase = 5107 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 5108 // Fill the data in the resulting kmp_task_t record. 5109 // Copy shareds if there are any. 5110 Address KmpTaskSharedsPtr = Address::invalid(); 5111 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 5112 KmpTaskSharedsPtr = 5113 Address(CGF.EmitLoadOfScalar( 5114 CGF.EmitLValueForField( 5115 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 5116 KmpTaskTShareds)), 5117 Loc), 5118 CGF.getNaturalTypeAlignment(SharedsTy)); 5119 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 5120 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 5121 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 5122 } 5123 // Emit initial values for private copies (if any). 5124 TaskResultTy Result; 5125 if (!Privates.empty()) { 5126 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 5127 SharedsTy, SharedsPtrTy, Data, Privates, 5128 /*ForDup=*/false); 5129 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 5130 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 5131 Result.TaskDupFn = emitTaskDupFunction( 5132 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 5133 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 5134 /*WithLastIter=*/!Data.LastprivateVars.empty()); 5135 } 5136 } 5137 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 5138 enum { Priority = 0, Destructors = 1 }; 5139 // Provide pointer to function with destructors for privates. 5140 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 5141 const RecordDecl *KmpCmplrdataUD = 5142 (*FI)->getType()->getAsUnionType()->getDecl(); 5143 if (NeedsCleanup) { 5144 llvm::Value *DestructorFn = emitDestructorsFunction( 5145 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5146 KmpTaskTWithPrivatesQTy); 5147 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 5148 LValue DestructorsLV = CGF.EmitLValueForField( 5149 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 5150 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5151 DestructorFn, KmpRoutineEntryPtrTy), 5152 DestructorsLV); 5153 } 5154 // Set priority. 5155 if (Data.Priority.getInt()) { 5156 LValue Data2LV = CGF.EmitLValueForField( 5157 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 5158 LValue PriorityLV = CGF.EmitLValueForField( 5159 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 5160 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 5161 } 5162 Result.NewTask = NewTask; 5163 Result.TaskEntry = TaskEntry; 5164 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 5165 Result.TDBase = TDBase; 5166 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 5167 return Result; 5168 } 5169 5170 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5171 const OMPExecutableDirective &D, 5172 llvm::Function *TaskFunction, 5173 QualType SharedsTy, Address Shareds, 5174 const Expr *IfCond, 5175 const OMPTaskDataTy &Data) { 5176 if (!CGF.HaveInsertPoint()) 5177 return; 5178 5179 TaskResultTy Result = 5180 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5181 llvm::Value *NewTask = Result.NewTask; 5182 llvm::Function *TaskEntry = Result.TaskEntry; 5183 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5184 LValue TDBase = Result.TDBase; 5185 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5186 ASTContext &C = CGM.getContext(); 5187 // Process list of dependences. 5188 Address DependenciesArray = Address::invalid(); 5189 unsigned NumDependencies = Data.Dependences.size(); 5190 if (NumDependencies) { 5191 // Dependence kind for RTL. 5192 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 }; 5193 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 5194 RecordDecl *KmpDependInfoRD; 5195 QualType FlagsTy = 5196 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 5197 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5198 if (KmpDependInfoTy.isNull()) { 5199 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 5200 KmpDependInfoRD->startDefinition(); 5201 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 5202 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 5203 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 5204 KmpDependInfoRD->completeDefinition(); 5205 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 5206 } else { 5207 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5208 } 5209 // Define type kmp_depend_info[<Dependences.size()>]; 5210 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5211 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 5212 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5213 // kmp_depend_info[<Dependences.size()>] deps; 5214 DependenciesArray = 5215 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 5216 for (unsigned I = 0; I < NumDependencies; ++I) { 5217 const Expr *E = Data.Dependences[I].second; 5218 LValue Addr = CGF.EmitLValue(E); 5219 llvm::Value *Size; 5220 QualType Ty = E->getType(); 5221 if (const auto *ASE = 5222 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 5223 LValue UpAddrLVal = 5224 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 5225 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 5226 UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 5227 llvm::Value *LowIntPtr = 5228 CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGM.SizeTy); 5229 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 5230 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 5231 } else { 5232 Size = CGF.getTypeSize(Ty); 5233 } 5234 LValue Base = CGF.MakeAddrLValue( 5235 CGF.Builder.CreateConstArrayGEP(DependenciesArray, I), 5236 KmpDependInfoTy); 5237 // deps[i].base_addr = &<Dependences[i].second>; 5238 LValue BaseAddrLVal = CGF.EmitLValueForField( 5239 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5240 CGF.EmitStoreOfScalar( 5241 CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGF.IntPtrTy), 5242 BaseAddrLVal); 5243 // deps[i].len = sizeof(<Dependences[i].second>); 5244 LValue LenLVal = CGF.EmitLValueForField( 5245 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 5246 CGF.EmitStoreOfScalar(Size, LenLVal); 5247 // deps[i].flags = <Dependences[i].first>; 5248 RTLDependenceKindTy DepKind; 5249 switch (Data.Dependences[I].first) { 5250 case OMPC_DEPEND_in: 5251 DepKind = DepIn; 5252 break; 5253 // Out and InOut dependencies must use the same code. 5254 case OMPC_DEPEND_out: 5255 case OMPC_DEPEND_inout: 5256 DepKind = DepInOut; 5257 break; 5258 case OMPC_DEPEND_mutexinoutset: 5259 DepKind = DepMutexInOutSet; 5260 break; 5261 case OMPC_DEPEND_source: 5262 case OMPC_DEPEND_sink: 5263 case OMPC_DEPEND_unknown: 5264 llvm_unreachable("Unknown task dependence type"); 5265 } 5266 LValue FlagsLVal = CGF.EmitLValueForField( 5267 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5268 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5269 FlagsLVal); 5270 } 5271 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5272 CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy); 5273 } 5274 5275 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5276 // libcall. 5277 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5278 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5279 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5280 // list is not empty 5281 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5282 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5283 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5284 llvm::Value *DepTaskArgs[7]; 5285 if (NumDependencies) { 5286 DepTaskArgs[0] = UpLoc; 5287 DepTaskArgs[1] = ThreadID; 5288 DepTaskArgs[2] = NewTask; 5289 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 5290 DepTaskArgs[4] = DependenciesArray.getPointer(); 5291 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5292 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5293 } 5294 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 5295 &TaskArgs, 5296 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5297 if (!Data.Tied) { 5298 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5299 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5300 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5301 } 5302 if (NumDependencies) { 5303 CGF.EmitRuntimeCall( 5304 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5305 } else { 5306 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5307 TaskArgs); 5308 } 5309 // Check if parent region is untied and build return for untied task; 5310 if (auto *Region = 5311 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5312 Region->emitUntiedSwitch(CGF); 5313 }; 5314 5315 llvm::Value *DepWaitTaskArgs[6]; 5316 if (NumDependencies) { 5317 DepWaitTaskArgs[0] = UpLoc; 5318 DepWaitTaskArgs[1] = ThreadID; 5319 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 5320 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5321 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5322 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5323 } 5324 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5325 NumDependencies, &DepWaitTaskArgs, 5326 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5327 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5328 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5329 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5330 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5331 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5332 // is specified. 5333 if (NumDependencies) 5334 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5335 DepWaitTaskArgs); 5336 // Call proxy_task_entry(gtid, new_task); 5337 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5338 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5339 Action.Enter(CGF); 5340 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5341 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5342 OutlinedFnArgs); 5343 }; 5344 5345 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5346 // kmp_task_t *new_task); 5347 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5348 // kmp_task_t *new_task); 5349 RegionCodeGenTy RCG(CodeGen); 5350 CommonActionTy Action( 5351 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5352 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5353 RCG.setAction(Action); 5354 RCG(CGF); 5355 }; 5356 5357 if (IfCond) { 5358 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5359 } else { 5360 RegionCodeGenTy ThenRCG(ThenCodeGen); 5361 ThenRCG(CGF); 5362 } 5363 } 5364 5365 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5366 const OMPLoopDirective &D, 5367 llvm::Function *TaskFunction, 5368 QualType SharedsTy, Address Shareds, 5369 const Expr *IfCond, 5370 const OMPTaskDataTy &Data) { 5371 if (!CGF.HaveInsertPoint()) 5372 return; 5373 TaskResultTy Result = 5374 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5375 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5376 // libcall. 5377 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5378 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5379 // sched, kmp_uint64 grainsize, void *task_dup); 5380 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5381 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5382 llvm::Value *IfVal; 5383 if (IfCond) { 5384 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5385 /*isSigned=*/true); 5386 } else { 5387 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5388 } 5389 5390 LValue LBLVal = CGF.EmitLValueForField( 5391 Result.TDBase, 5392 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5393 const auto *LBVar = 5394 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5395 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5396 LBLVal.getQuals(), 5397 /*IsInitializer=*/true); 5398 LValue UBLVal = CGF.EmitLValueForField( 5399 Result.TDBase, 5400 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5401 const auto *UBVar = 5402 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5403 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5404 UBLVal.getQuals(), 5405 /*IsInitializer=*/true); 5406 LValue StLVal = CGF.EmitLValueForField( 5407 Result.TDBase, 5408 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5409 const auto *StVar = 5410 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5411 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5412 StLVal.getQuals(), 5413 /*IsInitializer=*/true); 5414 // Store reductions address. 5415 LValue RedLVal = CGF.EmitLValueForField( 5416 Result.TDBase, 5417 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5418 if (Data.Reductions) { 5419 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5420 } else { 5421 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5422 CGF.getContext().VoidPtrTy); 5423 } 5424 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5425 llvm::Value *TaskArgs[] = { 5426 UpLoc, 5427 ThreadID, 5428 Result.NewTask, 5429 IfVal, 5430 LBLVal.getPointer(CGF), 5431 UBLVal.getPointer(CGF), 5432 CGF.EmitLoadOfScalar(StLVal, Loc), 5433 llvm::ConstantInt::getSigned( 5434 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5435 llvm::ConstantInt::getSigned( 5436 CGF.IntTy, Data.Schedule.getPointer() 5437 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5438 : NoSchedule), 5439 Data.Schedule.getPointer() 5440 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5441 /*isSigned=*/false) 5442 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5443 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5444 Result.TaskDupFn, CGF.VoidPtrTy) 5445 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5446 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 5447 } 5448 5449 /// Emit reduction operation for each element of array (required for 5450 /// array sections) LHS op = RHS. 5451 /// \param Type Type of array. 5452 /// \param LHSVar Variable on the left side of the reduction operation 5453 /// (references element of array in original variable). 5454 /// \param RHSVar Variable on the right side of the reduction operation 5455 /// (references element of array in original variable). 5456 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5457 /// RHSVar. 5458 static void EmitOMPAggregateReduction( 5459 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5460 const VarDecl *RHSVar, 5461 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5462 const Expr *, const Expr *)> &RedOpGen, 5463 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5464 const Expr *UpExpr = nullptr) { 5465 // Perform element-by-element initialization. 5466 QualType ElementTy; 5467 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5468 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5469 5470 // Drill down to the base element type on both arrays. 5471 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5472 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5473 5474 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5475 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5476 // Cast from pointer to array type to pointer to single element. 5477 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5478 // The basic structure here is a while-do loop. 5479 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5480 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5481 llvm::Value *IsEmpty = 5482 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5483 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5484 5485 // Enter the loop body, making that address the current address. 5486 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5487 CGF.EmitBlock(BodyBB); 5488 5489 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5490 5491 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5492 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5493 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5494 Address RHSElementCurrent = 5495 Address(RHSElementPHI, 5496 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5497 5498 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5499 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5500 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5501 Address LHSElementCurrent = 5502 Address(LHSElementPHI, 5503 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5504 5505 // Emit copy. 5506 CodeGenFunction::OMPPrivateScope Scope(CGF); 5507 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5508 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5509 Scope.Privatize(); 5510 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5511 Scope.ForceCleanup(); 5512 5513 // Shift the address forward by one element. 5514 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5515 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5516 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5517 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5518 // Check whether we've reached the end. 5519 llvm::Value *Done = 5520 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5521 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5522 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5523 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5524 5525 // Done. 5526 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5527 } 5528 5529 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5530 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5531 /// UDR combiner function. 5532 static void emitReductionCombiner(CodeGenFunction &CGF, 5533 const Expr *ReductionOp) { 5534 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5535 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5536 if (const auto *DRE = 5537 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5538 if (const auto *DRD = 5539 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5540 std::pair<llvm::Function *, llvm::Function *> Reduction = 5541 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5542 RValue Func = RValue::get(Reduction.first); 5543 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5544 CGF.EmitIgnoredExpr(ReductionOp); 5545 return; 5546 } 5547 CGF.EmitIgnoredExpr(ReductionOp); 5548 } 5549 5550 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5551 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5552 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5553 ArrayRef<const Expr *> ReductionOps) { 5554 ASTContext &C = CGM.getContext(); 5555 5556 // void reduction_func(void *LHSArg, void *RHSArg); 5557 FunctionArgList Args; 5558 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5559 ImplicitParamDecl::Other); 5560 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5561 ImplicitParamDecl::Other); 5562 Args.push_back(&LHSArg); 5563 Args.push_back(&RHSArg); 5564 const auto &CGFI = 5565 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5566 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5567 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5568 llvm::GlobalValue::InternalLinkage, Name, 5569 &CGM.getModule()); 5570 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5571 Fn->setDoesNotRecurse(); 5572 CodeGenFunction CGF(CGM); 5573 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5574 5575 // Dst = (void*[n])(LHSArg); 5576 // Src = (void*[n])(RHSArg); 5577 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5578 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5579 ArgsType), CGF.getPointerAlign()); 5580 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5581 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5582 ArgsType), CGF.getPointerAlign()); 5583 5584 // ... 5585 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5586 // ... 5587 CodeGenFunction::OMPPrivateScope Scope(CGF); 5588 auto IPriv = Privates.begin(); 5589 unsigned Idx = 0; 5590 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5591 const auto *RHSVar = 5592 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5593 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5594 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5595 }); 5596 const auto *LHSVar = 5597 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5598 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5599 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5600 }); 5601 QualType PrivTy = (*IPriv)->getType(); 5602 if (PrivTy->isVariablyModifiedType()) { 5603 // Get array size and emit VLA type. 5604 ++Idx; 5605 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5606 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5607 const VariableArrayType *VLA = 5608 CGF.getContext().getAsVariableArrayType(PrivTy); 5609 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5610 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5611 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5612 CGF.EmitVariablyModifiedType(PrivTy); 5613 } 5614 } 5615 Scope.Privatize(); 5616 IPriv = Privates.begin(); 5617 auto ILHS = LHSExprs.begin(); 5618 auto IRHS = RHSExprs.begin(); 5619 for (const Expr *E : ReductionOps) { 5620 if ((*IPriv)->getType()->isArrayType()) { 5621 // Emit reduction for array section. 5622 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5623 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5624 EmitOMPAggregateReduction( 5625 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5626 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5627 emitReductionCombiner(CGF, E); 5628 }); 5629 } else { 5630 // Emit reduction for array subscript or single variable. 5631 emitReductionCombiner(CGF, E); 5632 } 5633 ++IPriv; 5634 ++ILHS; 5635 ++IRHS; 5636 } 5637 Scope.ForceCleanup(); 5638 CGF.FinishFunction(); 5639 return Fn; 5640 } 5641 5642 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5643 const Expr *ReductionOp, 5644 const Expr *PrivateRef, 5645 const DeclRefExpr *LHS, 5646 const DeclRefExpr *RHS) { 5647 if (PrivateRef->getType()->isArrayType()) { 5648 // Emit reduction for array section. 5649 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5650 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5651 EmitOMPAggregateReduction( 5652 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5653 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5654 emitReductionCombiner(CGF, ReductionOp); 5655 }); 5656 } else { 5657 // Emit reduction for array subscript or single variable. 5658 emitReductionCombiner(CGF, ReductionOp); 5659 } 5660 } 5661 5662 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5663 ArrayRef<const Expr *> Privates, 5664 ArrayRef<const Expr *> LHSExprs, 5665 ArrayRef<const Expr *> RHSExprs, 5666 ArrayRef<const Expr *> ReductionOps, 5667 ReductionOptionsTy Options) { 5668 if (!CGF.HaveInsertPoint()) 5669 return; 5670 5671 bool WithNowait = Options.WithNowait; 5672 bool SimpleReduction = Options.SimpleReduction; 5673 5674 // Next code should be emitted for reduction: 5675 // 5676 // static kmp_critical_name lock = { 0 }; 5677 // 5678 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5679 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5680 // ... 5681 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5682 // *(Type<n>-1*)rhs[<n>-1]); 5683 // } 5684 // 5685 // ... 5686 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5687 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5688 // RedList, reduce_func, &<lock>)) { 5689 // case 1: 5690 // ... 5691 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5692 // ... 5693 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5694 // break; 5695 // case 2: 5696 // ... 5697 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5698 // ... 5699 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5700 // break; 5701 // default:; 5702 // } 5703 // 5704 // if SimpleReduction is true, only the next code is generated: 5705 // ... 5706 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5707 // ... 5708 5709 ASTContext &C = CGM.getContext(); 5710 5711 if (SimpleReduction) { 5712 CodeGenFunction::RunCleanupsScope Scope(CGF); 5713 auto IPriv = Privates.begin(); 5714 auto ILHS = LHSExprs.begin(); 5715 auto IRHS = RHSExprs.begin(); 5716 for (const Expr *E : ReductionOps) { 5717 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5718 cast<DeclRefExpr>(*IRHS)); 5719 ++IPriv; 5720 ++ILHS; 5721 ++IRHS; 5722 } 5723 return; 5724 } 5725 5726 // 1. Build a list of reduction variables. 5727 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5728 auto Size = RHSExprs.size(); 5729 for (const Expr *E : Privates) { 5730 if (E->getType()->isVariablyModifiedType()) 5731 // Reserve place for array size. 5732 ++Size; 5733 } 5734 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5735 QualType ReductionArrayTy = 5736 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5737 /*IndexTypeQuals=*/0); 5738 Address ReductionList = 5739 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5740 auto IPriv = Privates.begin(); 5741 unsigned Idx = 0; 5742 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5743 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5744 CGF.Builder.CreateStore( 5745 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5746 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5747 Elem); 5748 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5749 // Store array size. 5750 ++Idx; 5751 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5752 llvm::Value *Size = CGF.Builder.CreateIntCast( 5753 CGF.getVLASize( 5754 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5755 .NumElts, 5756 CGF.SizeTy, /*isSigned=*/false); 5757 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5758 Elem); 5759 } 5760 } 5761 5762 // 2. Emit reduce_func(). 5763 llvm::Function *ReductionFn = emitReductionFunction( 5764 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5765 LHSExprs, RHSExprs, ReductionOps); 5766 5767 // 3. Create static kmp_critical_name lock = { 0 }; 5768 std::string Name = getName({"reduction"}); 5769 llvm::Value *Lock = getCriticalRegionLock(Name); 5770 5771 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5772 // RedList, reduce_func, &<lock>); 5773 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5774 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5775 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5776 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5777 ReductionList.getPointer(), CGF.VoidPtrTy); 5778 llvm::Value *Args[] = { 5779 IdentTLoc, // ident_t *<loc> 5780 ThreadId, // i32 <gtid> 5781 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5782 ReductionArrayTySize, // size_type sizeof(RedList) 5783 RL, // void *RedList 5784 ReductionFn, // void (*) (void *, void *) <reduce_func> 5785 Lock // kmp_critical_name *&<lock> 5786 }; 5787 llvm::Value *Res = CGF.EmitRuntimeCall( 5788 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 5789 : OMPRTL__kmpc_reduce), 5790 Args); 5791 5792 // 5. Build switch(res) 5793 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5794 llvm::SwitchInst *SwInst = 5795 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5796 5797 // 6. Build case 1: 5798 // ... 5799 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5800 // ... 5801 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5802 // break; 5803 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5804 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5805 CGF.EmitBlock(Case1BB); 5806 5807 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5808 llvm::Value *EndArgs[] = { 5809 IdentTLoc, // ident_t *<loc> 5810 ThreadId, // i32 <gtid> 5811 Lock // kmp_critical_name *&<lock> 5812 }; 5813 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5814 CodeGenFunction &CGF, PrePostActionTy &Action) { 5815 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5816 auto IPriv = Privates.begin(); 5817 auto ILHS = LHSExprs.begin(); 5818 auto IRHS = RHSExprs.begin(); 5819 for (const Expr *E : ReductionOps) { 5820 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5821 cast<DeclRefExpr>(*IRHS)); 5822 ++IPriv; 5823 ++ILHS; 5824 ++IRHS; 5825 } 5826 }; 5827 RegionCodeGenTy RCG(CodeGen); 5828 CommonActionTy Action( 5829 nullptr, llvm::None, 5830 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 5831 : OMPRTL__kmpc_end_reduce), 5832 EndArgs); 5833 RCG.setAction(Action); 5834 RCG(CGF); 5835 5836 CGF.EmitBranch(DefaultBB); 5837 5838 // 7. Build case 2: 5839 // ... 5840 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5841 // ... 5842 // break; 5843 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5844 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5845 CGF.EmitBlock(Case2BB); 5846 5847 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5848 CodeGenFunction &CGF, PrePostActionTy &Action) { 5849 auto ILHS = LHSExprs.begin(); 5850 auto IRHS = RHSExprs.begin(); 5851 auto IPriv = Privates.begin(); 5852 for (const Expr *E : ReductionOps) { 5853 const Expr *XExpr = nullptr; 5854 const Expr *EExpr = nullptr; 5855 const Expr *UpExpr = nullptr; 5856 BinaryOperatorKind BO = BO_Comma; 5857 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5858 if (BO->getOpcode() == BO_Assign) { 5859 XExpr = BO->getLHS(); 5860 UpExpr = BO->getRHS(); 5861 } 5862 } 5863 // Try to emit update expression as a simple atomic. 5864 const Expr *RHSExpr = UpExpr; 5865 if (RHSExpr) { 5866 // Analyze RHS part of the whole expression. 5867 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5868 RHSExpr->IgnoreParenImpCasts())) { 5869 // If this is a conditional operator, analyze its condition for 5870 // min/max reduction operator. 5871 RHSExpr = ACO->getCond(); 5872 } 5873 if (const auto *BORHS = 5874 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5875 EExpr = BORHS->getRHS(); 5876 BO = BORHS->getOpcode(); 5877 } 5878 } 5879 if (XExpr) { 5880 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5881 auto &&AtomicRedGen = [BO, VD, 5882 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5883 const Expr *EExpr, const Expr *UpExpr) { 5884 LValue X = CGF.EmitLValue(XExpr); 5885 RValue E; 5886 if (EExpr) 5887 E = CGF.EmitAnyExpr(EExpr); 5888 CGF.EmitOMPAtomicSimpleUpdateExpr( 5889 X, E, BO, /*IsXLHSInRHSPart=*/true, 5890 llvm::AtomicOrdering::Monotonic, Loc, 5891 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5892 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5893 PrivateScope.addPrivate( 5894 VD, [&CGF, VD, XRValue, Loc]() { 5895 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5896 CGF.emitOMPSimpleStore( 5897 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5898 VD->getType().getNonReferenceType(), Loc); 5899 return LHSTemp; 5900 }); 5901 (void)PrivateScope.Privatize(); 5902 return CGF.EmitAnyExpr(UpExpr); 5903 }); 5904 }; 5905 if ((*IPriv)->getType()->isArrayType()) { 5906 // Emit atomic reduction for array section. 5907 const auto *RHSVar = 5908 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5909 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5910 AtomicRedGen, XExpr, EExpr, UpExpr); 5911 } else { 5912 // Emit atomic reduction for array subscript or single variable. 5913 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5914 } 5915 } else { 5916 // Emit as a critical region. 5917 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5918 const Expr *, const Expr *) { 5919 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5920 std::string Name = RT.getName({"atomic_reduction"}); 5921 RT.emitCriticalRegion( 5922 CGF, Name, 5923 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5924 Action.Enter(CGF); 5925 emitReductionCombiner(CGF, E); 5926 }, 5927 Loc); 5928 }; 5929 if ((*IPriv)->getType()->isArrayType()) { 5930 const auto *LHSVar = 5931 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5932 const auto *RHSVar = 5933 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5934 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5935 CritRedGen); 5936 } else { 5937 CritRedGen(CGF, nullptr, nullptr, nullptr); 5938 } 5939 } 5940 ++ILHS; 5941 ++IRHS; 5942 ++IPriv; 5943 } 5944 }; 5945 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5946 if (!WithNowait) { 5947 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5948 llvm::Value *EndArgs[] = { 5949 IdentTLoc, // ident_t *<loc> 5950 ThreadId, // i32 <gtid> 5951 Lock // kmp_critical_name *&<lock> 5952 }; 5953 CommonActionTy Action(nullptr, llvm::None, 5954 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 5955 EndArgs); 5956 AtomicRCG.setAction(Action); 5957 AtomicRCG(CGF); 5958 } else { 5959 AtomicRCG(CGF); 5960 } 5961 5962 CGF.EmitBranch(DefaultBB); 5963 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5964 } 5965 5966 /// Generates unique name for artificial threadprivate variables. 5967 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5968 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5969 const Expr *Ref) { 5970 SmallString<256> Buffer; 5971 llvm::raw_svector_ostream Out(Buffer); 5972 const clang::DeclRefExpr *DE; 5973 const VarDecl *D = ::getBaseDecl(Ref, DE); 5974 if (!D) 5975 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5976 D = D->getCanonicalDecl(); 5977 std::string Name = CGM.getOpenMPRuntime().getName( 5978 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5979 Out << Prefix << Name << "_" 5980 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5981 return std::string(Out.str()); 5982 } 5983 5984 /// Emits reduction initializer function: 5985 /// \code 5986 /// void @.red_init(void* %arg) { 5987 /// %0 = bitcast void* %arg to <type>* 5988 /// store <type> <init>, <type>* %0 5989 /// ret void 5990 /// } 5991 /// \endcode 5992 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 5993 SourceLocation Loc, 5994 ReductionCodeGen &RCG, unsigned N) { 5995 ASTContext &C = CGM.getContext(); 5996 FunctionArgList Args; 5997 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5998 ImplicitParamDecl::Other); 5999 Args.emplace_back(&Param); 6000 const auto &FnInfo = 6001 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6002 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6003 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 6004 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6005 Name, &CGM.getModule()); 6006 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6007 Fn->setDoesNotRecurse(); 6008 CodeGenFunction CGF(CGM); 6009 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6010 Address PrivateAddr = CGF.EmitLoadOfPointer( 6011 CGF.GetAddrOfLocalVar(&Param), 6012 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6013 llvm::Value *Size = nullptr; 6014 // If the size of the reduction item is non-constant, load it from global 6015 // threadprivate variable. 6016 if (RCG.getSizes(N).second) { 6017 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6018 CGF, CGM.getContext().getSizeType(), 6019 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6020 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6021 CGM.getContext().getSizeType(), Loc); 6022 } 6023 RCG.emitAggregateType(CGF, N, Size); 6024 LValue SharedLVal; 6025 // If initializer uses initializer from declare reduction construct, emit a 6026 // pointer to the address of the original reduction item (reuired by reduction 6027 // initializer) 6028 if (RCG.usesReductionInitializer(N)) { 6029 Address SharedAddr = 6030 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6031 CGF, CGM.getContext().VoidPtrTy, 6032 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6033 SharedAddr = CGF.EmitLoadOfPointer( 6034 SharedAddr, 6035 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 6036 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 6037 } else { 6038 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 6039 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 6040 CGM.getContext().VoidPtrTy); 6041 } 6042 // Emit the initializer: 6043 // %0 = bitcast void* %arg to <type>* 6044 // store <type> <init>, <type>* %0 6045 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 6046 [](CodeGenFunction &) { return false; }); 6047 CGF.FinishFunction(); 6048 return Fn; 6049 } 6050 6051 /// Emits reduction combiner function: 6052 /// \code 6053 /// void @.red_comb(void* %arg0, void* %arg1) { 6054 /// %lhs = bitcast void* %arg0 to <type>* 6055 /// %rhs = bitcast void* %arg1 to <type>* 6056 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 6057 /// store <type> %2, <type>* %lhs 6058 /// ret void 6059 /// } 6060 /// \endcode 6061 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 6062 SourceLocation Loc, 6063 ReductionCodeGen &RCG, unsigned N, 6064 const Expr *ReductionOp, 6065 const Expr *LHS, const Expr *RHS, 6066 const Expr *PrivateRef) { 6067 ASTContext &C = CGM.getContext(); 6068 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 6069 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 6070 FunctionArgList Args; 6071 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 6072 C.VoidPtrTy, ImplicitParamDecl::Other); 6073 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6074 ImplicitParamDecl::Other); 6075 Args.emplace_back(&ParamInOut); 6076 Args.emplace_back(&ParamIn); 6077 const auto &FnInfo = 6078 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6079 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6080 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 6081 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6082 Name, &CGM.getModule()); 6083 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6084 Fn->setDoesNotRecurse(); 6085 CodeGenFunction CGF(CGM); 6086 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6087 llvm::Value *Size = nullptr; 6088 // If the size of the reduction item is non-constant, load it from global 6089 // threadprivate variable. 6090 if (RCG.getSizes(N).second) { 6091 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6092 CGF, CGM.getContext().getSizeType(), 6093 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6094 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6095 CGM.getContext().getSizeType(), Loc); 6096 } 6097 RCG.emitAggregateType(CGF, N, Size); 6098 // Remap lhs and rhs variables to the addresses of the function arguments. 6099 // %lhs = bitcast void* %arg0 to <type>* 6100 // %rhs = bitcast void* %arg1 to <type>* 6101 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6102 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6103 // Pull out the pointer to the variable. 6104 Address PtrAddr = CGF.EmitLoadOfPointer( 6105 CGF.GetAddrOfLocalVar(&ParamInOut), 6106 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6107 return CGF.Builder.CreateElementBitCast( 6108 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6109 }); 6110 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6111 // Pull out the pointer to the variable. 6112 Address PtrAddr = CGF.EmitLoadOfPointer( 6113 CGF.GetAddrOfLocalVar(&ParamIn), 6114 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6115 return CGF.Builder.CreateElementBitCast( 6116 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6117 }); 6118 PrivateScope.Privatize(); 6119 // Emit the combiner body: 6120 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6121 // store <type> %2, <type>* %lhs 6122 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6123 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6124 cast<DeclRefExpr>(RHS)); 6125 CGF.FinishFunction(); 6126 return Fn; 6127 } 6128 6129 /// Emits reduction finalizer function: 6130 /// \code 6131 /// void @.red_fini(void* %arg) { 6132 /// %0 = bitcast void* %arg to <type>* 6133 /// <destroy>(<type>* %0) 6134 /// ret void 6135 /// } 6136 /// \endcode 6137 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6138 SourceLocation Loc, 6139 ReductionCodeGen &RCG, unsigned N) { 6140 if (!RCG.needCleanups(N)) 6141 return nullptr; 6142 ASTContext &C = CGM.getContext(); 6143 FunctionArgList Args; 6144 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6145 ImplicitParamDecl::Other); 6146 Args.emplace_back(&Param); 6147 const auto &FnInfo = 6148 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6149 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6150 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6151 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6152 Name, &CGM.getModule()); 6153 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6154 Fn->setDoesNotRecurse(); 6155 CodeGenFunction CGF(CGM); 6156 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6157 Address PrivateAddr = CGF.EmitLoadOfPointer( 6158 CGF.GetAddrOfLocalVar(&Param), 6159 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6160 llvm::Value *Size = nullptr; 6161 // If the size of the reduction item is non-constant, load it from global 6162 // threadprivate variable. 6163 if (RCG.getSizes(N).second) { 6164 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6165 CGF, CGM.getContext().getSizeType(), 6166 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6167 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6168 CGM.getContext().getSizeType(), Loc); 6169 } 6170 RCG.emitAggregateType(CGF, N, Size); 6171 // Emit the finalizer body: 6172 // <destroy>(<type>* %0) 6173 RCG.emitCleanups(CGF, N, PrivateAddr); 6174 CGF.FinishFunction(Loc); 6175 return Fn; 6176 } 6177 6178 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6179 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6180 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6181 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6182 return nullptr; 6183 6184 // Build typedef struct: 6185 // kmp_task_red_input { 6186 // void *reduce_shar; // shared reduction item 6187 // size_t reduce_size; // size of data item 6188 // void *reduce_init; // data initialization routine 6189 // void *reduce_fini; // data finalization routine 6190 // void *reduce_comb; // data combiner routine 6191 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6192 // } kmp_task_red_input_t; 6193 ASTContext &C = CGM.getContext(); 6194 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 6195 RD->startDefinition(); 6196 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6197 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6198 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6199 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6200 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6201 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6202 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6203 RD->completeDefinition(); 6204 QualType RDType = C.getRecordType(RD); 6205 unsigned Size = Data.ReductionVars.size(); 6206 llvm::APInt ArraySize(/*numBits=*/64, Size); 6207 QualType ArrayRDType = C.getConstantArrayType( 6208 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6209 // kmp_task_red_input_t .rd_input.[Size]; 6210 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6211 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 6212 Data.ReductionOps); 6213 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6214 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6215 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6216 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6217 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6218 TaskRedInput.getPointer(), Idxs, 6219 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6220 ".rd_input.gep."); 6221 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6222 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6223 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6224 RCG.emitSharedLValue(CGF, Cnt); 6225 llvm::Value *CastedShared = 6226 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6227 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6228 RCG.emitAggregateType(CGF, Cnt); 6229 llvm::Value *SizeValInChars; 6230 llvm::Value *SizeVal; 6231 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6232 // We use delayed creation/initialization for VLAs, array sections and 6233 // custom reduction initializations. It is required because runtime does not 6234 // provide the way to pass the sizes of VLAs/array sections to 6235 // initializer/combiner/finalizer functions and does not pass the pointer to 6236 // original reduction item to the initializer. Instead threadprivate global 6237 // variables are used to store these values and use them in the functions. 6238 bool DelayedCreation = !!SizeVal; 6239 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6240 /*isSigned=*/false); 6241 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6242 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6243 // ElemLVal.reduce_init = init; 6244 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6245 llvm::Value *InitAddr = 6246 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6247 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6248 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 6249 // ElemLVal.reduce_fini = fini; 6250 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6251 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6252 llvm::Value *FiniAddr = Fini 6253 ? CGF.EmitCastToVoidPtr(Fini) 6254 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6255 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6256 // ElemLVal.reduce_comb = comb; 6257 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6258 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6259 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6260 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6261 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6262 // ElemLVal.flags = 0; 6263 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6264 if (DelayedCreation) { 6265 CGF.EmitStoreOfScalar( 6266 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6267 FlagsLVal); 6268 } else 6269 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6270 FlagsLVal.getType()); 6271 } 6272 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 6273 // *data); 6274 llvm::Value *Args[] = { 6275 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6276 /*isSigned=*/true), 6277 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6278 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6279 CGM.VoidPtrTy)}; 6280 return CGF.EmitRuntimeCall( 6281 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 6282 } 6283 6284 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6285 SourceLocation Loc, 6286 ReductionCodeGen &RCG, 6287 unsigned N) { 6288 auto Sizes = RCG.getSizes(N); 6289 // Emit threadprivate global variable if the type is non-constant 6290 // (Sizes.second = nullptr). 6291 if (Sizes.second) { 6292 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6293 /*isSigned=*/false); 6294 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6295 CGF, CGM.getContext().getSizeType(), 6296 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6297 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6298 } 6299 // Store address of the original reduction item if custom initializer is used. 6300 if (RCG.usesReductionInitializer(N)) { 6301 Address SharedAddr = getAddrOfArtificialThreadPrivate( 6302 CGF, CGM.getContext().VoidPtrTy, 6303 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6304 CGF.Builder.CreateStore( 6305 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6306 RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy), 6307 SharedAddr, /*IsVolatile=*/false); 6308 } 6309 } 6310 6311 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6312 SourceLocation Loc, 6313 llvm::Value *ReductionsPtr, 6314 LValue SharedLVal) { 6315 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6316 // *d); 6317 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6318 CGM.IntTy, 6319 /*isSigned=*/true), 6320 ReductionsPtr, 6321 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6322 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6323 return Address( 6324 CGF.EmitRuntimeCall( 6325 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6326 SharedLVal.getAlignment()); 6327 } 6328 6329 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6330 SourceLocation Loc) { 6331 if (!CGF.HaveInsertPoint()) 6332 return; 6333 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6334 // global_tid); 6335 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6336 // Ignore return result until untied tasks are supported. 6337 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 6338 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6339 Region->emitUntiedSwitch(CGF); 6340 } 6341 6342 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6343 OpenMPDirectiveKind InnerKind, 6344 const RegionCodeGenTy &CodeGen, 6345 bool HasCancel) { 6346 if (!CGF.HaveInsertPoint()) 6347 return; 6348 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6349 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6350 } 6351 6352 namespace { 6353 enum RTCancelKind { 6354 CancelNoreq = 0, 6355 CancelParallel = 1, 6356 CancelLoop = 2, 6357 CancelSections = 3, 6358 CancelTaskgroup = 4 6359 }; 6360 } // anonymous namespace 6361 6362 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6363 RTCancelKind CancelKind = CancelNoreq; 6364 if (CancelRegion == OMPD_parallel) 6365 CancelKind = CancelParallel; 6366 else if (CancelRegion == OMPD_for) 6367 CancelKind = CancelLoop; 6368 else if (CancelRegion == OMPD_sections) 6369 CancelKind = CancelSections; 6370 else { 6371 assert(CancelRegion == OMPD_taskgroup); 6372 CancelKind = CancelTaskgroup; 6373 } 6374 return CancelKind; 6375 } 6376 6377 void CGOpenMPRuntime::emitCancellationPointCall( 6378 CodeGenFunction &CGF, SourceLocation Loc, 6379 OpenMPDirectiveKind CancelRegion) { 6380 if (!CGF.HaveInsertPoint()) 6381 return; 6382 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6383 // global_tid, kmp_int32 cncl_kind); 6384 if (auto *OMPRegionInfo = 6385 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6386 // For 'cancellation point taskgroup', the task region info may not have a 6387 // cancel. This may instead happen in another adjacent task. 6388 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6389 llvm::Value *Args[] = { 6390 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6391 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6392 // Ignore return result until untied tasks are supported. 6393 llvm::Value *Result = CGF.EmitRuntimeCall( 6394 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 6395 // if (__kmpc_cancellationpoint()) { 6396 // exit from construct; 6397 // } 6398 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6399 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6400 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6401 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6402 CGF.EmitBlock(ExitBB); 6403 // exit from construct; 6404 CodeGenFunction::JumpDest CancelDest = 6405 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6406 CGF.EmitBranchThroughCleanup(CancelDest); 6407 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6408 } 6409 } 6410 } 6411 6412 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6413 const Expr *IfCond, 6414 OpenMPDirectiveKind CancelRegion) { 6415 if (!CGF.HaveInsertPoint()) 6416 return; 6417 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6418 // kmp_int32 cncl_kind); 6419 if (auto *OMPRegionInfo = 6420 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6421 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 6422 PrePostActionTy &) { 6423 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6424 llvm::Value *Args[] = { 6425 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6426 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6427 // Ignore return result until untied tasks are supported. 6428 llvm::Value *Result = CGF.EmitRuntimeCall( 6429 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 6430 // if (__kmpc_cancel()) { 6431 // exit from construct; 6432 // } 6433 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6434 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6435 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6436 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6437 CGF.EmitBlock(ExitBB); 6438 // exit from construct; 6439 CodeGenFunction::JumpDest CancelDest = 6440 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6441 CGF.EmitBranchThroughCleanup(CancelDest); 6442 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6443 }; 6444 if (IfCond) { 6445 emitIfClause(CGF, IfCond, ThenGen, 6446 [](CodeGenFunction &, PrePostActionTy &) {}); 6447 } else { 6448 RegionCodeGenTy ThenRCG(ThenGen); 6449 ThenRCG(CGF); 6450 } 6451 } 6452 } 6453 6454 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6455 const OMPExecutableDirective &D, StringRef ParentName, 6456 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6457 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6458 assert(!ParentName.empty() && "Invalid target region parent name!"); 6459 HasEmittedTargetRegion = true; 6460 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6461 IsOffloadEntry, CodeGen); 6462 } 6463 6464 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6465 const OMPExecutableDirective &D, StringRef ParentName, 6466 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6467 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6468 // Create a unique name for the entry function using the source location 6469 // information of the current target region. The name will be something like: 6470 // 6471 // __omp_offloading_DD_FFFF_PP_lBB 6472 // 6473 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6474 // mangled name of the function that encloses the target region and BB is the 6475 // line number of the target region. 6476 6477 unsigned DeviceID; 6478 unsigned FileID; 6479 unsigned Line; 6480 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6481 Line); 6482 SmallString<64> EntryFnName; 6483 { 6484 llvm::raw_svector_ostream OS(EntryFnName); 6485 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6486 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6487 } 6488 6489 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6490 6491 CodeGenFunction CGF(CGM, true); 6492 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6493 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6494 6495 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 6496 6497 // If this target outline function is not an offload entry, we don't need to 6498 // register it. 6499 if (!IsOffloadEntry) 6500 return; 6501 6502 // The target region ID is used by the runtime library to identify the current 6503 // target region, so it only has to be unique and not necessarily point to 6504 // anything. It could be the pointer to the outlined function that implements 6505 // the target region, but we aren't using that so that the compiler doesn't 6506 // need to keep that, and could therefore inline the host function if proven 6507 // worthwhile during optimization. In the other hand, if emitting code for the 6508 // device, the ID has to be the function address so that it can retrieved from 6509 // the offloading entry and launched by the runtime library. We also mark the 6510 // outlined function to have external linkage in case we are emitting code for 6511 // the device, because these functions will be entry points to the device. 6512 6513 if (CGM.getLangOpts().OpenMPIsDevice) { 6514 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6515 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6516 OutlinedFn->setDSOLocal(false); 6517 } else { 6518 std::string Name = getName({EntryFnName, "region_id"}); 6519 OutlinedFnID = new llvm::GlobalVariable( 6520 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6521 llvm::GlobalValue::WeakAnyLinkage, 6522 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6523 } 6524 6525 // Register the information for the entry associated with this target region. 6526 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6527 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6528 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6529 } 6530 6531 /// Checks if the expression is constant or does not have non-trivial function 6532 /// calls. 6533 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6534 // We can skip constant expressions. 6535 // We can skip expressions with trivial calls or simple expressions. 6536 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6537 !E->hasNonTrivialCall(Ctx)) && 6538 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6539 } 6540 6541 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6542 const Stmt *Body) { 6543 const Stmt *Child = Body->IgnoreContainers(); 6544 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6545 Child = nullptr; 6546 for (const Stmt *S : C->body()) { 6547 if (const auto *E = dyn_cast<Expr>(S)) { 6548 if (isTrivial(Ctx, E)) 6549 continue; 6550 } 6551 // Some of the statements can be ignored. 6552 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6553 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6554 continue; 6555 // Analyze declarations. 6556 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6557 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6558 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6559 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6560 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6561 isa<UsingDirectiveDecl>(D) || 6562 isa<OMPDeclareReductionDecl>(D) || 6563 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6564 return true; 6565 const auto *VD = dyn_cast<VarDecl>(D); 6566 if (!VD) 6567 return false; 6568 return VD->isConstexpr() || 6569 ((VD->getType().isTrivialType(Ctx) || 6570 VD->getType()->isReferenceType()) && 6571 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6572 })) 6573 continue; 6574 } 6575 // Found multiple children - cannot get the one child only. 6576 if (Child) 6577 return nullptr; 6578 Child = S; 6579 } 6580 if (Child) 6581 Child = Child->IgnoreContainers(); 6582 } 6583 return Child; 6584 } 6585 6586 /// Emit the number of teams for a target directive. Inspect the num_teams 6587 /// clause associated with a teams construct combined or closely nested 6588 /// with the target directive. 6589 /// 6590 /// Emit a team of size one for directives such as 'target parallel' that 6591 /// have no associated teams construct. 6592 /// 6593 /// Otherwise, return nullptr. 6594 static llvm::Value * 6595 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6596 const OMPExecutableDirective &D) { 6597 assert(!CGF.getLangOpts().OpenMPIsDevice && 6598 "Clauses associated with the teams directive expected to be emitted " 6599 "only for the host!"); 6600 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6601 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6602 "Expected target-based executable directive."); 6603 CGBuilderTy &Bld = CGF.Builder; 6604 switch (DirectiveKind) { 6605 case OMPD_target: { 6606 const auto *CS = D.getInnermostCapturedStmt(); 6607 const auto *Body = 6608 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6609 const Stmt *ChildStmt = 6610 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6611 if (const auto *NestedDir = 6612 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6613 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6614 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6615 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6616 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6617 const Expr *NumTeams = 6618 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6619 llvm::Value *NumTeamsVal = 6620 CGF.EmitScalarExpr(NumTeams, 6621 /*IgnoreResultAssign*/ true); 6622 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6623 /*isSigned=*/true); 6624 } 6625 return Bld.getInt32(0); 6626 } 6627 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6628 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6629 return Bld.getInt32(1); 6630 return Bld.getInt32(0); 6631 } 6632 return nullptr; 6633 } 6634 case OMPD_target_teams: 6635 case OMPD_target_teams_distribute: 6636 case OMPD_target_teams_distribute_simd: 6637 case OMPD_target_teams_distribute_parallel_for: 6638 case OMPD_target_teams_distribute_parallel_for_simd: { 6639 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6640 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6641 const Expr *NumTeams = 6642 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6643 llvm::Value *NumTeamsVal = 6644 CGF.EmitScalarExpr(NumTeams, 6645 /*IgnoreResultAssign*/ true); 6646 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6647 /*isSigned=*/true); 6648 } 6649 return Bld.getInt32(0); 6650 } 6651 case OMPD_target_parallel: 6652 case OMPD_target_parallel_for: 6653 case OMPD_target_parallel_for_simd: 6654 case OMPD_target_simd: 6655 return Bld.getInt32(1); 6656 case OMPD_parallel: 6657 case OMPD_for: 6658 case OMPD_parallel_for: 6659 case OMPD_parallel_master: 6660 case OMPD_parallel_sections: 6661 case OMPD_for_simd: 6662 case OMPD_parallel_for_simd: 6663 case OMPD_cancel: 6664 case OMPD_cancellation_point: 6665 case OMPD_ordered: 6666 case OMPD_threadprivate: 6667 case OMPD_allocate: 6668 case OMPD_task: 6669 case OMPD_simd: 6670 case OMPD_sections: 6671 case OMPD_section: 6672 case OMPD_single: 6673 case OMPD_master: 6674 case OMPD_critical: 6675 case OMPD_taskyield: 6676 case OMPD_barrier: 6677 case OMPD_taskwait: 6678 case OMPD_taskgroup: 6679 case OMPD_atomic: 6680 case OMPD_flush: 6681 case OMPD_teams: 6682 case OMPD_target_data: 6683 case OMPD_target_exit_data: 6684 case OMPD_target_enter_data: 6685 case OMPD_distribute: 6686 case OMPD_distribute_simd: 6687 case OMPD_distribute_parallel_for: 6688 case OMPD_distribute_parallel_for_simd: 6689 case OMPD_teams_distribute: 6690 case OMPD_teams_distribute_simd: 6691 case OMPD_teams_distribute_parallel_for: 6692 case OMPD_teams_distribute_parallel_for_simd: 6693 case OMPD_target_update: 6694 case OMPD_declare_simd: 6695 case OMPD_declare_variant: 6696 case OMPD_declare_target: 6697 case OMPD_end_declare_target: 6698 case OMPD_declare_reduction: 6699 case OMPD_declare_mapper: 6700 case OMPD_taskloop: 6701 case OMPD_taskloop_simd: 6702 case OMPD_master_taskloop: 6703 case OMPD_master_taskloop_simd: 6704 case OMPD_parallel_master_taskloop: 6705 case OMPD_parallel_master_taskloop_simd: 6706 case OMPD_requires: 6707 case OMPD_unknown: 6708 break; 6709 } 6710 llvm_unreachable("Unexpected directive kind."); 6711 } 6712 6713 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6714 llvm::Value *DefaultThreadLimitVal) { 6715 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6716 CGF.getContext(), CS->getCapturedStmt()); 6717 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6718 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6719 llvm::Value *NumThreads = nullptr; 6720 llvm::Value *CondVal = nullptr; 6721 // Handle if clause. If if clause present, the number of threads is 6722 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6723 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6724 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6725 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6726 const OMPIfClause *IfClause = nullptr; 6727 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6728 if (C->getNameModifier() == OMPD_unknown || 6729 C->getNameModifier() == OMPD_parallel) { 6730 IfClause = C; 6731 break; 6732 } 6733 } 6734 if (IfClause) { 6735 const Expr *Cond = IfClause->getCondition(); 6736 bool Result; 6737 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6738 if (!Result) 6739 return CGF.Builder.getInt32(1); 6740 } else { 6741 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6742 if (const auto *PreInit = 6743 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6744 for (const auto *I : PreInit->decls()) { 6745 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6746 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6747 } else { 6748 CodeGenFunction::AutoVarEmission Emission = 6749 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6750 CGF.EmitAutoVarCleanups(Emission); 6751 } 6752 } 6753 } 6754 CondVal = CGF.EvaluateExprAsBool(Cond); 6755 } 6756 } 6757 } 6758 // Check the value of num_threads clause iff if clause was not specified 6759 // or is not evaluated to false. 6760 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6761 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6762 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6763 const auto *NumThreadsClause = 6764 Dir->getSingleClause<OMPNumThreadsClause>(); 6765 CodeGenFunction::LexicalScope Scope( 6766 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6767 if (const auto *PreInit = 6768 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6769 for (const auto *I : PreInit->decls()) { 6770 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6771 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6772 } else { 6773 CodeGenFunction::AutoVarEmission Emission = 6774 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6775 CGF.EmitAutoVarCleanups(Emission); 6776 } 6777 } 6778 } 6779 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6780 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6781 /*isSigned=*/false); 6782 if (DefaultThreadLimitVal) 6783 NumThreads = CGF.Builder.CreateSelect( 6784 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6785 DefaultThreadLimitVal, NumThreads); 6786 } else { 6787 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6788 : CGF.Builder.getInt32(0); 6789 } 6790 // Process condition of the if clause. 6791 if (CondVal) { 6792 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6793 CGF.Builder.getInt32(1)); 6794 } 6795 return NumThreads; 6796 } 6797 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6798 return CGF.Builder.getInt32(1); 6799 return DefaultThreadLimitVal; 6800 } 6801 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6802 : CGF.Builder.getInt32(0); 6803 } 6804 6805 /// Emit the number of threads for a target directive. Inspect the 6806 /// thread_limit clause associated with a teams construct combined or closely 6807 /// nested with the target directive. 6808 /// 6809 /// Emit the num_threads clause for directives such as 'target parallel' that 6810 /// have no associated teams construct. 6811 /// 6812 /// Otherwise, return nullptr. 6813 static llvm::Value * 6814 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6815 const OMPExecutableDirective &D) { 6816 assert(!CGF.getLangOpts().OpenMPIsDevice && 6817 "Clauses associated with the teams directive expected to be emitted " 6818 "only for the host!"); 6819 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6820 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6821 "Expected target-based executable directive."); 6822 CGBuilderTy &Bld = CGF.Builder; 6823 llvm::Value *ThreadLimitVal = nullptr; 6824 llvm::Value *NumThreadsVal = nullptr; 6825 switch (DirectiveKind) { 6826 case OMPD_target: { 6827 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6828 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6829 return NumThreads; 6830 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6831 CGF.getContext(), CS->getCapturedStmt()); 6832 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6833 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6834 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6835 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6836 const auto *ThreadLimitClause = 6837 Dir->getSingleClause<OMPThreadLimitClause>(); 6838 CodeGenFunction::LexicalScope Scope( 6839 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6840 if (const auto *PreInit = 6841 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6842 for (const auto *I : PreInit->decls()) { 6843 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6844 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6845 } else { 6846 CodeGenFunction::AutoVarEmission Emission = 6847 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6848 CGF.EmitAutoVarCleanups(Emission); 6849 } 6850 } 6851 } 6852 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6853 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6854 ThreadLimitVal = 6855 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6856 } 6857 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6858 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6859 CS = Dir->getInnermostCapturedStmt(); 6860 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6861 CGF.getContext(), CS->getCapturedStmt()); 6862 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6863 } 6864 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6865 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6866 CS = Dir->getInnermostCapturedStmt(); 6867 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6868 return NumThreads; 6869 } 6870 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6871 return Bld.getInt32(1); 6872 } 6873 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6874 } 6875 case OMPD_target_teams: { 6876 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6877 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6878 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6879 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6880 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6881 ThreadLimitVal = 6882 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6883 } 6884 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6885 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6886 return NumThreads; 6887 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6888 CGF.getContext(), CS->getCapturedStmt()); 6889 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6890 if (Dir->getDirectiveKind() == OMPD_distribute) { 6891 CS = Dir->getInnermostCapturedStmt(); 6892 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6893 return NumThreads; 6894 } 6895 } 6896 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6897 } 6898 case OMPD_target_teams_distribute: 6899 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6900 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6901 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6902 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6903 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6904 ThreadLimitVal = 6905 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6906 } 6907 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6908 case OMPD_target_parallel: 6909 case OMPD_target_parallel_for: 6910 case OMPD_target_parallel_for_simd: 6911 case OMPD_target_teams_distribute_parallel_for: 6912 case OMPD_target_teams_distribute_parallel_for_simd: { 6913 llvm::Value *CondVal = nullptr; 6914 // Handle if clause. If if clause present, the number of threads is 6915 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6916 if (D.hasClausesOfKind<OMPIfClause>()) { 6917 const OMPIfClause *IfClause = nullptr; 6918 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6919 if (C->getNameModifier() == OMPD_unknown || 6920 C->getNameModifier() == OMPD_parallel) { 6921 IfClause = C; 6922 break; 6923 } 6924 } 6925 if (IfClause) { 6926 const Expr *Cond = IfClause->getCondition(); 6927 bool Result; 6928 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6929 if (!Result) 6930 return Bld.getInt32(1); 6931 } else { 6932 CodeGenFunction::RunCleanupsScope Scope(CGF); 6933 CondVal = CGF.EvaluateExprAsBool(Cond); 6934 } 6935 } 6936 } 6937 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6938 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6939 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6940 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6941 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6942 ThreadLimitVal = 6943 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6944 } 6945 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6946 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6947 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6948 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6949 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6950 NumThreadsVal = 6951 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6952 ThreadLimitVal = ThreadLimitVal 6953 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6954 ThreadLimitVal), 6955 NumThreadsVal, ThreadLimitVal) 6956 : NumThreadsVal; 6957 } 6958 if (!ThreadLimitVal) 6959 ThreadLimitVal = Bld.getInt32(0); 6960 if (CondVal) 6961 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6962 return ThreadLimitVal; 6963 } 6964 case OMPD_target_teams_distribute_simd: 6965 case OMPD_target_simd: 6966 return Bld.getInt32(1); 6967 case OMPD_parallel: 6968 case OMPD_for: 6969 case OMPD_parallel_for: 6970 case OMPD_parallel_master: 6971 case OMPD_parallel_sections: 6972 case OMPD_for_simd: 6973 case OMPD_parallel_for_simd: 6974 case OMPD_cancel: 6975 case OMPD_cancellation_point: 6976 case OMPD_ordered: 6977 case OMPD_threadprivate: 6978 case OMPD_allocate: 6979 case OMPD_task: 6980 case OMPD_simd: 6981 case OMPD_sections: 6982 case OMPD_section: 6983 case OMPD_single: 6984 case OMPD_master: 6985 case OMPD_critical: 6986 case OMPD_taskyield: 6987 case OMPD_barrier: 6988 case OMPD_taskwait: 6989 case OMPD_taskgroup: 6990 case OMPD_atomic: 6991 case OMPD_flush: 6992 case OMPD_teams: 6993 case OMPD_target_data: 6994 case OMPD_target_exit_data: 6995 case OMPD_target_enter_data: 6996 case OMPD_distribute: 6997 case OMPD_distribute_simd: 6998 case OMPD_distribute_parallel_for: 6999 case OMPD_distribute_parallel_for_simd: 7000 case OMPD_teams_distribute: 7001 case OMPD_teams_distribute_simd: 7002 case OMPD_teams_distribute_parallel_for: 7003 case OMPD_teams_distribute_parallel_for_simd: 7004 case OMPD_target_update: 7005 case OMPD_declare_simd: 7006 case OMPD_declare_variant: 7007 case OMPD_declare_target: 7008 case OMPD_end_declare_target: 7009 case OMPD_declare_reduction: 7010 case OMPD_declare_mapper: 7011 case OMPD_taskloop: 7012 case OMPD_taskloop_simd: 7013 case OMPD_master_taskloop: 7014 case OMPD_master_taskloop_simd: 7015 case OMPD_parallel_master_taskloop: 7016 case OMPD_parallel_master_taskloop_simd: 7017 case OMPD_requires: 7018 case OMPD_unknown: 7019 break; 7020 } 7021 llvm_unreachable("Unsupported directive kind."); 7022 } 7023 7024 namespace { 7025 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7026 7027 // Utility to handle information from clauses associated with a given 7028 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7029 // It provides a convenient interface to obtain the information and generate 7030 // code for that information. 7031 class MappableExprsHandler { 7032 public: 7033 /// Values for bit flags used to specify the mapping type for 7034 /// offloading. 7035 enum OpenMPOffloadMappingFlags : uint64_t { 7036 /// No flags 7037 OMP_MAP_NONE = 0x0, 7038 /// Allocate memory on the device and move data from host to device. 7039 OMP_MAP_TO = 0x01, 7040 /// Allocate memory on the device and move data from device to host. 7041 OMP_MAP_FROM = 0x02, 7042 /// Always perform the requested mapping action on the element, even 7043 /// if it was already mapped before. 7044 OMP_MAP_ALWAYS = 0x04, 7045 /// Delete the element from the device environment, ignoring the 7046 /// current reference count associated with the element. 7047 OMP_MAP_DELETE = 0x08, 7048 /// The element being mapped is a pointer-pointee pair; both the 7049 /// pointer and the pointee should be mapped. 7050 OMP_MAP_PTR_AND_OBJ = 0x10, 7051 /// This flags signals that the base address of an entry should be 7052 /// passed to the target kernel as an argument. 7053 OMP_MAP_TARGET_PARAM = 0x20, 7054 /// Signal that the runtime library has to return the device pointer 7055 /// in the current position for the data being mapped. Used when we have the 7056 /// use_device_ptr clause. 7057 OMP_MAP_RETURN_PARAM = 0x40, 7058 /// This flag signals that the reference being passed is a pointer to 7059 /// private data. 7060 OMP_MAP_PRIVATE = 0x80, 7061 /// Pass the element to the device by value. 7062 OMP_MAP_LITERAL = 0x100, 7063 /// Implicit map 7064 OMP_MAP_IMPLICIT = 0x200, 7065 /// Close is a hint to the runtime to allocate memory close to 7066 /// the target device. 7067 OMP_MAP_CLOSE = 0x400, 7068 /// The 16 MSBs of the flags indicate whether the entry is member of some 7069 /// struct/class. 7070 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7071 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7072 }; 7073 7074 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7075 static unsigned getFlagMemberOffset() { 7076 unsigned Offset = 0; 7077 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7078 Remain = Remain >> 1) 7079 Offset++; 7080 return Offset; 7081 } 7082 7083 /// Class that associates information with a base pointer to be passed to the 7084 /// runtime library. 7085 class BasePointerInfo { 7086 /// The base pointer. 7087 llvm::Value *Ptr = nullptr; 7088 /// The base declaration that refers to this device pointer, or null if 7089 /// there is none. 7090 const ValueDecl *DevPtrDecl = nullptr; 7091 7092 public: 7093 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7094 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7095 llvm::Value *operator*() const { return Ptr; } 7096 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7097 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7098 }; 7099 7100 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7101 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7102 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7103 7104 /// Map between a struct and the its lowest & highest elements which have been 7105 /// mapped. 7106 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7107 /// HE(FieldIndex, Pointer)} 7108 struct StructRangeInfoTy { 7109 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7110 0, Address::invalid()}; 7111 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7112 0, Address::invalid()}; 7113 Address Base = Address::invalid(); 7114 }; 7115 7116 private: 7117 /// Kind that defines how a device pointer has to be returned. 7118 struct MapInfo { 7119 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7120 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7121 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7122 bool ReturnDevicePointer = false; 7123 bool IsImplicit = false; 7124 7125 MapInfo() = default; 7126 MapInfo( 7127 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7128 OpenMPMapClauseKind MapType, 7129 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7130 bool ReturnDevicePointer, bool IsImplicit) 7131 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7132 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 7133 }; 7134 7135 /// If use_device_ptr is used on a pointer which is a struct member and there 7136 /// is no map information about it, then emission of that entry is deferred 7137 /// until the whole struct has been processed. 7138 struct DeferredDevicePtrEntryTy { 7139 const Expr *IE = nullptr; 7140 const ValueDecl *VD = nullptr; 7141 7142 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 7143 : IE(IE), VD(VD) {} 7144 }; 7145 7146 /// The target directive from where the mappable clauses were extracted. It 7147 /// is either a executable directive or a user-defined mapper directive. 7148 llvm::PointerUnion<const OMPExecutableDirective *, 7149 const OMPDeclareMapperDecl *> 7150 CurDir; 7151 7152 /// Function the directive is being generated for. 7153 CodeGenFunction &CGF; 7154 7155 /// Set of all first private variables in the current directive. 7156 /// bool data is set to true if the variable is implicitly marked as 7157 /// firstprivate, false otherwise. 7158 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7159 7160 /// Map between device pointer declarations and their expression components. 7161 /// The key value for declarations in 'this' is null. 7162 llvm::DenseMap< 7163 const ValueDecl *, 7164 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7165 DevPointersMap; 7166 7167 llvm::Value *getExprTypeSize(const Expr *E) const { 7168 QualType ExprTy = E->getType().getCanonicalType(); 7169 7170 // Reference types are ignored for mapping purposes. 7171 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7172 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7173 7174 // Given that an array section is considered a built-in type, we need to 7175 // do the calculation based on the length of the section instead of relying 7176 // on CGF.getTypeSize(E->getType()). 7177 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7178 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7179 OAE->getBase()->IgnoreParenImpCasts()) 7180 .getCanonicalType(); 7181 7182 // If there is no length associated with the expression and lower bound is 7183 // not specified too, that means we are using the whole length of the 7184 // base. 7185 if (!OAE->getLength() && OAE->getColonLoc().isValid() && 7186 !OAE->getLowerBound()) 7187 return CGF.getTypeSize(BaseTy); 7188 7189 llvm::Value *ElemSize; 7190 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7191 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7192 } else { 7193 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7194 assert(ATy && "Expecting array type if not a pointer type."); 7195 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7196 } 7197 7198 // If we don't have a length at this point, that is because we have an 7199 // array section with a single element. 7200 if (!OAE->getLength() && OAE->getColonLoc().isInvalid()) 7201 return ElemSize; 7202 7203 if (const Expr *LenExpr = OAE->getLength()) { 7204 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7205 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7206 CGF.getContext().getSizeType(), 7207 LenExpr->getExprLoc()); 7208 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7209 } 7210 assert(!OAE->getLength() && OAE->getColonLoc().isValid() && 7211 OAE->getLowerBound() && "expected array_section[lb:]."); 7212 // Size = sizetype - lb * elemtype; 7213 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7214 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7215 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7216 CGF.getContext().getSizeType(), 7217 OAE->getLowerBound()->getExprLoc()); 7218 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7219 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7220 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7221 LengthVal = CGF.Builder.CreateSelect( 7222 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7223 return LengthVal; 7224 } 7225 return CGF.getTypeSize(ExprTy); 7226 } 7227 7228 /// Return the corresponding bits for a given map clause modifier. Add 7229 /// a flag marking the map as a pointer if requested. Add a flag marking the 7230 /// map as the first one of a series of maps that relate to the same map 7231 /// expression. 7232 OpenMPOffloadMappingFlags getMapTypeBits( 7233 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7234 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7235 OpenMPOffloadMappingFlags Bits = 7236 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7237 switch (MapType) { 7238 case OMPC_MAP_alloc: 7239 case OMPC_MAP_release: 7240 // alloc and release is the default behavior in the runtime library, i.e. 7241 // if we don't pass any bits alloc/release that is what the runtime is 7242 // going to do. Therefore, we don't need to signal anything for these two 7243 // type modifiers. 7244 break; 7245 case OMPC_MAP_to: 7246 Bits |= OMP_MAP_TO; 7247 break; 7248 case OMPC_MAP_from: 7249 Bits |= OMP_MAP_FROM; 7250 break; 7251 case OMPC_MAP_tofrom: 7252 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7253 break; 7254 case OMPC_MAP_delete: 7255 Bits |= OMP_MAP_DELETE; 7256 break; 7257 case OMPC_MAP_unknown: 7258 llvm_unreachable("Unexpected map type!"); 7259 } 7260 if (AddPtrFlag) 7261 Bits |= OMP_MAP_PTR_AND_OBJ; 7262 if (AddIsTargetParamFlag) 7263 Bits |= OMP_MAP_TARGET_PARAM; 7264 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7265 != MapModifiers.end()) 7266 Bits |= OMP_MAP_ALWAYS; 7267 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7268 != MapModifiers.end()) 7269 Bits |= OMP_MAP_CLOSE; 7270 return Bits; 7271 } 7272 7273 /// Return true if the provided expression is a final array section. A 7274 /// final array section, is one whose length can't be proved to be one. 7275 bool isFinalArraySectionExpression(const Expr *E) const { 7276 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7277 7278 // It is not an array section and therefore not a unity-size one. 7279 if (!OASE) 7280 return false; 7281 7282 // An array section with no colon always refer to a single element. 7283 if (OASE->getColonLoc().isInvalid()) 7284 return false; 7285 7286 const Expr *Length = OASE->getLength(); 7287 7288 // If we don't have a length we have to check if the array has size 1 7289 // for this dimension. Also, we should always expect a length if the 7290 // base type is pointer. 7291 if (!Length) { 7292 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7293 OASE->getBase()->IgnoreParenImpCasts()) 7294 .getCanonicalType(); 7295 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7296 return ATy->getSize().getSExtValue() != 1; 7297 // If we don't have a constant dimension length, we have to consider 7298 // the current section as having any size, so it is not necessarily 7299 // unitary. If it happen to be unity size, that's user fault. 7300 return true; 7301 } 7302 7303 // Check if the length evaluates to 1. 7304 Expr::EvalResult Result; 7305 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7306 return true; // Can have more that size 1. 7307 7308 llvm::APSInt ConstLength = Result.Val.getInt(); 7309 return ConstLength.getSExtValue() != 1; 7310 } 7311 7312 /// Generate the base pointers, section pointers, sizes and map type 7313 /// bits for the provided map type, map modifier, and expression components. 7314 /// \a IsFirstComponent should be set to true if the provided set of 7315 /// components is the first associated with a capture. 7316 void generateInfoForComponentList( 7317 OpenMPMapClauseKind MapType, 7318 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7319 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7320 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7321 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7322 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 7323 bool IsImplicit, 7324 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7325 OverlappedElements = llvm::None) const { 7326 // The following summarizes what has to be generated for each map and the 7327 // types below. The generated information is expressed in this order: 7328 // base pointer, section pointer, size, flags 7329 // (to add to the ones that come from the map type and modifier). 7330 // 7331 // double d; 7332 // int i[100]; 7333 // float *p; 7334 // 7335 // struct S1 { 7336 // int i; 7337 // float f[50]; 7338 // } 7339 // struct S2 { 7340 // int i; 7341 // float f[50]; 7342 // S1 s; 7343 // double *p; 7344 // struct S2 *ps; 7345 // } 7346 // S2 s; 7347 // S2 *ps; 7348 // 7349 // map(d) 7350 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7351 // 7352 // map(i) 7353 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7354 // 7355 // map(i[1:23]) 7356 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7357 // 7358 // map(p) 7359 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7360 // 7361 // map(p[1:24]) 7362 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7363 // 7364 // map(s) 7365 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7366 // 7367 // map(s.i) 7368 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7369 // 7370 // map(s.s.f) 7371 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7372 // 7373 // map(s.p) 7374 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7375 // 7376 // map(to: s.p[:22]) 7377 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7378 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7379 // &(s.p), &(s.p[0]), 22*sizeof(double), 7380 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7381 // (*) alloc space for struct members, only this is a target parameter 7382 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7383 // optimizes this entry out, same in the examples below) 7384 // (***) map the pointee (map: to) 7385 // 7386 // map(s.ps) 7387 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7388 // 7389 // map(from: s.ps->s.i) 7390 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7391 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7392 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7393 // 7394 // map(to: s.ps->ps) 7395 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7396 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7397 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7398 // 7399 // map(s.ps->ps->ps) 7400 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7401 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7402 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7403 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7404 // 7405 // map(to: s.ps->ps->s.f[:22]) 7406 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7407 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7408 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7409 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7410 // 7411 // map(ps) 7412 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7413 // 7414 // map(ps->i) 7415 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7416 // 7417 // map(ps->s.f) 7418 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7419 // 7420 // map(from: ps->p) 7421 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7422 // 7423 // map(to: ps->p[:22]) 7424 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7425 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7426 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7427 // 7428 // map(ps->ps) 7429 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7430 // 7431 // map(from: ps->ps->s.i) 7432 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7433 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7434 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7435 // 7436 // map(from: ps->ps->ps) 7437 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7438 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7439 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7440 // 7441 // map(ps->ps->ps->ps) 7442 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7443 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7444 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7445 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7446 // 7447 // map(to: ps->ps->ps->s.f[:22]) 7448 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7449 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7450 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7451 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7452 // 7453 // map(to: s.f[:22]) map(from: s.p[:33]) 7454 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7455 // sizeof(double*) (**), TARGET_PARAM 7456 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7457 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7458 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7459 // (*) allocate contiguous space needed to fit all mapped members even if 7460 // we allocate space for members not mapped (in this example, 7461 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7462 // them as well because they fall between &s.f[0] and &s.p) 7463 // 7464 // map(from: s.f[:22]) map(to: ps->p[:33]) 7465 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7466 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7467 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7468 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7469 // (*) the struct this entry pertains to is the 2nd element in the list of 7470 // arguments, hence MEMBER_OF(2) 7471 // 7472 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7473 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7474 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7475 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7476 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7477 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7478 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7479 // (*) the struct this entry pertains to is the 4th element in the list 7480 // of arguments, hence MEMBER_OF(4) 7481 7482 // Track if the map information being generated is the first for a capture. 7483 bool IsCaptureFirstInfo = IsFirstComponentList; 7484 // When the variable is on a declare target link or in a to clause with 7485 // unified memory, a reference is needed to hold the host/device address 7486 // of the variable. 7487 bool RequiresReference = false; 7488 7489 // Scan the components from the base to the complete expression. 7490 auto CI = Components.rbegin(); 7491 auto CE = Components.rend(); 7492 auto I = CI; 7493 7494 // Track if the map information being generated is the first for a list of 7495 // components. 7496 bool IsExpressionFirstInfo = true; 7497 Address BP = Address::invalid(); 7498 const Expr *AssocExpr = I->getAssociatedExpression(); 7499 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7500 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7501 7502 if (isa<MemberExpr>(AssocExpr)) { 7503 // The base is the 'this' pointer. The content of the pointer is going 7504 // to be the base of the field being mapped. 7505 BP = CGF.LoadCXXThisAddress(); 7506 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7507 (OASE && 7508 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7509 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7510 } else { 7511 // The base is the reference to the variable. 7512 // BP = &Var. 7513 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7514 if (const auto *VD = 7515 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7516 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7517 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7518 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7519 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7520 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7521 RequiresReference = true; 7522 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7523 } 7524 } 7525 } 7526 7527 // If the variable is a pointer and is being dereferenced (i.e. is not 7528 // the last component), the base has to be the pointer itself, not its 7529 // reference. References are ignored for mapping purposes. 7530 QualType Ty = 7531 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7532 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7533 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7534 7535 // We do not need to generate individual map information for the 7536 // pointer, it can be associated with the combined storage. 7537 ++I; 7538 } 7539 } 7540 7541 // Track whether a component of the list should be marked as MEMBER_OF some 7542 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7543 // in a component list should be marked as MEMBER_OF, all subsequent entries 7544 // do not belong to the base struct. E.g. 7545 // struct S2 s; 7546 // s.ps->ps->ps->f[:] 7547 // (1) (2) (3) (4) 7548 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7549 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7550 // is the pointee of ps(2) which is not member of struct s, so it should not 7551 // be marked as such (it is still PTR_AND_OBJ). 7552 // The variable is initialized to false so that PTR_AND_OBJ entries which 7553 // are not struct members are not considered (e.g. array of pointers to 7554 // data). 7555 bool ShouldBeMemberOf = false; 7556 7557 // Variable keeping track of whether or not we have encountered a component 7558 // in the component list which is a member expression. Useful when we have a 7559 // pointer or a final array section, in which case it is the previous 7560 // component in the list which tells us whether we have a member expression. 7561 // E.g. X.f[:] 7562 // While processing the final array section "[:]" it is "f" which tells us 7563 // whether we are dealing with a member of a declared struct. 7564 const MemberExpr *EncounteredME = nullptr; 7565 7566 for (; I != CE; ++I) { 7567 // If the current component is member of a struct (parent struct) mark it. 7568 if (!EncounteredME) { 7569 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7570 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7571 // as MEMBER_OF the parent struct. 7572 if (EncounteredME) 7573 ShouldBeMemberOf = true; 7574 } 7575 7576 auto Next = std::next(I); 7577 7578 // We need to generate the addresses and sizes if this is the last 7579 // component, if the component is a pointer or if it is an array section 7580 // whose length can't be proved to be one. If this is a pointer, it 7581 // becomes the base address for the following components. 7582 7583 // A final array section, is one whose length can't be proved to be one. 7584 bool IsFinalArraySection = 7585 isFinalArraySectionExpression(I->getAssociatedExpression()); 7586 7587 // Get information on whether the element is a pointer. Have to do a 7588 // special treatment for array sections given that they are built-in 7589 // types. 7590 const auto *OASE = 7591 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7592 bool IsPointer = 7593 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7594 .getCanonicalType() 7595 ->isAnyPointerType()) || 7596 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7597 7598 if (Next == CE || IsPointer || IsFinalArraySection) { 7599 // If this is not the last component, we expect the pointer to be 7600 // associated with an array expression or member expression. 7601 assert((Next == CE || 7602 isa<MemberExpr>(Next->getAssociatedExpression()) || 7603 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7604 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 7605 "Unexpected expression"); 7606 7607 Address LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7608 .getAddress(CGF); 7609 7610 // If this component is a pointer inside the base struct then we don't 7611 // need to create any entry for it - it will be combined with the object 7612 // it is pointing to into a single PTR_AND_OBJ entry. 7613 bool IsMemberPointer = 7614 IsPointer && EncounteredME && 7615 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7616 EncounteredME); 7617 if (!OverlappedElements.empty()) { 7618 // Handle base element with the info for overlapped elements. 7619 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7620 assert(Next == CE && 7621 "Expected last element for the overlapped elements."); 7622 assert(!IsPointer && 7623 "Unexpected base element with the pointer type."); 7624 // Mark the whole struct as the struct that requires allocation on the 7625 // device. 7626 PartialStruct.LowestElem = {0, LB}; 7627 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7628 I->getAssociatedExpression()->getType()); 7629 Address HB = CGF.Builder.CreateConstGEP( 7630 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7631 CGF.VoidPtrTy), 7632 TypeSize.getQuantity() - 1); 7633 PartialStruct.HighestElem = { 7634 std::numeric_limits<decltype( 7635 PartialStruct.HighestElem.first)>::max(), 7636 HB}; 7637 PartialStruct.Base = BP; 7638 // Emit data for non-overlapped data. 7639 OpenMPOffloadMappingFlags Flags = 7640 OMP_MAP_MEMBER_OF | 7641 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7642 /*AddPtrFlag=*/false, 7643 /*AddIsTargetParamFlag=*/false); 7644 LB = BP; 7645 llvm::Value *Size = nullptr; 7646 // Do bitcopy of all non-overlapped structure elements. 7647 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7648 Component : OverlappedElements) { 7649 Address ComponentLB = Address::invalid(); 7650 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7651 Component) { 7652 if (MC.getAssociatedDeclaration()) { 7653 ComponentLB = 7654 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7655 .getAddress(CGF); 7656 Size = CGF.Builder.CreatePtrDiff( 7657 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7658 CGF.EmitCastToVoidPtr(LB.getPointer())); 7659 break; 7660 } 7661 } 7662 BasePointers.push_back(BP.getPointer()); 7663 Pointers.push_back(LB.getPointer()); 7664 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, 7665 /*isSigned=*/true)); 7666 Types.push_back(Flags); 7667 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7668 } 7669 BasePointers.push_back(BP.getPointer()); 7670 Pointers.push_back(LB.getPointer()); 7671 Size = CGF.Builder.CreatePtrDiff( 7672 CGF.EmitCastToVoidPtr( 7673 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7674 CGF.EmitCastToVoidPtr(LB.getPointer())); 7675 Sizes.push_back( 7676 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7677 Types.push_back(Flags); 7678 break; 7679 } 7680 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7681 if (!IsMemberPointer) { 7682 BasePointers.push_back(BP.getPointer()); 7683 Pointers.push_back(LB.getPointer()); 7684 Sizes.push_back( 7685 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7686 7687 // We need to add a pointer flag for each map that comes from the 7688 // same expression except for the first one. We also need to signal 7689 // this map is the first one that relates with the current capture 7690 // (there is a set of entries for each capture). 7691 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7692 MapType, MapModifiers, IsImplicit, 7693 !IsExpressionFirstInfo || RequiresReference, 7694 IsCaptureFirstInfo && !RequiresReference); 7695 7696 if (!IsExpressionFirstInfo) { 7697 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7698 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7699 if (IsPointer) 7700 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7701 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7702 7703 if (ShouldBeMemberOf) { 7704 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7705 // should be later updated with the correct value of MEMBER_OF. 7706 Flags |= OMP_MAP_MEMBER_OF; 7707 // From now on, all subsequent PTR_AND_OBJ entries should not be 7708 // marked as MEMBER_OF. 7709 ShouldBeMemberOf = false; 7710 } 7711 } 7712 7713 Types.push_back(Flags); 7714 } 7715 7716 // If we have encountered a member expression so far, keep track of the 7717 // mapped member. If the parent is "*this", then the value declaration 7718 // is nullptr. 7719 if (EncounteredME) { 7720 const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl()); 7721 unsigned FieldIndex = FD->getFieldIndex(); 7722 7723 // Update info about the lowest and highest elements for this struct 7724 if (!PartialStruct.Base.isValid()) { 7725 PartialStruct.LowestElem = {FieldIndex, LB}; 7726 PartialStruct.HighestElem = {FieldIndex, LB}; 7727 PartialStruct.Base = BP; 7728 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7729 PartialStruct.LowestElem = {FieldIndex, LB}; 7730 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7731 PartialStruct.HighestElem = {FieldIndex, LB}; 7732 } 7733 } 7734 7735 // If we have a final array section, we are done with this expression. 7736 if (IsFinalArraySection) 7737 break; 7738 7739 // The pointer becomes the base for the next element. 7740 if (Next != CE) 7741 BP = LB; 7742 7743 IsExpressionFirstInfo = false; 7744 IsCaptureFirstInfo = false; 7745 } 7746 } 7747 } 7748 7749 /// Return the adjusted map modifiers if the declaration a capture refers to 7750 /// appears in a first-private clause. This is expected to be used only with 7751 /// directives that start with 'target'. 7752 MappableExprsHandler::OpenMPOffloadMappingFlags 7753 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7754 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7755 7756 // A first private variable captured by reference will use only the 7757 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7758 // declaration is known as first-private in this handler. 7759 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7760 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7761 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7762 return MappableExprsHandler::OMP_MAP_ALWAYS | 7763 MappableExprsHandler::OMP_MAP_TO; 7764 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7765 return MappableExprsHandler::OMP_MAP_TO | 7766 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 7767 return MappableExprsHandler::OMP_MAP_PRIVATE | 7768 MappableExprsHandler::OMP_MAP_TO; 7769 } 7770 return MappableExprsHandler::OMP_MAP_TO | 7771 MappableExprsHandler::OMP_MAP_FROM; 7772 } 7773 7774 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7775 // Rotate by getFlagMemberOffset() bits. 7776 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7777 << getFlagMemberOffset()); 7778 } 7779 7780 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7781 OpenMPOffloadMappingFlags MemberOfFlag) { 7782 // If the entry is PTR_AND_OBJ but has not been marked with the special 7783 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7784 // marked as MEMBER_OF. 7785 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7786 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7787 return; 7788 7789 // Reset the placeholder value to prepare the flag for the assignment of the 7790 // proper MEMBER_OF value. 7791 Flags &= ~OMP_MAP_MEMBER_OF; 7792 Flags |= MemberOfFlag; 7793 } 7794 7795 void getPlainLayout(const CXXRecordDecl *RD, 7796 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7797 bool AsBase) const { 7798 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7799 7800 llvm::StructType *St = 7801 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7802 7803 unsigned NumElements = St->getNumElements(); 7804 llvm::SmallVector< 7805 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7806 RecordLayout(NumElements); 7807 7808 // Fill bases. 7809 for (const auto &I : RD->bases()) { 7810 if (I.isVirtual()) 7811 continue; 7812 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7813 // Ignore empty bases. 7814 if (Base->isEmpty() || CGF.getContext() 7815 .getASTRecordLayout(Base) 7816 .getNonVirtualSize() 7817 .isZero()) 7818 continue; 7819 7820 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7821 RecordLayout[FieldIndex] = Base; 7822 } 7823 // Fill in virtual bases. 7824 for (const auto &I : RD->vbases()) { 7825 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7826 // Ignore empty bases. 7827 if (Base->isEmpty()) 7828 continue; 7829 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7830 if (RecordLayout[FieldIndex]) 7831 continue; 7832 RecordLayout[FieldIndex] = Base; 7833 } 7834 // Fill in all the fields. 7835 assert(!RD->isUnion() && "Unexpected union."); 7836 for (const auto *Field : RD->fields()) { 7837 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7838 // will fill in later.) 7839 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 7840 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7841 RecordLayout[FieldIndex] = Field; 7842 } 7843 } 7844 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7845 &Data : RecordLayout) { 7846 if (Data.isNull()) 7847 continue; 7848 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7849 getPlainLayout(Base, Layout, /*AsBase=*/true); 7850 else 7851 Layout.push_back(Data.get<const FieldDecl *>()); 7852 } 7853 } 7854 7855 public: 7856 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7857 : CurDir(&Dir), CGF(CGF) { 7858 // Extract firstprivate clause information. 7859 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7860 for (const auto *D : C->varlists()) 7861 FirstPrivateDecls.try_emplace( 7862 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 7863 // Extract device pointer clause information. 7864 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7865 for (auto L : C->component_lists()) 7866 DevPointersMap[L.first].push_back(L.second); 7867 } 7868 7869 /// Constructor for the declare mapper directive. 7870 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 7871 : CurDir(&Dir), CGF(CGF) {} 7872 7873 /// Generate code for the combined entry if we have a partially mapped struct 7874 /// and take care of the mapping flags of the arguments corresponding to 7875 /// individual struct members. 7876 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 7877 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7878 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 7879 const StructRangeInfoTy &PartialStruct) const { 7880 // Base is the base of the struct 7881 BasePointers.push_back(PartialStruct.Base.getPointer()); 7882 // Pointer is the address of the lowest element 7883 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7884 Pointers.push_back(LB); 7885 // Size is (addr of {highest+1} element) - (addr of lowest element) 7886 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7887 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7888 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7889 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7890 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7891 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 7892 /*isSigned=*/false); 7893 Sizes.push_back(Size); 7894 // Map type is always TARGET_PARAM 7895 Types.push_back(OMP_MAP_TARGET_PARAM); 7896 // Remove TARGET_PARAM flag from the first element 7897 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7898 7899 // All other current entries will be MEMBER_OF the combined entry 7900 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7901 // 0xFFFF in the MEMBER_OF field). 7902 OpenMPOffloadMappingFlags MemberOfFlag = 7903 getMemberOfFlag(BasePointers.size() - 1); 7904 for (auto &M : CurTypes) 7905 setCorrectMemberOfFlag(M, MemberOfFlag); 7906 } 7907 7908 /// Generate all the base pointers, section pointers, sizes and map 7909 /// types for the extracted mappable expressions. Also, for each item that 7910 /// relates with a device pointer, a pair of the relevant declaration and 7911 /// index where it occurs is appended to the device pointers info array. 7912 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 7913 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7914 MapFlagsArrayTy &Types) const { 7915 // We have to process the component lists that relate with the same 7916 // declaration in a single chunk so that we can generate the map flags 7917 // correctly. Therefore, we organize all lists in a map. 7918 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 7919 7920 // Helper function to fill the information map for the different supported 7921 // clauses. 7922 auto &&InfoGen = [&Info]( 7923 const ValueDecl *D, 7924 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7925 OpenMPMapClauseKind MapType, 7926 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7927 bool ReturnDevicePointer, bool IsImplicit) { 7928 const ValueDecl *VD = 7929 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 7930 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 7931 IsImplicit); 7932 }; 7933 7934 assert(CurDir.is<const OMPExecutableDirective *>() && 7935 "Expect a executable directive"); 7936 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 7937 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 7938 for (const auto L : C->component_lists()) { 7939 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 7940 /*ReturnDevicePointer=*/false, C->isImplicit()); 7941 } 7942 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 7943 for (const auto L : C->component_lists()) { 7944 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 7945 /*ReturnDevicePointer=*/false, C->isImplicit()); 7946 } 7947 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 7948 for (const auto L : C->component_lists()) { 7949 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 7950 /*ReturnDevicePointer=*/false, C->isImplicit()); 7951 } 7952 7953 // Look at the use_device_ptr clause information and mark the existing map 7954 // entries as such. If there is no map information for an entry in the 7955 // use_device_ptr list, we create one with map type 'alloc' and zero size 7956 // section. It is the user fault if that was not mapped before. If there is 7957 // no map information and the pointer is a struct member, then we defer the 7958 // emission of that entry until the whole struct has been processed. 7959 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 7960 DeferredInfo; 7961 7962 for (const auto *C : 7963 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 7964 for (const auto L : C->component_lists()) { 7965 assert(!L.second.empty() && "Not expecting empty list of components!"); 7966 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 7967 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 7968 const Expr *IE = L.second.back().getAssociatedExpression(); 7969 // If the first component is a member expression, we have to look into 7970 // 'this', which maps to null in the map of map information. Otherwise 7971 // look directly for the information. 7972 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 7973 7974 // We potentially have map information for this declaration already. 7975 // Look for the first set of components that refer to it. 7976 if (It != Info.end()) { 7977 auto CI = std::find_if( 7978 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 7979 return MI.Components.back().getAssociatedDeclaration() == VD; 7980 }); 7981 // If we found a map entry, signal that the pointer has to be returned 7982 // and move on to the next declaration. 7983 if (CI != It->second.end()) { 7984 CI->ReturnDevicePointer = true; 7985 continue; 7986 } 7987 } 7988 7989 // We didn't find any match in our map information - generate a zero 7990 // size array section - if the pointer is a struct member we defer this 7991 // action until the whole struct has been processed. 7992 if (isa<MemberExpr>(IE)) { 7993 // Insert the pointer into Info to be processed by 7994 // generateInfoForComponentList. Because it is a member pointer 7995 // without a pointee, no entry will be generated for it, therefore 7996 // we need to generate one after the whole struct has been processed. 7997 // Nonetheless, generateInfoForComponentList must be called to take 7998 // the pointer into account for the calculation of the range of the 7999 // partial struct. 8000 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 8001 /*ReturnDevicePointer=*/false, C->isImplicit()); 8002 DeferredInfo[nullptr].emplace_back(IE, VD); 8003 } else { 8004 llvm::Value *Ptr = 8005 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8006 BasePointers.emplace_back(Ptr, VD); 8007 Pointers.push_back(Ptr); 8008 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8009 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8010 } 8011 } 8012 } 8013 8014 for (const auto &M : Info) { 8015 // We need to know when we generate information for the first component 8016 // associated with a capture, because the mapping flags depend on it. 8017 bool IsFirstComponentList = true; 8018 8019 // Temporary versions of arrays 8020 MapBaseValuesArrayTy CurBasePointers; 8021 MapValuesArrayTy CurPointers; 8022 MapValuesArrayTy CurSizes; 8023 MapFlagsArrayTy CurTypes; 8024 StructRangeInfoTy PartialStruct; 8025 8026 for (const MapInfo &L : M.second) { 8027 assert(!L.Components.empty() && 8028 "Not expecting declaration with no component lists."); 8029 8030 // Remember the current base pointer index. 8031 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 8032 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8033 CurBasePointers, CurPointers, CurSizes, 8034 CurTypes, PartialStruct, 8035 IsFirstComponentList, L.IsImplicit); 8036 8037 // If this entry relates with a device pointer, set the relevant 8038 // declaration and add the 'return pointer' flag. 8039 if (L.ReturnDevicePointer) { 8040 assert(CurBasePointers.size() > CurrentBasePointersIdx && 8041 "Unexpected number of mapped base pointers."); 8042 8043 const ValueDecl *RelevantVD = 8044 L.Components.back().getAssociatedDeclaration(); 8045 assert(RelevantVD && 8046 "No relevant declaration related with device pointer??"); 8047 8048 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 8049 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8050 } 8051 IsFirstComponentList = false; 8052 } 8053 8054 // Append any pending zero-length pointers which are struct members and 8055 // used with use_device_ptr. 8056 auto CI = DeferredInfo.find(M.first); 8057 if (CI != DeferredInfo.end()) { 8058 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8059 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8060 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 8061 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 8062 CurBasePointers.emplace_back(BasePtr, L.VD); 8063 CurPointers.push_back(Ptr); 8064 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8065 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8066 // value MEMBER_OF=FFFF so that the entry is later updated with the 8067 // correct value of MEMBER_OF. 8068 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8069 OMP_MAP_MEMBER_OF); 8070 } 8071 } 8072 8073 // If there is an entry in PartialStruct it means we have a struct with 8074 // individual members mapped. Emit an extra combined entry. 8075 if (PartialStruct.Base.isValid()) 8076 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8077 PartialStruct); 8078 8079 // We need to append the results of this capture to what we already have. 8080 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8081 Pointers.append(CurPointers.begin(), CurPointers.end()); 8082 Sizes.append(CurSizes.begin(), CurSizes.end()); 8083 Types.append(CurTypes.begin(), CurTypes.end()); 8084 } 8085 } 8086 8087 /// Generate all the base pointers, section pointers, sizes and map types for 8088 /// the extracted map clauses of user-defined mapper. 8089 void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, 8090 MapValuesArrayTy &Pointers, 8091 MapValuesArrayTy &Sizes, 8092 MapFlagsArrayTy &Types) const { 8093 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8094 "Expect a declare mapper directive"); 8095 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8096 // We have to process the component lists that relate with the same 8097 // declaration in a single chunk so that we can generate the map flags 8098 // correctly. Therefore, we organize all lists in a map. 8099 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8100 8101 // Helper function to fill the information map for the different supported 8102 // clauses. 8103 auto &&InfoGen = [&Info]( 8104 const ValueDecl *D, 8105 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8106 OpenMPMapClauseKind MapType, 8107 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8108 bool ReturnDevicePointer, bool IsImplicit) { 8109 const ValueDecl *VD = 8110 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8111 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8112 IsImplicit); 8113 }; 8114 8115 for (const auto *C : CurMapperDir->clauselists()) { 8116 const auto *MC = cast<OMPMapClause>(C); 8117 for (const auto L : MC->component_lists()) { 8118 InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), 8119 /*ReturnDevicePointer=*/false, MC->isImplicit()); 8120 } 8121 } 8122 8123 for (const auto &M : Info) { 8124 // We need to know when we generate information for the first component 8125 // associated with a capture, because the mapping flags depend on it. 8126 bool IsFirstComponentList = true; 8127 8128 // Temporary versions of arrays 8129 MapBaseValuesArrayTy CurBasePointers; 8130 MapValuesArrayTy CurPointers; 8131 MapValuesArrayTy CurSizes; 8132 MapFlagsArrayTy CurTypes; 8133 StructRangeInfoTy PartialStruct; 8134 8135 for (const MapInfo &L : M.second) { 8136 assert(!L.Components.empty() && 8137 "Not expecting declaration with no component lists."); 8138 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8139 CurBasePointers, CurPointers, CurSizes, 8140 CurTypes, PartialStruct, 8141 IsFirstComponentList, L.IsImplicit); 8142 IsFirstComponentList = false; 8143 } 8144 8145 // If there is an entry in PartialStruct it means we have a struct with 8146 // individual members mapped. Emit an extra combined entry. 8147 if (PartialStruct.Base.isValid()) 8148 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8149 PartialStruct); 8150 8151 // We need to append the results of this capture to what we already have. 8152 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8153 Pointers.append(CurPointers.begin(), CurPointers.end()); 8154 Sizes.append(CurSizes.begin(), CurSizes.end()); 8155 Types.append(CurTypes.begin(), CurTypes.end()); 8156 } 8157 } 8158 8159 /// Emit capture info for lambdas for variables captured by reference. 8160 void generateInfoForLambdaCaptures( 8161 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 8162 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8163 MapFlagsArrayTy &Types, 8164 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8165 const auto *RD = VD->getType() 8166 .getCanonicalType() 8167 .getNonReferenceType() 8168 ->getAsCXXRecordDecl(); 8169 if (!RD || !RD->isLambda()) 8170 return; 8171 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8172 LValue VDLVal = CGF.MakeAddrLValue( 8173 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8174 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8175 FieldDecl *ThisCapture = nullptr; 8176 RD->getCaptureFields(Captures, ThisCapture); 8177 if (ThisCapture) { 8178 LValue ThisLVal = 8179 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8180 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8181 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8182 VDLVal.getPointer(CGF)); 8183 BasePointers.push_back(ThisLVal.getPointer(CGF)); 8184 Pointers.push_back(ThisLValVal.getPointer(CGF)); 8185 Sizes.push_back( 8186 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8187 CGF.Int64Ty, /*isSigned=*/true)); 8188 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8189 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8190 } 8191 for (const LambdaCapture &LC : RD->captures()) { 8192 if (!LC.capturesVariable()) 8193 continue; 8194 const VarDecl *VD = LC.getCapturedVar(); 8195 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8196 continue; 8197 auto It = Captures.find(VD); 8198 assert(It != Captures.end() && "Found lambda capture without field."); 8199 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8200 if (LC.getCaptureKind() == LCK_ByRef) { 8201 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8202 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8203 VDLVal.getPointer(CGF)); 8204 BasePointers.push_back(VarLVal.getPointer(CGF)); 8205 Pointers.push_back(VarLValVal.getPointer(CGF)); 8206 Sizes.push_back(CGF.Builder.CreateIntCast( 8207 CGF.getTypeSize( 8208 VD->getType().getCanonicalType().getNonReferenceType()), 8209 CGF.Int64Ty, /*isSigned=*/true)); 8210 } else { 8211 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8212 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8213 VDLVal.getPointer(CGF)); 8214 BasePointers.push_back(VarLVal.getPointer(CGF)); 8215 Pointers.push_back(VarRVal.getScalarVal()); 8216 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8217 } 8218 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8219 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8220 } 8221 } 8222 8223 /// Set correct indices for lambdas captures. 8224 void adjustMemberOfForLambdaCaptures( 8225 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8226 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8227 MapFlagsArrayTy &Types) const { 8228 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8229 // Set correct member_of idx for all implicit lambda captures. 8230 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8231 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8232 continue; 8233 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8234 assert(BasePtr && "Unable to find base lambda address."); 8235 int TgtIdx = -1; 8236 for (unsigned J = I; J > 0; --J) { 8237 unsigned Idx = J - 1; 8238 if (Pointers[Idx] != BasePtr) 8239 continue; 8240 TgtIdx = Idx; 8241 break; 8242 } 8243 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8244 // All other current entries will be MEMBER_OF the combined entry 8245 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8246 // 0xFFFF in the MEMBER_OF field). 8247 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8248 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8249 } 8250 } 8251 8252 /// Generate the base pointers, section pointers, sizes and map types 8253 /// associated to a given capture. 8254 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8255 llvm::Value *Arg, 8256 MapBaseValuesArrayTy &BasePointers, 8257 MapValuesArrayTy &Pointers, 8258 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8259 StructRangeInfoTy &PartialStruct) const { 8260 assert(!Cap->capturesVariableArrayType() && 8261 "Not expecting to generate map info for a variable array type!"); 8262 8263 // We need to know when we generating information for the first component 8264 const ValueDecl *VD = Cap->capturesThis() 8265 ? nullptr 8266 : Cap->getCapturedVar()->getCanonicalDecl(); 8267 8268 // If this declaration appears in a is_device_ptr clause we just have to 8269 // pass the pointer by value. If it is a reference to a declaration, we just 8270 // pass its value. 8271 if (DevPointersMap.count(VD)) { 8272 BasePointers.emplace_back(Arg, VD); 8273 Pointers.push_back(Arg); 8274 Sizes.push_back( 8275 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8276 CGF.Int64Ty, /*isSigned=*/true)); 8277 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8278 return; 8279 } 8280 8281 using MapData = 8282 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8283 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 8284 SmallVector<MapData, 4> DeclComponentLists; 8285 assert(CurDir.is<const OMPExecutableDirective *>() && 8286 "Expect a executable directive"); 8287 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8288 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8289 for (const auto L : C->decl_component_lists(VD)) { 8290 assert(L.first == VD && 8291 "We got information for the wrong declaration??"); 8292 assert(!L.second.empty() && 8293 "Not expecting declaration with no component lists."); 8294 DeclComponentLists.emplace_back(L.second, C->getMapType(), 8295 C->getMapTypeModifiers(), 8296 C->isImplicit()); 8297 } 8298 } 8299 8300 // Find overlapping elements (including the offset from the base element). 8301 llvm::SmallDenseMap< 8302 const MapData *, 8303 llvm::SmallVector< 8304 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8305 4> 8306 OverlappedData; 8307 size_t Count = 0; 8308 for (const MapData &L : DeclComponentLists) { 8309 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8310 OpenMPMapClauseKind MapType; 8311 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8312 bool IsImplicit; 8313 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8314 ++Count; 8315 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8316 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8317 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 8318 auto CI = Components.rbegin(); 8319 auto CE = Components.rend(); 8320 auto SI = Components1.rbegin(); 8321 auto SE = Components1.rend(); 8322 for (; CI != CE && SI != SE; ++CI, ++SI) { 8323 if (CI->getAssociatedExpression()->getStmtClass() != 8324 SI->getAssociatedExpression()->getStmtClass()) 8325 break; 8326 // Are we dealing with different variables/fields? 8327 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8328 break; 8329 } 8330 // Found overlapping if, at least for one component, reached the head of 8331 // the components list. 8332 if (CI == CE || SI == SE) { 8333 assert((CI != CE || SI != SE) && 8334 "Unexpected full match of the mapping components."); 8335 const MapData &BaseData = CI == CE ? L : L1; 8336 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8337 SI == SE ? Components : Components1; 8338 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8339 OverlappedElements.getSecond().push_back(SubData); 8340 } 8341 } 8342 } 8343 // Sort the overlapped elements for each item. 8344 llvm::SmallVector<const FieldDecl *, 4> Layout; 8345 if (!OverlappedData.empty()) { 8346 if (const auto *CRD = 8347 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8348 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8349 else { 8350 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8351 Layout.append(RD->field_begin(), RD->field_end()); 8352 } 8353 } 8354 for (auto &Pair : OverlappedData) { 8355 llvm::sort( 8356 Pair.getSecond(), 8357 [&Layout]( 8358 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8359 OMPClauseMappableExprCommon::MappableExprComponentListRef 8360 Second) { 8361 auto CI = First.rbegin(); 8362 auto CE = First.rend(); 8363 auto SI = Second.rbegin(); 8364 auto SE = Second.rend(); 8365 for (; CI != CE && SI != SE; ++CI, ++SI) { 8366 if (CI->getAssociatedExpression()->getStmtClass() != 8367 SI->getAssociatedExpression()->getStmtClass()) 8368 break; 8369 // Are we dealing with different variables/fields? 8370 if (CI->getAssociatedDeclaration() != 8371 SI->getAssociatedDeclaration()) 8372 break; 8373 } 8374 8375 // Lists contain the same elements. 8376 if (CI == CE && SI == SE) 8377 return false; 8378 8379 // List with less elements is less than list with more elements. 8380 if (CI == CE || SI == SE) 8381 return CI == CE; 8382 8383 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8384 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8385 if (FD1->getParent() == FD2->getParent()) 8386 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8387 const auto It = 8388 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8389 return FD == FD1 || FD == FD2; 8390 }); 8391 return *It == FD1; 8392 }); 8393 } 8394 8395 // Associated with a capture, because the mapping flags depend on it. 8396 // Go through all of the elements with the overlapped elements. 8397 for (const auto &Pair : OverlappedData) { 8398 const MapData &L = *Pair.getFirst(); 8399 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8400 OpenMPMapClauseKind MapType; 8401 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8402 bool IsImplicit; 8403 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8404 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8405 OverlappedComponents = Pair.getSecond(); 8406 bool IsFirstComponentList = true; 8407 generateInfoForComponentList(MapType, MapModifiers, Components, 8408 BasePointers, Pointers, Sizes, Types, 8409 PartialStruct, IsFirstComponentList, 8410 IsImplicit, OverlappedComponents); 8411 } 8412 // Go through other elements without overlapped elements. 8413 bool IsFirstComponentList = OverlappedData.empty(); 8414 for (const MapData &L : DeclComponentLists) { 8415 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8416 OpenMPMapClauseKind MapType; 8417 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8418 bool IsImplicit; 8419 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8420 auto It = OverlappedData.find(&L); 8421 if (It == OverlappedData.end()) 8422 generateInfoForComponentList(MapType, MapModifiers, Components, 8423 BasePointers, Pointers, Sizes, Types, 8424 PartialStruct, IsFirstComponentList, 8425 IsImplicit); 8426 IsFirstComponentList = false; 8427 } 8428 } 8429 8430 /// Generate the base pointers, section pointers, sizes and map types 8431 /// associated with the declare target link variables. 8432 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 8433 MapValuesArrayTy &Pointers, 8434 MapValuesArrayTy &Sizes, 8435 MapFlagsArrayTy &Types) const { 8436 assert(CurDir.is<const OMPExecutableDirective *>() && 8437 "Expect a executable directive"); 8438 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8439 // Map other list items in the map clause which are not captured variables 8440 // but "declare target link" global variables. 8441 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8442 for (const auto L : C->component_lists()) { 8443 if (!L.first) 8444 continue; 8445 const auto *VD = dyn_cast<VarDecl>(L.first); 8446 if (!VD) 8447 continue; 8448 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8449 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8450 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8451 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 8452 continue; 8453 StructRangeInfoTy PartialStruct; 8454 generateInfoForComponentList( 8455 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 8456 Pointers, Sizes, Types, PartialStruct, 8457 /*IsFirstComponentList=*/true, C->isImplicit()); 8458 assert(!PartialStruct.Base.isValid() && 8459 "No partial structs for declare target link expected."); 8460 } 8461 } 8462 } 8463 8464 /// Generate the default map information for a given capture \a CI, 8465 /// record field declaration \a RI and captured value \a CV. 8466 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8467 const FieldDecl &RI, llvm::Value *CV, 8468 MapBaseValuesArrayTy &CurBasePointers, 8469 MapValuesArrayTy &CurPointers, 8470 MapValuesArrayTy &CurSizes, 8471 MapFlagsArrayTy &CurMapTypes) const { 8472 bool IsImplicit = true; 8473 // Do the default mapping. 8474 if (CI.capturesThis()) { 8475 CurBasePointers.push_back(CV); 8476 CurPointers.push_back(CV); 8477 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8478 CurSizes.push_back( 8479 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8480 CGF.Int64Ty, /*isSigned=*/true)); 8481 // Default map type. 8482 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8483 } else if (CI.capturesVariableByCopy()) { 8484 CurBasePointers.push_back(CV); 8485 CurPointers.push_back(CV); 8486 if (!RI.getType()->isAnyPointerType()) { 8487 // We have to signal to the runtime captures passed by value that are 8488 // not pointers. 8489 CurMapTypes.push_back(OMP_MAP_LITERAL); 8490 CurSizes.push_back(CGF.Builder.CreateIntCast( 8491 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8492 } else { 8493 // Pointers are implicitly mapped with a zero size and no flags 8494 // (other than first map that is added for all implicit maps). 8495 CurMapTypes.push_back(OMP_MAP_NONE); 8496 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8497 } 8498 const VarDecl *VD = CI.getCapturedVar(); 8499 auto I = FirstPrivateDecls.find(VD); 8500 if (I != FirstPrivateDecls.end()) 8501 IsImplicit = I->getSecond(); 8502 } else { 8503 assert(CI.capturesVariable() && "Expected captured reference."); 8504 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8505 QualType ElementType = PtrTy->getPointeeType(); 8506 CurSizes.push_back(CGF.Builder.CreateIntCast( 8507 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8508 // The default map type for a scalar/complex type is 'to' because by 8509 // default the value doesn't have to be retrieved. For an aggregate 8510 // type, the default is 'tofrom'. 8511 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 8512 const VarDecl *VD = CI.getCapturedVar(); 8513 auto I = FirstPrivateDecls.find(VD); 8514 if (I != FirstPrivateDecls.end() && 8515 VD->getType().isConstant(CGF.getContext())) { 8516 llvm::Constant *Addr = 8517 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8518 // Copy the value of the original variable to the new global copy. 8519 CGF.Builder.CreateMemCpy( 8520 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 8521 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8522 CurSizes.back(), /*IsVolatile=*/false); 8523 // Use new global variable as the base pointers. 8524 CurBasePointers.push_back(Addr); 8525 CurPointers.push_back(Addr); 8526 } else { 8527 CurBasePointers.push_back(CV); 8528 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8529 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8530 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8531 AlignmentSource::Decl)); 8532 CurPointers.push_back(PtrAddr.getPointer()); 8533 } else { 8534 CurPointers.push_back(CV); 8535 } 8536 } 8537 if (I != FirstPrivateDecls.end()) 8538 IsImplicit = I->getSecond(); 8539 } 8540 // Every default map produces a single argument which is a target parameter. 8541 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 8542 8543 // Add flag stating this is an implicit map. 8544 if (IsImplicit) 8545 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 8546 } 8547 }; 8548 } // anonymous namespace 8549 8550 /// Emit the arrays used to pass the captures and map information to the 8551 /// offloading runtime library. If there is no map or capture information, 8552 /// return nullptr by reference. 8553 static void 8554 emitOffloadingArrays(CodeGenFunction &CGF, 8555 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 8556 MappableExprsHandler::MapValuesArrayTy &Pointers, 8557 MappableExprsHandler::MapValuesArrayTy &Sizes, 8558 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 8559 CGOpenMPRuntime::TargetDataInfo &Info) { 8560 CodeGenModule &CGM = CGF.CGM; 8561 ASTContext &Ctx = CGF.getContext(); 8562 8563 // Reset the array information. 8564 Info.clearArrayInfo(); 8565 Info.NumberOfPtrs = BasePointers.size(); 8566 8567 if (Info.NumberOfPtrs) { 8568 // Detect if we have any capture size requiring runtime evaluation of the 8569 // size so that a constant array could be eventually used. 8570 bool hasRuntimeEvaluationCaptureSize = false; 8571 for (llvm::Value *S : Sizes) 8572 if (!isa<llvm::Constant>(S)) { 8573 hasRuntimeEvaluationCaptureSize = true; 8574 break; 8575 } 8576 8577 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8578 QualType PointerArrayType = Ctx.getConstantArrayType( 8579 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 8580 /*IndexTypeQuals=*/0); 8581 8582 Info.BasePointersArray = 8583 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8584 Info.PointersArray = 8585 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8586 8587 // If we don't have any VLA types or other types that require runtime 8588 // evaluation, we can use a constant array for the map sizes, otherwise we 8589 // need to fill up the arrays as we do for the pointers. 8590 QualType Int64Ty = 8591 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8592 if (hasRuntimeEvaluationCaptureSize) { 8593 QualType SizeArrayType = Ctx.getConstantArrayType( 8594 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 8595 /*IndexTypeQuals=*/0); 8596 Info.SizesArray = 8597 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8598 } else { 8599 // We expect all the sizes to be constant, so we collect them to create 8600 // a constant array. 8601 SmallVector<llvm::Constant *, 16> ConstSizes; 8602 for (llvm::Value *S : Sizes) 8603 ConstSizes.push_back(cast<llvm::Constant>(S)); 8604 8605 auto *SizesArrayInit = llvm::ConstantArray::get( 8606 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 8607 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8608 auto *SizesArrayGbl = new llvm::GlobalVariable( 8609 CGM.getModule(), SizesArrayInit->getType(), 8610 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8611 SizesArrayInit, Name); 8612 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8613 Info.SizesArray = SizesArrayGbl; 8614 } 8615 8616 // The map types are always constant so we don't need to generate code to 8617 // fill arrays. Instead, we create an array constant. 8618 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 8619 llvm::copy(MapTypes, Mapping.begin()); 8620 llvm::Constant *MapTypesArrayInit = 8621 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8622 std::string MaptypesName = 8623 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8624 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8625 CGM.getModule(), MapTypesArrayInit->getType(), 8626 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8627 MapTypesArrayInit, MaptypesName); 8628 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8629 Info.MapTypesArray = MapTypesArrayGbl; 8630 8631 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8632 llvm::Value *BPVal = *BasePointers[I]; 8633 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8634 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8635 Info.BasePointersArray, 0, I); 8636 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8637 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8638 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8639 CGF.Builder.CreateStore(BPVal, BPAddr); 8640 8641 if (Info.requiresDevicePointerInfo()) 8642 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 8643 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8644 8645 llvm::Value *PVal = Pointers[I]; 8646 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8647 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8648 Info.PointersArray, 0, I); 8649 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8650 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8651 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8652 CGF.Builder.CreateStore(PVal, PAddr); 8653 8654 if (hasRuntimeEvaluationCaptureSize) { 8655 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8656 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8657 Info.SizesArray, 8658 /*Idx0=*/0, 8659 /*Idx1=*/I); 8660 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 8661 CGF.Builder.CreateStore( 8662 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true), 8663 SAddr); 8664 } 8665 } 8666 } 8667 } 8668 8669 /// Emit the arguments to be passed to the runtime library based on the 8670 /// arrays of pointers, sizes and map types. 8671 static void emitOffloadingArraysArgument( 8672 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8673 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8674 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 8675 CodeGenModule &CGM = CGF.CGM; 8676 if (Info.NumberOfPtrs) { 8677 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8678 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8679 Info.BasePointersArray, 8680 /*Idx0=*/0, /*Idx1=*/0); 8681 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8682 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8683 Info.PointersArray, 8684 /*Idx0=*/0, 8685 /*Idx1=*/0); 8686 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8687 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 8688 /*Idx0=*/0, /*Idx1=*/0); 8689 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8690 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8691 Info.MapTypesArray, 8692 /*Idx0=*/0, 8693 /*Idx1=*/0); 8694 } else { 8695 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8696 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8697 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8698 MapTypesArrayArg = 8699 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8700 } 8701 } 8702 8703 /// Check for inner distribute directive. 8704 static const OMPExecutableDirective * 8705 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8706 const auto *CS = D.getInnermostCapturedStmt(); 8707 const auto *Body = 8708 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8709 const Stmt *ChildStmt = 8710 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8711 8712 if (const auto *NestedDir = 8713 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8714 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8715 switch (D.getDirectiveKind()) { 8716 case OMPD_target: 8717 if (isOpenMPDistributeDirective(DKind)) 8718 return NestedDir; 8719 if (DKind == OMPD_teams) { 8720 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8721 /*IgnoreCaptured=*/true); 8722 if (!Body) 8723 return nullptr; 8724 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8725 if (const auto *NND = 8726 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8727 DKind = NND->getDirectiveKind(); 8728 if (isOpenMPDistributeDirective(DKind)) 8729 return NND; 8730 } 8731 } 8732 return nullptr; 8733 case OMPD_target_teams: 8734 if (isOpenMPDistributeDirective(DKind)) 8735 return NestedDir; 8736 return nullptr; 8737 case OMPD_target_parallel: 8738 case OMPD_target_simd: 8739 case OMPD_target_parallel_for: 8740 case OMPD_target_parallel_for_simd: 8741 return nullptr; 8742 case OMPD_target_teams_distribute: 8743 case OMPD_target_teams_distribute_simd: 8744 case OMPD_target_teams_distribute_parallel_for: 8745 case OMPD_target_teams_distribute_parallel_for_simd: 8746 case OMPD_parallel: 8747 case OMPD_for: 8748 case OMPD_parallel_for: 8749 case OMPD_parallel_master: 8750 case OMPD_parallel_sections: 8751 case OMPD_for_simd: 8752 case OMPD_parallel_for_simd: 8753 case OMPD_cancel: 8754 case OMPD_cancellation_point: 8755 case OMPD_ordered: 8756 case OMPD_threadprivate: 8757 case OMPD_allocate: 8758 case OMPD_task: 8759 case OMPD_simd: 8760 case OMPD_sections: 8761 case OMPD_section: 8762 case OMPD_single: 8763 case OMPD_master: 8764 case OMPD_critical: 8765 case OMPD_taskyield: 8766 case OMPD_barrier: 8767 case OMPD_taskwait: 8768 case OMPD_taskgroup: 8769 case OMPD_atomic: 8770 case OMPD_flush: 8771 case OMPD_teams: 8772 case OMPD_target_data: 8773 case OMPD_target_exit_data: 8774 case OMPD_target_enter_data: 8775 case OMPD_distribute: 8776 case OMPD_distribute_simd: 8777 case OMPD_distribute_parallel_for: 8778 case OMPD_distribute_parallel_for_simd: 8779 case OMPD_teams_distribute: 8780 case OMPD_teams_distribute_simd: 8781 case OMPD_teams_distribute_parallel_for: 8782 case OMPD_teams_distribute_parallel_for_simd: 8783 case OMPD_target_update: 8784 case OMPD_declare_simd: 8785 case OMPD_declare_variant: 8786 case OMPD_declare_target: 8787 case OMPD_end_declare_target: 8788 case OMPD_declare_reduction: 8789 case OMPD_declare_mapper: 8790 case OMPD_taskloop: 8791 case OMPD_taskloop_simd: 8792 case OMPD_master_taskloop: 8793 case OMPD_master_taskloop_simd: 8794 case OMPD_parallel_master_taskloop: 8795 case OMPD_parallel_master_taskloop_simd: 8796 case OMPD_requires: 8797 case OMPD_unknown: 8798 llvm_unreachable("Unexpected directive."); 8799 } 8800 } 8801 8802 return nullptr; 8803 } 8804 8805 /// Emit the user-defined mapper function. The code generation follows the 8806 /// pattern in the example below. 8807 /// \code 8808 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 8809 /// void *base, void *begin, 8810 /// int64_t size, int64_t type) { 8811 /// // Allocate space for an array section first. 8812 /// if (size > 1 && !maptype.IsDelete) 8813 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8814 /// size*sizeof(Ty), clearToFrom(type)); 8815 /// // Map members. 8816 /// for (unsigned i = 0; i < size; i++) { 8817 /// // For each component specified by this mapper: 8818 /// for (auto c : all_components) { 8819 /// if (c.hasMapper()) 8820 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 8821 /// c.arg_type); 8822 /// else 8823 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 8824 /// c.arg_begin, c.arg_size, c.arg_type); 8825 /// } 8826 /// } 8827 /// // Delete the array section. 8828 /// if (size > 1 && maptype.IsDelete) 8829 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8830 /// size*sizeof(Ty), clearToFrom(type)); 8831 /// } 8832 /// \endcode 8833 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 8834 CodeGenFunction *CGF) { 8835 if (UDMMap.count(D) > 0) 8836 return; 8837 ASTContext &C = CGM.getContext(); 8838 QualType Ty = D->getType(); 8839 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 8840 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 8841 auto *MapperVarDecl = 8842 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 8843 SourceLocation Loc = D->getLocation(); 8844 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 8845 8846 // Prepare mapper function arguments and attributes. 8847 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8848 C.VoidPtrTy, ImplicitParamDecl::Other); 8849 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 8850 ImplicitParamDecl::Other); 8851 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8852 C.VoidPtrTy, ImplicitParamDecl::Other); 8853 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8854 ImplicitParamDecl::Other); 8855 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8856 ImplicitParamDecl::Other); 8857 FunctionArgList Args; 8858 Args.push_back(&HandleArg); 8859 Args.push_back(&BaseArg); 8860 Args.push_back(&BeginArg); 8861 Args.push_back(&SizeArg); 8862 Args.push_back(&TypeArg); 8863 const CGFunctionInfo &FnInfo = 8864 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 8865 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 8866 SmallString<64> TyStr; 8867 llvm::raw_svector_ostream Out(TyStr); 8868 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 8869 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 8870 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 8871 Name, &CGM.getModule()); 8872 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 8873 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 8874 // Start the mapper function code generation. 8875 CodeGenFunction MapperCGF(CGM); 8876 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 8877 // Compute the starting and end addreses of array elements. 8878 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 8879 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 8880 C.getPointerType(Int64Ty), Loc); 8881 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 8882 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 8883 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 8884 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 8885 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 8886 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 8887 C.getPointerType(Int64Ty), Loc); 8888 // Prepare common arguments for array initiation and deletion. 8889 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 8890 MapperCGF.GetAddrOfLocalVar(&HandleArg), 8891 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8892 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 8893 MapperCGF.GetAddrOfLocalVar(&BaseArg), 8894 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8895 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 8896 MapperCGF.GetAddrOfLocalVar(&BeginArg), 8897 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8898 8899 // Emit array initiation if this is an array section and \p MapType indicates 8900 // that memory allocation is required. 8901 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 8902 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 8903 ElementSize, HeadBB, /*IsInit=*/true); 8904 8905 // Emit a for loop to iterate through SizeArg of elements and map all of them. 8906 8907 // Emit the loop header block. 8908 MapperCGF.EmitBlock(HeadBB); 8909 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 8910 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 8911 // Evaluate whether the initial condition is satisfied. 8912 llvm::Value *IsEmpty = 8913 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 8914 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 8915 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 8916 8917 // Emit the loop body block. 8918 MapperCGF.EmitBlock(BodyBB); 8919 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 8920 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 8921 PtrPHI->addIncoming(PtrBegin, EntryBB); 8922 Address PtrCurrent = 8923 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 8924 .getAlignment() 8925 .alignmentOfArrayElement(ElementSize)); 8926 // Privatize the declared variable of mapper to be the current array element. 8927 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 8928 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 8929 return MapperCGF 8930 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 8931 .getAddress(MapperCGF); 8932 }); 8933 (void)Scope.Privatize(); 8934 8935 // Get map clause information. Fill up the arrays with all mapped variables. 8936 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 8937 MappableExprsHandler::MapValuesArrayTy Pointers; 8938 MappableExprsHandler::MapValuesArrayTy Sizes; 8939 MappableExprsHandler::MapFlagsArrayTy MapTypes; 8940 MappableExprsHandler MEHandler(*D, MapperCGF); 8941 MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); 8942 8943 // Call the runtime API __tgt_mapper_num_components to get the number of 8944 // pre-existing components. 8945 llvm::Value *OffloadingArgs[] = {Handle}; 8946 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 8947 createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs); 8948 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 8949 PreviousSize, 8950 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 8951 8952 // Fill up the runtime mapper handle for all components. 8953 for (unsigned I = 0; I < BasePointers.size(); ++I) { 8954 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 8955 *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 8956 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 8957 Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 8958 llvm::Value *CurSizeArg = Sizes[I]; 8959 8960 // Extract the MEMBER_OF field from the map type. 8961 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 8962 MapperCGF.EmitBlock(MemberBB); 8963 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); 8964 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 8965 OriMapType, 8966 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 8967 llvm::BasicBlock *MemberCombineBB = 8968 MapperCGF.createBasicBlock("omp.member.combine"); 8969 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 8970 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 8971 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 8972 // Add the number of pre-existing components to the MEMBER_OF field if it 8973 // is valid. 8974 MapperCGF.EmitBlock(MemberCombineBB); 8975 llvm::Value *CombinedMember = 8976 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 8977 // Do nothing if it is not a member of previous components. 8978 MapperCGF.EmitBlock(TypeBB); 8979 llvm::PHINode *MemberMapType = 8980 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 8981 MemberMapType->addIncoming(OriMapType, MemberBB); 8982 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 8983 8984 // Combine the map type inherited from user-defined mapper with that 8985 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 8986 // bits of the \a MapType, which is the input argument of the mapper 8987 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 8988 // bits of MemberMapType. 8989 // [OpenMP 5.0], 1.2.6. map-type decay. 8990 // | alloc | to | from | tofrom | release | delete 8991 // ---------------------------------------------------------- 8992 // alloc | alloc | alloc | alloc | alloc | release | delete 8993 // to | alloc | to | alloc | to | release | delete 8994 // from | alloc | alloc | from | from | release | delete 8995 // tofrom | alloc | to | from | tofrom | release | delete 8996 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 8997 MapType, 8998 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 8999 MappableExprsHandler::OMP_MAP_FROM)); 9000 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9001 llvm::BasicBlock *AllocElseBB = 9002 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9003 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9004 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9005 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9006 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9007 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9008 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9009 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9010 MapperCGF.EmitBlock(AllocBB); 9011 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9012 MemberMapType, 9013 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9014 MappableExprsHandler::OMP_MAP_FROM))); 9015 MapperCGF.Builder.CreateBr(EndBB); 9016 MapperCGF.EmitBlock(AllocElseBB); 9017 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9018 LeftToFrom, 9019 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9020 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9021 // In case of to, clear OMP_MAP_FROM. 9022 MapperCGF.EmitBlock(ToBB); 9023 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9024 MemberMapType, 9025 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9026 MapperCGF.Builder.CreateBr(EndBB); 9027 MapperCGF.EmitBlock(ToElseBB); 9028 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9029 LeftToFrom, 9030 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9031 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9032 // In case of from, clear OMP_MAP_TO. 9033 MapperCGF.EmitBlock(FromBB); 9034 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9035 MemberMapType, 9036 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9037 // In case of tofrom, do nothing. 9038 MapperCGF.EmitBlock(EndBB); 9039 llvm::PHINode *CurMapType = 9040 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9041 CurMapType->addIncoming(AllocMapType, AllocBB); 9042 CurMapType->addIncoming(ToMapType, ToBB); 9043 CurMapType->addIncoming(FromMapType, FromBB); 9044 CurMapType->addIncoming(MemberMapType, ToElseBB); 9045 9046 // TODO: call the corresponding mapper function if a user-defined mapper is 9047 // associated with this map clause. 9048 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9049 // data structure. 9050 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9051 CurSizeArg, CurMapType}; 9052 MapperCGF.EmitRuntimeCall( 9053 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), 9054 OffloadingArgs); 9055 } 9056 9057 // Update the pointer to point to the next element that needs to be mapped, 9058 // and check whether we have mapped all elements. 9059 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9060 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9061 PtrPHI->addIncoming(PtrNext, BodyBB); 9062 llvm::Value *IsDone = 9063 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9064 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9065 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9066 9067 MapperCGF.EmitBlock(ExitBB); 9068 // Emit array deletion if this is an array section and \p MapType indicates 9069 // that deletion is required. 9070 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9071 ElementSize, DoneBB, /*IsInit=*/false); 9072 9073 // Emit the function exit block. 9074 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9075 MapperCGF.FinishFunction(); 9076 UDMMap.try_emplace(D, Fn); 9077 if (CGF) { 9078 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9079 Decls.second.push_back(D); 9080 } 9081 } 9082 9083 /// Emit the array initialization or deletion portion for user-defined mapper 9084 /// code generation. First, it evaluates whether an array section is mapped and 9085 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9086 /// true, and \a MapType indicates to not delete this array, array 9087 /// initialization code is generated. If \a IsInit is false, and \a MapType 9088 /// indicates to not this array, array deletion code is generated. 9089 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9090 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9091 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9092 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9093 StringRef Prefix = IsInit ? ".init" : ".del"; 9094 9095 // Evaluate if this is an array section. 9096 llvm::BasicBlock *IsDeleteBB = 9097 MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"})); 9098 llvm::BasicBlock *BodyBB = 9099 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9100 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9101 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9102 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9103 9104 // Evaluate if we are going to delete this section. 9105 MapperCGF.EmitBlock(IsDeleteBB); 9106 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9107 MapType, 9108 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9109 llvm::Value *DeleteCond; 9110 if (IsInit) { 9111 DeleteCond = MapperCGF.Builder.CreateIsNull( 9112 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9113 } else { 9114 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9115 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9116 } 9117 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9118 9119 MapperCGF.EmitBlock(BodyBB); 9120 // Get the array size by multiplying element size and element number (i.e., \p 9121 // Size). 9122 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9123 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9124 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9125 // memory allocation/deletion purpose only. 9126 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9127 MapType, 9128 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9129 MappableExprsHandler::OMP_MAP_FROM))); 9130 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9131 // data structure. 9132 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9133 MapperCGF.EmitRuntimeCall( 9134 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs); 9135 } 9136 9137 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9138 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9139 llvm::Value *DeviceID, 9140 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9141 const OMPLoopDirective &D)> 9142 SizeEmitter) { 9143 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9144 const OMPExecutableDirective *TD = &D; 9145 // Get nested teams distribute kind directive, if any. 9146 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9147 TD = getNestedDistributeDirective(CGM.getContext(), D); 9148 if (!TD) 9149 return; 9150 const auto *LD = cast<OMPLoopDirective>(TD); 9151 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, 9152 PrePostActionTy &) { 9153 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9154 llvm::Value *Args[] = {DeviceID, NumIterations}; 9155 CGF.EmitRuntimeCall( 9156 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); 9157 } 9158 }; 9159 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9160 } 9161 9162 void CGOpenMPRuntime::emitTargetCall( 9163 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9164 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9165 const Expr *Device, 9166 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9167 const OMPLoopDirective &D)> 9168 SizeEmitter) { 9169 if (!CGF.HaveInsertPoint()) 9170 return; 9171 9172 assert(OutlinedFn && "Invalid outlined function!"); 9173 9174 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 9175 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9176 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9177 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9178 PrePostActionTy &) { 9179 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9180 }; 9181 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9182 9183 CodeGenFunction::OMPTargetDataInfo InputInfo; 9184 llvm::Value *MapTypesArray = nullptr; 9185 // Fill up the pointer arrays and transfer execution to the device. 9186 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9187 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, 9188 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9189 // On top of the arrays that were filled up, the target offloading call 9190 // takes as arguments the device id as well as the host pointer. The host 9191 // pointer is used by the runtime library to identify the current target 9192 // region, so it only has to be unique and not necessarily point to 9193 // anything. It could be the pointer to the outlined function that 9194 // implements the target region, but we aren't using that so that the 9195 // compiler doesn't need to keep that, and could therefore inline the host 9196 // function if proven worthwhile during optimization. 9197 9198 // From this point on, we need to have an ID of the target region defined. 9199 assert(OutlinedFnID && "Invalid outlined function ID!"); 9200 9201 // Emit device ID if any. 9202 llvm::Value *DeviceID; 9203 if (Device) { 9204 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9205 CGF.Int64Ty, /*isSigned=*/true); 9206 } else { 9207 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9208 } 9209 9210 // Emit the number of elements in the offloading arrays. 9211 llvm::Value *PointerNum = 9212 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9213 9214 // Return value of the runtime offloading call. 9215 llvm::Value *Return; 9216 9217 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9218 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9219 9220 // Emit tripcount for the target loop-based directive. 9221 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 9222 9223 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9224 // The target region is an outlined function launched by the runtime 9225 // via calls __tgt_target() or __tgt_target_teams(). 9226 // 9227 // __tgt_target() launches a target region with one team and one thread, 9228 // executing a serial region. This master thread may in turn launch 9229 // more threads within its team upon encountering a parallel region, 9230 // however, no additional teams can be launched on the device. 9231 // 9232 // __tgt_target_teams() launches a target region with one or more teams, 9233 // each with one or more threads. This call is required for target 9234 // constructs such as: 9235 // 'target teams' 9236 // 'target' / 'teams' 9237 // 'target teams distribute parallel for' 9238 // 'target parallel' 9239 // and so on. 9240 // 9241 // Note that on the host and CPU targets, the runtime implementation of 9242 // these calls simply call the outlined function without forking threads. 9243 // The outlined functions themselves have runtime calls to 9244 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9245 // the compiler in emitTeamsCall() and emitParallelCall(). 9246 // 9247 // In contrast, on the NVPTX target, the implementation of 9248 // __tgt_target_teams() launches a GPU kernel with the requested number 9249 // of teams and threads so no additional calls to the runtime are required. 9250 if (NumTeams) { 9251 // If we have NumTeams defined this means that we have an enclosed teams 9252 // region. Therefore we also expect to have NumThreads defined. These two 9253 // values should be defined in the presence of a teams directive, 9254 // regardless of having any clauses associated. If the user is using teams 9255 // but no clauses, these two values will be the default that should be 9256 // passed to the runtime library - a 32-bit integer with the value zero. 9257 assert(NumThreads && "Thread limit expression should be available along " 9258 "with number of teams."); 9259 llvm::Value *OffloadingArgs[] = {DeviceID, 9260 OutlinedFnID, 9261 PointerNum, 9262 InputInfo.BasePointersArray.getPointer(), 9263 InputInfo.PointersArray.getPointer(), 9264 InputInfo.SizesArray.getPointer(), 9265 MapTypesArray, 9266 NumTeams, 9267 NumThreads}; 9268 Return = CGF.EmitRuntimeCall( 9269 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 9270 : OMPRTL__tgt_target_teams), 9271 OffloadingArgs); 9272 } else { 9273 llvm::Value *OffloadingArgs[] = {DeviceID, 9274 OutlinedFnID, 9275 PointerNum, 9276 InputInfo.BasePointersArray.getPointer(), 9277 InputInfo.PointersArray.getPointer(), 9278 InputInfo.SizesArray.getPointer(), 9279 MapTypesArray}; 9280 Return = CGF.EmitRuntimeCall( 9281 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 9282 : OMPRTL__tgt_target), 9283 OffloadingArgs); 9284 } 9285 9286 // Check the error code and execute the host version if required. 9287 llvm::BasicBlock *OffloadFailedBlock = 9288 CGF.createBasicBlock("omp_offload.failed"); 9289 llvm::BasicBlock *OffloadContBlock = 9290 CGF.createBasicBlock("omp_offload.cont"); 9291 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9292 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9293 9294 CGF.EmitBlock(OffloadFailedBlock); 9295 if (RequiresOuterTask) { 9296 CapturedVars.clear(); 9297 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9298 } 9299 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9300 CGF.EmitBranch(OffloadContBlock); 9301 9302 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9303 }; 9304 9305 // Notify that the host version must be executed. 9306 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 9307 RequiresOuterTask](CodeGenFunction &CGF, 9308 PrePostActionTy &) { 9309 if (RequiresOuterTask) { 9310 CapturedVars.clear(); 9311 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9312 } 9313 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9314 }; 9315 9316 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 9317 &CapturedVars, RequiresOuterTask, 9318 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 9319 // Fill up the arrays with all the captured variables. 9320 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9321 MappableExprsHandler::MapValuesArrayTy Pointers; 9322 MappableExprsHandler::MapValuesArrayTy Sizes; 9323 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9324 9325 // Get mappable expression information. 9326 MappableExprsHandler MEHandler(D, CGF); 9327 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9328 9329 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9330 auto CV = CapturedVars.begin(); 9331 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9332 CE = CS.capture_end(); 9333 CI != CE; ++CI, ++RI, ++CV) { 9334 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 9335 MappableExprsHandler::MapValuesArrayTy CurPointers; 9336 MappableExprsHandler::MapValuesArrayTy CurSizes; 9337 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 9338 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9339 9340 // VLA sizes are passed to the outlined region by copy and do not have map 9341 // information associated. 9342 if (CI->capturesVariableArrayType()) { 9343 CurBasePointers.push_back(*CV); 9344 CurPointers.push_back(*CV); 9345 CurSizes.push_back(CGF.Builder.CreateIntCast( 9346 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9347 // Copy to the device as an argument. No need to retrieve it. 9348 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 9349 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 9350 MappableExprsHandler::OMP_MAP_IMPLICIT); 9351 } else { 9352 // If we have any information in the map clause, we use it, otherwise we 9353 // just do a default mapping. 9354 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 9355 CurSizes, CurMapTypes, PartialStruct); 9356 if (CurBasePointers.empty()) 9357 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 9358 CurPointers, CurSizes, CurMapTypes); 9359 // Generate correct mapping for variables captured by reference in 9360 // lambdas. 9361 if (CI->capturesVariable()) 9362 MEHandler.generateInfoForLambdaCaptures( 9363 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 9364 CurMapTypes, LambdaPointers); 9365 } 9366 // We expect to have at least an element of information for this capture. 9367 assert(!CurBasePointers.empty() && 9368 "Non-existing map pointer for capture!"); 9369 assert(CurBasePointers.size() == CurPointers.size() && 9370 CurBasePointers.size() == CurSizes.size() && 9371 CurBasePointers.size() == CurMapTypes.size() && 9372 "Inconsistent map information sizes!"); 9373 9374 // If there is an entry in PartialStruct it means we have a struct with 9375 // individual members mapped. Emit an extra combined entry. 9376 if (PartialStruct.Base.isValid()) 9377 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 9378 CurMapTypes, PartialStruct); 9379 9380 // We need to append the results of this capture to what we already have. 9381 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 9382 Pointers.append(CurPointers.begin(), CurPointers.end()); 9383 Sizes.append(CurSizes.begin(), CurSizes.end()); 9384 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 9385 } 9386 // Adjust MEMBER_OF flags for the lambdas captures. 9387 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 9388 Pointers, MapTypes); 9389 // Map other list items in the map clause which are not captured variables 9390 // but "declare target link" global variables. 9391 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 9392 MapTypes); 9393 9394 TargetDataInfo Info; 9395 // Fill up the arrays and create the arguments. 9396 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9397 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9398 Info.PointersArray, Info.SizesArray, 9399 Info.MapTypesArray, Info); 9400 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9401 InputInfo.BasePointersArray = 9402 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9403 InputInfo.PointersArray = 9404 Address(Info.PointersArray, CGM.getPointerAlign()); 9405 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 9406 MapTypesArray = Info.MapTypesArray; 9407 if (RequiresOuterTask) 9408 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9409 else 9410 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9411 }; 9412 9413 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 9414 CodeGenFunction &CGF, PrePostActionTy &) { 9415 if (RequiresOuterTask) { 9416 CodeGenFunction::OMPTargetDataInfo InputInfo; 9417 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9418 } else { 9419 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9420 } 9421 }; 9422 9423 // If we have a target function ID it means that we need to support 9424 // offloading, otherwise, just execute on the host. We need to execute on host 9425 // regardless of the conditional in the if clause if, e.g., the user do not 9426 // specify target triples. 9427 if (OutlinedFnID) { 9428 if (IfCond) { 9429 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9430 } else { 9431 RegionCodeGenTy ThenRCG(TargetThenGen); 9432 ThenRCG(CGF); 9433 } 9434 } else { 9435 RegionCodeGenTy ElseRCG(TargetElseGen); 9436 ElseRCG(CGF); 9437 } 9438 } 9439 9440 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9441 StringRef ParentName) { 9442 if (!S) 9443 return; 9444 9445 // Codegen OMP target directives that offload compute to the device. 9446 bool RequiresDeviceCodegen = 9447 isa<OMPExecutableDirective>(S) && 9448 isOpenMPTargetExecutionDirective( 9449 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9450 9451 if (RequiresDeviceCodegen) { 9452 const auto &E = *cast<OMPExecutableDirective>(S); 9453 unsigned DeviceID; 9454 unsigned FileID; 9455 unsigned Line; 9456 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 9457 FileID, Line); 9458 9459 // Is this a target region that should not be emitted as an entry point? If 9460 // so just signal we are done with this target region. 9461 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 9462 ParentName, Line)) 9463 return; 9464 9465 switch (E.getDirectiveKind()) { 9466 case OMPD_target: 9467 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9468 cast<OMPTargetDirective>(E)); 9469 break; 9470 case OMPD_target_parallel: 9471 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9472 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9473 break; 9474 case OMPD_target_teams: 9475 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9476 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9477 break; 9478 case OMPD_target_teams_distribute: 9479 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9480 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9481 break; 9482 case OMPD_target_teams_distribute_simd: 9483 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9484 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9485 break; 9486 case OMPD_target_parallel_for: 9487 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9488 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9489 break; 9490 case OMPD_target_parallel_for_simd: 9491 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9492 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9493 break; 9494 case OMPD_target_simd: 9495 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9496 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9497 break; 9498 case OMPD_target_teams_distribute_parallel_for: 9499 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9500 CGM, ParentName, 9501 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9502 break; 9503 case OMPD_target_teams_distribute_parallel_for_simd: 9504 CodeGenFunction:: 9505 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9506 CGM, ParentName, 9507 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9508 break; 9509 case OMPD_parallel: 9510 case OMPD_for: 9511 case OMPD_parallel_for: 9512 case OMPD_parallel_master: 9513 case OMPD_parallel_sections: 9514 case OMPD_for_simd: 9515 case OMPD_parallel_for_simd: 9516 case OMPD_cancel: 9517 case OMPD_cancellation_point: 9518 case OMPD_ordered: 9519 case OMPD_threadprivate: 9520 case OMPD_allocate: 9521 case OMPD_task: 9522 case OMPD_simd: 9523 case OMPD_sections: 9524 case OMPD_section: 9525 case OMPD_single: 9526 case OMPD_master: 9527 case OMPD_critical: 9528 case OMPD_taskyield: 9529 case OMPD_barrier: 9530 case OMPD_taskwait: 9531 case OMPD_taskgroup: 9532 case OMPD_atomic: 9533 case OMPD_flush: 9534 case OMPD_teams: 9535 case OMPD_target_data: 9536 case OMPD_target_exit_data: 9537 case OMPD_target_enter_data: 9538 case OMPD_distribute: 9539 case OMPD_distribute_simd: 9540 case OMPD_distribute_parallel_for: 9541 case OMPD_distribute_parallel_for_simd: 9542 case OMPD_teams_distribute: 9543 case OMPD_teams_distribute_simd: 9544 case OMPD_teams_distribute_parallel_for: 9545 case OMPD_teams_distribute_parallel_for_simd: 9546 case OMPD_target_update: 9547 case OMPD_declare_simd: 9548 case OMPD_declare_variant: 9549 case OMPD_declare_target: 9550 case OMPD_end_declare_target: 9551 case OMPD_declare_reduction: 9552 case OMPD_declare_mapper: 9553 case OMPD_taskloop: 9554 case OMPD_taskloop_simd: 9555 case OMPD_master_taskloop: 9556 case OMPD_master_taskloop_simd: 9557 case OMPD_parallel_master_taskloop: 9558 case OMPD_parallel_master_taskloop_simd: 9559 case OMPD_requires: 9560 case OMPD_unknown: 9561 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9562 } 9563 return; 9564 } 9565 9566 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9567 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9568 return; 9569 9570 scanForTargetRegionsFunctions( 9571 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9572 return; 9573 } 9574 9575 // If this is a lambda function, look into its body. 9576 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9577 S = L->getBody(); 9578 9579 // Keep looking for target regions recursively. 9580 for (const Stmt *II : S->children()) 9581 scanForTargetRegionsFunctions(II, ParentName); 9582 } 9583 9584 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9585 // If emitting code for the host, we do not process FD here. Instead we do 9586 // the normal code generation. 9587 if (!CGM.getLangOpts().OpenMPIsDevice) { 9588 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 9589 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9590 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9591 // Do not emit device_type(nohost) functions for the host. 9592 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 9593 return true; 9594 } 9595 return false; 9596 } 9597 9598 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9599 // Try to detect target regions in the function. 9600 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 9601 StringRef Name = CGM.getMangledName(GD); 9602 scanForTargetRegionsFunctions(FD->getBody(), Name); 9603 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9604 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9605 // Do not emit device_type(nohost) functions for the host. 9606 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 9607 return true; 9608 } 9609 9610 // Do not to emit function if it is not marked as declare target. 9611 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9612 AlreadyEmittedTargetDecls.count(VD) == 0; 9613 } 9614 9615 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9616 if (!CGM.getLangOpts().OpenMPIsDevice) 9617 return false; 9618 9619 // Check if there are Ctors/Dtors in this declaration and look for target 9620 // regions in it. We use the complete variant to produce the kernel name 9621 // mangling. 9622 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9623 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9624 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9625 StringRef ParentName = 9626 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9627 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9628 } 9629 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9630 StringRef ParentName = 9631 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9632 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9633 } 9634 } 9635 9636 // Do not to emit variable if it is not marked as declare target. 9637 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9638 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9639 cast<VarDecl>(GD.getDecl())); 9640 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9641 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9642 HasRequiresUnifiedSharedMemory)) { 9643 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9644 return true; 9645 } 9646 return false; 9647 } 9648 9649 llvm::Constant * 9650 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9651 const VarDecl *VD) { 9652 assert(VD->getType().isConstant(CGM.getContext()) && 9653 "Expected constant variable."); 9654 StringRef VarName; 9655 llvm::Constant *Addr; 9656 llvm::GlobalValue::LinkageTypes Linkage; 9657 QualType Ty = VD->getType(); 9658 SmallString<128> Buffer; 9659 { 9660 unsigned DeviceID; 9661 unsigned FileID; 9662 unsigned Line; 9663 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9664 FileID, Line); 9665 llvm::raw_svector_ostream OS(Buffer); 9666 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9667 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9668 VarName = OS.str(); 9669 } 9670 Linkage = llvm::GlobalValue::InternalLinkage; 9671 Addr = 9672 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9673 getDefaultFirstprivateAddressSpace()); 9674 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9675 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9676 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9677 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9678 VarName, Addr, VarSize, 9679 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9680 return Addr; 9681 } 9682 9683 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9684 llvm::Constant *Addr) { 9685 if (CGM.getLangOpts().OMPTargetTriples.empty() && 9686 !CGM.getLangOpts().OpenMPIsDevice) 9687 return; 9688 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9689 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9690 if (!Res) { 9691 if (CGM.getLangOpts().OpenMPIsDevice) { 9692 // Register non-target variables being emitted in device code (debug info 9693 // may cause this). 9694 StringRef VarName = CGM.getMangledName(VD); 9695 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9696 } 9697 return; 9698 } 9699 // Register declare target variables. 9700 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9701 StringRef VarName; 9702 CharUnits VarSize; 9703 llvm::GlobalValue::LinkageTypes Linkage; 9704 9705 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9706 !HasRequiresUnifiedSharedMemory) { 9707 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9708 VarName = CGM.getMangledName(VD); 9709 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9710 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9711 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9712 } else { 9713 VarSize = CharUnits::Zero(); 9714 } 9715 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 9716 // Temp solution to prevent optimizations of the internal variables. 9717 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 9718 std::string RefName = getName({VarName, "ref"}); 9719 if (!CGM.GetGlobalValue(RefName)) { 9720 llvm::Constant *AddrRef = 9721 getOrCreateInternalVariable(Addr->getType(), RefName); 9722 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 9723 GVAddrRef->setConstant(/*Val=*/true); 9724 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 9725 GVAddrRef->setInitializer(Addr); 9726 CGM.addCompilerUsedGlobal(GVAddrRef); 9727 } 9728 } 9729 } else { 9730 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 9731 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9732 HasRequiresUnifiedSharedMemory)) && 9733 "Declare target attribute must link or to with unified memory."); 9734 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 9735 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 9736 else 9737 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9738 9739 if (CGM.getLangOpts().OpenMPIsDevice) { 9740 VarName = Addr->getName(); 9741 Addr = nullptr; 9742 } else { 9743 VarName = getAddrOfDeclareTargetVar(VD).getName(); 9744 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 9745 } 9746 VarSize = CGM.getPointerSize(); 9747 Linkage = llvm::GlobalValue::WeakAnyLinkage; 9748 } 9749 9750 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9751 VarName, Addr, VarSize, Flags, Linkage); 9752 } 9753 9754 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9755 if (isa<FunctionDecl>(GD.getDecl()) || 9756 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9757 return emitTargetFunctions(GD); 9758 9759 return emitTargetGlobalVariable(GD); 9760 } 9761 9762 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9763 for (const VarDecl *VD : DeferredGlobalVariables) { 9764 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9765 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9766 if (!Res) 9767 continue; 9768 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9769 !HasRequiresUnifiedSharedMemory) { 9770 CGM.EmitGlobal(VD); 9771 } else { 9772 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 9773 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9774 HasRequiresUnifiedSharedMemory)) && 9775 "Expected link clause or to clause with unified memory."); 9776 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 9777 } 9778 } 9779 } 9780 9781 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 9782 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 9783 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 9784 " Expected target-based directive."); 9785 } 9786 9787 void CGOpenMPRuntime::checkArchForUnifiedAddressing( 9788 const OMPRequiresDecl *D) { 9789 for (const OMPClause *Clause : D->clauselists()) { 9790 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 9791 HasRequiresUnifiedSharedMemory = true; 9792 break; 9793 } 9794 } 9795 } 9796 9797 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 9798 LangAS &AS) { 9799 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 9800 return false; 9801 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 9802 switch(A->getAllocatorType()) { 9803 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 9804 // Not supported, fallback to the default mem space. 9805 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 9806 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 9807 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 9808 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 9809 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 9810 case OMPAllocateDeclAttr::OMPConstMemAlloc: 9811 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 9812 AS = LangAS::Default; 9813 return true; 9814 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 9815 llvm_unreachable("Expected predefined allocator for the variables with the " 9816 "static storage."); 9817 } 9818 return false; 9819 } 9820 9821 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 9822 return HasRequiresUnifiedSharedMemory; 9823 } 9824 9825 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 9826 CodeGenModule &CGM) 9827 : CGM(CGM) { 9828 if (CGM.getLangOpts().OpenMPIsDevice) { 9829 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 9830 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 9831 } 9832 } 9833 9834 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 9835 if (CGM.getLangOpts().OpenMPIsDevice) 9836 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 9837 } 9838 9839 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 9840 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 9841 return true; 9842 9843 const auto *D = cast<FunctionDecl>(GD.getDecl()); 9844 // Do not to emit function if it is marked as declare target as it was already 9845 // emitted. 9846 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 9847 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 9848 if (auto *F = dyn_cast_or_null<llvm::Function>( 9849 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 9850 return !F->isDeclaration(); 9851 return false; 9852 } 9853 return true; 9854 } 9855 9856 return !AlreadyEmittedTargetDecls.insert(D).second; 9857 } 9858 9859 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 9860 // If we don't have entries or if we are emitting code for the device, we 9861 // don't need to do anything. 9862 if (CGM.getLangOpts().OMPTargetTriples.empty() || 9863 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 9864 (OffloadEntriesInfoManager.empty() && 9865 !HasEmittedDeclareTargetRegion && 9866 !HasEmittedTargetRegion)) 9867 return nullptr; 9868 9869 // Create and register the function that handles the requires directives. 9870 ASTContext &C = CGM.getContext(); 9871 9872 llvm::Function *RequiresRegFn; 9873 { 9874 CodeGenFunction CGF(CGM); 9875 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 9876 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 9877 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 9878 RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); 9879 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 9880 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 9881 // TODO: check for other requires clauses. 9882 // The requires directive takes effect only when a target region is 9883 // present in the compilation unit. Otherwise it is ignored and not 9884 // passed to the runtime. This avoids the runtime from throwing an error 9885 // for mismatching requires clauses across compilation units that don't 9886 // contain at least 1 target region. 9887 assert((HasEmittedTargetRegion || 9888 HasEmittedDeclareTargetRegion || 9889 !OffloadEntriesInfoManager.empty()) && 9890 "Target or declare target region expected."); 9891 if (HasRequiresUnifiedSharedMemory) 9892 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 9893 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires), 9894 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 9895 CGF.FinishFunction(); 9896 } 9897 return RequiresRegFn; 9898 } 9899 9900 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 9901 const OMPExecutableDirective &D, 9902 SourceLocation Loc, 9903 llvm::Function *OutlinedFn, 9904 ArrayRef<llvm::Value *> CapturedVars) { 9905 if (!CGF.HaveInsertPoint()) 9906 return; 9907 9908 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9909 CodeGenFunction::RunCleanupsScope Scope(CGF); 9910 9911 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 9912 llvm::Value *Args[] = { 9913 RTLoc, 9914 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 9915 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 9916 llvm::SmallVector<llvm::Value *, 16> RealArgs; 9917 RealArgs.append(std::begin(Args), std::end(Args)); 9918 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 9919 9920 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 9921 CGF.EmitRuntimeCall(RTLFn, RealArgs); 9922 } 9923 9924 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 9925 const Expr *NumTeams, 9926 const Expr *ThreadLimit, 9927 SourceLocation Loc) { 9928 if (!CGF.HaveInsertPoint()) 9929 return; 9930 9931 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9932 9933 llvm::Value *NumTeamsVal = 9934 NumTeams 9935 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 9936 CGF.CGM.Int32Ty, /* isSigned = */ true) 9937 : CGF.Builder.getInt32(0); 9938 9939 llvm::Value *ThreadLimitVal = 9940 ThreadLimit 9941 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 9942 CGF.CGM.Int32Ty, /* isSigned = */ true) 9943 : CGF.Builder.getInt32(0); 9944 9945 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 9946 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 9947 ThreadLimitVal}; 9948 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 9949 PushNumTeamsArgs); 9950 } 9951 9952 void CGOpenMPRuntime::emitTargetDataCalls( 9953 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9954 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 9955 if (!CGF.HaveInsertPoint()) 9956 return; 9957 9958 // Action used to replace the default codegen action and turn privatization 9959 // off. 9960 PrePostActionTy NoPrivAction; 9961 9962 // Generate the code for the opening of the data environment. Capture all the 9963 // arguments of the runtime call by reference because they are used in the 9964 // closing of the region. 9965 auto &&BeginThenGen = [this, &D, Device, &Info, 9966 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 9967 // Fill up the arrays with all the mapped variables. 9968 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9969 MappableExprsHandler::MapValuesArrayTy Pointers; 9970 MappableExprsHandler::MapValuesArrayTy Sizes; 9971 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9972 9973 // Get map clause information. 9974 MappableExprsHandler MCHandler(D, CGF); 9975 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 9976 9977 // Fill up the arrays and create the arguments. 9978 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9979 9980 llvm::Value *BasePointersArrayArg = nullptr; 9981 llvm::Value *PointersArrayArg = nullptr; 9982 llvm::Value *SizesArrayArg = nullptr; 9983 llvm::Value *MapTypesArrayArg = nullptr; 9984 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 9985 SizesArrayArg, MapTypesArrayArg, Info); 9986 9987 // Emit device ID if any. 9988 llvm::Value *DeviceID = nullptr; 9989 if (Device) { 9990 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9991 CGF.Int64Ty, /*isSigned=*/true); 9992 } else { 9993 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9994 } 9995 9996 // Emit the number of elements in the offloading arrays. 9997 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 9998 9999 llvm::Value *OffloadingArgs[] = { 10000 DeviceID, PointerNum, BasePointersArrayArg, 10001 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10002 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 10003 OffloadingArgs); 10004 10005 // If device pointer privatization is required, emit the body of the region 10006 // here. It will have to be duplicated: with and without privatization. 10007 if (!Info.CaptureDeviceAddrMap.empty()) 10008 CodeGen(CGF); 10009 }; 10010 10011 // Generate code for the closing of the data region. 10012 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10013 PrePostActionTy &) { 10014 assert(Info.isValid() && "Invalid data environment closing arguments."); 10015 10016 llvm::Value *BasePointersArrayArg = nullptr; 10017 llvm::Value *PointersArrayArg = nullptr; 10018 llvm::Value *SizesArrayArg = nullptr; 10019 llvm::Value *MapTypesArrayArg = nullptr; 10020 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10021 SizesArrayArg, MapTypesArrayArg, Info); 10022 10023 // Emit device ID if any. 10024 llvm::Value *DeviceID = nullptr; 10025 if (Device) { 10026 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10027 CGF.Int64Ty, /*isSigned=*/true); 10028 } else { 10029 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10030 } 10031 10032 // Emit the number of elements in the offloading arrays. 10033 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10034 10035 llvm::Value *OffloadingArgs[] = { 10036 DeviceID, PointerNum, BasePointersArrayArg, 10037 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10038 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 10039 OffloadingArgs); 10040 }; 10041 10042 // If we need device pointer privatization, we need to emit the body of the 10043 // region with no privatization in the 'else' branch of the conditional. 10044 // Otherwise, we don't have to do anything. 10045 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10046 PrePostActionTy &) { 10047 if (!Info.CaptureDeviceAddrMap.empty()) { 10048 CodeGen.setAction(NoPrivAction); 10049 CodeGen(CGF); 10050 } 10051 }; 10052 10053 // We don't have to do anything to close the region if the if clause evaluates 10054 // to false. 10055 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10056 10057 if (IfCond) { 10058 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10059 } else { 10060 RegionCodeGenTy RCG(BeginThenGen); 10061 RCG(CGF); 10062 } 10063 10064 // If we don't require privatization of device pointers, we emit the body in 10065 // between the runtime calls. This avoids duplicating the body code. 10066 if (Info.CaptureDeviceAddrMap.empty()) { 10067 CodeGen.setAction(NoPrivAction); 10068 CodeGen(CGF); 10069 } 10070 10071 if (IfCond) { 10072 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10073 } else { 10074 RegionCodeGenTy RCG(EndThenGen); 10075 RCG(CGF); 10076 } 10077 } 10078 10079 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10080 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10081 const Expr *Device) { 10082 if (!CGF.HaveInsertPoint()) 10083 return; 10084 10085 assert((isa<OMPTargetEnterDataDirective>(D) || 10086 isa<OMPTargetExitDataDirective>(D) || 10087 isa<OMPTargetUpdateDirective>(D)) && 10088 "Expecting either target enter, exit data, or update directives."); 10089 10090 CodeGenFunction::OMPTargetDataInfo InputInfo; 10091 llvm::Value *MapTypesArray = nullptr; 10092 // Generate the code for the opening of the data environment. 10093 auto &&ThenGen = [this, &D, Device, &InputInfo, 10094 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10095 // Emit device ID if any. 10096 llvm::Value *DeviceID = nullptr; 10097 if (Device) { 10098 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10099 CGF.Int64Ty, /*isSigned=*/true); 10100 } else { 10101 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10102 } 10103 10104 // Emit the number of elements in the offloading arrays. 10105 llvm::Constant *PointerNum = 10106 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10107 10108 llvm::Value *OffloadingArgs[] = {DeviceID, 10109 PointerNum, 10110 InputInfo.BasePointersArray.getPointer(), 10111 InputInfo.PointersArray.getPointer(), 10112 InputInfo.SizesArray.getPointer(), 10113 MapTypesArray}; 10114 10115 // Select the right runtime function call for each expected standalone 10116 // directive. 10117 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10118 OpenMPRTLFunction RTLFn; 10119 switch (D.getDirectiveKind()) { 10120 case OMPD_target_enter_data: 10121 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 10122 : OMPRTL__tgt_target_data_begin; 10123 break; 10124 case OMPD_target_exit_data: 10125 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 10126 : OMPRTL__tgt_target_data_end; 10127 break; 10128 case OMPD_target_update: 10129 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 10130 : OMPRTL__tgt_target_data_update; 10131 break; 10132 case OMPD_parallel: 10133 case OMPD_for: 10134 case OMPD_parallel_for: 10135 case OMPD_parallel_master: 10136 case OMPD_parallel_sections: 10137 case OMPD_for_simd: 10138 case OMPD_parallel_for_simd: 10139 case OMPD_cancel: 10140 case OMPD_cancellation_point: 10141 case OMPD_ordered: 10142 case OMPD_threadprivate: 10143 case OMPD_allocate: 10144 case OMPD_task: 10145 case OMPD_simd: 10146 case OMPD_sections: 10147 case OMPD_section: 10148 case OMPD_single: 10149 case OMPD_master: 10150 case OMPD_critical: 10151 case OMPD_taskyield: 10152 case OMPD_barrier: 10153 case OMPD_taskwait: 10154 case OMPD_taskgroup: 10155 case OMPD_atomic: 10156 case OMPD_flush: 10157 case OMPD_teams: 10158 case OMPD_target_data: 10159 case OMPD_distribute: 10160 case OMPD_distribute_simd: 10161 case OMPD_distribute_parallel_for: 10162 case OMPD_distribute_parallel_for_simd: 10163 case OMPD_teams_distribute: 10164 case OMPD_teams_distribute_simd: 10165 case OMPD_teams_distribute_parallel_for: 10166 case OMPD_teams_distribute_parallel_for_simd: 10167 case OMPD_declare_simd: 10168 case OMPD_declare_variant: 10169 case OMPD_declare_target: 10170 case OMPD_end_declare_target: 10171 case OMPD_declare_reduction: 10172 case OMPD_declare_mapper: 10173 case OMPD_taskloop: 10174 case OMPD_taskloop_simd: 10175 case OMPD_master_taskloop: 10176 case OMPD_master_taskloop_simd: 10177 case OMPD_parallel_master_taskloop: 10178 case OMPD_parallel_master_taskloop_simd: 10179 case OMPD_target: 10180 case OMPD_target_simd: 10181 case OMPD_target_teams_distribute: 10182 case OMPD_target_teams_distribute_simd: 10183 case OMPD_target_teams_distribute_parallel_for: 10184 case OMPD_target_teams_distribute_parallel_for_simd: 10185 case OMPD_target_teams: 10186 case OMPD_target_parallel: 10187 case OMPD_target_parallel_for: 10188 case OMPD_target_parallel_for_simd: 10189 case OMPD_requires: 10190 case OMPD_unknown: 10191 llvm_unreachable("Unexpected standalone target data directive."); 10192 break; 10193 } 10194 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 10195 }; 10196 10197 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 10198 CodeGenFunction &CGF, PrePostActionTy &) { 10199 // Fill up the arrays with all the mapped variables. 10200 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10201 MappableExprsHandler::MapValuesArrayTy Pointers; 10202 MappableExprsHandler::MapValuesArrayTy Sizes; 10203 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10204 10205 // Get map clause information. 10206 MappableExprsHandler MEHandler(D, CGF); 10207 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10208 10209 TargetDataInfo Info; 10210 // Fill up the arrays and create the arguments. 10211 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10212 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10213 Info.PointersArray, Info.SizesArray, 10214 Info.MapTypesArray, Info); 10215 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10216 InputInfo.BasePointersArray = 10217 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10218 InputInfo.PointersArray = 10219 Address(Info.PointersArray, CGM.getPointerAlign()); 10220 InputInfo.SizesArray = 10221 Address(Info.SizesArray, CGM.getPointerAlign()); 10222 MapTypesArray = Info.MapTypesArray; 10223 if (D.hasClausesOfKind<OMPDependClause>()) 10224 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10225 else 10226 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10227 }; 10228 10229 if (IfCond) { 10230 emitIfClause(CGF, IfCond, TargetThenGen, 10231 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10232 } else { 10233 RegionCodeGenTy ThenRCG(TargetThenGen); 10234 ThenRCG(CGF); 10235 } 10236 } 10237 10238 namespace { 10239 /// Kind of parameter in a function with 'declare simd' directive. 10240 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 10241 /// Attribute set of the parameter. 10242 struct ParamAttrTy { 10243 ParamKindTy Kind = Vector; 10244 llvm::APSInt StrideOrArg; 10245 llvm::APSInt Alignment; 10246 }; 10247 } // namespace 10248 10249 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10250 ArrayRef<ParamAttrTy> ParamAttrs) { 10251 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10252 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10253 // of that clause. The VLEN value must be power of 2. 10254 // In other case the notion of the function`s "characteristic data type" (CDT) 10255 // is used to compute the vector length. 10256 // CDT is defined in the following order: 10257 // a) For non-void function, the CDT is the return type. 10258 // b) If the function has any non-uniform, non-linear parameters, then the 10259 // CDT is the type of the first such parameter. 10260 // c) If the CDT determined by a) or b) above is struct, union, or class 10261 // type which is pass-by-value (except for the type that maps to the 10262 // built-in complex data type), the characteristic data type is int. 10263 // d) If none of the above three cases is applicable, the CDT is int. 10264 // The VLEN is then determined based on the CDT and the size of vector 10265 // register of that ISA for which current vector version is generated. The 10266 // VLEN is computed using the formula below: 10267 // VLEN = sizeof(vector_register) / sizeof(CDT), 10268 // where vector register size specified in section 3.2.1 Registers and the 10269 // Stack Frame of original AMD64 ABI document. 10270 QualType RetType = FD->getReturnType(); 10271 if (RetType.isNull()) 10272 return 0; 10273 ASTContext &C = FD->getASTContext(); 10274 QualType CDT; 10275 if (!RetType.isNull() && !RetType->isVoidType()) { 10276 CDT = RetType; 10277 } else { 10278 unsigned Offset = 0; 10279 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10280 if (ParamAttrs[Offset].Kind == Vector) 10281 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10282 ++Offset; 10283 } 10284 if (CDT.isNull()) { 10285 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10286 if (ParamAttrs[I + Offset].Kind == Vector) { 10287 CDT = FD->getParamDecl(I)->getType(); 10288 break; 10289 } 10290 } 10291 } 10292 } 10293 if (CDT.isNull()) 10294 CDT = C.IntTy; 10295 CDT = CDT->getCanonicalTypeUnqualified(); 10296 if (CDT->isRecordType() || CDT->isUnionType()) 10297 CDT = C.IntTy; 10298 return C.getTypeSize(CDT); 10299 } 10300 10301 static void 10302 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10303 const llvm::APSInt &VLENVal, 10304 ArrayRef<ParamAttrTy> ParamAttrs, 10305 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10306 struct ISADataTy { 10307 char ISA; 10308 unsigned VecRegSize; 10309 }; 10310 ISADataTy ISAData[] = { 10311 { 10312 'b', 128 10313 }, // SSE 10314 { 10315 'c', 256 10316 }, // AVX 10317 { 10318 'd', 256 10319 }, // AVX2 10320 { 10321 'e', 512 10322 }, // AVX512 10323 }; 10324 llvm::SmallVector<char, 2> Masked; 10325 switch (State) { 10326 case OMPDeclareSimdDeclAttr::BS_Undefined: 10327 Masked.push_back('N'); 10328 Masked.push_back('M'); 10329 break; 10330 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10331 Masked.push_back('N'); 10332 break; 10333 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10334 Masked.push_back('M'); 10335 break; 10336 } 10337 for (char Mask : Masked) { 10338 for (const ISADataTy &Data : ISAData) { 10339 SmallString<256> Buffer; 10340 llvm::raw_svector_ostream Out(Buffer); 10341 Out << "_ZGV" << Data.ISA << Mask; 10342 if (!VLENVal) { 10343 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10344 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10345 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10346 } else { 10347 Out << VLENVal; 10348 } 10349 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 10350 switch (ParamAttr.Kind){ 10351 case LinearWithVarStride: 10352 Out << 's' << ParamAttr.StrideOrArg; 10353 break; 10354 case Linear: 10355 Out << 'l'; 10356 if (!!ParamAttr.StrideOrArg) 10357 Out << ParamAttr.StrideOrArg; 10358 break; 10359 case Uniform: 10360 Out << 'u'; 10361 break; 10362 case Vector: 10363 Out << 'v'; 10364 break; 10365 } 10366 if (!!ParamAttr.Alignment) 10367 Out << 'a' << ParamAttr.Alignment; 10368 } 10369 Out << '_' << Fn->getName(); 10370 Fn->addFnAttr(Out.str()); 10371 } 10372 } 10373 } 10374 10375 // This are the Functions that are needed to mangle the name of the 10376 // vector functions generated by the compiler, according to the rules 10377 // defined in the "Vector Function ABI specifications for AArch64", 10378 // available at 10379 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10380 10381 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 10382 /// 10383 /// TODO: Need to implement the behavior for reference marked with a 10384 /// var or no linear modifiers (1.b in the section). For this, we 10385 /// need to extend ParamKindTy to support the linear modifiers. 10386 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10387 QT = QT.getCanonicalType(); 10388 10389 if (QT->isVoidType()) 10390 return false; 10391 10392 if (Kind == ParamKindTy::Uniform) 10393 return false; 10394 10395 if (Kind == ParamKindTy::Linear) 10396 return false; 10397 10398 // TODO: Handle linear references with modifiers 10399 10400 if (Kind == ParamKindTy::LinearWithVarStride) 10401 return false; 10402 10403 return true; 10404 } 10405 10406 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10407 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10408 QT = QT.getCanonicalType(); 10409 unsigned Size = C.getTypeSize(QT); 10410 10411 // Only scalars and complex within 16 bytes wide set PVB to true. 10412 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10413 return false; 10414 10415 if (QT->isFloatingType()) 10416 return true; 10417 10418 if (QT->isIntegerType()) 10419 return true; 10420 10421 if (QT->isPointerType()) 10422 return true; 10423 10424 // TODO: Add support for complex types (section 3.1.2, item 2). 10425 10426 return false; 10427 } 10428 10429 /// Computes the lane size (LS) of a return type or of an input parameter, 10430 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10431 /// TODO: Add support for references, section 3.2.1, item 1. 10432 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10433 if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10434 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10435 if (getAArch64PBV(PTy, C)) 10436 return C.getTypeSize(PTy); 10437 } 10438 if (getAArch64PBV(QT, C)) 10439 return C.getTypeSize(QT); 10440 10441 return C.getTypeSize(C.getUIntPtrType()); 10442 } 10443 10444 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10445 // signature of the scalar function, as defined in 3.2.2 of the 10446 // AAVFABI. 10447 static std::tuple<unsigned, unsigned, bool> 10448 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10449 QualType RetType = FD->getReturnType().getCanonicalType(); 10450 10451 ASTContext &C = FD->getASTContext(); 10452 10453 bool OutputBecomesInput = false; 10454 10455 llvm::SmallVector<unsigned, 8> Sizes; 10456 if (!RetType->isVoidType()) { 10457 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10458 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10459 OutputBecomesInput = true; 10460 } 10461 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10462 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10463 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10464 } 10465 10466 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10467 // The LS of a function parameter / return value can only be a power 10468 // of 2, starting from 8 bits, up to 128. 10469 assert(std::all_of(Sizes.begin(), Sizes.end(), 10470 [](unsigned Size) { 10471 return Size == 8 || Size == 16 || Size == 32 || 10472 Size == 64 || Size == 128; 10473 }) && 10474 "Invalid size"); 10475 10476 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10477 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10478 OutputBecomesInput); 10479 } 10480 10481 /// Mangle the parameter part of the vector function name according to 10482 /// their OpenMP classification. The mangling function is defined in 10483 /// section 3.5 of the AAVFABI. 10484 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10485 SmallString<256> Buffer; 10486 llvm::raw_svector_ostream Out(Buffer); 10487 for (const auto &ParamAttr : ParamAttrs) { 10488 switch (ParamAttr.Kind) { 10489 case LinearWithVarStride: 10490 Out << "ls" << ParamAttr.StrideOrArg; 10491 break; 10492 case Linear: 10493 Out << 'l'; 10494 // Don't print the step value if it is not present or if it is 10495 // equal to 1. 10496 if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1) 10497 Out << ParamAttr.StrideOrArg; 10498 break; 10499 case Uniform: 10500 Out << 'u'; 10501 break; 10502 case Vector: 10503 Out << 'v'; 10504 break; 10505 } 10506 10507 if (!!ParamAttr.Alignment) 10508 Out << 'a' << ParamAttr.Alignment; 10509 } 10510 10511 return std::string(Out.str()); 10512 } 10513 10514 // Function used to add the attribute. The parameter `VLEN` is 10515 // templated to allow the use of "x" when targeting scalable functions 10516 // for SVE. 10517 template <typename T> 10518 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10519 char ISA, StringRef ParSeq, 10520 StringRef MangledName, bool OutputBecomesInput, 10521 llvm::Function *Fn) { 10522 SmallString<256> Buffer; 10523 llvm::raw_svector_ostream Out(Buffer); 10524 Out << Prefix << ISA << LMask << VLEN; 10525 if (OutputBecomesInput) 10526 Out << "v"; 10527 Out << ParSeq << "_" << MangledName; 10528 Fn->addFnAttr(Out.str()); 10529 } 10530 10531 // Helper function to generate the Advanced SIMD names depending on 10532 // the value of the NDS when simdlen is not present. 10533 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10534 StringRef Prefix, char ISA, 10535 StringRef ParSeq, StringRef MangledName, 10536 bool OutputBecomesInput, 10537 llvm::Function *Fn) { 10538 switch (NDS) { 10539 case 8: 10540 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10541 OutputBecomesInput, Fn); 10542 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10543 OutputBecomesInput, Fn); 10544 break; 10545 case 16: 10546 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10547 OutputBecomesInput, Fn); 10548 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10549 OutputBecomesInput, Fn); 10550 break; 10551 case 32: 10552 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10553 OutputBecomesInput, Fn); 10554 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10555 OutputBecomesInput, Fn); 10556 break; 10557 case 64: 10558 case 128: 10559 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10560 OutputBecomesInput, Fn); 10561 break; 10562 default: 10563 llvm_unreachable("Scalar type is too wide."); 10564 } 10565 } 10566 10567 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10568 static void emitAArch64DeclareSimdFunction( 10569 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10570 ArrayRef<ParamAttrTy> ParamAttrs, 10571 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10572 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10573 10574 // Get basic data for building the vector signature. 10575 const auto Data = getNDSWDS(FD, ParamAttrs); 10576 const unsigned NDS = std::get<0>(Data); 10577 const unsigned WDS = std::get<1>(Data); 10578 const bool OutputBecomesInput = std::get<2>(Data); 10579 10580 // Check the values provided via `simdlen` by the user. 10581 // 1. A `simdlen(1)` doesn't produce vector signatures, 10582 if (UserVLEN == 1) { 10583 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10584 DiagnosticsEngine::Warning, 10585 "The clause simdlen(1) has no effect when targeting aarch64."); 10586 CGM.getDiags().Report(SLoc, DiagID); 10587 return; 10588 } 10589 10590 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10591 // Advanced SIMD output. 10592 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10593 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10594 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10595 "power of 2 when targeting Advanced SIMD."); 10596 CGM.getDiags().Report(SLoc, DiagID); 10597 return; 10598 } 10599 10600 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10601 // limits. 10602 if (ISA == 's' && UserVLEN != 0) { 10603 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10604 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10605 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10606 "lanes in the architectural constraints " 10607 "for SVE (min is 128-bit, max is " 10608 "2048-bit, by steps of 128-bit)"); 10609 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10610 return; 10611 } 10612 } 10613 10614 // Sort out parameter sequence. 10615 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10616 StringRef Prefix = "_ZGV"; 10617 // Generate simdlen from user input (if any). 10618 if (UserVLEN) { 10619 if (ISA == 's') { 10620 // SVE generates only a masked function. 10621 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10622 OutputBecomesInput, Fn); 10623 } else { 10624 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10625 // Advanced SIMD generates one or two functions, depending on 10626 // the `[not]inbranch` clause. 10627 switch (State) { 10628 case OMPDeclareSimdDeclAttr::BS_Undefined: 10629 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10630 OutputBecomesInput, Fn); 10631 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10632 OutputBecomesInput, Fn); 10633 break; 10634 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10635 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10636 OutputBecomesInput, Fn); 10637 break; 10638 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10639 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10640 OutputBecomesInput, Fn); 10641 break; 10642 } 10643 } 10644 } else { 10645 // If no user simdlen is provided, follow the AAVFABI rules for 10646 // generating the vector length. 10647 if (ISA == 's') { 10648 // SVE, section 3.4.1, item 1. 10649 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10650 OutputBecomesInput, Fn); 10651 } else { 10652 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10653 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10654 // two vector names depending on the use of the clause 10655 // `[not]inbranch`. 10656 switch (State) { 10657 case OMPDeclareSimdDeclAttr::BS_Undefined: 10658 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10659 OutputBecomesInput, Fn); 10660 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10661 OutputBecomesInput, Fn); 10662 break; 10663 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10664 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10665 OutputBecomesInput, Fn); 10666 break; 10667 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10668 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10669 OutputBecomesInput, Fn); 10670 break; 10671 } 10672 } 10673 } 10674 } 10675 10676 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10677 llvm::Function *Fn) { 10678 ASTContext &C = CGM.getContext(); 10679 FD = FD->getMostRecentDecl(); 10680 // Map params to their positions in function decl. 10681 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10682 if (isa<CXXMethodDecl>(FD)) 10683 ParamPositions.try_emplace(FD, 0); 10684 unsigned ParamPos = ParamPositions.size(); 10685 for (const ParmVarDecl *P : FD->parameters()) { 10686 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10687 ++ParamPos; 10688 } 10689 while (FD) { 10690 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10691 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10692 // Mark uniform parameters. 10693 for (const Expr *E : Attr->uniforms()) { 10694 E = E->IgnoreParenImpCasts(); 10695 unsigned Pos; 10696 if (isa<CXXThisExpr>(E)) { 10697 Pos = ParamPositions[FD]; 10698 } else { 10699 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10700 ->getCanonicalDecl(); 10701 Pos = ParamPositions[PVD]; 10702 } 10703 ParamAttrs[Pos].Kind = Uniform; 10704 } 10705 // Get alignment info. 10706 auto NI = Attr->alignments_begin(); 10707 for (const Expr *E : Attr->aligneds()) { 10708 E = E->IgnoreParenImpCasts(); 10709 unsigned Pos; 10710 QualType ParmTy; 10711 if (isa<CXXThisExpr>(E)) { 10712 Pos = ParamPositions[FD]; 10713 ParmTy = E->getType(); 10714 } else { 10715 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10716 ->getCanonicalDecl(); 10717 Pos = ParamPositions[PVD]; 10718 ParmTy = PVD->getType(); 10719 } 10720 ParamAttrs[Pos].Alignment = 10721 (*NI) 10722 ? (*NI)->EvaluateKnownConstInt(C) 10723 : llvm::APSInt::getUnsigned( 10724 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10725 .getQuantity()); 10726 ++NI; 10727 } 10728 // Mark linear parameters. 10729 auto SI = Attr->steps_begin(); 10730 auto MI = Attr->modifiers_begin(); 10731 for (const Expr *E : Attr->linears()) { 10732 E = E->IgnoreParenImpCasts(); 10733 unsigned Pos; 10734 if (isa<CXXThisExpr>(E)) { 10735 Pos = ParamPositions[FD]; 10736 } else { 10737 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10738 ->getCanonicalDecl(); 10739 Pos = ParamPositions[PVD]; 10740 } 10741 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 10742 ParamAttr.Kind = Linear; 10743 if (*SI) { 10744 Expr::EvalResult Result; 10745 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 10746 if (const auto *DRE = 10747 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 10748 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 10749 ParamAttr.Kind = LinearWithVarStride; 10750 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 10751 ParamPositions[StridePVD->getCanonicalDecl()]); 10752 } 10753 } 10754 } else { 10755 ParamAttr.StrideOrArg = Result.Val.getInt(); 10756 } 10757 } 10758 ++SI; 10759 ++MI; 10760 } 10761 llvm::APSInt VLENVal; 10762 SourceLocation ExprLoc; 10763 const Expr *VLENExpr = Attr->getSimdlen(); 10764 if (VLENExpr) { 10765 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 10766 ExprLoc = VLENExpr->getExprLoc(); 10767 } 10768 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 10769 if (CGM.getTriple().isX86()) { 10770 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 10771 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 10772 unsigned VLEN = VLENVal.getExtValue(); 10773 StringRef MangledName = Fn->getName(); 10774 if (CGM.getTarget().hasFeature("sve")) 10775 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10776 MangledName, 's', 128, Fn, ExprLoc); 10777 if (CGM.getTarget().hasFeature("neon")) 10778 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10779 MangledName, 'n', 128, Fn, ExprLoc); 10780 } 10781 } 10782 FD = FD->getPreviousDecl(); 10783 } 10784 } 10785 10786 namespace { 10787 /// Cleanup action for doacross support. 10788 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 10789 public: 10790 static const int DoacrossFinArgs = 2; 10791 10792 private: 10793 llvm::FunctionCallee RTLFn; 10794 llvm::Value *Args[DoacrossFinArgs]; 10795 10796 public: 10797 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 10798 ArrayRef<llvm::Value *> CallArgs) 10799 : RTLFn(RTLFn) { 10800 assert(CallArgs.size() == DoacrossFinArgs); 10801 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10802 } 10803 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10804 if (!CGF.HaveInsertPoint()) 10805 return; 10806 CGF.EmitRuntimeCall(RTLFn, Args); 10807 } 10808 }; 10809 } // namespace 10810 10811 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 10812 const OMPLoopDirective &D, 10813 ArrayRef<Expr *> NumIterations) { 10814 if (!CGF.HaveInsertPoint()) 10815 return; 10816 10817 ASTContext &C = CGM.getContext(); 10818 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 10819 RecordDecl *RD; 10820 if (KmpDimTy.isNull()) { 10821 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 10822 // kmp_int64 lo; // lower 10823 // kmp_int64 up; // upper 10824 // kmp_int64 st; // stride 10825 // }; 10826 RD = C.buildImplicitRecord("kmp_dim"); 10827 RD->startDefinition(); 10828 addFieldToRecordDecl(C, RD, Int64Ty); 10829 addFieldToRecordDecl(C, RD, Int64Ty); 10830 addFieldToRecordDecl(C, RD, Int64Ty); 10831 RD->completeDefinition(); 10832 KmpDimTy = C.getRecordType(RD); 10833 } else { 10834 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 10835 } 10836 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 10837 QualType ArrayTy = 10838 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 10839 10840 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 10841 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 10842 enum { LowerFD = 0, UpperFD, StrideFD }; 10843 // Fill dims with data. 10844 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 10845 LValue DimsLVal = CGF.MakeAddrLValue( 10846 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 10847 // dims.upper = num_iterations; 10848 LValue UpperLVal = CGF.EmitLValueForField( 10849 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 10850 llvm::Value *NumIterVal = 10851 CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]), 10852 D.getNumIterations()->getType(), Int64Ty, 10853 D.getNumIterations()->getExprLoc()); 10854 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 10855 // dims.stride = 1; 10856 LValue StrideLVal = CGF.EmitLValueForField( 10857 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 10858 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 10859 StrideLVal); 10860 } 10861 10862 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 10863 // kmp_int32 num_dims, struct kmp_dim * dims); 10864 llvm::Value *Args[] = { 10865 emitUpdateLocation(CGF, D.getBeginLoc()), 10866 getThreadID(CGF, D.getBeginLoc()), 10867 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 10868 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 10869 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 10870 CGM.VoidPtrTy)}; 10871 10872 llvm::FunctionCallee RTLFn = 10873 createRuntimeFunction(OMPRTL__kmpc_doacross_init); 10874 CGF.EmitRuntimeCall(RTLFn, Args); 10875 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 10876 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 10877 llvm::FunctionCallee FiniRTLFn = 10878 createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 10879 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 10880 llvm::makeArrayRef(FiniArgs)); 10881 } 10882 10883 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 10884 const OMPDependClause *C) { 10885 QualType Int64Ty = 10886 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 10887 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 10888 QualType ArrayTy = CGM.getContext().getConstantArrayType( 10889 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 10890 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 10891 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 10892 const Expr *CounterVal = C->getLoopData(I); 10893 assert(CounterVal); 10894 llvm::Value *CntVal = CGF.EmitScalarConversion( 10895 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 10896 CounterVal->getExprLoc()); 10897 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 10898 /*Volatile=*/false, Int64Ty); 10899 } 10900 llvm::Value *Args[] = { 10901 emitUpdateLocation(CGF, C->getBeginLoc()), 10902 getThreadID(CGF, C->getBeginLoc()), 10903 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 10904 llvm::FunctionCallee RTLFn; 10905 if (C->getDependencyKind() == OMPC_DEPEND_source) { 10906 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 10907 } else { 10908 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 10909 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 10910 } 10911 CGF.EmitRuntimeCall(RTLFn, Args); 10912 } 10913 10914 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 10915 llvm::FunctionCallee Callee, 10916 ArrayRef<llvm::Value *> Args) const { 10917 assert(Loc.isValid() && "Outlined function call location must be valid."); 10918 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 10919 10920 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 10921 if (Fn->doesNotThrow()) { 10922 CGF.EmitNounwindRuntimeCall(Fn, Args); 10923 return; 10924 } 10925 } 10926 CGF.EmitRuntimeCall(Callee, Args); 10927 } 10928 10929 void CGOpenMPRuntime::emitOutlinedFunctionCall( 10930 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 10931 ArrayRef<llvm::Value *> Args) const { 10932 emitCall(CGF, Loc, OutlinedFn, Args); 10933 } 10934 10935 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 10936 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 10937 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 10938 HasEmittedDeclareTargetRegion = true; 10939 } 10940 10941 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 10942 const VarDecl *NativeParam, 10943 const VarDecl *TargetParam) const { 10944 return CGF.GetAddrOfLocalVar(NativeParam); 10945 } 10946 10947 namespace { 10948 /// Cleanup action for allocate support. 10949 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 10950 public: 10951 static const int CleanupArgs = 3; 10952 10953 private: 10954 llvm::FunctionCallee RTLFn; 10955 llvm::Value *Args[CleanupArgs]; 10956 10957 public: 10958 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 10959 ArrayRef<llvm::Value *> CallArgs) 10960 : RTLFn(RTLFn) { 10961 assert(CallArgs.size() == CleanupArgs && 10962 "Size of arguments does not match."); 10963 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10964 } 10965 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10966 if (!CGF.HaveInsertPoint()) 10967 return; 10968 CGF.EmitRuntimeCall(RTLFn, Args); 10969 } 10970 }; 10971 } // namespace 10972 10973 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 10974 const VarDecl *VD) { 10975 if (!VD) 10976 return Address::invalid(); 10977 const VarDecl *CVD = VD->getCanonicalDecl(); 10978 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 10979 return Address::invalid(); 10980 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 10981 // Use the default allocation. 10982 if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 10983 !AA->getAllocator()) 10984 return Address::invalid(); 10985 llvm::Value *Size; 10986 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 10987 if (CVD->getType()->isVariablyModifiedType()) { 10988 Size = CGF.getTypeSize(CVD->getType()); 10989 // Align the size: ((size + align - 1) / align) * align 10990 Size = CGF.Builder.CreateNUWAdd( 10991 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 10992 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 10993 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 10994 } else { 10995 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 10996 Size = CGM.getSize(Sz.alignTo(Align)); 10997 } 10998 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 10999 assert(AA->getAllocator() && 11000 "Expected allocator expression for non-default allocator."); 11001 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11002 // According to the standard, the original allocator type is a enum (integer). 11003 // Convert to pointer type, if required. 11004 if (Allocator->getType()->isIntegerTy()) 11005 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 11006 else if (Allocator->getType()->isPointerTy()) 11007 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 11008 CGM.VoidPtrTy); 11009 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11010 11011 llvm::Value *Addr = 11012 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, 11013 getName({CVD->getName(), ".void.addr"})); 11014 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 11015 Allocator}; 11016 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); 11017 11018 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11019 llvm::makeArrayRef(FiniArgs)); 11020 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11021 Addr, 11022 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 11023 getName({CVD->getName(), ".addr"})); 11024 return Address(Addr, Align); 11025 } 11026 11027 namespace { 11028 using OMPContextSelectorData = 11029 OpenMPCtxSelectorData<ArrayRef<StringRef>, llvm::APSInt>; 11030 using CompleteOMPContextSelectorData = SmallVector<OMPContextSelectorData, 4>; 11031 } // anonymous namespace 11032 11033 /// Checks current context and returns true if it matches the context selector. 11034 template <OpenMPContextSelectorSetKind CtxSet, OpenMPContextSelectorKind Ctx, 11035 typename... Arguments> 11036 static bool checkContext(const OMPContextSelectorData &Data, 11037 Arguments... Params) { 11038 assert(Data.CtxSet != OMP_CTX_SET_unknown && Data.Ctx != OMP_CTX_unknown && 11039 "Unknown context selector or context selector set."); 11040 return false; 11041 } 11042 11043 /// Checks for implementation={vendor(<vendor>)} context selector. 11044 /// \returns true iff <vendor>="llvm", false otherwise. 11045 template <> 11046 bool checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>( 11047 const OMPContextSelectorData &Data) { 11048 return llvm::all_of(Data.Names, 11049 [](StringRef S) { return !S.compare_lower("llvm"); }); 11050 } 11051 11052 /// Checks for device={kind(<kind>)} context selector. 11053 /// \returns true if <kind>="host" and compilation is for host. 11054 /// true if <kind>="nohost" and compilation is for device. 11055 /// true if <kind>="cpu" and compilation is for Arm, X86 or PPC CPU. 11056 /// true if <kind>="gpu" and compilation is for NVPTX or AMDGCN. 11057 /// false otherwise. 11058 template <> 11059 bool checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>( 11060 const OMPContextSelectorData &Data, CodeGenModule &CGM) { 11061 for (StringRef Name : Data.Names) { 11062 if (!Name.compare_lower("host")) { 11063 if (CGM.getLangOpts().OpenMPIsDevice) 11064 return false; 11065 continue; 11066 } 11067 if (!Name.compare_lower("nohost")) { 11068 if (!CGM.getLangOpts().OpenMPIsDevice) 11069 return false; 11070 continue; 11071 } 11072 switch (CGM.getTriple().getArch()) { 11073 case llvm::Triple::arm: 11074 case llvm::Triple::armeb: 11075 case llvm::Triple::aarch64: 11076 case llvm::Triple::aarch64_be: 11077 case llvm::Triple::aarch64_32: 11078 case llvm::Triple::ppc: 11079 case llvm::Triple::ppc64: 11080 case llvm::Triple::ppc64le: 11081 case llvm::Triple::x86: 11082 case llvm::Triple::x86_64: 11083 if (Name.compare_lower("cpu")) 11084 return false; 11085 break; 11086 case llvm::Triple::amdgcn: 11087 case llvm::Triple::nvptx: 11088 case llvm::Triple::nvptx64: 11089 if (Name.compare_lower("gpu")) 11090 return false; 11091 break; 11092 case llvm::Triple::UnknownArch: 11093 case llvm::Triple::arc: 11094 case llvm::Triple::avr: 11095 case llvm::Triple::bpfel: 11096 case llvm::Triple::bpfeb: 11097 case llvm::Triple::hexagon: 11098 case llvm::Triple::mips: 11099 case llvm::Triple::mipsel: 11100 case llvm::Triple::mips64: 11101 case llvm::Triple::mips64el: 11102 case llvm::Triple::msp430: 11103 case llvm::Triple::r600: 11104 case llvm::Triple::riscv32: 11105 case llvm::Triple::riscv64: 11106 case llvm::Triple::sparc: 11107 case llvm::Triple::sparcv9: 11108 case llvm::Triple::sparcel: 11109 case llvm::Triple::systemz: 11110 case llvm::Triple::tce: 11111 case llvm::Triple::tcele: 11112 case llvm::Triple::thumb: 11113 case llvm::Triple::thumbeb: 11114 case llvm::Triple::xcore: 11115 case llvm::Triple::le32: 11116 case llvm::Triple::le64: 11117 case llvm::Triple::amdil: 11118 case llvm::Triple::amdil64: 11119 case llvm::Triple::hsail: 11120 case llvm::Triple::hsail64: 11121 case llvm::Triple::spir: 11122 case llvm::Triple::spir64: 11123 case llvm::Triple::kalimba: 11124 case llvm::Triple::shave: 11125 case llvm::Triple::lanai: 11126 case llvm::Triple::wasm32: 11127 case llvm::Triple::wasm64: 11128 case llvm::Triple::renderscript32: 11129 case llvm::Triple::renderscript64: 11130 case llvm::Triple::ve: 11131 return false; 11132 } 11133 } 11134 return true; 11135 } 11136 11137 static bool matchesContext(CodeGenModule &CGM, 11138 const CompleteOMPContextSelectorData &ContextData) { 11139 for (const OMPContextSelectorData &Data : ContextData) { 11140 switch (Data.Ctx) { 11141 case OMP_CTX_vendor: 11142 assert(Data.CtxSet == OMP_CTX_SET_implementation && 11143 "Expected implementation context selector set."); 11144 if (!checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>(Data)) 11145 return false; 11146 break; 11147 case OMP_CTX_kind: 11148 assert(Data.CtxSet == OMP_CTX_SET_device && 11149 "Expected device context selector set."); 11150 if (!checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>(Data, 11151 CGM)) 11152 return false; 11153 break; 11154 case OMP_CTX_unknown: 11155 llvm_unreachable("Unknown context selector kind."); 11156 } 11157 } 11158 return true; 11159 } 11160 11161 static CompleteOMPContextSelectorData 11162 translateAttrToContextSelectorData(ASTContext &C, 11163 const OMPDeclareVariantAttr *A) { 11164 CompleteOMPContextSelectorData Data; 11165 for (unsigned I = 0, E = A->scores_size(); I < E; ++I) { 11166 Data.emplace_back(); 11167 auto CtxSet = static_cast<OpenMPContextSelectorSetKind>( 11168 *std::next(A->ctxSelectorSets_begin(), I)); 11169 auto Ctx = static_cast<OpenMPContextSelectorKind>( 11170 *std::next(A->ctxSelectors_begin(), I)); 11171 Data.back().CtxSet = CtxSet; 11172 Data.back().Ctx = Ctx; 11173 const Expr *Score = *std::next(A->scores_begin(), I); 11174 Data.back().Score = Score->EvaluateKnownConstInt(C); 11175 switch (Ctx) { 11176 case OMP_CTX_vendor: 11177 assert(CtxSet == OMP_CTX_SET_implementation && 11178 "Expected implementation context selector set."); 11179 Data.back().Names = 11180 llvm::makeArrayRef(A->implVendors_begin(), A->implVendors_end()); 11181 break; 11182 case OMP_CTX_kind: 11183 assert(CtxSet == OMP_CTX_SET_device && 11184 "Expected device context selector set."); 11185 Data.back().Names = 11186 llvm::makeArrayRef(A->deviceKinds_begin(), A->deviceKinds_end()); 11187 break; 11188 case OMP_CTX_unknown: 11189 llvm_unreachable("Unknown context selector kind."); 11190 } 11191 } 11192 return Data; 11193 } 11194 11195 static bool isStrictSubset(const CompleteOMPContextSelectorData &LHS, 11196 const CompleteOMPContextSelectorData &RHS) { 11197 llvm::SmallDenseMap<std::pair<int, int>, llvm::StringSet<>, 4> RHSData; 11198 for (const OMPContextSelectorData &D : RHS) { 11199 auto &Pair = RHSData.FindAndConstruct(std::make_pair(D.CtxSet, D.Ctx)); 11200 Pair.getSecond().insert(D.Names.begin(), D.Names.end()); 11201 } 11202 bool AllSetsAreEqual = true; 11203 for (const OMPContextSelectorData &D : LHS) { 11204 auto It = RHSData.find(std::make_pair(D.CtxSet, D.Ctx)); 11205 if (It == RHSData.end()) 11206 return false; 11207 if (D.Names.size() > It->getSecond().size()) 11208 return false; 11209 if (llvm::set_union(It->getSecond(), D.Names)) 11210 return false; 11211 AllSetsAreEqual = 11212 AllSetsAreEqual && (D.Names.size() == It->getSecond().size()); 11213 } 11214 11215 return LHS.size() != RHS.size() || !AllSetsAreEqual; 11216 } 11217 11218 static bool greaterCtxScore(const CompleteOMPContextSelectorData &LHS, 11219 const CompleteOMPContextSelectorData &RHS) { 11220 // Score is calculated as sum of all scores + 1. 11221 llvm::APSInt LHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false); 11222 bool RHSIsSubsetOfLHS = isStrictSubset(RHS, LHS); 11223 if (RHSIsSubsetOfLHS) { 11224 LHSScore = llvm::APSInt::get(0); 11225 } else { 11226 for (const OMPContextSelectorData &Data : LHS) { 11227 if (Data.Score.getBitWidth() > LHSScore.getBitWidth()) { 11228 LHSScore = LHSScore.extend(Data.Score.getBitWidth()) + Data.Score; 11229 } else if (Data.Score.getBitWidth() < LHSScore.getBitWidth()) { 11230 LHSScore += Data.Score.extend(LHSScore.getBitWidth()); 11231 } else { 11232 LHSScore += Data.Score; 11233 } 11234 } 11235 } 11236 llvm::APSInt RHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false); 11237 if (!RHSIsSubsetOfLHS && isStrictSubset(LHS, RHS)) { 11238 RHSScore = llvm::APSInt::get(0); 11239 } else { 11240 for (const OMPContextSelectorData &Data : RHS) { 11241 if (Data.Score.getBitWidth() > RHSScore.getBitWidth()) { 11242 RHSScore = RHSScore.extend(Data.Score.getBitWidth()) + Data.Score; 11243 } else if (Data.Score.getBitWidth() < RHSScore.getBitWidth()) { 11244 RHSScore += Data.Score.extend(RHSScore.getBitWidth()); 11245 } else { 11246 RHSScore += Data.Score; 11247 } 11248 } 11249 } 11250 return llvm::APSInt::compareValues(LHSScore, RHSScore) >= 0; 11251 } 11252 11253 /// Finds the variant function that matches current context with its context 11254 /// selector. 11255 static const FunctionDecl *getDeclareVariantFunction(CodeGenModule &CGM, 11256 const FunctionDecl *FD) { 11257 if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>()) 11258 return FD; 11259 // Iterate through all DeclareVariant attributes and check context selectors. 11260 const OMPDeclareVariantAttr *TopMostAttr = nullptr; 11261 CompleteOMPContextSelectorData TopMostData; 11262 for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) { 11263 CompleteOMPContextSelectorData Data = 11264 translateAttrToContextSelectorData(CGM.getContext(), A); 11265 if (!matchesContext(CGM, Data)) 11266 continue; 11267 // If the attribute matches the context, find the attribute with the highest 11268 // score. 11269 if (!TopMostAttr || !greaterCtxScore(TopMostData, Data)) { 11270 TopMostAttr = A; 11271 TopMostData.swap(Data); 11272 } 11273 } 11274 if (!TopMostAttr) 11275 return FD; 11276 return cast<FunctionDecl>( 11277 cast<DeclRefExpr>(TopMostAttr->getVariantFuncRef()->IgnoreParenImpCasts()) 11278 ->getDecl()); 11279 } 11280 11281 bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) { 11282 const auto *D = cast<FunctionDecl>(GD.getDecl()); 11283 // If the original function is defined already, use its definition. 11284 StringRef MangledName = CGM.getMangledName(GD); 11285 llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName); 11286 if (Orig && !Orig->isDeclaration()) 11287 return false; 11288 const FunctionDecl *NewFD = getDeclareVariantFunction(CGM, D); 11289 // Emit original function if it does not have declare variant attribute or the 11290 // context does not match. 11291 if (NewFD == D) 11292 return false; 11293 GlobalDecl NewGD = GD.getWithDecl(NewFD); 11294 if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) { 11295 DeferredVariantFunction.erase(D); 11296 return true; 11297 } 11298 DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD))); 11299 return true; 11300 } 11301 11302 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11303 CodeGenModule &CGM, const OMPLoopDirective &S) 11304 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11305 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11306 if (!NeedToPush) 11307 return; 11308 NontemporalDeclsSet &DS = 11309 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11310 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11311 for (const Stmt *Ref : C->private_refs()) { 11312 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11313 const ValueDecl *VD; 11314 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11315 VD = DRE->getDecl(); 11316 } else { 11317 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11318 assert((ME->isImplicitCXXThis() || 11319 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11320 "Expected member of current class."); 11321 VD = ME->getMemberDecl(); 11322 } 11323 DS.insert(VD); 11324 } 11325 } 11326 } 11327 11328 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11329 if (!NeedToPush) 11330 return; 11331 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11332 } 11333 11334 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11335 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11336 11337 return llvm::any_of( 11338 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11339 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 11340 } 11341 11342 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11343 const OMPExecutableDirective &S, 11344 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11345 const { 11346 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11347 // Vars in target/task regions must be excluded completely. 11348 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11349 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11350 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11351 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11352 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11353 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11354 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11355 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11356 } 11357 } 11358 // Exclude vars in private clauses. 11359 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11360 for (const Expr *Ref : C->varlists()) { 11361 if (!Ref->getType()->isScalarType()) 11362 continue; 11363 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11364 if (!DRE) 11365 continue; 11366 NeedToCheckForLPCs.insert(DRE->getDecl()); 11367 } 11368 } 11369 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11370 for (const Expr *Ref : C->varlists()) { 11371 if (!Ref->getType()->isScalarType()) 11372 continue; 11373 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11374 if (!DRE) 11375 continue; 11376 NeedToCheckForLPCs.insert(DRE->getDecl()); 11377 } 11378 } 11379 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11380 for (const Expr *Ref : C->varlists()) { 11381 if (!Ref->getType()->isScalarType()) 11382 continue; 11383 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11384 if (!DRE) 11385 continue; 11386 NeedToCheckForLPCs.insert(DRE->getDecl()); 11387 } 11388 } 11389 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11390 for (const Expr *Ref : C->varlists()) { 11391 if (!Ref->getType()->isScalarType()) 11392 continue; 11393 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11394 if (!DRE) 11395 continue; 11396 NeedToCheckForLPCs.insert(DRE->getDecl()); 11397 } 11398 } 11399 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 11400 for (const Expr *Ref : C->varlists()) { 11401 if (!Ref->getType()->isScalarType()) 11402 continue; 11403 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11404 if (!DRE) 11405 continue; 11406 NeedToCheckForLPCs.insert(DRE->getDecl()); 11407 } 11408 } 11409 for (const Decl *VD : NeedToCheckForLPCs) { 11410 for (const LastprivateConditionalData &Data : 11411 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 11412 if (Data.DeclToUniqueName.count(VD) > 0) { 11413 if (!Data.Disabled) 11414 NeedToAddForLPCsAsDisabled.insert(VD); 11415 break; 11416 } 11417 } 11418 } 11419 } 11420 11421 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11422 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 11423 : CGM(CGF.CGM), 11424 Action((CGM.getLangOpts().OpenMP >= 50 && 11425 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11426 [](const OMPLastprivateClause *C) { 11427 return C->getKind() == 11428 OMPC_LASTPRIVATE_conditional; 11429 })) 11430 ? ActionToDo::PushAsLastprivateConditional 11431 : ActionToDo::DoNotPush) { 11432 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11433 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 11434 return; 11435 assert(Action == ActionToDo::PushAsLastprivateConditional && 11436 "Expected a push action."); 11437 LastprivateConditionalData &Data = 11438 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11439 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11440 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 11441 continue; 11442 11443 for (const Expr *Ref : C->varlists()) { 11444 Data.DeclToUniqueName.insert(std::make_pair( 11445 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 11446 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 11447 } 11448 } 11449 Data.IVLVal = IVLVal; 11450 Data.Fn = CGF.CurFn; 11451 } 11452 11453 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11454 CodeGenFunction &CGF, const OMPExecutableDirective &S) 11455 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 11456 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11457 if (CGM.getLangOpts().OpenMP < 50) 11458 return; 11459 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 11460 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 11461 if (!NeedToAddForLPCsAsDisabled.empty()) { 11462 Action = ActionToDo::DisableLastprivateConditional; 11463 LastprivateConditionalData &Data = 11464 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11465 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 11466 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 11467 Data.Fn = CGF.CurFn; 11468 Data.Disabled = true; 11469 } 11470 } 11471 11472 CGOpenMPRuntime::LastprivateConditionalRAII 11473 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 11474 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 11475 return LastprivateConditionalRAII(CGF, S); 11476 } 11477 11478 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 11479 if (CGM.getLangOpts().OpenMP < 50) 11480 return; 11481 if (Action == ActionToDo::DisableLastprivateConditional) { 11482 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11483 "Expected list of disabled private vars."); 11484 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11485 } 11486 if (Action == ActionToDo::PushAsLastprivateConditional) { 11487 assert( 11488 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11489 "Expected list of lastprivate conditional vars."); 11490 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11491 } 11492 } 11493 11494 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 11495 const VarDecl *VD) { 11496 ASTContext &C = CGM.getContext(); 11497 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 11498 if (I == LastprivateConditionalToTypes.end()) 11499 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 11500 QualType NewType; 11501 const FieldDecl *VDField; 11502 const FieldDecl *FiredField; 11503 LValue BaseLVal; 11504 auto VI = I->getSecond().find(VD); 11505 if (VI == I->getSecond().end()) { 11506 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 11507 RD->startDefinition(); 11508 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 11509 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 11510 RD->completeDefinition(); 11511 NewType = C.getRecordType(RD); 11512 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 11513 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 11514 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 11515 } else { 11516 NewType = std::get<0>(VI->getSecond()); 11517 VDField = std::get<1>(VI->getSecond()); 11518 FiredField = std::get<2>(VI->getSecond()); 11519 BaseLVal = std::get<3>(VI->getSecond()); 11520 } 11521 LValue FiredLVal = 11522 CGF.EmitLValueForField(BaseLVal, FiredField); 11523 CGF.EmitStoreOfScalar( 11524 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 11525 FiredLVal); 11526 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 11527 } 11528 11529 namespace { 11530 /// Checks if the lastprivate conditional variable is referenced in LHS. 11531 class LastprivateConditionalRefChecker final 11532 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 11533 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 11534 const Expr *FoundE = nullptr; 11535 const Decl *FoundD = nullptr; 11536 StringRef UniqueDeclName; 11537 LValue IVLVal; 11538 llvm::Function *FoundFn = nullptr; 11539 SourceLocation Loc; 11540 11541 public: 11542 bool VisitDeclRefExpr(const DeclRefExpr *E) { 11543 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11544 llvm::reverse(LPM)) { 11545 auto It = D.DeclToUniqueName.find(E->getDecl()); 11546 if (It == D.DeclToUniqueName.end()) 11547 continue; 11548 if (D.Disabled) 11549 return false; 11550 FoundE = E; 11551 FoundD = E->getDecl()->getCanonicalDecl(); 11552 UniqueDeclName = It->second; 11553 IVLVal = D.IVLVal; 11554 FoundFn = D.Fn; 11555 break; 11556 } 11557 return FoundE == E; 11558 } 11559 bool VisitMemberExpr(const MemberExpr *E) { 11560 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 11561 return false; 11562 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11563 llvm::reverse(LPM)) { 11564 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 11565 if (It == D.DeclToUniqueName.end()) 11566 continue; 11567 if (D.Disabled) 11568 return false; 11569 FoundE = E; 11570 FoundD = E->getMemberDecl()->getCanonicalDecl(); 11571 UniqueDeclName = It->second; 11572 IVLVal = D.IVLVal; 11573 FoundFn = D.Fn; 11574 break; 11575 } 11576 return FoundE == E; 11577 } 11578 bool VisitStmt(const Stmt *S) { 11579 for (const Stmt *Child : S->children()) { 11580 if (!Child) 11581 continue; 11582 if (const auto *E = dyn_cast<Expr>(Child)) 11583 if (!E->isGLValue()) 11584 continue; 11585 if (Visit(Child)) 11586 return true; 11587 } 11588 return false; 11589 } 11590 explicit LastprivateConditionalRefChecker( 11591 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 11592 : LPM(LPM) {} 11593 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 11594 getFoundData() const { 11595 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 11596 } 11597 }; 11598 } // namespace 11599 11600 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 11601 LValue IVLVal, 11602 StringRef UniqueDeclName, 11603 LValue LVal, 11604 SourceLocation Loc) { 11605 // Last updated loop counter for the lastprivate conditional var. 11606 // int<xx> last_iv = 0; 11607 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 11608 llvm::Constant *LastIV = 11609 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 11610 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 11611 IVLVal.getAlignment().getAsAlign()); 11612 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 11613 11614 // Last value of the lastprivate conditional. 11615 // decltype(priv_a) last_a; 11616 llvm::Constant *Last = getOrCreateInternalVariable( 11617 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 11618 cast<llvm::GlobalVariable>(Last)->setAlignment( 11619 LVal.getAlignment().getAsAlign()); 11620 LValue LastLVal = 11621 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 11622 11623 // Global loop counter. Required to handle inner parallel-for regions. 11624 // iv 11625 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 11626 11627 // #pragma omp critical(a) 11628 // if (last_iv <= iv) { 11629 // last_iv = iv; 11630 // last_a = priv_a; 11631 // } 11632 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 11633 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 11634 Action.Enter(CGF); 11635 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 11636 // (last_iv <= iv) ? Check if the variable is updated and store new 11637 // value in global var. 11638 llvm::Value *CmpRes; 11639 if (IVLVal.getType()->isSignedIntegerType()) { 11640 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 11641 } else { 11642 assert(IVLVal.getType()->isUnsignedIntegerType() && 11643 "Loop iteration variable must be integer."); 11644 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 11645 } 11646 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 11647 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 11648 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 11649 // { 11650 CGF.EmitBlock(ThenBB); 11651 11652 // last_iv = iv; 11653 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 11654 11655 // last_a = priv_a; 11656 switch (CGF.getEvaluationKind(LVal.getType())) { 11657 case TEK_Scalar: { 11658 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 11659 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 11660 break; 11661 } 11662 case TEK_Complex: { 11663 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 11664 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 11665 break; 11666 } 11667 case TEK_Aggregate: 11668 llvm_unreachable( 11669 "Aggregates are not supported in lastprivate conditional."); 11670 } 11671 // } 11672 CGF.EmitBranch(ExitBB); 11673 // There is no need to emit line number for unconditional branch. 11674 (void)ApplyDebugLocation::CreateEmpty(CGF); 11675 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 11676 }; 11677 11678 if (CGM.getLangOpts().OpenMPSimd) { 11679 // Do not emit as a critical region as no parallel region could be emitted. 11680 RegionCodeGenTy ThenRCG(CodeGen); 11681 ThenRCG(CGF); 11682 } else { 11683 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 11684 } 11685 } 11686 11687 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 11688 const Expr *LHS) { 11689 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11690 return; 11691 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 11692 if (!Checker.Visit(LHS)) 11693 return; 11694 const Expr *FoundE; 11695 const Decl *FoundD; 11696 StringRef UniqueDeclName; 11697 LValue IVLVal; 11698 llvm::Function *FoundFn; 11699 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 11700 Checker.getFoundData(); 11701 if (FoundFn != CGF.CurFn) { 11702 // Special codegen for inner parallel regions. 11703 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 11704 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 11705 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 11706 "Lastprivate conditional is not found in outer region."); 11707 QualType StructTy = std::get<0>(It->getSecond()); 11708 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 11709 LValue PrivLVal = CGF.EmitLValue(FoundE); 11710 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11711 PrivLVal.getAddress(CGF), 11712 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 11713 LValue BaseLVal = 11714 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 11715 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 11716 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 11717 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 11718 FiredLVal, llvm::AtomicOrdering::Unordered, 11719 /*IsVolatile=*/true, /*isInit=*/false); 11720 return; 11721 } 11722 11723 // Private address of the lastprivate conditional in the current context. 11724 // priv_a 11725 LValue LVal = CGF.EmitLValue(FoundE); 11726 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 11727 FoundE->getExprLoc()); 11728 } 11729 11730 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 11731 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11732 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 11733 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 11734 return; 11735 auto Range = llvm::reverse(LastprivateConditionalStack); 11736 auto It = llvm::find_if( 11737 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 11738 if (It == Range.end() || It->Fn != CGF.CurFn) 11739 return; 11740 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 11741 assert(LPCI != LastprivateConditionalToTypes.end() && 11742 "Lastprivates must be registered already."); 11743 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11744 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 11745 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 11746 for (const auto &Pair : It->DeclToUniqueName) { 11747 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 11748 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 11749 continue; 11750 auto I = LPCI->getSecond().find(Pair.first); 11751 assert(I != LPCI->getSecond().end() && 11752 "Lastprivate must be rehistered already."); 11753 // bool Cmp = priv_a.Fired != 0; 11754 LValue BaseLVal = std::get<3>(I->getSecond()); 11755 LValue FiredLVal = 11756 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 11757 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 11758 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 11759 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 11760 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 11761 // if (Cmp) { 11762 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 11763 CGF.EmitBlock(ThenBB); 11764 Address Addr = CGF.GetAddrOfLocalVar(VD); 11765 LValue LVal; 11766 if (VD->getType()->isReferenceType()) 11767 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 11768 AlignmentSource::Decl); 11769 else 11770 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 11771 AlignmentSource::Decl); 11772 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 11773 D.getBeginLoc()); 11774 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 11775 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 11776 // } 11777 } 11778 } 11779 11780 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 11781 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 11782 SourceLocation Loc) { 11783 if (CGF.getLangOpts().OpenMP < 50) 11784 return; 11785 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 11786 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 11787 "Unknown lastprivate conditional variable."); 11788 StringRef UniqueName = It->second; 11789 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 11790 // The variable was not updated in the region - exit. 11791 if (!GV) 11792 return; 11793 LValue LPLVal = CGF.MakeAddrLValue( 11794 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 11795 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 11796 CGF.EmitStoreOfScalar(Res, PrivLVal); 11797 } 11798 11799 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11800 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11801 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11802 llvm_unreachable("Not supported in SIMD-only mode"); 11803 } 11804 11805 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11806 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11807 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11808 llvm_unreachable("Not supported in SIMD-only mode"); 11809 } 11810 11811 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11812 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11813 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11814 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11815 bool Tied, unsigned &NumberOfParts) { 11816 llvm_unreachable("Not supported in SIMD-only mode"); 11817 } 11818 11819 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11820 SourceLocation Loc, 11821 llvm::Function *OutlinedFn, 11822 ArrayRef<llvm::Value *> CapturedVars, 11823 const Expr *IfCond) { 11824 llvm_unreachable("Not supported in SIMD-only mode"); 11825 } 11826 11827 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11828 CodeGenFunction &CGF, StringRef CriticalName, 11829 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11830 const Expr *Hint) { 11831 llvm_unreachable("Not supported in SIMD-only mode"); 11832 } 11833 11834 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11835 const RegionCodeGenTy &MasterOpGen, 11836 SourceLocation Loc) { 11837 llvm_unreachable("Not supported in SIMD-only mode"); 11838 } 11839 11840 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11841 SourceLocation Loc) { 11842 llvm_unreachable("Not supported in SIMD-only mode"); 11843 } 11844 11845 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 11846 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 11847 SourceLocation Loc) { 11848 llvm_unreachable("Not supported in SIMD-only mode"); 11849 } 11850 11851 void CGOpenMPSIMDRuntime::emitSingleRegion( 11852 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 11853 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 11854 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 11855 ArrayRef<const Expr *> AssignmentOps) { 11856 llvm_unreachable("Not supported in SIMD-only mode"); 11857 } 11858 11859 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 11860 const RegionCodeGenTy &OrderedOpGen, 11861 SourceLocation Loc, 11862 bool IsThreads) { 11863 llvm_unreachable("Not supported in SIMD-only mode"); 11864 } 11865 11866 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 11867 SourceLocation Loc, 11868 OpenMPDirectiveKind Kind, 11869 bool EmitChecks, 11870 bool ForceSimpleCall) { 11871 llvm_unreachable("Not supported in SIMD-only mode"); 11872 } 11873 11874 void CGOpenMPSIMDRuntime::emitForDispatchInit( 11875 CodeGenFunction &CGF, SourceLocation Loc, 11876 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 11877 bool Ordered, const DispatchRTInput &DispatchValues) { 11878 llvm_unreachable("Not supported in SIMD-only mode"); 11879 } 11880 11881 void CGOpenMPSIMDRuntime::emitForStaticInit( 11882 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 11883 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 11884 llvm_unreachable("Not supported in SIMD-only mode"); 11885 } 11886 11887 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 11888 CodeGenFunction &CGF, SourceLocation Loc, 11889 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 11890 llvm_unreachable("Not supported in SIMD-only mode"); 11891 } 11892 11893 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 11894 SourceLocation Loc, 11895 unsigned IVSize, 11896 bool IVSigned) { 11897 llvm_unreachable("Not supported in SIMD-only mode"); 11898 } 11899 11900 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 11901 SourceLocation Loc, 11902 OpenMPDirectiveKind DKind) { 11903 llvm_unreachable("Not supported in SIMD-only mode"); 11904 } 11905 11906 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 11907 SourceLocation Loc, 11908 unsigned IVSize, bool IVSigned, 11909 Address IL, Address LB, 11910 Address UB, Address ST) { 11911 llvm_unreachable("Not supported in SIMD-only mode"); 11912 } 11913 11914 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 11915 llvm::Value *NumThreads, 11916 SourceLocation Loc) { 11917 llvm_unreachable("Not supported in SIMD-only mode"); 11918 } 11919 11920 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 11921 ProcBindKind ProcBind, 11922 SourceLocation Loc) { 11923 llvm_unreachable("Not supported in SIMD-only mode"); 11924 } 11925 11926 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 11927 const VarDecl *VD, 11928 Address VDAddr, 11929 SourceLocation Loc) { 11930 llvm_unreachable("Not supported in SIMD-only mode"); 11931 } 11932 11933 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 11934 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 11935 CodeGenFunction *CGF) { 11936 llvm_unreachable("Not supported in SIMD-only mode"); 11937 } 11938 11939 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 11940 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 11941 llvm_unreachable("Not supported in SIMD-only mode"); 11942 } 11943 11944 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 11945 ArrayRef<const Expr *> Vars, 11946 SourceLocation Loc, 11947 llvm::AtomicOrdering AO) { 11948 llvm_unreachable("Not supported in SIMD-only mode"); 11949 } 11950 11951 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 11952 const OMPExecutableDirective &D, 11953 llvm::Function *TaskFunction, 11954 QualType SharedsTy, Address Shareds, 11955 const Expr *IfCond, 11956 const OMPTaskDataTy &Data) { 11957 llvm_unreachable("Not supported in SIMD-only mode"); 11958 } 11959 11960 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 11961 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 11962 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 11963 const Expr *IfCond, const OMPTaskDataTy &Data) { 11964 llvm_unreachable("Not supported in SIMD-only mode"); 11965 } 11966 11967 void CGOpenMPSIMDRuntime::emitReduction( 11968 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 11969 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 11970 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 11971 assert(Options.SimpleReduction && "Only simple reduction is expected."); 11972 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 11973 ReductionOps, Options); 11974 } 11975 11976 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 11977 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 11978 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 11979 llvm_unreachable("Not supported in SIMD-only mode"); 11980 } 11981 11982 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 11983 SourceLocation Loc, 11984 ReductionCodeGen &RCG, 11985 unsigned N) { 11986 llvm_unreachable("Not supported in SIMD-only mode"); 11987 } 11988 11989 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 11990 SourceLocation Loc, 11991 llvm::Value *ReductionsPtr, 11992 LValue SharedLVal) { 11993 llvm_unreachable("Not supported in SIMD-only mode"); 11994 } 11995 11996 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 11997 SourceLocation Loc) { 11998 llvm_unreachable("Not supported in SIMD-only mode"); 11999 } 12000 12001 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12002 CodeGenFunction &CGF, SourceLocation Loc, 12003 OpenMPDirectiveKind CancelRegion) { 12004 llvm_unreachable("Not supported in SIMD-only mode"); 12005 } 12006 12007 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12008 SourceLocation Loc, const Expr *IfCond, 12009 OpenMPDirectiveKind CancelRegion) { 12010 llvm_unreachable("Not supported in SIMD-only mode"); 12011 } 12012 12013 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12014 const OMPExecutableDirective &D, StringRef ParentName, 12015 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12016 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12017 llvm_unreachable("Not supported in SIMD-only mode"); 12018 } 12019 12020 void CGOpenMPSIMDRuntime::emitTargetCall( 12021 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12022 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12023 const Expr *Device, 12024 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12025 const OMPLoopDirective &D)> 12026 SizeEmitter) { 12027 llvm_unreachable("Not supported in SIMD-only mode"); 12028 } 12029 12030 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12031 llvm_unreachable("Not supported in SIMD-only mode"); 12032 } 12033 12034 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12035 llvm_unreachable("Not supported in SIMD-only mode"); 12036 } 12037 12038 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12039 return false; 12040 } 12041 12042 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12043 const OMPExecutableDirective &D, 12044 SourceLocation Loc, 12045 llvm::Function *OutlinedFn, 12046 ArrayRef<llvm::Value *> CapturedVars) { 12047 llvm_unreachable("Not supported in SIMD-only mode"); 12048 } 12049 12050 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12051 const Expr *NumTeams, 12052 const Expr *ThreadLimit, 12053 SourceLocation Loc) { 12054 llvm_unreachable("Not supported in SIMD-only mode"); 12055 } 12056 12057 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12058 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12059 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12060 llvm_unreachable("Not supported in SIMD-only mode"); 12061 } 12062 12063 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12064 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12065 const Expr *Device) { 12066 llvm_unreachable("Not supported in SIMD-only mode"); 12067 } 12068 12069 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12070 const OMPLoopDirective &D, 12071 ArrayRef<Expr *> NumIterations) { 12072 llvm_unreachable("Not supported in SIMD-only mode"); 12073 } 12074 12075 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12076 const OMPDependClause *C) { 12077 llvm_unreachable("Not supported in SIMD-only mode"); 12078 } 12079 12080 const VarDecl * 12081 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12082 const VarDecl *NativeParam) const { 12083 llvm_unreachable("Not supported in SIMD-only mode"); 12084 } 12085 12086 Address 12087 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12088 const VarDecl *NativeParam, 12089 const VarDecl *TargetParam) const { 12090 llvm_unreachable("Not supported in SIMD-only mode"); 12091 } 12092