1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGCXXABI.h" 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/CodeGen/ConstantInitBuilder.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/Basic/BitmaskEnum.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/Bitcode/BitcodeReader.h" 24 #include "llvm/IR/DerivedTypes.h" 25 #include "llvm/IR/GlobalValue.h" 26 #include "llvm/IR/Value.h" 27 #include "llvm/Support/Format.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <cassert> 30 31 using namespace clang; 32 using namespace CodeGen; 33 34 namespace { 35 /// Base class for handling code generation inside OpenMP regions. 36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 37 public: 38 /// Kinds of OpenMP regions used in codegen. 39 enum CGOpenMPRegionKind { 40 /// Region with outlined function for standalone 'parallel' 41 /// directive. 42 ParallelOutlinedRegion, 43 /// Region with outlined function for standalone 'task' directive. 44 TaskOutlinedRegion, 45 /// Region for constructs that do not require function outlining, 46 /// like 'for', 'sections', 'atomic' etc. directives. 47 InlinedRegion, 48 /// Region with outlined function for standalone 'target' directive. 49 TargetRegion, 50 }; 51 52 CGOpenMPRegionInfo(const CapturedStmt &CS, 53 const CGOpenMPRegionKind RegionKind, 54 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 55 bool HasCancel) 56 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 57 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 58 59 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 60 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 61 bool HasCancel) 62 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 63 Kind(Kind), HasCancel(HasCancel) {} 64 65 /// Get a variable or parameter for storing global thread id 66 /// inside OpenMP construct. 67 virtual const VarDecl *getThreadIDVariable() const = 0; 68 69 /// Emit the captured statement body. 70 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 71 72 /// Get an LValue for the current ThreadID variable. 73 /// \return LValue for thread id variable. This LValue always has type int32*. 74 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 75 76 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 77 78 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 79 80 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 81 82 bool hasCancel() const { return HasCancel; } 83 84 static bool classof(const CGCapturedStmtInfo *Info) { 85 return Info->getKind() == CR_OpenMP; 86 } 87 88 ~CGOpenMPRegionInfo() override = default; 89 90 protected: 91 CGOpenMPRegionKind RegionKind; 92 RegionCodeGenTy CodeGen; 93 OpenMPDirectiveKind Kind; 94 bool HasCancel; 95 }; 96 97 /// API for captured statement code generation in OpenMP constructs. 98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 99 public: 100 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 101 const RegionCodeGenTy &CodeGen, 102 OpenMPDirectiveKind Kind, bool HasCancel, 103 StringRef HelperName) 104 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 105 HasCancel), 106 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 107 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 108 } 109 110 /// Get a variable or parameter for storing global thread id 111 /// inside OpenMP construct. 112 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 113 114 /// Get the name of the capture helper. 115 StringRef getHelperName() const override { return HelperName; } 116 117 static bool classof(const CGCapturedStmtInfo *Info) { 118 return CGOpenMPRegionInfo::classof(Info) && 119 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 120 ParallelOutlinedRegion; 121 } 122 123 private: 124 /// A variable or parameter storing global thread id for OpenMP 125 /// constructs. 126 const VarDecl *ThreadIDVar; 127 StringRef HelperName; 128 }; 129 130 /// API for captured statement code generation in OpenMP constructs. 131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 132 public: 133 class UntiedTaskActionTy final : public PrePostActionTy { 134 bool Untied; 135 const VarDecl *PartIDVar; 136 const RegionCodeGenTy UntiedCodeGen; 137 llvm::SwitchInst *UntiedSwitch = nullptr; 138 139 public: 140 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 141 const RegionCodeGenTy &UntiedCodeGen) 142 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 143 void Enter(CodeGenFunction &CGF) override { 144 if (Untied) { 145 // Emit task switching point. 146 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 147 CGF.GetAddrOfLocalVar(PartIDVar), 148 PartIDVar->getType()->castAs<PointerType>()); 149 llvm::Value *Res = 150 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 151 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 152 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 153 CGF.EmitBlock(DoneBB); 154 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 155 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 156 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 157 CGF.Builder.GetInsertBlock()); 158 emitUntiedSwitch(CGF); 159 } 160 } 161 void emitUntiedSwitch(CodeGenFunction &CGF) const { 162 if (Untied) { 163 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 164 CGF.GetAddrOfLocalVar(PartIDVar), 165 PartIDVar->getType()->castAs<PointerType>()); 166 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 167 PartIdLVal); 168 UntiedCodeGen(CGF); 169 CodeGenFunction::JumpDest CurPoint = 170 CGF.getJumpDestInCurrentScope(".untied.next."); 171 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 172 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 173 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 174 CGF.Builder.GetInsertBlock()); 175 CGF.EmitBranchThroughCleanup(CurPoint); 176 CGF.EmitBlock(CurPoint.getBlock()); 177 } 178 } 179 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 180 }; 181 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 182 const VarDecl *ThreadIDVar, 183 const RegionCodeGenTy &CodeGen, 184 OpenMPDirectiveKind Kind, bool HasCancel, 185 const UntiedTaskActionTy &Action) 186 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 187 ThreadIDVar(ThreadIDVar), Action(Action) { 188 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 189 } 190 191 /// Get a variable or parameter for storing global thread id 192 /// inside OpenMP construct. 193 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 194 195 /// Get an LValue for the current ThreadID variable. 196 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 197 198 /// Get the name of the capture helper. 199 StringRef getHelperName() const override { return ".omp_outlined."; } 200 201 void emitUntiedSwitch(CodeGenFunction &CGF) override { 202 Action.emitUntiedSwitch(CGF); 203 } 204 205 static bool classof(const CGCapturedStmtInfo *Info) { 206 return CGOpenMPRegionInfo::classof(Info) && 207 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 208 TaskOutlinedRegion; 209 } 210 211 private: 212 /// A variable or parameter storing global thread id for OpenMP 213 /// constructs. 214 const VarDecl *ThreadIDVar; 215 /// Action for emitting code for untied tasks. 216 const UntiedTaskActionTy &Action; 217 }; 218 219 /// API for inlined captured statement code generation in OpenMP 220 /// constructs. 221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 222 public: 223 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 224 const RegionCodeGenTy &CodeGen, 225 OpenMPDirectiveKind Kind, bool HasCancel) 226 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 227 OldCSI(OldCSI), 228 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 229 230 // Retrieve the value of the context parameter. 231 llvm::Value *getContextValue() const override { 232 if (OuterRegionInfo) 233 return OuterRegionInfo->getContextValue(); 234 llvm_unreachable("No context value for inlined OpenMP region"); 235 } 236 237 void setContextValue(llvm::Value *V) override { 238 if (OuterRegionInfo) { 239 OuterRegionInfo->setContextValue(V); 240 return; 241 } 242 llvm_unreachable("No context value for inlined OpenMP region"); 243 } 244 245 /// Lookup the captured field decl for a variable. 246 const FieldDecl *lookup(const VarDecl *VD) const override { 247 if (OuterRegionInfo) 248 return OuterRegionInfo->lookup(VD); 249 // If there is no outer outlined region,no need to lookup in a list of 250 // captured variables, we can use the original one. 251 return nullptr; 252 } 253 254 FieldDecl *getThisFieldDecl() const override { 255 if (OuterRegionInfo) 256 return OuterRegionInfo->getThisFieldDecl(); 257 return nullptr; 258 } 259 260 /// Get a variable or parameter for storing global thread id 261 /// inside OpenMP construct. 262 const VarDecl *getThreadIDVariable() const override { 263 if (OuterRegionInfo) 264 return OuterRegionInfo->getThreadIDVariable(); 265 return nullptr; 266 } 267 268 /// Get an LValue for the current ThreadID variable. 269 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 270 if (OuterRegionInfo) 271 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 272 llvm_unreachable("No LValue for inlined OpenMP construct"); 273 } 274 275 /// Get the name of the capture helper. 276 StringRef getHelperName() const override { 277 if (auto *OuterRegionInfo = getOldCSI()) 278 return OuterRegionInfo->getHelperName(); 279 llvm_unreachable("No helper name for inlined OpenMP construct"); 280 } 281 282 void emitUntiedSwitch(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 OuterRegionInfo->emitUntiedSwitch(CGF); 285 } 286 287 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 288 289 static bool classof(const CGCapturedStmtInfo *Info) { 290 return CGOpenMPRegionInfo::classof(Info) && 291 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 292 } 293 294 ~CGOpenMPInlinedRegionInfo() override = default; 295 296 private: 297 /// CodeGen info about outer OpenMP region. 298 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 299 CGOpenMPRegionInfo *OuterRegionInfo; 300 }; 301 302 /// API for captured statement code generation in OpenMP target 303 /// constructs. For this captures, implicit parameters are used instead of the 304 /// captured fields. The name of the target region has to be unique in a given 305 /// application so it is provided by the client, because only the client has 306 /// the information to generate that. 307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 308 public: 309 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 310 const RegionCodeGenTy &CodeGen, StringRef HelperName) 311 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 312 /*HasCancel=*/false), 313 HelperName(HelperName) {} 314 315 /// This is unused for target regions because each starts executing 316 /// with a single thread. 317 const VarDecl *getThreadIDVariable() const override { return nullptr; } 318 319 /// Get the name of the capture helper. 320 StringRef getHelperName() const override { return HelperName; } 321 322 static bool classof(const CGCapturedStmtInfo *Info) { 323 return CGOpenMPRegionInfo::classof(Info) && 324 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 325 } 326 327 private: 328 StringRef HelperName; 329 }; 330 331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 332 llvm_unreachable("No codegen for expressions"); 333 } 334 /// API for generation of expressions captured in a innermost OpenMP 335 /// region. 336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 337 public: 338 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 339 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 340 OMPD_unknown, 341 /*HasCancel=*/false), 342 PrivScope(CGF) { 343 // Make sure the globals captured in the provided statement are local by 344 // using the privatization logic. We assume the same variable is not 345 // captured more than once. 346 for (const auto &C : CS.captures()) { 347 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 348 continue; 349 350 const VarDecl *VD = C.getCapturedVar(); 351 if (VD->isLocalVarDeclOrParm()) 352 continue; 353 354 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 355 /*RefersToEnclosingVariableOrCapture=*/false, 356 VD->getType().getNonReferenceType(), VK_LValue, 357 C.getLocation()); 358 PrivScope.addPrivate( 359 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); }); 360 } 361 (void)PrivScope.Privatize(); 362 } 363 364 /// Lookup the captured field decl for a variable. 365 const FieldDecl *lookup(const VarDecl *VD) const override { 366 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 367 return FD; 368 return nullptr; 369 } 370 371 /// Emit the captured statement body. 372 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 373 llvm_unreachable("No body for expressions"); 374 } 375 376 /// Get a variable or parameter for storing global thread id 377 /// inside OpenMP construct. 378 const VarDecl *getThreadIDVariable() const override { 379 llvm_unreachable("No thread id for expressions"); 380 } 381 382 /// Get the name of the capture helper. 383 StringRef getHelperName() const override { 384 llvm_unreachable("No helper name for expressions"); 385 } 386 387 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 388 389 private: 390 /// Private scope to capture global variables. 391 CodeGenFunction::OMPPrivateScope PrivScope; 392 }; 393 394 /// RAII for emitting code of OpenMP constructs. 395 class InlinedOpenMPRegionRAII { 396 CodeGenFunction &CGF; 397 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 398 FieldDecl *LambdaThisCaptureField = nullptr; 399 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 400 401 public: 402 /// Constructs region for combined constructs. 403 /// \param CodeGen Code generation sequence for combined directives. Includes 404 /// a list of functions used for code generation of implicitly inlined 405 /// regions. 406 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 407 OpenMPDirectiveKind Kind, bool HasCancel) 408 : CGF(CGF) { 409 // Start emission for the construct. 410 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 411 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 412 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 413 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 414 CGF.LambdaThisCaptureField = nullptr; 415 BlockInfo = CGF.BlockInfo; 416 CGF.BlockInfo = nullptr; 417 } 418 419 ~InlinedOpenMPRegionRAII() { 420 // Restore original CapturedStmtInfo only if we're done with code emission. 421 auto *OldCSI = 422 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 423 delete CGF.CapturedStmtInfo; 424 CGF.CapturedStmtInfo = OldCSI; 425 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 426 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 427 CGF.BlockInfo = BlockInfo; 428 } 429 }; 430 431 /// Values for bit flags used in the ident_t to describe the fields. 432 /// All enumeric elements are named and described in accordance with the code 433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 434 enum OpenMPLocationFlags : unsigned { 435 /// Use trampoline for internal microtask. 436 OMP_IDENT_IMD = 0x01, 437 /// Use c-style ident structure. 438 OMP_IDENT_KMPC = 0x02, 439 /// Atomic reduction option for kmpc_reduce. 440 OMP_ATOMIC_REDUCE = 0x10, 441 /// Explicit 'barrier' directive. 442 OMP_IDENT_BARRIER_EXPL = 0x20, 443 /// Implicit barrier in code. 444 OMP_IDENT_BARRIER_IMPL = 0x40, 445 /// Implicit barrier in 'for' directive. 446 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 447 /// Implicit barrier in 'sections' directive. 448 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 449 /// Implicit barrier in 'single' directive. 450 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 451 /// Call of __kmp_for_static_init for static loop. 452 OMP_IDENT_WORK_LOOP = 0x200, 453 /// Call of __kmp_for_static_init for sections. 454 OMP_IDENT_WORK_SECTIONS = 0x400, 455 /// Call of __kmp_for_static_init for distribute. 456 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 457 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 458 }; 459 460 namespace { 461 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 462 /// Values for bit flags for marking which requires clauses have been used. 463 enum OpenMPOffloadingRequiresDirFlags : int64_t { 464 /// flag undefined. 465 OMP_REQ_UNDEFINED = 0x000, 466 /// no requires clause present. 467 OMP_REQ_NONE = 0x001, 468 /// reverse_offload clause. 469 OMP_REQ_REVERSE_OFFLOAD = 0x002, 470 /// unified_address clause. 471 OMP_REQ_UNIFIED_ADDRESS = 0x004, 472 /// unified_shared_memory clause. 473 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 474 /// dynamic_allocators clause. 475 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 476 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 477 }; 478 479 enum OpenMPOffloadingReservedDeviceIDs { 480 /// Device ID if the device was not defined, runtime should get it 481 /// from environment variables in the spec. 482 OMP_DEVICEID_UNDEF = -1, 483 }; 484 } // anonymous namespace 485 486 /// Describes ident structure that describes a source location. 487 /// All descriptions are taken from 488 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 489 /// Original structure: 490 /// typedef struct ident { 491 /// kmp_int32 reserved_1; /**< might be used in Fortran; 492 /// see above */ 493 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 494 /// KMP_IDENT_KMPC identifies this union 495 /// member */ 496 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 497 /// see above */ 498 ///#if USE_ITT_BUILD 499 /// /* but currently used for storing 500 /// region-specific ITT */ 501 /// /* contextual information. */ 502 ///#endif /* USE_ITT_BUILD */ 503 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 504 /// C++ */ 505 /// char const *psource; /**< String describing the source location. 506 /// The string is composed of semi-colon separated 507 // fields which describe the source file, 508 /// the function and a pair of line numbers that 509 /// delimit the construct. 510 /// */ 511 /// } ident_t; 512 enum IdentFieldIndex { 513 /// might be used in Fortran 514 IdentField_Reserved_1, 515 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 516 IdentField_Flags, 517 /// Not really used in Fortran any more 518 IdentField_Reserved_2, 519 /// Source[4] in Fortran, do not use for C++ 520 IdentField_Reserved_3, 521 /// String describing the source location. The string is composed of 522 /// semi-colon separated fields which describe the source file, the function 523 /// and a pair of line numbers that delimit the construct. 524 IdentField_PSource 525 }; 526 527 /// Schedule types for 'omp for' loops (these enumerators are taken from 528 /// the enum sched_type in kmp.h). 529 enum OpenMPSchedType { 530 /// Lower bound for default (unordered) versions. 531 OMP_sch_lower = 32, 532 OMP_sch_static_chunked = 33, 533 OMP_sch_static = 34, 534 OMP_sch_dynamic_chunked = 35, 535 OMP_sch_guided_chunked = 36, 536 OMP_sch_runtime = 37, 537 OMP_sch_auto = 38, 538 /// static with chunk adjustment (e.g., simd) 539 OMP_sch_static_balanced_chunked = 45, 540 /// Lower bound for 'ordered' versions. 541 OMP_ord_lower = 64, 542 OMP_ord_static_chunked = 65, 543 OMP_ord_static = 66, 544 OMP_ord_dynamic_chunked = 67, 545 OMP_ord_guided_chunked = 68, 546 OMP_ord_runtime = 69, 547 OMP_ord_auto = 70, 548 OMP_sch_default = OMP_sch_static, 549 /// dist_schedule types 550 OMP_dist_sch_static_chunked = 91, 551 OMP_dist_sch_static = 92, 552 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 553 /// Set if the monotonic schedule modifier was present. 554 OMP_sch_modifier_monotonic = (1 << 29), 555 /// Set if the nonmonotonic schedule modifier was present. 556 OMP_sch_modifier_nonmonotonic = (1 << 30), 557 }; 558 559 enum OpenMPRTLFunction { 560 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 561 /// kmpc_micro microtask, ...); 562 OMPRTL__kmpc_fork_call, 563 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, 564 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 565 OMPRTL__kmpc_threadprivate_cached, 566 /// Call to void __kmpc_threadprivate_register( ident_t *, 567 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 568 OMPRTL__kmpc_threadprivate_register, 569 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 570 OMPRTL__kmpc_global_thread_num, 571 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 572 // kmp_critical_name *crit); 573 OMPRTL__kmpc_critical, 574 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 575 // global_tid, kmp_critical_name *crit, uintptr_t hint); 576 OMPRTL__kmpc_critical_with_hint, 577 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 578 // kmp_critical_name *crit); 579 OMPRTL__kmpc_end_critical, 580 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 581 // global_tid); 582 OMPRTL__kmpc_cancel_barrier, 583 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 584 OMPRTL__kmpc_barrier, 585 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 586 OMPRTL__kmpc_for_static_fini, 587 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 588 // global_tid); 589 OMPRTL__kmpc_serialized_parallel, 590 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 591 // global_tid); 592 OMPRTL__kmpc_end_serialized_parallel, 593 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 594 // kmp_int32 num_threads); 595 OMPRTL__kmpc_push_num_threads, 596 // Call to void __kmpc_flush(ident_t *loc); 597 OMPRTL__kmpc_flush, 598 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 599 OMPRTL__kmpc_master, 600 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 601 OMPRTL__kmpc_end_master, 602 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 603 // int end_part); 604 OMPRTL__kmpc_omp_taskyield, 605 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 606 OMPRTL__kmpc_single, 607 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 608 OMPRTL__kmpc_end_single, 609 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 610 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 611 // kmp_routine_entry_t *task_entry); 612 OMPRTL__kmpc_omp_task_alloc, 613 // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *, 614 // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, 615 // size_t sizeof_shareds, kmp_routine_entry_t *task_entry, 616 // kmp_int64 device_id); 617 OMPRTL__kmpc_omp_target_task_alloc, 618 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 619 // new_task); 620 OMPRTL__kmpc_omp_task, 621 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 622 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 623 // kmp_int32 didit); 624 OMPRTL__kmpc_copyprivate, 625 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 626 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 627 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 628 OMPRTL__kmpc_reduce, 629 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 630 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 631 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 632 // *lck); 633 OMPRTL__kmpc_reduce_nowait, 634 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 635 // kmp_critical_name *lck); 636 OMPRTL__kmpc_end_reduce, 637 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 638 // kmp_critical_name *lck); 639 OMPRTL__kmpc_end_reduce_nowait, 640 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 641 // kmp_task_t * new_task); 642 OMPRTL__kmpc_omp_task_begin_if0, 643 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 644 // kmp_task_t * new_task); 645 OMPRTL__kmpc_omp_task_complete_if0, 646 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 647 OMPRTL__kmpc_ordered, 648 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 649 OMPRTL__kmpc_end_ordered, 650 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 651 // global_tid); 652 OMPRTL__kmpc_omp_taskwait, 653 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 654 OMPRTL__kmpc_taskgroup, 655 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 656 OMPRTL__kmpc_end_taskgroup, 657 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 658 // int proc_bind); 659 OMPRTL__kmpc_push_proc_bind, 660 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 661 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 662 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 663 OMPRTL__kmpc_omp_task_with_deps, 664 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 665 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 666 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 667 OMPRTL__kmpc_omp_wait_deps, 668 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 669 // global_tid, kmp_int32 cncl_kind); 670 OMPRTL__kmpc_cancellationpoint, 671 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 672 // kmp_int32 cncl_kind); 673 OMPRTL__kmpc_cancel, 674 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 675 // kmp_int32 num_teams, kmp_int32 thread_limit); 676 OMPRTL__kmpc_push_num_teams, 677 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 678 // microtask, ...); 679 OMPRTL__kmpc_fork_teams, 680 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 681 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 682 // sched, kmp_uint64 grainsize, void *task_dup); 683 OMPRTL__kmpc_taskloop, 684 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 685 // num_dims, struct kmp_dim *dims); 686 OMPRTL__kmpc_doacross_init, 687 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 688 OMPRTL__kmpc_doacross_fini, 689 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 690 // *vec); 691 OMPRTL__kmpc_doacross_post, 692 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 693 // *vec); 694 OMPRTL__kmpc_doacross_wait, 695 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 696 // *data); 697 OMPRTL__kmpc_task_reduction_init, 698 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 699 // *d); 700 OMPRTL__kmpc_task_reduction_get_th_data, 701 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); 702 OMPRTL__kmpc_alloc, 703 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); 704 OMPRTL__kmpc_free, 705 706 // 707 // Offloading related calls 708 // 709 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 710 // size); 711 OMPRTL__kmpc_push_target_tripcount, 712 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 713 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 714 // *arg_types); 715 OMPRTL__tgt_target, 716 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 717 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 718 // *arg_types); 719 OMPRTL__tgt_target_nowait, 720 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 721 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 722 // *arg_types, int32_t num_teams, int32_t thread_limit); 723 OMPRTL__tgt_target_teams, 724 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 725 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 726 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 727 OMPRTL__tgt_target_teams_nowait, 728 // Call to void __tgt_register_requires(int64_t flags); 729 OMPRTL__tgt_register_requires, 730 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 731 OMPRTL__tgt_register_lib, 732 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 733 OMPRTL__tgt_unregister_lib, 734 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 735 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 736 OMPRTL__tgt_target_data_begin, 737 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 738 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 739 // *arg_types); 740 OMPRTL__tgt_target_data_begin_nowait, 741 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 742 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 743 OMPRTL__tgt_target_data_end, 744 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 745 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 746 // *arg_types); 747 OMPRTL__tgt_target_data_end_nowait, 748 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 749 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 750 OMPRTL__tgt_target_data_update, 751 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 752 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 753 // *arg_types); 754 OMPRTL__tgt_target_data_update_nowait, 755 // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 756 OMPRTL__tgt_mapper_num_components, 757 // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void 758 // *base, void *begin, int64_t size, int64_t type); 759 OMPRTL__tgt_push_mapper_component, 760 }; 761 762 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 763 /// region. 764 class CleanupTy final : public EHScopeStack::Cleanup { 765 PrePostActionTy *Action; 766 767 public: 768 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 769 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 770 if (!CGF.HaveInsertPoint()) 771 return; 772 Action->Exit(CGF); 773 } 774 }; 775 776 } // anonymous namespace 777 778 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 779 CodeGenFunction::RunCleanupsScope Scope(CGF); 780 if (PrePostAction) { 781 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 782 Callback(CodeGen, CGF, *PrePostAction); 783 } else { 784 PrePostActionTy Action; 785 Callback(CodeGen, CGF, Action); 786 } 787 } 788 789 /// Check if the combiner is a call to UDR combiner and if it is so return the 790 /// UDR decl used for reduction. 791 static const OMPDeclareReductionDecl * 792 getReductionInit(const Expr *ReductionOp) { 793 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 794 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 795 if (const auto *DRE = 796 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 797 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 798 return DRD; 799 return nullptr; 800 } 801 802 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 803 const OMPDeclareReductionDecl *DRD, 804 const Expr *InitOp, 805 Address Private, Address Original, 806 QualType Ty) { 807 if (DRD->getInitializer()) { 808 std::pair<llvm::Function *, llvm::Function *> Reduction = 809 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 810 const auto *CE = cast<CallExpr>(InitOp); 811 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 812 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 813 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 814 const auto *LHSDRE = 815 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 816 const auto *RHSDRE = 817 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 818 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 819 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 820 [=]() { return Private; }); 821 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 822 [=]() { return Original; }); 823 (void)PrivateScope.Privatize(); 824 RValue Func = RValue::get(Reduction.second); 825 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 826 CGF.EmitIgnoredExpr(InitOp); 827 } else { 828 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 829 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 830 auto *GV = new llvm::GlobalVariable( 831 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 832 llvm::GlobalValue::PrivateLinkage, Init, Name); 833 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 834 RValue InitRVal; 835 switch (CGF.getEvaluationKind(Ty)) { 836 case TEK_Scalar: 837 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 838 break; 839 case TEK_Complex: 840 InitRVal = 841 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 842 break; 843 case TEK_Aggregate: 844 InitRVal = RValue::getAggregate(LV.getAddress()); 845 break; 846 } 847 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 848 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 849 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 850 /*IsInitializer=*/false); 851 } 852 } 853 854 /// Emit initialization of arrays of complex types. 855 /// \param DestAddr Address of the array. 856 /// \param Type Type of array. 857 /// \param Init Initial expression of array. 858 /// \param SrcAddr Address of the original array. 859 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 860 QualType Type, bool EmitDeclareReductionInit, 861 const Expr *Init, 862 const OMPDeclareReductionDecl *DRD, 863 Address SrcAddr = Address::invalid()) { 864 // Perform element-by-element initialization. 865 QualType ElementTy; 866 867 // Drill down to the base element type on both arrays. 868 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 869 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 870 DestAddr = 871 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 872 if (DRD) 873 SrcAddr = 874 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 875 876 llvm::Value *SrcBegin = nullptr; 877 if (DRD) 878 SrcBegin = SrcAddr.getPointer(); 879 llvm::Value *DestBegin = DestAddr.getPointer(); 880 // Cast from pointer to array type to pointer to single element. 881 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 882 // The basic structure here is a while-do loop. 883 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 884 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 885 llvm::Value *IsEmpty = 886 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 887 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 888 889 // Enter the loop body, making that address the current address. 890 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 891 CGF.EmitBlock(BodyBB); 892 893 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 894 895 llvm::PHINode *SrcElementPHI = nullptr; 896 Address SrcElementCurrent = Address::invalid(); 897 if (DRD) { 898 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 899 "omp.arraycpy.srcElementPast"); 900 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 901 SrcElementCurrent = 902 Address(SrcElementPHI, 903 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 904 } 905 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 906 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 907 DestElementPHI->addIncoming(DestBegin, EntryBB); 908 Address DestElementCurrent = 909 Address(DestElementPHI, 910 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 911 912 // Emit copy. 913 { 914 CodeGenFunction::RunCleanupsScope InitScope(CGF); 915 if (EmitDeclareReductionInit) { 916 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 917 SrcElementCurrent, ElementTy); 918 } else 919 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 920 /*IsInitializer=*/false); 921 } 922 923 if (DRD) { 924 // Shift the address forward by one element. 925 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 926 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 927 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 928 } 929 930 // Shift the address forward by one element. 931 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 932 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 933 // Check whether we've reached the end. 934 llvm::Value *Done = 935 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 936 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 937 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 938 939 // Done. 940 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 941 } 942 943 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 944 return CGF.EmitOMPSharedLValue(E); 945 } 946 947 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 948 const Expr *E) { 949 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 950 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 951 return LValue(); 952 } 953 954 void ReductionCodeGen::emitAggregateInitialization( 955 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 956 const OMPDeclareReductionDecl *DRD) { 957 // Emit VarDecl with copy init for arrays. 958 // Get the address of the original variable captured in current 959 // captured region. 960 const auto *PrivateVD = 961 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 962 bool EmitDeclareReductionInit = 963 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 964 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 965 EmitDeclareReductionInit, 966 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 967 : PrivateVD->getInit(), 968 DRD, SharedLVal.getAddress()); 969 } 970 971 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 972 ArrayRef<const Expr *> Privates, 973 ArrayRef<const Expr *> ReductionOps) { 974 ClausesData.reserve(Shareds.size()); 975 SharedAddresses.reserve(Shareds.size()); 976 Sizes.reserve(Shareds.size()); 977 BaseDecls.reserve(Shareds.size()); 978 auto IPriv = Privates.begin(); 979 auto IRed = ReductionOps.begin(); 980 for (const Expr *Ref : Shareds) { 981 ClausesData.emplace_back(Ref, *IPriv, *IRed); 982 std::advance(IPriv, 1); 983 std::advance(IRed, 1); 984 } 985 } 986 987 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 988 assert(SharedAddresses.size() == N && 989 "Number of generated lvalues must be exactly N."); 990 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 991 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 992 SharedAddresses.emplace_back(First, Second); 993 } 994 995 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 996 const auto *PrivateVD = 997 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 998 QualType PrivateType = PrivateVD->getType(); 999 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 1000 if (!PrivateType->isVariablyModifiedType()) { 1001 Sizes.emplace_back( 1002 CGF.getTypeSize( 1003 SharedAddresses[N].first.getType().getNonReferenceType()), 1004 nullptr); 1005 return; 1006 } 1007 llvm::Value *Size; 1008 llvm::Value *SizeInChars; 1009 auto *ElemType = 1010 cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) 1011 ->getElementType(); 1012 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 1013 if (AsArraySection) { 1014 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), 1015 SharedAddresses[N].first.getPointer()); 1016 Size = CGF.Builder.CreateNUWAdd( 1017 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 1018 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 1019 } else { 1020 SizeInChars = CGF.getTypeSize( 1021 SharedAddresses[N].first.getType().getNonReferenceType()); 1022 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 1023 } 1024 Sizes.emplace_back(SizeInChars, Size); 1025 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1026 CGF, 1027 cast<OpaqueValueExpr>( 1028 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1029 RValue::get(Size)); 1030 CGF.EmitVariablyModifiedType(PrivateType); 1031 } 1032 1033 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 1034 llvm::Value *Size) { 1035 const auto *PrivateVD = 1036 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1037 QualType PrivateType = PrivateVD->getType(); 1038 if (!PrivateType->isVariablyModifiedType()) { 1039 assert(!Size && !Sizes[N].second && 1040 "Size should be nullptr for non-variably modified reduction " 1041 "items."); 1042 return; 1043 } 1044 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1045 CGF, 1046 cast<OpaqueValueExpr>( 1047 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1048 RValue::get(Size)); 1049 CGF.EmitVariablyModifiedType(PrivateType); 1050 } 1051 1052 void ReductionCodeGen::emitInitialization( 1053 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1054 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1055 assert(SharedAddresses.size() > N && "No variable was generated"); 1056 const auto *PrivateVD = 1057 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1058 const OMPDeclareReductionDecl *DRD = 1059 getReductionInit(ClausesData[N].ReductionOp); 1060 QualType PrivateType = PrivateVD->getType(); 1061 PrivateAddr = CGF.Builder.CreateElementBitCast( 1062 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1063 QualType SharedType = SharedAddresses[N].first.getType(); 1064 SharedLVal = CGF.MakeAddrLValue( 1065 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), 1066 CGF.ConvertTypeForMem(SharedType)), 1067 SharedType, SharedAddresses[N].first.getBaseInfo(), 1068 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1069 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1070 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1071 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1072 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1073 PrivateAddr, SharedLVal.getAddress(), 1074 SharedLVal.getType()); 1075 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1076 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1077 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1078 PrivateVD->getType().getQualifiers(), 1079 /*IsInitializer=*/false); 1080 } 1081 } 1082 1083 bool ReductionCodeGen::needCleanups(unsigned N) { 1084 const auto *PrivateVD = 1085 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1086 QualType PrivateType = PrivateVD->getType(); 1087 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1088 return DTorKind != QualType::DK_none; 1089 } 1090 1091 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1092 Address PrivateAddr) { 1093 const auto *PrivateVD = 1094 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1095 QualType PrivateType = PrivateVD->getType(); 1096 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1097 if (needCleanups(N)) { 1098 PrivateAddr = CGF.Builder.CreateElementBitCast( 1099 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1100 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1101 } 1102 } 1103 1104 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1105 LValue BaseLV) { 1106 BaseTy = BaseTy.getNonReferenceType(); 1107 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1108 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1109 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1110 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 1111 } else { 1112 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); 1113 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1114 } 1115 BaseTy = BaseTy->getPointeeType(); 1116 } 1117 return CGF.MakeAddrLValue( 1118 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), 1119 CGF.ConvertTypeForMem(ElTy)), 1120 BaseLV.getType(), BaseLV.getBaseInfo(), 1121 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1122 } 1123 1124 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1125 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1126 llvm::Value *Addr) { 1127 Address Tmp = Address::invalid(); 1128 Address TopTmp = Address::invalid(); 1129 Address MostTopTmp = Address::invalid(); 1130 BaseTy = BaseTy.getNonReferenceType(); 1131 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1132 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1133 Tmp = CGF.CreateMemTemp(BaseTy); 1134 if (TopTmp.isValid()) 1135 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1136 else 1137 MostTopTmp = Tmp; 1138 TopTmp = Tmp; 1139 BaseTy = BaseTy->getPointeeType(); 1140 } 1141 llvm::Type *Ty = BaseLVType; 1142 if (Tmp.isValid()) 1143 Ty = Tmp.getElementType(); 1144 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1145 if (Tmp.isValid()) { 1146 CGF.Builder.CreateStore(Addr, Tmp); 1147 return MostTopTmp; 1148 } 1149 return Address(Addr, BaseLVAlignment); 1150 } 1151 1152 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1153 const VarDecl *OrigVD = nullptr; 1154 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1155 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1156 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1157 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1158 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1159 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1160 DE = cast<DeclRefExpr>(Base); 1161 OrigVD = cast<VarDecl>(DE->getDecl()); 1162 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1163 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1164 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1165 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1166 DE = cast<DeclRefExpr>(Base); 1167 OrigVD = cast<VarDecl>(DE->getDecl()); 1168 } 1169 return OrigVD; 1170 } 1171 1172 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1173 Address PrivateAddr) { 1174 const DeclRefExpr *DE; 1175 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1176 BaseDecls.emplace_back(OrigVD); 1177 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1178 LValue BaseLValue = 1179 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1180 OriginalBaseLValue); 1181 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1182 BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); 1183 llvm::Value *PrivatePointer = 1184 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1185 PrivateAddr.getPointer(), 1186 SharedAddresses[N].first.getAddress().getType()); 1187 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1188 return castToBase(CGF, OrigVD->getType(), 1189 SharedAddresses[N].first.getType(), 1190 OriginalBaseLValue.getAddress().getType(), 1191 OriginalBaseLValue.getAlignment(), Ptr); 1192 } 1193 BaseDecls.emplace_back( 1194 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1195 return PrivateAddr; 1196 } 1197 1198 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1199 const OMPDeclareReductionDecl *DRD = 1200 getReductionInit(ClausesData[N].ReductionOp); 1201 return DRD && DRD->getInitializer(); 1202 } 1203 1204 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1205 return CGF.EmitLoadOfPointerLValue( 1206 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1207 getThreadIDVariable()->getType()->castAs<PointerType>()); 1208 } 1209 1210 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1211 if (!CGF.HaveInsertPoint()) 1212 return; 1213 // 1.2.2 OpenMP Language Terminology 1214 // Structured block - An executable statement with a single entry at the 1215 // top and a single exit at the bottom. 1216 // The point of exit cannot be a branch out of the structured block. 1217 // longjmp() and throw() must not violate the entry/exit criteria. 1218 CGF.EHStack.pushTerminate(); 1219 CodeGen(CGF); 1220 CGF.EHStack.popTerminate(); 1221 } 1222 1223 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1224 CodeGenFunction &CGF) { 1225 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1226 getThreadIDVariable()->getType(), 1227 AlignmentSource::Decl); 1228 } 1229 1230 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1231 QualType FieldTy) { 1232 auto *Field = FieldDecl::Create( 1233 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1234 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1235 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1236 Field->setAccess(AS_public); 1237 DC->addDecl(Field); 1238 return Field; 1239 } 1240 1241 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1242 StringRef Separator) 1243 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1244 OffloadEntriesInfoManager(CGM) { 1245 ASTContext &C = CGM.getContext(); 1246 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1247 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1248 RD->startDefinition(); 1249 // reserved_1 1250 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1251 // flags 1252 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1253 // reserved_2 1254 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1255 // reserved_3 1256 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1257 // psource 1258 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1259 RD->completeDefinition(); 1260 IdentQTy = C.getRecordType(RD); 1261 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1262 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1263 1264 loadOffloadInfoMetadata(); 1265 } 1266 1267 void CGOpenMPRuntime::clear() { 1268 InternalVars.clear(); 1269 // Clean non-target variable declarations possibly used only in debug info. 1270 for (const auto &Data : EmittedNonTargetVariables) { 1271 if (!Data.getValue().pointsToAliveValue()) 1272 continue; 1273 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1274 if (!GV) 1275 continue; 1276 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1277 continue; 1278 GV->eraseFromParent(); 1279 } 1280 } 1281 1282 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1283 SmallString<128> Buffer; 1284 llvm::raw_svector_ostream OS(Buffer); 1285 StringRef Sep = FirstSeparator; 1286 for (StringRef Part : Parts) { 1287 OS << Sep << Part; 1288 Sep = Separator; 1289 } 1290 return OS.str(); 1291 } 1292 1293 static llvm::Function * 1294 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1295 const Expr *CombinerInitializer, const VarDecl *In, 1296 const VarDecl *Out, bool IsCombiner) { 1297 // void .omp_combiner.(Ty *in, Ty *out); 1298 ASTContext &C = CGM.getContext(); 1299 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1300 FunctionArgList Args; 1301 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1302 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1303 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1304 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1305 Args.push_back(&OmpOutParm); 1306 Args.push_back(&OmpInParm); 1307 const CGFunctionInfo &FnInfo = 1308 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1309 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1310 std::string Name = CGM.getOpenMPRuntime().getName( 1311 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1312 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1313 Name, &CGM.getModule()); 1314 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1315 if (CGM.getLangOpts().Optimize) { 1316 Fn->removeFnAttr(llvm::Attribute::NoInline); 1317 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1318 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1319 } 1320 CodeGenFunction CGF(CGM); 1321 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1322 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1323 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1324 Out->getLocation()); 1325 CodeGenFunction::OMPPrivateScope Scope(CGF); 1326 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1327 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1328 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1329 .getAddress(); 1330 }); 1331 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1332 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1333 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1334 .getAddress(); 1335 }); 1336 (void)Scope.Privatize(); 1337 if (!IsCombiner && Out->hasInit() && 1338 !CGF.isTrivialInitializer(Out->getInit())) { 1339 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1340 Out->getType().getQualifiers(), 1341 /*IsInitializer=*/true); 1342 } 1343 if (CombinerInitializer) 1344 CGF.EmitIgnoredExpr(CombinerInitializer); 1345 Scope.ForceCleanup(); 1346 CGF.FinishFunction(); 1347 return Fn; 1348 } 1349 1350 void CGOpenMPRuntime::emitUserDefinedReduction( 1351 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1352 if (UDRMap.count(D) > 0) 1353 return; 1354 llvm::Function *Combiner = emitCombinerOrInitializer( 1355 CGM, D->getType(), D->getCombiner(), 1356 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1357 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1358 /*IsCombiner=*/true); 1359 llvm::Function *Initializer = nullptr; 1360 if (const Expr *Init = D->getInitializer()) { 1361 Initializer = emitCombinerOrInitializer( 1362 CGM, D->getType(), 1363 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1364 : nullptr, 1365 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1366 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1367 /*IsCombiner=*/false); 1368 } 1369 UDRMap.try_emplace(D, Combiner, Initializer); 1370 if (CGF) { 1371 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1372 Decls.second.push_back(D); 1373 } 1374 } 1375 1376 std::pair<llvm::Function *, llvm::Function *> 1377 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1378 auto I = UDRMap.find(D); 1379 if (I != UDRMap.end()) 1380 return I->second; 1381 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1382 return UDRMap.lookup(D); 1383 } 1384 1385 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1386 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1387 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1388 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1389 assert(ThreadIDVar->getType()->isPointerType() && 1390 "thread id variable must be of type kmp_int32 *"); 1391 CodeGenFunction CGF(CGM, true); 1392 bool HasCancel = false; 1393 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1394 HasCancel = OPD->hasCancel(); 1395 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1396 HasCancel = OPSD->hasCancel(); 1397 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1398 HasCancel = OPFD->hasCancel(); 1399 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1400 HasCancel = OPFD->hasCancel(); 1401 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1402 HasCancel = OPFD->hasCancel(); 1403 else if (const auto *OPFD = 1404 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1405 HasCancel = OPFD->hasCancel(); 1406 else if (const auto *OPFD = 1407 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1408 HasCancel = OPFD->hasCancel(); 1409 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1410 HasCancel, OutlinedHelperName); 1411 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1412 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 1413 } 1414 1415 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1416 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1417 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1418 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1419 return emitParallelOrTeamsOutlinedFunction( 1420 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1421 } 1422 1423 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1424 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1425 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1426 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1427 return emitParallelOrTeamsOutlinedFunction( 1428 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1429 } 1430 1431 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1432 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1433 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1434 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1435 bool Tied, unsigned &NumberOfParts) { 1436 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1437 PrePostActionTy &) { 1438 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1439 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1440 llvm::Value *TaskArgs[] = { 1441 UpLoc, ThreadID, 1442 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1443 TaskTVar->getType()->castAs<PointerType>()) 1444 .getPointer()}; 1445 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1446 }; 1447 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1448 UntiedCodeGen); 1449 CodeGen.setAction(Action); 1450 assert(!ThreadIDVar->getType()->isPointerType() && 1451 "thread id variable must be of type kmp_int32 for tasks"); 1452 const OpenMPDirectiveKind Region = 1453 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1454 : OMPD_task; 1455 const CapturedStmt *CS = D.getCapturedStmt(Region); 1456 const auto *TD = dyn_cast<OMPTaskDirective>(&D); 1457 CodeGenFunction CGF(CGM, true); 1458 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1459 InnermostKind, 1460 TD ? TD->hasCancel() : false, Action); 1461 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1462 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1463 if (!Tied) 1464 NumberOfParts = Action.getNumberOfParts(); 1465 return Res; 1466 } 1467 1468 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1469 const RecordDecl *RD, const CGRecordLayout &RL, 1470 ArrayRef<llvm::Constant *> Data) { 1471 llvm::StructType *StructTy = RL.getLLVMType(); 1472 unsigned PrevIdx = 0; 1473 ConstantInitBuilder CIBuilder(CGM); 1474 auto DI = Data.begin(); 1475 for (const FieldDecl *FD : RD->fields()) { 1476 unsigned Idx = RL.getLLVMFieldNo(FD); 1477 // Fill the alignment. 1478 for (unsigned I = PrevIdx; I < Idx; ++I) 1479 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1480 PrevIdx = Idx + 1; 1481 Fields.add(*DI); 1482 ++DI; 1483 } 1484 } 1485 1486 template <class... As> 1487 static llvm::GlobalVariable * 1488 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1489 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1490 As &&... Args) { 1491 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1492 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1493 ConstantInitBuilder CIBuilder(CGM); 1494 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1495 buildStructValue(Fields, CGM, RD, RL, Data); 1496 return Fields.finishAndCreateGlobal( 1497 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1498 std::forward<As>(Args)...); 1499 } 1500 1501 template <typename T> 1502 static void 1503 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1504 ArrayRef<llvm::Constant *> Data, 1505 T &Parent) { 1506 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1507 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1508 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1509 buildStructValue(Fields, CGM, RD, RL, Data); 1510 Fields.finishAndAddTo(Parent); 1511 } 1512 1513 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1514 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1515 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1516 FlagsTy FlagsKey(Flags, Reserved2Flags); 1517 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1518 if (!Entry) { 1519 if (!DefaultOpenMPPSource) { 1520 // Initialize default location for psource field of ident_t structure of 1521 // all ident_t objects. Format is ";file;function;line;column;;". 1522 // Taken from 1523 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1524 DefaultOpenMPPSource = 1525 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1526 DefaultOpenMPPSource = 1527 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1528 } 1529 1530 llvm::Constant *Data[] = { 1531 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1532 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1533 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1534 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1535 llvm::GlobalValue *DefaultOpenMPLocation = 1536 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1537 llvm::GlobalValue::PrivateLinkage); 1538 DefaultOpenMPLocation->setUnnamedAddr( 1539 llvm::GlobalValue::UnnamedAddr::Global); 1540 1541 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1542 } 1543 return Address(Entry, Align); 1544 } 1545 1546 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1547 bool AtCurrentPoint) { 1548 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1549 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1550 1551 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1552 if (AtCurrentPoint) { 1553 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1554 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1555 } else { 1556 Elem.second.ServiceInsertPt = 1557 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1558 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1559 } 1560 } 1561 1562 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1563 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1564 if (Elem.second.ServiceInsertPt) { 1565 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1566 Elem.second.ServiceInsertPt = nullptr; 1567 Ptr->eraseFromParent(); 1568 } 1569 } 1570 1571 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1572 SourceLocation Loc, 1573 unsigned Flags) { 1574 Flags |= OMP_IDENT_KMPC; 1575 // If no debug info is generated - return global default location. 1576 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1577 Loc.isInvalid()) 1578 return getOrCreateDefaultLocation(Flags).getPointer(); 1579 1580 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1581 1582 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1583 Address LocValue = Address::invalid(); 1584 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1585 if (I != OpenMPLocThreadIDMap.end()) 1586 LocValue = Address(I->second.DebugLoc, Align); 1587 1588 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1589 // GetOpenMPThreadID was called before this routine. 1590 if (!LocValue.isValid()) { 1591 // Generate "ident_t .kmpc_loc.addr;" 1592 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1593 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1594 Elem.second.DebugLoc = AI.getPointer(); 1595 LocValue = AI; 1596 1597 if (!Elem.second.ServiceInsertPt) 1598 setLocThreadIdInsertPt(CGF); 1599 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1600 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1601 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1602 CGF.getTypeSize(IdentQTy)); 1603 } 1604 1605 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1606 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1607 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1608 LValue PSource = 1609 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1610 1611 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1612 if (OMPDebugLoc == nullptr) { 1613 SmallString<128> Buffer2; 1614 llvm::raw_svector_ostream OS2(Buffer2); 1615 // Build debug location 1616 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1617 OS2 << ";" << PLoc.getFilename() << ";"; 1618 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1619 OS2 << FD->getQualifiedNameAsString(); 1620 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1621 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1622 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1623 } 1624 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1625 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1626 1627 // Our callers always pass this to a runtime function, so for 1628 // convenience, go ahead and return a naked pointer. 1629 return LocValue.getPointer(); 1630 } 1631 1632 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1633 SourceLocation Loc) { 1634 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1635 1636 llvm::Value *ThreadID = nullptr; 1637 // Check whether we've already cached a load of the thread id in this 1638 // function. 1639 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1640 if (I != OpenMPLocThreadIDMap.end()) { 1641 ThreadID = I->second.ThreadID; 1642 if (ThreadID != nullptr) 1643 return ThreadID; 1644 } 1645 // If exceptions are enabled, do not use parameter to avoid possible crash. 1646 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1647 !CGF.getLangOpts().CXXExceptions || 1648 CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1649 if (auto *OMPRegionInfo = 1650 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1651 if (OMPRegionInfo->getThreadIDVariable()) { 1652 // Check if this an outlined function with thread id passed as argument. 1653 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1654 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1655 // If value loaded in entry block, cache it and use it everywhere in 1656 // function. 1657 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1658 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1659 Elem.second.ThreadID = ThreadID; 1660 } 1661 return ThreadID; 1662 } 1663 } 1664 } 1665 1666 // This is not an outlined function region - need to call __kmpc_int32 1667 // kmpc_global_thread_num(ident_t *loc). 1668 // Generate thread id value and cache this value for use across the 1669 // function. 1670 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1671 if (!Elem.second.ServiceInsertPt) 1672 setLocThreadIdInsertPt(CGF); 1673 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1674 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1675 llvm::CallInst *Call = CGF.Builder.CreateCall( 1676 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1677 emitUpdateLocation(CGF, Loc)); 1678 Call->setCallingConv(CGF.getRuntimeCC()); 1679 Elem.second.ThreadID = Call; 1680 return Call; 1681 } 1682 1683 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1684 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1685 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1686 clearLocThreadIdInsertPt(CGF); 1687 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1688 } 1689 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1690 for(auto *D : FunctionUDRMap[CGF.CurFn]) 1691 UDRMap.erase(D); 1692 FunctionUDRMap.erase(CGF.CurFn); 1693 } 1694 auto I = FunctionUDMMap.find(CGF.CurFn); 1695 if (I != FunctionUDMMap.end()) { 1696 for(auto *D : I->second) 1697 UDMMap.erase(D); 1698 FunctionUDMMap.erase(I); 1699 } 1700 } 1701 1702 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1703 return IdentTy->getPointerTo(); 1704 } 1705 1706 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1707 if (!Kmpc_MicroTy) { 1708 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1709 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1710 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1711 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1712 } 1713 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1714 } 1715 1716 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1717 llvm::FunctionCallee RTLFn = nullptr; 1718 switch (static_cast<OpenMPRTLFunction>(Function)) { 1719 case OMPRTL__kmpc_fork_call: { 1720 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1721 // microtask, ...); 1722 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1723 getKmpc_MicroPointerTy()}; 1724 auto *FnTy = 1725 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1726 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1727 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 1728 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 1729 llvm::LLVMContext &Ctx = F->getContext(); 1730 llvm::MDBuilder MDB(Ctx); 1731 // Annotate the callback behavior of the __kmpc_fork_call: 1732 // - The callback callee is argument number 2 (microtask). 1733 // - The first two arguments of the callback callee are unknown (-1). 1734 // - All variadic arguments to the __kmpc_fork_call are passed to the 1735 // callback callee. 1736 F->addMetadata( 1737 llvm::LLVMContext::MD_callback, 1738 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 1739 2, {-1, -1}, 1740 /* VarArgsArePassed */ true)})); 1741 } 1742 } 1743 break; 1744 } 1745 case OMPRTL__kmpc_global_thread_num: { 1746 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1747 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1748 auto *FnTy = 1749 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1750 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1751 break; 1752 } 1753 case OMPRTL__kmpc_threadprivate_cached: { 1754 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1755 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1756 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1757 CGM.VoidPtrTy, CGM.SizeTy, 1758 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1759 auto *FnTy = 1760 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1761 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1762 break; 1763 } 1764 case OMPRTL__kmpc_critical: { 1765 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1766 // kmp_critical_name *crit); 1767 llvm::Type *TypeParams[] = { 1768 getIdentTyPointerTy(), CGM.Int32Ty, 1769 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1770 auto *FnTy = 1771 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1772 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1773 break; 1774 } 1775 case OMPRTL__kmpc_critical_with_hint: { 1776 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1777 // kmp_critical_name *crit, uintptr_t hint); 1778 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1779 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1780 CGM.IntPtrTy}; 1781 auto *FnTy = 1782 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1783 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1784 break; 1785 } 1786 case OMPRTL__kmpc_threadprivate_register: { 1787 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1788 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1789 // typedef void *(*kmpc_ctor)(void *); 1790 auto *KmpcCtorTy = 1791 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1792 /*isVarArg*/ false)->getPointerTo(); 1793 // typedef void *(*kmpc_cctor)(void *, void *); 1794 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1795 auto *KmpcCopyCtorTy = 1796 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1797 /*isVarArg*/ false) 1798 ->getPointerTo(); 1799 // typedef void (*kmpc_dtor)(void *); 1800 auto *KmpcDtorTy = 1801 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1802 ->getPointerTo(); 1803 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1804 KmpcCopyCtorTy, KmpcDtorTy}; 1805 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1806 /*isVarArg*/ false); 1807 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1808 break; 1809 } 1810 case OMPRTL__kmpc_end_critical: { 1811 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1812 // kmp_critical_name *crit); 1813 llvm::Type *TypeParams[] = { 1814 getIdentTyPointerTy(), CGM.Int32Ty, 1815 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1816 auto *FnTy = 1817 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1818 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1819 break; 1820 } 1821 case OMPRTL__kmpc_cancel_barrier: { 1822 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1823 // global_tid); 1824 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1825 auto *FnTy = 1826 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1827 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1828 break; 1829 } 1830 case OMPRTL__kmpc_barrier: { 1831 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1832 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1833 auto *FnTy = 1834 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1835 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1836 break; 1837 } 1838 case OMPRTL__kmpc_for_static_fini: { 1839 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1840 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1841 auto *FnTy = 1842 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1843 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1844 break; 1845 } 1846 case OMPRTL__kmpc_push_num_threads: { 1847 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1848 // kmp_int32 num_threads) 1849 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1850 CGM.Int32Ty}; 1851 auto *FnTy = 1852 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1853 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1854 break; 1855 } 1856 case OMPRTL__kmpc_serialized_parallel: { 1857 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1858 // global_tid); 1859 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1860 auto *FnTy = 1861 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1862 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1863 break; 1864 } 1865 case OMPRTL__kmpc_end_serialized_parallel: { 1866 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1867 // global_tid); 1868 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1869 auto *FnTy = 1870 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1871 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1872 break; 1873 } 1874 case OMPRTL__kmpc_flush: { 1875 // Build void __kmpc_flush(ident_t *loc); 1876 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1877 auto *FnTy = 1878 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1879 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1880 break; 1881 } 1882 case OMPRTL__kmpc_master: { 1883 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1884 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1885 auto *FnTy = 1886 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1887 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1888 break; 1889 } 1890 case OMPRTL__kmpc_end_master: { 1891 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1892 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1893 auto *FnTy = 1894 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1895 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1896 break; 1897 } 1898 case OMPRTL__kmpc_omp_taskyield: { 1899 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1900 // int end_part); 1901 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1902 auto *FnTy = 1903 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1904 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1905 break; 1906 } 1907 case OMPRTL__kmpc_single: { 1908 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1909 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1910 auto *FnTy = 1911 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1912 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1913 break; 1914 } 1915 case OMPRTL__kmpc_end_single: { 1916 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1917 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1918 auto *FnTy = 1919 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1920 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1921 break; 1922 } 1923 case OMPRTL__kmpc_omp_task_alloc: { 1924 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1925 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1926 // kmp_routine_entry_t *task_entry); 1927 assert(KmpRoutineEntryPtrTy != nullptr && 1928 "Type kmp_routine_entry_t must be created."); 1929 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1930 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1931 // Return void * and then cast to particular kmp_task_t type. 1932 auto *FnTy = 1933 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1934 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1935 break; 1936 } 1937 case OMPRTL__kmpc_omp_target_task_alloc: { 1938 // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid, 1939 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1940 // kmp_routine_entry_t *task_entry, kmp_int64 device_id); 1941 assert(KmpRoutineEntryPtrTy != nullptr && 1942 "Type kmp_routine_entry_t must be created."); 1943 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1944 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy, 1945 CGM.Int64Ty}; 1946 // Return void * and then cast to particular kmp_task_t type. 1947 auto *FnTy = 1948 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1949 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc"); 1950 break; 1951 } 1952 case OMPRTL__kmpc_omp_task: { 1953 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1954 // *new_task); 1955 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1956 CGM.VoidPtrTy}; 1957 auto *FnTy = 1958 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1959 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 1960 break; 1961 } 1962 case OMPRTL__kmpc_copyprivate: { 1963 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 1964 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 1965 // kmp_int32 didit); 1966 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1967 auto *CpyFnTy = 1968 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 1969 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 1970 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 1971 CGM.Int32Ty}; 1972 auto *FnTy = 1973 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1974 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 1975 break; 1976 } 1977 case OMPRTL__kmpc_reduce: { 1978 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 1979 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 1980 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 1981 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1982 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1983 /*isVarArg=*/false); 1984 llvm::Type *TypeParams[] = { 1985 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1986 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1987 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1988 auto *FnTy = 1989 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1990 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 1991 break; 1992 } 1993 case OMPRTL__kmpc_reduce_nowait: { 1994 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 1995 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 1996 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 1997 // *lck); 1998 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1999 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2000 /*isVarArg=*/false); 2001 llvm::Type *TypeParams[] = { 2002 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2003 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2004 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2005 auto *FnTy = 2006 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2007 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 2008 break; 2009 } 2010 case OMPRTL__kmpc_end_reduce: { 2011 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 2012 // kmp_critical_name *lck); 2013 llvm::Type *TypeParams[] = { 2014 getIdentTyPointerTy(), CGM.Int32Ty, 2015 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2016 auto *FnTy = 2017 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2018 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 2019 break; 2020 } 2021 case OMPRTL__kmpc_end_reduce_nowait: { 2022 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 2023 // kmp_critical_name *lck); 2024 llvm::Type *TypeParams[] = { 2025 getIdentTyPointerTy(), CGM.Int32Ty, 2026 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2027 auto *FnTy = 2028 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2029 RTLFn = 2030 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 2031 break; 2032 } 2033 case OMPRTL__kmpc_omp_task_begin_if0: { 2034 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2035 // *new_task); 2036 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2037 CGM.VoidPtrTy}; 2038 auto *FnTy = 2039 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2040 RTLFn = 2041 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 2042 break; 2043 } 2044 case OMPRTL__kmpc_omp_task_complete_if0: { 2045 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2046 // *new_task); 2047 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2048 CGM.VoidPtrTy}; 2049 auto *FnTy = 2050 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2051 RTLFn = CGM.CreateRuntimeFunction(FnTy, 2052 /*Name=*/"__kmpc_omp_task_complete_if0"); 2053 break; 2054 } 2055 case OMPRTL__kmpc_ordered: { 2056 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 2057 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2058 auto *FnTy = 2059 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2060 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 2061 break; 2062 } 2063 case OMPRTL__kmpc_end_ordered: { 2064 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 2065 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2066 auto *FnTy = 2067 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2068 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 2069 break; 2070 } 2071 case OMPRTL__kmpc_omp_taskwait: { 2072 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 2073 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2074 auto *FnTy = 2075 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2076 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 2077 break; 2078 } 2079 case OMPRTL__kmpc_taskgroup: { 2080 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 2081 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2082 auto *FnTy = 2083 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2084 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 2085 break; 2086 } 2087 case OMPRTL__kmpc_end_taskgroup: { 2088 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 2089 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2090 auto *FnTy = 2091 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2092 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 2093 break; 2094 } 2095 case OMPRTL__kmpc_push_proc_bind: { 2096 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 2097 // int proc_bind) 2098 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2099 auto *FnTy = 2100 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2101 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 2102 break; 2103 } 2104 case OMPRTL__kmpc_omp_task_with_deps: { 2105 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2106 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2107 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 2108 llvm::Type *TypeParams[] = { 2109 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 2110 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 2111 auto *FnTy = 2112 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2113 RTLFn = 2114 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 2115 break; 2116 } 2117 case OMPRTL__kmpc_omp_wait_deps: { 2118 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2119 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2120 // kmp_depend_info_t *noalias_dep_list); 2121 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2122 CGM.Int32Ty, CGM.VoidPtrTy, 2123 CGM.Int32Ty, CGM.VoidPtrTy}; 2124 auto *FnTy = 2125 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2126 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2127 break; 2128 } 2129 case OMPRTL__kmpc_cancellationpoint: { 2130 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2131 // global_tid, kmp_int32 cncl_kind) 2132 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2133 auto *FnTy = 2134 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2135 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2136 break; 2137 } 2138 case OMPRTL__kmpc_cancel: { 2139 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2140 // kmp_int32 cncl_kind) 2141 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2142 auto *FnTy = 2143 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2144 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2145 break; 2146 } 2147 case OMPRTL__kmpc_push_num_teams: { 2148 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2149 // kmp_int32 num_teams, kmp_int32 num_threads) 2150 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2151 CGM.Int32Ty}; 2152 auto *FnTy = 2153 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2154 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2155 break; 2156 } 2157 case OMPRTL__kmpc_fork_teams: { 2158 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2159 // microtask, ...); 2160 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2161 getKmpc_MicroPointerTy()}; 2162 auto *FnTy = 2163 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2164 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2165 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 2166 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 2167 llvm::LLVMContext &Ctx = F->getContext(); 2168 llvm::MDBuilder MDB(Ctx); 2169 // Annotate the callback behavior of the __kmpc_fork_teams: 2170 // - The callback callee is argument number 2 (microtask). 2171 // - The first two arguments of the callback callee are unknown (-1). 2172 // - All variadic arguments to the __kmpc_fork_teams are passed to the 2173 // callback callee. 2174 F->addMetadata( 2175 llvm::LLVMContext::MD_callback, 2176 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 2177 2, {-1, -1}, 2178 /* VarArgsArePassed */ true)})); 2179 } 2180 } 2181 break; 2182 } 2183 case OMPRTL__kmpc_taskloop: { 2184 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2185 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2186 // sched, kmp_uint64 grainsize, void *task_dup); 2187 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2188 CGM.IntTy, 2189 CGM.VoidPtrTy, 2190 CGM.IntTy, 2191 CGM.Int64Ty->getPointerTo(), 2192 CGM.Int64Ty->getPointerTo(), 2193 CGM.Int64Ty, 2194 CGM.IntTy, 2195 CGM.IntTy, 2196 CGM.Int64Ty, 2197 CGM.VoidPtrTy}; 2198 auto *FnTy = 2199 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2200 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2201 break; 2202 } 2203 case OMPRTL__kmpc_doacross_init: { 2204 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2205 // num_dims, struct kmp_dim *dims); 2206 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2207 CGM.Int32Ty, 2208 CGM.Int32Ty, 2209 CGM.VoidPtrTy}; 2210 auto *FnTy = 2211 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2212 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2213 break; 2214 } 2215 case OMPRTL__kmpc_doacross_fini: { 2216 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2217 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2218 auto *FnTy = 2219 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2220 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2221 break; 2222 } 2223 case OMPRTL__kmpc_doacross_post: { 2224 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2225 // *vec); 2226 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2227 CGM.Int64Ty->getPointerTo()}; 2228 auto *FnTy = 2229 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2230 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2231 break; 2232 } 2233 case OMPRTL__kmpc_doacross_wait: { 2234 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2235 // *vec); 2236 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2237 CGM.Int64Ty->getPointerTo()}; 2238 auto *FnTy = 2239 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2240 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2241 break; 2242 } 2243 case OMPRTL__kmpc_task_reduction_init: { 2244 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2245 // *data); 2246 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2247 auto *FnTy = 2248 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2249 RTLFn = 2250 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2251 break; 2252 } 2253 case OMPRTL__kmpc_task_reduction_get_th_data: { 2254 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2255 // *d); 2256 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2257 auto *FnTy = 2258 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2259 RTLFn = CGM.CreateRuntimeFunction( 2260 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2261 break; 2262 } 2263 case OMPRTL__kmpc_alloc: { 2264 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t 2265 // al); omp_allocator_handle_t type is void *. 2266 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy}; 2267 auto *FnTy = 2268 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2269 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc"); 2270 break; 2271 } 2272 case OMPRTL__kmpc_free: { 2273 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t 2274 // al); omp_allocator_handle_t type is void *. 2275 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2276 auto *FnTy = 2277 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2278 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); 2279 break; 2280 } 2281 case OMPRTL__kmpc_push_target_tripcount: { 2282 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 2283 // size); 2284 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; 2285 llvm::FunctionType *FnTy = 2286 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2287 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); 2288 break; 2289 } 2290 case OMPRTL__tgt_target: { 2291 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2292 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2293 // *arg_types); 2294 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2295 CGM.VoidPtrTy, 2296 CGM.Int32Ty, 2297 CGM.VoidPtrPtrTy, 2298 CGM.VoidPtrPtrTy, 2299 CGM.Int64Ty->getPointerTo(), 2300 CGM.Int64Ty->getPointerTo()}; 2301 auto *FnTy = 2302 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2303 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2304 break; 2305 } 2306 case OMPRTL__tgt_target_nowait: { 2307 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2308 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2309 // int64_t *arg_types); 2310 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2311 CGM.VoidPtrTy, 2312 CGM.Int32Ty, 2313 CGM.VoidPtrPtrTy, 2314 CGM.VoidPtrPtrTy, 2315 CGM.Int64Ty->getPointerTo(), 2316 CGM.Int64Ty->getPointerTo()}; 2317 auto *FnTy = 2318 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2319 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2320 break; 2321 } 2322 case OMPRTL__tgt_target_teams: { 2323 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2324 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2325 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2326 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2327 CGM.VoidPtrTy, 2328 CGM.Int32Ty, 2329 CGM.VoidPtrPtrTy, 2330 CGM.VoidPtrPtrTy, 2331 CGM.Int64Ty->getPointerTo(), 2332 CGM.Int64Ty->getPointerTo(), 2333 CGM.Int32Ty, 2334 CGM.Int32Ty}; 2335 auto *FnTy = 2336 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2337 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2338 break; 2339 } 2340 case OMPRTL__tgt_target_teams_nowait: { 2341 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2342 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 2343 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2344 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2345 CGM.VoidPtrTy, 2346 CGM.Int32Ty, 2347 CGM.VoidPtrPtrTy, 2348 CGM.VoidPtrPtrTy, 2349 CGM.Int64Ty->getPointerTo(), 2350 CGM.Int64Ty->getPointerTo(), 2351 CGM.Int32Ty, 2352 CGM.Int32Ty}; 2353 auto *FnTy = 2354 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2355 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2356 break; 2357 } 2358 case OMPRTL__tgt_register_requires: { 2359 // Build void __tgt_register_requires(int64_t flags); 2360 llvm::Type *TypeParams[] = {CGM.Int64Ty}; 2361 auto *FnTy = 2362 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2363 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); 2364 break; 2365 } 2366 case OMPRTL__tgt_register_lib: { 2367 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 2368 QualType ParamTy = 2369 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2370 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2371 auto *FnTy = 2372 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2373 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 2374 break; 2375 } 2376 case OMPRTL__tgt_unregister_lib: { 2377 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 2378 QualType ParamTy = 2379 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2380 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2381 auto *FnTy = 2382 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2383 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 2384 break; 2385 } 2386 case OMPRTL__tgt_target_data_begin: { 2387 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2388 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2389 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2390 CGM.Int32Ty, 2391 CGM.VoidPtrPtrTy, 2392 CGM.VoidPtrPtrTy, 2393 CGM.Int64Ty->getPointerTo(), 2394 CGM.Int64Ty->getPointerTo()}; 2395 auto *FnTy = 2396 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2397 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2398 break; 2399 } 2400 case OMPRTL__tgt_target_data_begin_nowait: { 2401 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2402 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2403 // *arg_types); 2404 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2405 CGM.Int32Ty, 2406 CGM.VoidPtrPtrTy, 2407 CGM.VoidPtrPtrTy, 2408 CGM.Int64Ty->getPointerTo(), 2409 CGM.Int64Ty->getPointerTo()}; 2410 auto *FnTy = 2411 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2412 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2413 break; 2414 } 2415 case OMPRTL__tgt_target_data_end: { 2416 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2417 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2418 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2419 CGM.Int32Ty, 2420 CGM.VoidPtrPtrTy, 2421 CGM.VoidPtrPtrTy, 2422 CGM.Int64Ty->getPointerTo(), 2423 CGM.Int64Ty->getPointerTo()}; 2424 auto *FnTy = 2425 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2426 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2427 break; 2428 } 2429 case OMPRTL__tgt_target_data_end_nowait: { 2430 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2431 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2432 // *arg_types); 2433 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2434 CGM.Int32Ty, 2435 CGM.VoidPtrPtrTy, 2436 CGM.VoidPtrPtrTy, 2437 CGM.Int64Ty->getPointerTo(), 2438 CGM.Int64Ty->getPointerTo()}; 2439 auto *FnTy = 2440 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2441 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2442 break; 2443 } 2444 case OMPRTL__tgt_target_data_update: { 2445 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2446 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2447 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2448 CGM.Int32Ty, 2449 CGM.VoidPtrPtrTy, 2450 CGM.VoidPtrPtrTy, 2451 CGM.Int64Ty->getPointerTo(), 2452 CGM.Int64Ty->getPointerTo()}; 2453 auto *FnTy = 2454 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2455 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2456 break; 2457 } 2458 case OMPRTL__tgt_target_data_update_nowait: { 2459 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2460 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2461 // *arg_types); 2462 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2463 CGM.Int32Ty, 2464 CGM.VoidPtrPtrTy, 2465 CGM.VoidPtrPtrTy, 2466 CGM.Int64Ty->getPointerTo(), 2467 CGM.Int64Ty->getPointerTo()}; 2468 auto *FnTy = 2469 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2470 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2471 break; 2472 } 2473 case OMPRTL__tgt_mapper_num_components: { 2474 // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 2475 llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; 2476 auto *FnTy = 2477 llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); 2478 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components"); 2479 break; 2480 } 2481 case OMPRTL__tgt_push_mapper_component: { 2482 // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void 2483 // *base, void *begin, int64_t size, int64_t type); 2484 llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy, 2485 CGM.Int64Ty, CGM.Int64Ty}; 2486 auto *FnTy = 2487 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2488 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component"); 2489 break; 2490 } 2491 } 2492 assert(RTLFn && "Unable to find OpenMP runtime function"); 2493 return RTLFn; 2494 } 2495 2496 llvm::FunctionCallee 2497 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 2498 assert((IVSize == 32 || IVSize == 64) && 2499 "IV size is not compatible with the omp runtime"); 2500 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2501 : "__kmpc_for_static_init_4u") 2502 : (IVSigned ? "__kmpc_for_static_init_8" 2503 : "__kmpc_for_static_init_8u"); 2504 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2505 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2506 llvm::Type *TypeParams[] = { 2507 getIdentTyPointerTy(), // loc 2508 CGM.Int32Ty, // tid 2509 CGM.Int32Ty, // schedtype 2510 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2511 PtrTy, // p_lower 2512 PtrTy, // p_upper 2513 PtrTy, // p_stride 2514 ITy, // incr 2515 ITy // chunk 2516 }; 2517 auto *FnTy = 2518 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2519 return CGM.CreateRuntimeFunction(FnTy, Name); 2520 } 2521 2522 llvm::FunctionCallee 2523 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 2524 assert((IVSize == 32 || IVSize == 64) && 2525 "IV size is not compatible with the omp runtime"); 2526 StringRef Name = 2527 IVSize == 32 2528 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2529 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2530 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2531 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2532 CGM.Int32Ty, // tid 2533 CGM.Int32Ty, // schedtype 2534 ITy, // lower 2535 ITy, // upper 2536 ITy, // stride 2537 ITy // chunk 2538 }; 2539 auto *FnTy = 2540 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2541 return CGM.CreateRuntimeFunction(FnTy, Name); 2542 } 2543 2544 llvm::FunctionCallee 2545 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 2546 assert((IVSize == 32 || IVSize == 64) && 2547 "IV size is not compatible with the omp runtime"); 2548 StringRef Name = 2549 IVSize == 32 2550 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2551 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2552 llvm::Type *TypeParams[] = { 2553 getIdentTyPointerTy(), // loc 2554 CGM.Int32Ty, // tid 2555 }; 2556 auto *FnTy = 2557 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2558 return CGM.CreateRuntimeFunction(FnTy, Name); 2559 } 2560 2561 llvm::FunctionCallee 2562 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 2563 assert((IVSize == 32 || IVSize == 64) && 2564 "IV size is not compatible with the omp runtime"); 2565 StringRef Name = 2566 IVSize == 32 2567 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2568 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2569 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2570 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2571 llvm::Type *TypeParams[] = { 2572 getIdentTyPointerTy(), // loc 2573 CGM.Int32Ty, // tid 2574 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2575 PtrTy, // p_lower 2576 PtrTy, // p_upper 2577 PtrTy // p_stride 2578 }; 2579 auto *FnTy = 2580 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2581 return CGM.CreateRuntimeFunction(FnTy, Name); 2582 } 2583 2584 /// Obtain information that uniquely identifies a target entry. This 2585 /// consists of the file and device IDs as well as line number associated with 2586 /// the relevant entry source location. 2587 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2588 unsigned &DeviceID, unsigned &FileID, 2589 unsigned &LineNum) { 2590 SourceManager &SM = C.getSourceManager(); 2591 2592 // The loc should be always valid and have a file ID (the user cannot use 2593 // #pragma directives in macros) 2594 2595 assert(Loc.isValid() && "Source location is expected to be always valid."); 2596 2597 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2598 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2599 2600 llvm::sys::fs::UniqueID ID; 2601 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2602 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2603 << PLoc.getFilename() << EC.message(); 2604 2605 DeviceID = ID.getDevice(); 2606 FileID = ID.getFile(); 2607 LineNum = PLoc.getLine(); 2608 } 2609 2610 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 2611 if (CGM.getLangOpts().OpenMPSimd) 2612 return Address::invalid(); 2613 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2614 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2615 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 2616 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2617 HasRequiresUnifiedSharedMemory))) { 2618 SmallString<64> PtrName; 2619 { 2620 llvm::raw_svector_ostream OS(PtrName); 2621 OS << CGM.getMangledName(GlobalDecl(VD)); 2622 if (!VD->isExternallyVisible()) { 2623 unsigned DeviceID, FileID, Line; 2624 getTargetEntryUniqueInfo(CGM.getContext(), 2625 VD->getCanonicalDecl()->getBeginLoc(), 2626 DeviceID, FileID, Line); 2627 OS << llvm::format("_%x", FileID); 2628 } 2629 OS << "_decl_tgt_ref_ptr"; 2630 } 2631 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2632 if (!Ptr) { 2633 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2634 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2635 PtrName); 2636 2637 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2638 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 2639 2640 if (!CGM.getLangOpts().OpenMPIsDevice) 2641 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2642 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2643 } 2644 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2645 } 2646 return Address::invalid(); 2647 } 2648 2649 llvm::Constant * 2650 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2651 assert(!CGM.getLangOpts().OpenMPUseTLS || 2652 !CGM.getContext().getTargetInfo().isTLSSupported()); 2653 // Lookup the entry, lazily creating it if necessary. 2654 std::string Suffix = getName({"cache", ""}); 2655 return getOrCreateInternalVariable( 2656 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 2657 } 2658 2659 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2660 const VarDecl *VD, 2661 Address VDAddr, 2662 SourceLocation Loc) { 2663 if (CGM.getLangOpts().OpenMPUseTLS && 2664 CGM.getContext().getTargetInfo().isTLSSupported()) 2665 return VDAddr; 2666 2667 llvm::Type *VarTy = VDAddr.getElementType(); 2668 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2669 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2670 CGM.Int8PtrTy), 2671 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2672 getOrCreateThreadPrivateCache(VD)}; 2673 return Address(CGF.EmitRuntimeCall( 2674 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2675 VDAddr.getAlignment()); 2676 } 2677 2678 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2679 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2680 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2681 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2682 // library. 2683 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2684 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2685 OMPLoc); 2686 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2687 // to register constructor/destructor for variable. 2688 llvm::Value *Args[] = { 2689 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2690 Ctor, CopyCtor, Dtor}; 2691 CGF.EmitRuntimeCall( 2692 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2693 } 2694 2695 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2696 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2697 bool PerformInit, CodeGenFunction *CGF) { 2698 if (CGM.getLangOpts().OpenMPUseTLS && 2699 CGM.getContext().getTargetInfo().isTLSSupported()) 2700 return nullptr; 2701 2702 VD = VD->getDefinition(CGM.getContext()); 2703 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 2704 QualType ASTTy = VD->getType(); 2705 2706 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2707 const Expr *Init = VD->getAnyInitializer(); 2708 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2709 // Generate function that re-emits the declaration's initializer into the 2710 // threadprivate copy of the variable VD 2711 CodeGenFunction CtorCGF(CGM); 2712 FunctionArgList Args; 2713 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2714 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2715 ImplicitParamDecl::Other); 2716 Args.push_back(&Dst); 2717 2718 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2719 CGM.getContext().VoidPtrTy, Args); 2720 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2721 std::string Name = getName({"__kmpc_global_ctor_", ""}); 2722 llvm::Function *Fn = 2723 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2724 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2725 Args, Loc, Loc); 2726 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2727 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2728 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2729 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2730 Arg = CtorCGF.Builder.CreateElementBitCast( 2731 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2732 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2733 /*IsInitializer=*/true); 2734 ArgVal = CtorCGF.EmitLoadOfScalar( 2735 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2736 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2737 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2738 CtorCGF.FinishFunction(); 2739 Ctor = Fn; 2740 } 2741 if (VD->getType().isDestructedType() != QualType::DK_none) { 2742 // Generate function that emits destructor call for the threadprivate copy 2743 // of the variable VD 2744 CodeGenFunction DtorCGF(CGM); 2745 FunctionArgList Args; 2746 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2747 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2748 ImplicitParamDecl::Other); 2749 Args.push_back(&Dst); 2750 2751 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2752 CGM.getContext().VoidTy, Args); 2753 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2754 std::string Name = getName({"__kmpc_global_dtor_", ""}); 2755 llvm::Function *Fn = 2756 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2757 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2758 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2759 Loc, Loc); 2760 // Create a scope with an artificial location for the body of this function. 2761 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2762 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2763 DtorCGF.GetAddrOfLocalVar(&Dst), 2764 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2765 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2766 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2767 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2768 DtorCGF.FinishFunction(); 2769 Dtor = Fn; 2770 } 2771 // Do not emit init function if it is not required. 2772 if (!Ctor && !Dtor) 2773 return nullptr; 2774 2775 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2776 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2777 /*isVarArg=*/false) 2778 ->getPointerTo(); 2779 // Copying constructor for the threadprivate variable. 2780 // Must be NULL - reserved by runtime, but currently it requires that this 2781 // parameter is always NULL. Otherwise it fires assertion. 2782 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2783 if (Ctor == nullptr) { 2784 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2785 /*isVarArg=*/false) 2786 ->getPointerTo(); 2787 Ctor = llvm::Constant::getNullValue(CtorTy); 2788 } 2789 if (Dtor == nullptr) { 2790 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2791 /*isVarArg=*/false) 2792 ->getPointerTo(); 2793 Dtor = llvm::Constant::getNullValue(DtorTy); 2794 } 2795 if (!CGF) { 2796 auto *InitFunctionTy = 2797 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2798 std::string Name = getName({"__omp_threadprivate_init_", ""}); 2799 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2800 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 2801 CodeGenFunction InitCGF(CGM); 2802 FunctionArgList ArgList; 2803 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2804 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2805 Loc, Loc); 2806 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2807 InitCGF.FinishFunction(); 2808 return InitFunction; 2809 } 2810 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2811 } 2812 return nullptr; 2813 } 2814 2815 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2816 llvm::GlobalVariable *Addr, 2817 bool PerformInit) { 2818 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2819 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2820 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 2821 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2822 HasRequiresUnifiedSharedMemory)) 2823 return CGM.getLangOpts().OpenMPIsDevice; 2824 VD = VD->getDefinition(CGM.getContext()); 2825 if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 2826 return CGM.getLangOpts().OpenMPIsDevice; 2827 2828 QualType ASTTy = VD->getType(); 2829 2830 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 2831 // Produce the unique prefix to identify the new target regions. We use 2832 // the source location of the variable declaration which we know to not 2833 // conflict with any target region. 2834 unsigned DeviceID; 2835 unsigned FileID; 2836 unsigned Line; 2837 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2838 SmallString<128> Buffer, Out; 2839 { 2840 llvm::raw_svector_ostream OS(Buffer); 2841 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2842 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2843 } 2844 2845 const Expr *Init = VD->getAnyInitializer(); 2846 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2847 llvm::Constant *Ctor; 2848 llvm::Constant *ID; 2849 if (CGM.getLangOpts().OpenMPIsDevice) { 2850 // Generate function that re-emits the declaration's initializer into 2851 // the threadprivate copy of the variable VD 2852 CodeGenFunction CtorCGF(CGM); 2853 2854 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2855 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2856 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2857 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2858 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2859 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2860 FunctionArgList(), Loc, Loc); 2861 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2862 CtorCGF.EmitAnyExprToMem(Init, 2863 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2864 Init->getType().getQualifiers(), 2865 /*IsInitializer=*/true); 2866 CtorCGF.FinishFunction(); 2867 Ctor = Fn; 2868 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2869 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 2870 } else { 2871 Ctor = new llvm::GlobalVariable( 2872 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2873 llvm::GlobalValue::PrivateLinkage, 2874 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2875 ID = Ctor; 2876 } 2877 2878 // Register the information for the entry associated with the constructor. 2879 Out.clear(); 2880 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2881 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2882 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2883 } 2884 if (VD->getType().isDestructedType() != QualType::DK_none) { 2885 llvm::Constant *Dtor; 2886 llvm::Constant *ID; 2887 if (CGM.getLangOpts().OpenMPIsDevice) { 2888 // Generate function that emits destructor call for the threadprivate 2889 // copy of the variable VD 2890 CodeGenFunction DtorCGF(CGM); 2891 2892 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2893 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2894 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2895 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2896 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2897 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2898 FunctionArgList(), Loc, Loc); 2899 // Create a scope with an artificial location for the body of this 2900 // function. 2901 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2902 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 2903 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2904 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2905 DtorCGF.FinishFunction(); 2906 Dtor = Fn; 2907 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2908 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 2909 } else { 2910 Dtor = new llvm::GlobalVariable( 2911 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2912 llvm::GlobalValue::PrivateLinkage, 2913 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2914 ID = Dtor; 2915 } 2916 // Register the information for the entry associated with the destructor. 2917 Out.clear(); 2918 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2919 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2920 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2921 } 2922 return CGM.getLangOpts().OpenMPIsDevice; 2923 } 2924 2925 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2926 QualType VarType, 2927 StringRef Name) { 2928 std::string Suffix = getName({"artificial", ""}); 2929 std::string CacheSuffix = getName({"cache", ""}); 2930 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2931 llvm::Value *GAddr = 2932 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2933 llvm::Value *Args[] = { 2934 emitUpdateLocation(CGF, SourceLocation()), 2935 getThreadID(CGF, SourceLocation()), 2936 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2937 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2938 /*isSigned=*/false), 2939 getOrCreateInternalVariable( 2940 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2941 return Address( 2942 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2943 CGF.EmitRuntimeCall( 2944 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2945 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2946 CGM.getPointerAlign()); 2947 } 2948 2949 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 2950 const RegionCodeGenTy &ThenGen, 2951 const RegionCodeGenTy &ElseGen) { 2952 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2953 2954 // If the condition constant folds and can be elided, try to avoid emitting 2955 // the condition and the dead arm of the if/else. 2956 bool CondConstant; 2957 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2958 if (CondConstant) 2959 ThenGen(CGF); 2960 else 2961 ElseGen(CGF); 2962 return; 2963 } 2964 2965 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2966 // emit the conditional branch. 2967 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2968 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2969 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2970 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2971 2972 // Emit the 'then' code. 2973 CGF.EmitBlock(ThenBlock); 2974 ThenGen(CGF); 2975 CGF.EmitBranch(ContBlock); 2976 // Emit the 'else' code if present. 2977 // There is no need to emit line number for unconditional branch. 2978 (void)ApplyDebugLocation::CreateEmpty(CGF); 2979 CGF.EmitBlock(ElseBlock); 2980 ElseGen(CGF); 2981 // There is no need to emit line number for unconditional branch. 2982 (void)ApplyDebugLocation::CreateEmpty(CGF); 2983 CGF.EmitBranch(ContBlock); 2984 // Emit the continuation block for code after the if. 2985 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2986 } 2987 2988 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2989 llvm::Function *OutlinedFn, 2990 ArrayRef<llvm::Value *> CapturedVars, 2991 const Expr *IfCond) { 2992 if (!CGF.HaveInsertPoint()) 2993 return; 2994 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2995 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 2996 PrePostActionTy &) { 2997 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2998 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2999 llvm::Value *Args[] = { 3000 RTLoc, 3001 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 3002 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 3003 llvm::SmallVector<llvm::Value *, 16> RealArgs; 3004 RealArgs.append(std::begin(Args), std::end(Args)); 3005 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 3006 3007 llvm::FunctionCallee RTLFn = 3008 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 3009 CGF.EmitRuntimeCall(RTLFn, RealArgs); 3010 }; 3011 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 3012 PrePostActionTy &) { 3013 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3014 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 3015 // Build calls: 3016 // __kmpc_serialized_parallel(&Loc, GTid); 3017 llvm::Value *Args[] = {RTLoc, ThreadID}; 3018 CGF.EmitRuntimeCall( 3019 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 3020 3021 // OutlinedFn(>id, &zero, CapturedStruct); 3022 Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 3023 /*Name*/ ".zero.addr"); 3024 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 3025 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 3026 // ThreadId for serialized parallels is 0. 3027 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 3028 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 3029 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 3030 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 3031 3032 // __kmpc_end_serialized_parallel(&Loc, GTid); 3033 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 3034 CGF.EmitRuntimeCall( 3035 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 3036 EndArgs); 3037 }; 3038 if (IfCond) { 3039 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 3040 } else { 3041 RegionCodeGenTy ThenRCG(ThenGen); 3042 ThenRCG(CGF); 3043 } 3044 } 3045 3046 // If we're inside an (outlined) parallel region, use the region info's 3047 // thread-ID variable (it is passed in a first argument of the outlined function 3048 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 3049 // regular serial code region, get thread ID by calling kmp_int32 3050 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 3051 // return the address of that temp. 3052 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 3053 SourceLocation Loc) { 3054 if (auto *OMPRegionInfo = 3055 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3056 if (OMPRegionInfo->getThreadIDVariable()) 3057 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 3058 3059 llvm::Value *ThreadID = getThreadID(CGF, Loc); 3060 QualType Int32Ty = 3061 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 3062 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 3063 CGF.EmitStoreOfScalar(ThreadID, 3064 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 3065 3066 return ThreadIDTemp; 3067 } 3068 3069 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 3070 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 3071 SmallString<256> Buffer; 3072 llvm::raw_svector_ostream Out(Buffer); 3073 Out << Name; 3074 StringRef RuntimeName = Out.str(); 3075 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 3076 if (Elem.second) { 3077 assert(Elem.second->getType()->getPointerElementType() == Ty && 3078 "OMP internal variable has different type than requested"); 3079 return &*Elem.second; 3080 } 3081 3082 return Elem.second = new llvm::GlobalVariable( 3083 CGM.getModule(), Ty, /*IsConstant*/ false, 3084 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 3085 Elem.first(), /*InsertBefore=*/nullptr, 3086 llvm::GlobalValue::NotThreadLocal, AddressSpace); 3087 } 3088 3089 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 3090 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 3091 std::string Name = getName({Prefix, "var"}); 3092 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 3093 } 3094 3095 namespace { 3096 /// Common pre(post)-action for different OpenMP constructs. 3097 class CommonActionTy final : public PrePostActionTy { 3098 llvm::FunctionCallee EnterCallee; 3099 ArrayRef<llvm::Value *> EnterArgs; 3100 llvm::FunctionCallee ExitCallee; 3101 ArrayRef<llvm::Value *> ExitArgs; 3102 bool Conditional; 3103 llvm::BasicBlock *ContBlock = nullptr; 3104 3105 public: 3106 CommonActionTy(llvm::FunctionCallee EnterCallee, 3107 ArrayRef<llvm::Value *> EnterArgs, 3108 llvm::FunctionCallee ExitCallee, 3109 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 3110 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 3111 ExitArgs(ExitArgs), Conditional(Conditional) {} 3112 void Enter(CodeGenFunction &CGF) override { 3113 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 3114 if (Conditional) { 3115 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 3116 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3117 ContBlock = CGF.createBasicBlock("omp_if.end"); 3118 // Generate the branch (If-stmt) 3119 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 3120 CGF.EmitBlock(ThenBlock); 3121 } 3122 } 3123 void Done(CodeGenFunction &CGF) { 3124 // Emit the rest of blocks/branches 3125 CGF.EmitBranch(ContBlock); 3126 CGF.EmitBlock(ContBlock, true); 3127 } 3128 void Exit(CodeGenFunction &CGF) override { 3129 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 3130 } 3131 }; 3132 } // anonymous namespace 3133 3134 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 3135 StringRef CriticalName, 3136 const RegionCodeGenTy &CriticalOpGen, 3137 SourceLocation Loc, const Expr *Hint) { 3138 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 3139 // CriticalOpGen(); 3140 // __kmpc_end_critical(ident_t *, gtid, Lock); 3141 // Prepare arguments and build a call to __kmpc_critical 3142 if (!CGF.HaveInsertPoint()) 3143 return; 3144 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3145 getCriticalRegionLock(CriticalName)}; 3146 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 3147 std::end(Args)); 3148 if (Hint) { 3149 EnterArgs.push_back(CGF.Builder.CreateIntCast( 3150 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 3151 } 3152 CommonActionTy Action( 3153 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 3154 : OMPRTL__kmpc_critical), 3155 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 3156 CriticalOpGen.setAction(Action); 3157 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 3158 } 3159 3160 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 3161 const RegionCodeGenTy &MasterOpGen, 3162 SourceLocation Loc) { 3163 if (!CGF.HaveInsertPoint()) 3164 return; 3165 // if(__kmpc_master(ident_t *, gtid)) { 3166 // MasterOpGen(); 3167 // __kmpc_end_master(ident_t *, gtid); 3168 // } 3169 // Prepare arguments and build a call to __kmpc_master 3170 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3171 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 3172 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 3173 /*Conditional=*/true); 3174 MasterOpGen.setAction(Action); 3175 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 3176 Action.Done(CGF); 3177 } 3178 3179 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 3180 SourceLocation Loc) { 3181 if (!CGF.HaveInsertPoint()) 3182 return; 3183 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 3184 llvm::Value *Args[] = { 3185 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3186 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 3187 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 3188 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3189 Region->emitUntiedSwitch(CGF); 3190 } 3191 3192 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 3193 const RegionCodeGenTy &TaskgroupOpGen, 3194 SourceLocation Loc) { 3195 if (!CGF.HaveInsertPoint()) 3196 return; 3197 // __kmpc_taskgroup(ident_t *, gtid); 3198 // TaskgroupOpGen(); 3199 // __kmpc_end_taskgroup(ident_t *, gtid); 3200 // Prepare arguments and build a call to __kmpc_taskgroup 3201 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3202 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3203 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3204 Args); 3205 TaskgroupOpGen.setAction(Action); 3206 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3207 } 3208 3209 /// Given an array of pointers to variables, project the address of a 3210 /// given variable. 3211 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3212 unsigned Index, const VarDecl *Var) { 3213 // Pull out the pointer to the variable. 3214 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 3215 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3216 3217 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3218 Addr = CGF.Builder.CreateElementBitCast( 3219 Addr, CGF.ConvertTypeForMem(Var->getType())); 3220 return Addr; 3221 } 3222 3223 static llvm::Value *emitCopyprivateCopyFunction( 3224 CodeGenModule &CGM, llvm::Type *ArgsType, 3225 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3226 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3227 SourceLocation Loc) { 3228 ASTContext &C = CGM.getContext(); 3229 // void copy_func(void *LHSArg, void *RHSArg); 3230 FunctionArgList Args; 3231 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3232 ImplicitParamDecl::Other); 3233 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3234 ImplicitParamDecl::Other); 3235 Args.push_back(&LHSArg); 3236 Args.push_back(&RHSArg); 3237 const auto &CGFI = 3238 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3239 std::string Name = 3240 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 3241 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 3242 llvm::GlobalValue::InternalLinkage, Name, 3243 &CGM.getModule()); 3244 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3245 Fn->setDoesNotRecurse(); 3246 CodeGenFunction CGF(CGM); 3247 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3248 // Dest = (void*[n])(LHSArg); 3249 // Src = (void*[n])(RHSArg); 3250 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3251 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3252 ArgsType), CGF.getPointerAlign()); 3253 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3254 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3255 ArgsType), CGF.getPointerAlign()); 3256 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3257 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3258 // ... 3259 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3260 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3261 const auto *DestVar = 3262 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3263 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3264 3265 const auto *SrcVar = 3266 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3267 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3268 3269 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3270 QualType Type = VD->getType(); 3271 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3272 } 3273 CGF.FinishFunction(); 3274 return Fn; 3275 } 3276 3277 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3278 const RegionCodeGenTy &SingleOpGen, 3279 SourceLocation Loc, 3280 ArrayRef<const Expr *> CopyprivateVars, 3281 ArrayRef<const Expr *> SrcExprs, 3282 ArrayRef<const Expr *> DstExprs, 3283 ArrayRef<const Expr *> AssignmentOps) { 3284 if (!CGF.HaveInsertPoint()) 3285 return; 3286 assert(CopyprivateVars.size() == SrcExprs.size() && 3287 CopyprivateVars.size() == DstExprs.size() && 3288 CopyprivateVars.size() == AssignmentOps.size()); 3289 ASTContext &C = CGM.getContext(); 3290 // int32 did_it = 0; 3291 // if(__kmpc_single(ident_t *, gtid)) { 3292 // SingleOpGen(); 3293 // __kmpc_end_single(ident_t *, gtid); 3294 // did_it = 1; 3295 // } 3296 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3297 // <copy_func>, did_it); 3298 3299 Address DidIt = Address::invalid(); 3300 if (!CopyprivateVars.empty()) { 3301 // int32 did_it = 0; 3302 QualType KmpInt32Ty = 3303 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3304 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3305 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3306 } 3307 // Prepare arguments and build a call to __kmpc_single 3308 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3309 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3310 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3311 /*Conditional=*/true); 3312 SingleOpGen.setAction(Action); 3313 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3314 if (DidIt.isValid()) { 3315 // did_it = 1; 3316 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3317 } 3318 Action.Done(CGF); 3319 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3320 // <copy_func>, did_it); 3321 if (DidIt.isValid()) { 3322 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3323 QualType CopyprivateArrayTy = 3324 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 3325 /*IndexTypeQuals=*/0); 3326 // Create a list of all private variables for copyprivate. 3327 Address CopyprivateList = 3328 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3329 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3330 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 3331 CGF.Builder.CreateStore( 3332 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3333 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 3334 Elem); 3335 } 3336 // Build function that copies private values from single region to all other 3337 // threads in the corresponding parallel region. 3338 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3339 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3340 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3341 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3342 Address CL = 3343 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3344 CGF.VoidPtrTy); 3345 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3346 llvm::Value *Args[] = { 3347 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3348 getThreadID(CGF, Loc), // i32 <gtid> 3349 BufSize, // size_t <buf_size> 3350 CL.getPointer(), // void *<copyprivate list> 3351 CpyFn, // void (*) (void *, void *) <copy_func> 3352 DidItVal // i32 did_it 3353 }; 3354 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3355 } 3356 } 3357 3358 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3359 const RegionCodeGenTy &OrderedOpGen, 3360 SourceLocation Loc, bool IsThreads) { 3361 if (!CGF.HaveInsertPoint()) 3362 return; 3363 // __kmpc_ordered(ident_t *, gtid); 3364 // OrderedOpGen(); 3365 // __kmpc_end_ordered(ident_t *, gtid); 3366 // Prepare arguments and build a call to __kmpc_ordered 3367 if (IsThreads) { 3368 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3369 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3370 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3371 Args); 3372 OrderedOpGen.setAction(Action); 3373 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3374 return; 3375 } 3376 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3377 } 3378 3379 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 3380 unsigned Flags; 3381 if (Kind == OMPD_for) 3382 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3383 else if (Kind == OMPD_sections) 3384 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3385 else if (Kind == OMPD_single) 3386 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3387 else if (Kind == OMPD_barrier) 3388 Flags = OMP_IDENT_BARRIER_EXPL; 3389 else 3390 Flags = OMP_IDENT_BARRIER_IMPL; 3391 return Flags; 3392 } 3393 3394 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 3395 CodeGenFunction &CGF, const OMPLoopDirective &S, 3396 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 3397 // Check if the loop directive is actually a doacross loop directive. In this 3398 // case choose static, 1 schedule. 3399 if (llvm::any_of( 3400 S.getClausesOfKind<OMPOrderedClause>(), 3401 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 3402 ScheduleKind = OMPC_SCHEDULE_static; 3403 // Chunk size is 1 in this case. 3404 llvm::APInt ChunkSize(32, 1); 3405 ChunkExpr = IntegerLiteral::Create( 3406 CGF.getContext(), ChunkSize, 3407 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3408 SourceLocation()); 3409 } 3410 } 3411 3412 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3413 OpenMPDirectiveKind Kind, bool EmitChecks, 3414 bool ForceSimpleCall) { 3415 if (!CGF.HaveInsertPoint()) 3416 return; 3417 // Build call __kmpc_cancel_barrier(loc, thread_id); 3418 // Build call __kmpc_barrier(loc, thread_id); 3419 unsigned Flags = getDefaultFlagsForBarriers(Kind); 3420 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3421 // thread_id); 3422 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3423 getThreadID(CGF, Loc)}; 3424 if (auto *OMPRegionInfo = 3425 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 3426 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3427 llvm::Value *Result = CGF.EmitRuntimeCall( 3428 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3429 if (EmitChecks) { 3430 // if (__kmpc_cancel_barrier()) { 3431 // exit from construct; 3432 // } 3433 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3434 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3435 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3436 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3437 CGF.EmitBlock(ExitBB); 3438 // exit from construct; 3439 CodeGenFunction::JumpDest CancelDestination = 3440 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3441 CGF.EmitBranchThroughCleanup(CancelDestination); 3442 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3443 } 3444 return; 3445 } 3446 } 3447 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3448 } 3449 3450 /// Map the OpenMP loop schedule to the runtime enumeration. 3451 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3452 bool Chunked, bool Ordered) { 3453 switch (ScheduleKind) { 3454 case OMPC_SCHEDULE_static: 3455 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3456 : (Ordered ? OMP_ord_static : OMP_sch_static); 3457 case OMPC_SCHEDULE_dynamic: 3458 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3459 case OMPC_SCHEDULE_guided: 3460 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3461 case OMPC_SCHEDULE_runtime: 3462 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3463 case OMPC_SCHEDULE_auto: 3464 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3465 case OMPC_SCHEDULE_unknown: 3466 assert(!Chunked && "chunk was specified but schedule kind not known"); 3467 return Ordered ? OMP_ord_static : OMP_sch_static; 3468 } 3469 llvm_unreachable("Unexpected runtime schedule"); 3470 } 3471 3472 /// Map the OpenMP distribute schedule to the runtime enumeration. 3473 static OpenMPSchedType 3474 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3475 // only static is allowed for dist_schedule 3476 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3477 } 3478 3479 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3480 bool Chunked) const { 3481 OpenMPSchedType Schedule = 3482 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3483 return Schedule == OMP_sch_static; 3484 } 3485 3486 bool CGOpenMPRuntime::isStaticNonchunked( 3487 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3488 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3489 return Schedule == OMP_dist_sch_static; 3490 } 3491 3492 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 3493 bool Chunked) const { 3494 OpenMPSchedType Schedule = 3495 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3496 return Schedule == OMP_sch_static_chunked; 3497 } 3498 3499 bool CGOpenMPRuntime::isStaticChunked( 3500 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3501 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3502 return Schedule == OMP_dist_sch_static_chunked; 3503 } 3504 3505 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3506 OpenMPSchedType Schedule = 3507 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3508 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3509 return Schedule != OMP_sch_static; 3510 } 3511 3512 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 3513 OpenMPScheduleClauseModifier M1, 3514 OpenMPScheduleClauseModifier M2) { 3515 int Modifier = 0; 3516 switch (M1) { 3517 case OMPC_SCHEDULE_MODIFIER_monotonic: 3518 Modifier = OMP_sch_modifier_monotonic; 3519 break; 3520 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3521 Modifier = OMP_sch_modifier_nonmonotonic; 3522 break; 3523 case OMPC_SCHEDULE_MODIFIER_simd: 3524 if (Schedule == OMP_sch_static_chunked) 3525 Schedule = OMP_sch_static_balanced_chunked; 3526 break; 3527 case OMPC_SCHEDULE_MODIFIER_last: 3528 case OMPC_SCHEDULE_MODIFIER_unknown: 3529 break; 3530 } 3531 switch (M2) { 3532 case OMPC_SCHEDULE_MODIFIER_monotonic: 3533 Modifier = OMP_sch_modifier_monotonic; 3534 break; 3535 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3536 Modifier = OMP_sch_modifier_nonmonotonic; 3537 break; 3538 case OMPC_SCHEDULE_MODIFIER_simd: 3539 if (Schedule == OMP_sch_static_chunked) 3540 Schedule = OMP_sch_static_balanced_chunked; 3541 break; 3542 case OMPC_SCHEDULE_MODIFIER_last: 3543 case OMPC_SCHEDULE_MODIFIER_unknown: 3544 break; 3545 } 3546 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 3547 // If the static schedule kind is specified or if the ordered clause is 3548 // specified, and if the nonmonotonic modifier is not specified, the effect is 3549 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 3550 // modifier is specified, the effect is as if the nonmonotonic modifier is 3551 // specified. 3552 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 3553 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 3554 Schedule == OMP_sch_static_balanced_chunked || 3555 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static)) 3556 Modifier = OMP_sch_modifier_nonmonotonic; 3557 } 3558 return Schedule | Modifier; 3559 } 3560 3561 void CGOpenMPRuntime::emitForDispatchInit( 3562 CodeGenFunction &CGF, SourceLocation Loc, 3563 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3564 bool Ordered, const DispatchRTInput &DispatchValues) { 3565 if (!CGF.HaveInsertPoint()) 3566 return; 3567 OpenMPSchedType Schedule = getRuntimeSchedule( 3568 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3569 assert(Ordered || 3570 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3571 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3572 Schedule != OMP_sch_static_balanced_chunked)); 3573 // Call __kmpc_dispatch_init( 3574 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3575 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3576 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3577 3578 // If the Chunk was not specified in the clause - use default value 1. 3579 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3580 : CGF.Builder.getIntN(IVSize, 1); 3581 llvm::Value *Args[] = { 3582 emitUpdateLocation(CGF, Loc), 3583 getThreadID(CGF, Loc), 3584 CGF.Builder.getInt32(addMonoNonMonoModifier( 3585 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3586 DispatchValues.LB, // Lower 3587 DispatchValues.UB, // Upper 3588 CGF.Builder.getIntN(IVSize, 1), // Stride 3589 Chunk // Chunk 3590 }; 3591 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3592 } 3593 3594 static void emitForStaticInitCall( 3595 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3596 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 3597 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3598 const CGOpenMPRuntime::StaticRTInput &Values) { 3599 if (!CGF.HaveInsertPoint()) 3600 return; 3601 3602 assert(!Values.Ordered); 3603 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3604 Schedule == OMP_sch_static_balanced_chunked || 3605 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3606 Schedule == OMP_dist_sch_static || 3607 Schedule == OMP_dist_sch_static_chunked); 3608 3609 // Call __kmpc_for_static_init( 3610 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3611 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3612 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3613 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3614 llvm::Value *Chunk = Values.Chunk; 3615 if (Chunk == nullptr) { 3616 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3617 Schedule == OMP_dist_sch_static) && 3618 "expected static non-chunked schedule"); 3619 // If the Chunk was not specified in the clause - use default value 1. 3620 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3621 } else { 3622 assert((Schedule == OMP_sch_static_chunked || 3623 Schedule == OMP_sch_static_balanced_chunked || 3624 Schedule == OMP_ord_static_chunked || 3625 Schedule == OMP_dist_sch_static_chunked) && 3626 "expected static chunked schedule"); 3627 } 3628 llvm::Value *Args[] = { 3629 UpdateLocation, 3630 ThreadId, 3631 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 3632 M2)), // Schedule type 3633 Values.IL.getPointer(), // &isLastIter 3634 Values.LB.getPointer(), // &LB 3635 Values.UB.getPointer(), // &UB 3636 Values.ST.getPointer(), // &Stride 3637 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3638 Chunk // Chunk 3639 }; 3640 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3641 } 3642 3643 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3644 SourceLocation Loc, 3645 OpenMPDirectiveKind DKind, 3646 const OpenMPScheduleTy &ScheduleKind, 3647 const StaticRTInput &Values) { 3648 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3649 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3650 assert(isOpenMPWorksharingDirective(DKind) && 3651 "Expected loop-based or sections-based directive."); 3652 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3653 isOpenMPLoopDirective(DKind) 3654 ? OMP_IDENT_WORK_LOOP 3655 : OMP_IDENT_WORK_SECTIONS); 3656 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3657 llvm::FunctionCallee StaticInitFunction = 3658 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3659 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3660 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3661 } 3662 3663 void CGOpenMPRuntime::emitDistributeStaticInit( 3664 CodeGenFunction &CGF, SourceLocation Loc, 3665 OpenMPDistScheduleClauseKind SchedKind, 3666 const CGOpenMPRuntime::StaticRTInput &Values) { 3667 OpenMPSchedType ScheduleNum = 3668 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3669 llvm::Value *UpdatedLocation = 3670 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3671 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3672 llvm::FunctionCallee StaticInitFunction = 3673 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3674 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3675 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3676 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3677 } 3678 3679 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3680 SourceLocation Loc, 3681 OpenMPDirectiveKind DKind) { 3682 if (!CGF.HaveInsertPoint()) 3683 return; 3684 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3685 llvm::Value *Args[] = { 3686 emitUpdateLocation(CGF, Loc, 3687 isOpenMPDistributeDirective(DKind) 3688 ? OMP_IDENT_WORK_DISTRIBUTE 3689 : isOpenMPLoopDirective(DKind) 3690 ? OMP_IDENT_WORK_LOOP 3691 : OMP_IDENT_WORK_SECTIONS), 3692 getThreadID(CGF, Loc)}; 3693 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3694 Args); 3695 } 3696 3697 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3698 SourceLocation Loc, 3699 unsigned IVSize, 3700 bool IVSigned) { 3701 if (!CGF.HaveInsertPoint()) 3702 return; 3703 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3704 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3705 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3706 } 3707 3708 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3709 SourceLocation Loc, unsigned IVSize, 3710 bool IVSigned, Address IL, 3711 Address LB, Address UB, 3712 Address ST) { 3713 // Call __kmpc_dispatch_next( 3714 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3715 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3716 // kmp_int[32|64] *p_stride); 3717 llvm::Value *Args[] = { 3718 emitUpdateLocation(CGF, Loc), 3719 getThreadID(CGF, Loc), 3720 IL.getPointer(), // &isLastIter 3721 LB.getPointer(), // &Lower 3722 UB.getPointer(), // &Upper 3723 ST.getPointer() // &Stride 3724 }; 3725 llvm::Value *Call = 3726 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3727 return CGF.EmitScalarConversion( 3728 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3729 CGF.getContext().BoolTy, Loc); 3730 } 3731 3732 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3733 llvm::Value *NumThreads, 3734 SourceLocation Loc) { 3735 if (!CGF.HaveInsertPoint()) 3736 return; 3737 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3738 llvm::Value *Args[] = { 3739 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3740 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3741 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3742 Args); 3743 } 3744 3745 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3746 OpenMPProcBindClauseKind ProcBind, 3747 SourceLocation Loc) { 3748 if (!CGF.HaveInsertPoint()) 3749 return; 3750 // Constants for proc bind value accepted by the runtime. 3751 enum ProcBindTy { 3752 ProcBindFalse = 0, 3753 ProcBindTrue, 3754 ProcBindMaster, 3755 ProcBindClose, 3756 ProcBindSpread, 3757 ProcBindIntel, 3758 ProcBindDefault 3759 } RuntimeProcBind; 3760 switch (ProcBind) { 3761 case OMPC_PROC_BIND_master: 3762 RuntimeProcBind = ProcBindMaster; 3763 break; 3764 case OMPC_PROC_BIND_close: 3765 RuntimeProcBind = ProcBindClose; 3766 break; 3767 case OMPC_PROC_BIND_spread: 3768 RuntimeProcBind = ProcBindSpread; 3769 break; 3770 case OMPC_PROC_BIND_unknown: 3771 llvm_unreachable("Unsupported proc_bind value."); 3772 } 3773 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3774 llvm::Value *Args[] = { 3775 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3776 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 3777 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3778 } 3779 3780 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3781 SourceLocation Loc) { 3782 if (!CGF.HaveInsertPoint()) 3783 return; 3784 // Build call void __kmpc_flush(ident_t *loc) 3785 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3786 emitUpdateLocation(CGF, Loc)); 3787 } 3788 3789 namespace { 3790 /// Indexes of fields for type kmp_task_t. 3791 enum KmpTaskTFields { 3792 /// List of shared variables. 3793 KmpTaskTShareds, 3794 /// Task routine. 3795 KmpTaskTRoutine, 3796 /// Partition id for the untied tasks. 3797 KmpTaskTPartId, 3798 /// Function with call of destructors for private variables. 3799 Data1, 3800 /// Task priority. 3801 Data2, 3802 /// (Taskloops only) Lower bound. 3803 KmpTaskTLowerBound, 3804 /// (Taskloops only) Upper bound. 3805 KmpTaskTUpperBound, 3806 /// (Taskloops only) Stride. 3807 KmpTaskTStride, 3808 /// (Taskloops only) Is last iteration flag. 3809 KmpTaskTLastIter, 3810 /// (Taskloops only) Reduction data. 3811 KmpTaskTReductions, 3812 }; 3813 } // anonymous namespace 3814 3815 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3816 return OffloadEntriesTargetRegion.empty() && 3817 OffloadEntriesDeviceGlobalVar.empty(); 3818 } 3819 3820 /// Initialize target region entry. 3821 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3822 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3823 StringRef ParentName, unsigned LineNum, 3824 unsigned Order) { 3825 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3826 "only required for the device " 3827 "code generation."); 3828 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3829 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3830 OMPTargetRegionEntryTargetRegion); 3831 ++OffloadingEntriesNum; 3832 } 3833 3834 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3835 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3836 StringRef ParentName, unsigned LineNum, 3837 llvm::Constant *Addr, llvm::Constant *ID, 3838 OMPTargetRegionEntryKind Flags) { 3839 // If we are emitting code for a target, the entry is already initialized, 3840 // only has to be registered. 3841 if (CGM.getLangOpts().OpenMPIsDevice) { 3842 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3843 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3844 DiagnosticsEngine::Error, 3845 "Unable to find target region on line '%0' in the device code."); 3846 CGM.getDiags().Report(DiagID) << LineNum; 3847 return; 3848 } 3849 auto &Entry = 3850 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3851 assert(Entry.isValid() && "Entry not initialized!"); 3852 Entry.setAddress(Addr); 3853 Entry.setID(ID); 3854 Entry.setFlags(Flags); 3855 } else { 3856 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3857 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3858 ++OffloadingEntriesNum; 3859 } 3860 } 3861 3862 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3863 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3864 unsigned LineNum) const { 3865 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3866 if (PerDevice == OffloadEntriesTargetRegion.end()) 3867 return false; 3868 auto PerFile = PerDevice->second.find(FileID); 3869 if (PerFile == PerDevice->second.end()) 3870 return false; 3871 auto PerParentName = PerFile->second.find(ParentName); 3872 if (PerParentName == PerFile->second.end()) 3873 return false; 3874 auto PerLine = PerParentName->second.find(LineNum); 3875 if (PerLine == PerParentName->second.end()) 3876 return false; 3877 // Fail if this entry is already registered. 3878 if (PerLine->second.getAddress() || PerLine->second.getID()) 3879 return false; 3880 return true; 3881 } 3882 3883 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3884 const OffloadTargetRegionEntryInfoActTy &Action) { 3885 // Scan all target region entries and perform the provided action. 3886 for (const auto &D : OffloadEntriesTargetRegion) 3887 for (const auto &F : D.second) 3888 for (const auto &P : F.second) 3889 for (const auto &L : P.second) 3890 Action(D.first, F.first, P.first(), L.first, L.second); 3891 } 3892 3893 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3894 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3895 OMPTargetGlobalVarEntryKind Flags, 3896 unsigned Order) { 3897 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3898 "only required for the device " 3899 "code generation."); 3900 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3901 ++OffloadingEntriesNum; 3902 } 3903 3904 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3905 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3906 CharUnits VarSize, 3907 OMPTargetGlobalVarEntryKind Flags, 3908 llvm::GlobalValue::LinkageTypes Linkage) { 3909 if (CGM.getLangOpts().OpenMPIsDevice) { 3910 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3911 assert(Entry.isValid() && Entry.getFlags() == Flags && 3912 "Entry not initialized!"); 3913 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3914 "Resetting with the new address."); 3915 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3916 if (Entry.getVarSize().isZero()) { 3917 Entry.setVarSize(VarSize); 3918 Entry.setLinkage(Linkage); 3919 } 3920 return; 3921 } 3922 Entry.setVarSize(VarSize); 3923 Entry.setLinkage(Linkage); 3924 Entry.setAddress(Addr); 3925 } else { 3926 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3927 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3928 assert(Entry.isValid() && Entry.getFlags() == Flags && 3929 "Entry not initialized!"); 3930 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3931 "Resetting with the new address."); 3932 if (Entry.getVarSize().isZero()) { 3933 Entry.setVarSize(VarSize); 3934 Entry.setLinkage(Linkage); 3935 } 3936 return; 3937 } 3938 OffloadEntriesDeviceGlobalVar.try_emplace( 3939 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3940 ++OffloadingEntriesNum; 3941 } 3942 } 3943 3944 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3945 actOnDeviceGlobalVarEntriesInfo( 3946 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3947 // Scan all target region entries and perform the provided action. 3948 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3949 Action(E.getKey(), E.getValue()); 3950 } 3951 3952 llvm::Function * 3953 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 3954 // If we don't have entries or if we are emitting code for the device, we 3955 // don't need to do anything. 3956 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 3957 return nullptr; 3958 3959 llvm::Module &M = CGM.getModule(); 3960 ASTContext &C = CGM.getContext(); 3961 3962 // Get list of devices we care about 3963 const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples; 3964 3965 // We should be creating an offloading descriptor only if there are devices 3966 // specified. 3967 assert(!Devices.empty() && "No OpenMP offloading devices??"); 3968 3969 // Create the external variables that will point to the begin and end of the 3970 // host entries section. These will be defined by the linker. 3971 llvm::Type *OffloadEntryTy = 3972 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 3973 std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"}); 3974 auto *HostEntriesBegin = new llvm::GlobalVariable( 3975 M, OffloadEntryTy, /*isConstant=*/true, 3976 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 3977 EntriesBeginName); 3978 std::string EntriesEndName = getName({"omp_offloading", "entries_end"}); 3979 auto *HostEntriesEnd = 3980 new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true, 3981 llvm::GlobalValue::ExternalLinkage, 3982 /*Initializer=*/nullptr, EntriesEndName); 3983 3984 // Create all device images 3985 auto *DeviceImageTy = cast<llvm::StructType>( 3986 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 3987 ConstantInitBuilder DeviceImagesBuilder(CGM); 3988 ConstantArrayBuilder DeviceImagesEntries = 3989 DeviceImagesBuilder.beginArray(DeviceImageTy); 3990 3991 for (const llvm::Triple &Device : Devices) { 3992 StringRef T = Device.getTriple(); 3993 std::string BeginName = getName({"omp_offloading", "img_start", ""}); 3994 auto *ImgBegin = new llvm::GlobalVariable( 3995 M, CGM.Int8Ty, /*isConstant=*/true, 3996 llvm::GlobalValue::ExternalWeakLinkage, 3997 /*Initializer=*/nullptr, Twine(BeginName).concat(T)); 3998 std::string EndName = getName({"omp_offloading", "img_end", ""}); 3999 auto *ImgEnd = new llvm::GlobalVariable( 4000 M, CGM.Int8Ty, /*isConstant=*/true, 4001 llvm::GlobalValue::ExternalWeakLinkage, 4002 /*Initializer=*/nullptr, Twine(EndName).concat(T)); 4003 4004 llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin, 4005 HostEntriesEnd}; 4006 createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data, 4007 DeviceImagesEntries); 4008 } 4009 4010 // Create device images global array. 4011 std::string ImagesName = getName({"omp_offloading", "device_images"}); 4012 llvm::GlobalVariable *DeviceImages = 4013 DeviceImagesEntries.finishAndCreateGlobal(ImagesName, 4014 CGM.getPointerAlign(), 4015 /*isConstant=*/true); 4016 DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4017 4018 // This is a Zero array to be used in the creation of the constant expressions 4019 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 4020 llvm::Constant::getNullValue(CGM.Int32Ty)}; 4021 4022 // Create the target region descriptor. 4023 llvm::Constant *Data[] = { 4024 llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()), 4025 llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), 4026 DeviceImages, Index), 4027 HostEntriesBegin, HostEntriesEnd}; 4028 std::string Descriptor = getName({"omp_offloading", "descriptor"}); 4029 llvm::GlobalVariable *Desc = createGlobalStruct( 4030 CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor); 4031 4032 // Emit code to register or unregister the descriptor at execution 4033 // startup or closing, respectively. 4034 4035 llvm::Function *UnRegFn; 4036 { 4037 FunctionArgList Args; 4038 ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); 4039 Args.push_back(&DummyPtr); 4040 4041 CodeGenFunction CGF(CGM); 4042 // Disable debug info for global (de-)initializer because they are not part 4043 // of some particular construct. 4044 CGF.disableDebugInfo(); 4045 const auto &FI = 4046 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4047 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 4048 std::string UnregName = getName({"omp_offloading", "descriptor_unreg"}); 4049 UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI); 4050 CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args); 4051 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 4052 Desc); 4053 CGF.FinishFunction(); 4054 } 4055 llvm::Function *RegFn; 4056 { 4057 CodeGenFunction CGF(CGM); 4058 // Disable debug info for global (de-)initializer because they are not part 4059 // of some particular construct. 4060 CGF.disableDebugInfo(); 4061 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 4062 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 4063 4064 // Encode offload target triples into the registration function name. It 4065 // will serve as a comdat key for the registration/unregistration code for 4066 // this particular combination of offloading targets. 4067 SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U); 4068 RegFnNameParts[0] = "omp_offloading"; 4069 RegFnNameParts[1] = "descriptor_reg"; 4070 llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2), 4071 [](const llvm::Triple &T) -> const std::string& { 4072 return T.getTriple(); 4073 }); 4074 llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end()); 4075 std::string Descriptor = getName(RegFnNameParts); 4076 RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI); 4077 CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList()); 4078 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); 4079 // Create a variable to drive the registration and unregistration of the 4080 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 4081 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), 4082 SourceLocation(), nullptr, C.CharTy, 4083 ImplicitParamDecl::Other); 4084 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 4085 CGF.FinishFunction(); 4086 } 4087 if (CGM.supportsCOMDAT()) { 4088 // It is sufficient to call registration function only once, so create a 4089 // COMDAT group for registration/unregistration functions and associated 4090 // data. That would reduce startup time and code size. Registration 4091 // function serves as a COMDAT group key. 4092 llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName()); 4093 RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); 4094 RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); 4095 RegFn->setComdat(ComdatKey); 4096 UnRegFn->setComdat(ComdatKey); 4097 DeviceImages->setComdat(ComdatKey); 4098 Desc->setComdat(ComdatKey); 4099 } 4100 return RegFn; 4101 } 4102 4103 void CGOpenMPRuntime::createOffloadEntry( 4104 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 4105 llvm::GlobalValue::LinkageTypes Linkage) { 4106 StringRef Name = Addr->getName(); 4107 llvm::Module &M = CGM.getModule(); 4108 llvm::LLVMContext &C = M.getContext(); 4109 4110 // Create constant string with the name. 4111 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 4112 4113 std::string StringName = getName({"omp_offloading", "entry_name"}); 4114 auto *Str = new llvm::GlobalVariable( 4115 M, StrPtrInit->getType(), /*isConstant=*/true, 4116 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 4117 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4118 4119 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 4120 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 4121 llvm::ConstantInt::get(CGM.SizeTy, Size), 4122 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 4123 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 4124 std::string EntryName = getName({"omp_offloading", "entry", ""}); 4125 llvm::GlobalVariable *Entry = createGlobalStruct( 4126 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 4127 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 4128 4129 // The entry has to be created in the section the linker expects it to be. 4130 std::string Section = getName({"omp_offloading", "entries"}); 4131 Entry->setSection(Section); 4132 } 4133 4134 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 4135 // Emit the offloading entries and metadata so that the device codegen side 4136 // can easily figure out what to emit. The produced metadata looks like 4137 // this: 4138 // 4139 // !omp_offload.info = !{!1, ...} 4140 // 4141 // Right now we only generate metadata for function that contain target 4142 // regions. 4143 4144 // If we do not have entries, we don't need to do anything. 4145 if (OffloadEntriesInfoManager.empty()) 4146 return; 4147 4148 llvm::Module &M = CGM.getModule(); 4149 llvm::LLVMContext &C = M.getContext(); 4150 SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 4151 OrderedEntries(OffloadEntriesInfoManager.size()); 4152 llvm::SmallVector<StringRef, 16> ParentFunctions( 4153 OffloadEntriesInfoManager.size()); 4154 4155 // Auxiliary methods to create metadata values and strings. 4156 auto &&GetMDInt = [this](unsigned V) { 4157 return llvm::ConstantAsMetadata::get( 4158 llvm::ConstantInt::get(CGM.Int32Ty, V)); 4159 }; 4160 4161 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 4162 4163 // Create the offloading info metadata node. 4164 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 4165 4166 // Create function that emits metadata for each target region entry; 4167 auto &&TargetRegionMetadataEmitter = 4168 [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString]( 4169 unsigned DeviceID, unsigned FileID, StringRef ParentName, 4170 unsigned Line, 4171 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 4172 // Generate metadata for target regions. Each entry of this metadata 4173 // contains: 4174 // - Entry 0 -> Kind of this type of metadata (0). 4175 // - Entry 1 -> Device ID of the file where the entry was identified. 4176 // - Entry 2 -> File ID of the file where the entry was identified. 4177 // - Entry 3 -> Mangled name of the function where the entry was 4178 // identified. 4179 // - Entry 4 -> Line in the file where the entry was identified. 4180 // - Entry 5 -> Order the entry was created. 4181 // The first element of the metadata node is the kind. 4182 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 4183 GetMDInt(FileID), GetMDString(ParentName), 4184 GetMDInt(Line), GetMDInt(E.getOrder())}; 4185 4186 // Save this entry in the right position of the ordered entries array. 4187 OrderedEntries[E.getOrder()] = &E; 4188 ParentFunctions[E.getOrder()] = ParentName; 4189 4190 // Add metadata to the named metadata node. 4191 MD->addOperand(llvm::MDNode::get(C, Ops)); 4192 }; 4193 4194 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 4195 TargetRegionMetadataEmitter); 4196 4197 // Create function that emits metadata for each device global variable entry; 4198 auto &&DeviceGlobalVarMetadataEmitter = 4199 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 4200 MD](StringRef MangledName, 4201 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 4202 &E) { 4203 // Generate metadata for global variables. Each entry of this metadata 4204 // contains: 4205 // - Entry 0 -> Kind of this type of metadata (1). 4206 // - Entry 1 -> Mangled name of the variable. 4207 // - Entry 2 -> Declare target kind. 4208 // - Entry 3 -> Order the entry was created. 4209 // The first element of the metadata node is the kind. 4210 llvm::Metadata *Ops[] = { 4211 GetMDInt(E.getKind()), GetMDString(MangledName), 4212 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 4213 4214 // Save this entry in the right position of the ordered entries array. 4215 OrderedEntries[E.getOrder()] = &E; 4216 4217 // Add metadata to the named metadata node. 4218 MD->addOperand(llvm::MDNode::get(C, Ops)); 4219 }; 4220 4221 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 4222 DeviceGlobalVarMetadataEmitter); 4223 4224 for (const auto *E : OrderedEntries) { 4225 assert(E && "All ordered entries must exist!"); 4226 if (const auto *CE = 4227 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 4228 E)) { 4229 if (!CE->getID() || !CE->getAddress()) { 4230 // Do not blame the entry if the parent funtion is not emitted. 4231 StringRef FnName = ParentFunctions[CE->getOrder()]; 4232 if (!CGM.GetGlobalValue(FnName)) 4233 continue; 4234 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4235 DiagnosticsEngine::Error, 4236 "Offloading entry for target region is incorrect: either the " 4237 "address or the ID is invalid."); 4238 CGM.getDiags().Report(DiagID); 4239 continue; 4240 } 4241 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 4242 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 4243 } else if (const auto *CE = 4244 dyn_cast<OffloadEntriesInfoManagerTy:: 4245 OffloadEntryInfoDeviceGlobalVar>(E)) { 4246 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 4247 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4248 CE->getFlags()); 4249 switch (Flags) { 4250 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 4251 if (CGM.getLangOpts().OpenMPIsDevice && 4252 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 4253 continue; 4254 if (!CE->getAddress()) { 4255 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4256 DiagnosticsEngine::Error, 4257 "Offloading entry for declare target variable is incorrect: the " 4258 "address is invalid."); 4259 CGM.getDiags().Report(DiagID); 4260 continue; 4261 } 4262 // The vaiable has no definition - no need to add the entry. 4263 if (CE->getVarSize().isZero()) 4264 continue; 4265 break; 4266 } 4267 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 4268 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 4269 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 4270 "Declaret target link address is set."); 4271 if (CGM.getLangOpts().OpenMPIsDevice) 4272 continue; 4273 if (!CE->getAddress()) { 4274 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4275 DiagnosticsEngine::Error, 4276 "Offloading entry for declare target variable is incorrect: the " 4277 "address is invalid."); 4278 CGM.getDiags().Report(DiagID); 4279 continue; 4280 } 4281 break; 4282 } 4283 createOffloadEntry(CE->getAddress(), CE->getAddress(), 4284 CE->getVarSize().getQuantity(), Flags, 4285 CE->getLinkage()); 4286 } else { 4287 llvm_unreachable("Unsupported entry kind."); 4288 } 4289 } 4290 } 4291 4292 /// Loads all the offload entries information from the host IR 4293 /// metadata. 4294 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 4295 // If we are in target mode, load the metadata from the host IR. This code has 4296 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 4297 4298 if (!CGM.getLangOpts().OpenMPIsDevice) 4299 return; 4300 4301 if (CGM.getLangOpts().OMPHostIRFile.empty()) 4302 return; 4303 4304 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 4305 if (auto EC = Buf.getError()) { 4306 CGM.getDiags().Report(diag::err_cannot_open_file) 4307 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4308 return; 4309 } 4310 4311 llvm::LLVMContext C; 4312 auto ME = expectedToErrorOrAndEmitErrors( 4313 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 4314 4315 if (auto EC = ME.getError()) { 4316 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4317 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 4318 CGM.getDiags().Report(DiagID) 4319 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4320 return; 4321 } 4322 4323 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4324 if (!MD) 4325 return; 4326 4327 for (llvm::MDNode *MN : MD->operands()) { 4328 auto &&GetMDInt = [MN](unsigned Idx) { 4329 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4330 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4331 }; 4332 4333 auto &&GetMDString = [MN](unsigned Idx) { 4334 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4335 return V->getString(); 4336 }; 4337 4338 switch (GetMDInt(0)) { 4339 default: 4340 llvm_unreachable("Unexpected metadata!"); 4341 break; 4342 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4343 OffloadingEntryInfoTargetRegion: 4344 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4345 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4346 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4347 /*Order=*/GetMDInt(5)); 4348 break; 4349 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4350 OffloadingEntryInfoDeviceGlobalVar: 4351 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4352 /*MangledName=*/GetMDString(1), 4353 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4354 /*Flags=*/GetMDInt(2)), 4355 /*Order=*/GetMDInt(3)); 4356 break; 4357 } 4358 } 4359 } 4360 4361 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4362 if (!KmpRoutineEntryPtrTy) { 4363 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4364 ASTContext &C = CGM.getContext(); 4365 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4366 FunctionProtoType::ExtProtoInfo EPI; 4367 KmpRoutineEntryPtrQTy = C.getPointerType( 4368 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4369 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4370 } 4371 } 4372 4373 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4374 // Make sure the type of the entry is already created. This is the type we 4375 // have to create: 4376 // struct __tgt_offload_entry{ 4377 // void *addr; // Pointer to the offload entry info. 4378 // // (function or global) 4379 // char *name; // Name of the function or global. 4380 // size_t size; // Size of the entry info (0 if it a function). 4381 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4382 // int32_t reserved; // Reserved, to use by the runtime library. 4383 // }; 4384 if (TgtOffloadEntryQTy.isNull()) { 4385 ASTContext &C = CGM.getContext(); 4386 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4387 RD->startDefinition(); 4388 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4389 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4390 addFieldToRecordDecl(C, RD, C.getSizeType()); 4391 addFieldToRecordDecl( 4392 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4393 addFieldToRecordDecl( 4394 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4395 RD->completeDefinition(); 4396 RD->addAttr(PackedAttr::CreateImplicit(C)); 4397 TgtOffloadEntryQTy = C.getRecordType(RD); 4398 } 4399 return TgtOffloadEntryQTy; 4400 } 4401 4402 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 4403 // These are the types we need to build: 4404 // struct __tgt_device_image{ 4405 // void *ImageStart; // Pointer to the target code start. 4406 // void *ImageEnd; // Pointer to the target code end. 4407 // // We also add the host entries to the device image, as it may be useful 4408 // // for the target runtime to have access to that information. 4409 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 4410 // // the entries. 4411 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4412 // // entries (non inclusive). 4413 // }; 4414 if (TgtDeviceImageQTy.isNull()) { 4415 ASTContext &C = CGM.getContext(); 4416 RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image"); 4417 RD->startDefinition(); 4418 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4419 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4420 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4421 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4422 RD->completeDefinition(); 4423 TgtDeviceImageQTy = C.getRecordType(RD); 4424 } 4425 return TgtDeviceImageQTy; 4426 } 4427 4428 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 4429 // struct __tgt_bin_desc{ 4430 // int32_t NumDevices; // Number of devices supported. 4431 // __tgt_device_image *DeviceImages; // Arrays of device images 4432 // // (one per device). 4433 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 4434 // // entries. 4435 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4436 // // entries (non inclusive). 4437 // }; 4438 if (TgtBinaryDescriptorQTy.isNull()) { 4439 ASTContext &C = CGM.getContext(); 4440 RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc"); 4441 RD->startDefinition(); 4442 addFieldToRecordDecl( 4443 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4444 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 4445 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4446 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4447 RD->completeDefinition(); 4448 TgtBinaryDescriptorQTy = C.getRecordType(RD); 4449 } 4450 return TgtBinaryDescriptorQTy; 4451 } 4452 4453 namespace { 4454 struct PrivateHelpersTy { 4455 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 4456 const VarDecl *PrivateElemInit) 4457 : Original(Original), PrivateCopy(PrivateCopy), 4458 PrivateElemInit(PrivateElemInit) {} 4459 const VarDecl *Original; 4460 const VarDecl *PrivateCopy; 4461 const VarDecl *PrivateElemInit; 4462 }; 4463 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4464 } // anonymous namespace 4465 4466 static RecordDecl * 4467 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4468 if (!Privates.empty()) { 4469 ASTContext &C = CGM.getContext(); 4470 // Build struct .kmp_privates_t. { 4471 // /* private vars */ 4472 // }; 4473 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4474 RD->startDefinition(); 4475 for (const auto &Pair : Privates) { 4476 const VarDecl *VD = Pair.second.Original; 4477 QualType Type = VD->getType().getNonReferenceType(); 4478 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4479 if (VD->hasAttrs()) { 4480 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4481 E(VD->getAttrs().end()); 4482 I != E; ++I) 4483 FD->addAttr(*I); 4484 } 4485 } 4486 RD->completeDefinition(); 4487 return RD; 4488 } 4489 return nullptr; 4490 } 4491 4492 static RecordDecl * 4493 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4494 QualType KmpInt32Ty, 4495 QualType KmpRoutineEntryPointerQTy) { 4496 ASTContext &C = CGM.getContext(); 4497 // Build struct kmp_task_t { 4498 // void * shareds; 4499 // kmp_routine_entry_t routine; 4500 // kmp_int32 part_id; 4501 // kmp_cmplrdata_t data1; 4502 // kmp_cmplrdata_t data2; 4503 // For taskloops additional fields: 4504 // kmp_uint64 lb; 4505 // kmp_uint64 ub; 4506 // kmp_int64 st; 4507 // kmp_int32 liter; 4508 // void * reductions; 4509 // }; 4510 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4511 UD->startDefinition(); 4512 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4513 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4514 UD->completeDefinition(); 4515 QualType KmpCmplrdataTy = C.getRecordType(UD); 4516 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4517 RD->startDefinition(); 4518 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4519 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4520 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4521 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4522 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4523 if (isOpenMPTaskLoopDirective(Kind)) { 4524 QualType KmpUInt64Ty = 4525 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4526 QualType KmpInt64Ty = 4527 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4528 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4529 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4530 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4531 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4532 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4533 } 4534 RD->completeDefinition(); 4535 return RD; 4536 } 4537 4538 static RecordDecl * 4539 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4540 ArrayRef<PrivateDataTy> Privates) { 4541 ASTContext &C = CGM.getContext(); 4542 // Build struct kmp_task_t_with_privates { 4543 // kmp_task_t task_data; 4544 // .kmp_privates_t. privates; 4545 // }; 4546 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4547 RD->startDefinition(); 4548 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4549 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4550 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4551 RD->completeDefinition(); 4552 return RD; 4553 } 4554 4555 /// Emit a proxy function which accepts kmp_task_t as the second 4556 /// argument. 4557 /// \code 4558 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4559 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4560 /// For taskloops: 4561 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4562 /// tt->reductions, tt->shareds); 4563 /// return 0; 4564 /// } 4565 /// \endcode 4566 static llvm::Function * 4567 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4568 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4569 QualType KmpTaskTWithPrivatesPtrQTy, 4570 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4571 QualType SharedsPtrTy, llvm::Function *TaskFunction, 4572 llvm::Value *TaskPrivatesMap) { 4573 ASTContext &C = CGM.getContext(); 4574 FunctionArgList Args; 4575 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4576 ImplicitParamDecl::Other); 4577 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4578 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4579 ImplicitParamDecl::Other); 4580 Args.push_back(&GtidArg); 4581 Args.push_back(&TaskTypeArg); 4582 const auto &TaskEntryFnInfo = 4583 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4584 llvm::FunctionType *TaskEntryTy = 4585 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4586 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 4587 auto *TaskEntry = llvm::Function::Create( 4588 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4589 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4590 TaskEntry->setDoesNotRecurse(); 4591 CodeGenFunction CGF(CGM); 4592 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4593 Loc, Loc); 4594 4595 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4596 // tt, 4597 // For taskloops: 4598 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4599 // tt->task_data.shareds); 4600 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4601 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4602 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4603 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4604 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4605 const auto *KmpTaskTWithPrivatesQTyRD = 4606 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4607 LValue Base = 4608 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4609 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4610 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4611 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4612 llvm::Value *PartidParam = PartIdLVal.getPointer(); 4613 4614 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4615 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4616 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4617 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4618 CGF.ConvertTypeForMem(SharedsPtrTy)); 4619 4620 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4621 llvm::Value *PrivatesParam; 4622 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4623 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4624 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4625 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 4626 } else { 4627 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4628 } 4629 4630 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4631 TaskPrivatesMap, 4632 CGF.Builder 4633 .CreatePointerBitCastOrAddrSpaceCast( 4634 TDBase.getAddress(), CGF.VoidPtrTy) 4635 .getPointer()}; 4636 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4637 std::end(CommonArgs)); 4638 if (isOpenMPTaskLoopDirective(Kind)) { 4639 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4640 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4641 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4642 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4643 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4644 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4645 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4646 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4647 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4648 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4649 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4650 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4651 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4652 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4653 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4654 CallArgs.push_back(LBParam); 4655 CallArgs.push_back(UBParam); 4656 CallArgs.push_back(StParam); 4657 CallArgs.push_back(LIParam); 4658 CallArgs.push_back(RParam); 4659 } 4660 CallArgs.push_back(SharedsParam); 4661 4662 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4663 CallArgs); 4664 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4665 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4666 CGF.FinishFunction(); 4667 return TaskEntry; 4668 } 4669 4670 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4671 SourceLocation Loc, 4672 QualType KmpInt32Ty, 4673 QualType KmpTaskTWithPrivatesPtrQTy, 4674 QualType KmpTaskTWithPrivatesQTy) { 4675 ASTContext &C = CGM.getContext(); 4676 FunctionArgList Args; 4677 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4678 ImplicitParamDecl::Other); 4679 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4680 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4681 ImplicitParamDecl::Other); 4682 Args.push_back(&GtidArg); 4683 Args.push_back(&TaskTypeArg); 4684 const auto &DestructorFnInfo = 4685 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4686 llvm::FunctionType *DestructorFnTy = 4687 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4688 std::string Name = 4689 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 4690 auto *DestructorFn = 4691 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4692 Name, &CGM.getModule()); 4693 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4694 DestructorFnInfo); 4695 DestructorFn->setDoesNotRecurse(); 4696 CodeGenFunction CGF(CGM); 4697 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4698 Args, Loc, Loc); 4699 4700 LValue Base = CGF.EmitLoadOfPointerLValue( 4701 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4702 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4703 const auto *KmpTaskTWithPrivatesQTyRD = 4704 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4705 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4706 Base = CGF.EmitLValueForField(Base, *FI); 4707 for (const auto *Field : 4708 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4709 if (QualType::DestructionKind DtorKind = 4710 Field->getType().isDestructedType()) { 4711 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4712 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 4713 } 4714 } 4715 CGF.FinishFunction(); 4716 return DestructorFn; 4717 } 4718 4719 /// Emit a privates mapping function for correct handling of private and 4720 /// firstprivate variables. 4721 /// \code 4722 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4723 /// **noalias priv1,..., <tyn> **noalias privn) { 4724 /// *priv1 = &.privates.priv1; 4725 /// ...; 4726 /// *privn = &.privates.privn; 4727 /// } 4728 /// \endcode 4729 static llvm::Value * 4730 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4731 ArrayRef<const Expr *> PrivateVars, 4732 ArrayRef<const Expr *> FirstprivateVars, 4733 ArrayRef<const Expr *> LastprivateVars, 4734 QualType PrivatesQTy, 4735 ArrayRef<PrivateDataTy> Privates) { 4736 ASTContext &C = CGM.getContext(); 4737 FunctionArgList Args; 4738 ImplicitParamDecl TaskPrivatesArg( 4739 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4740 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4741 ImplicitParamDecl::Other); 4742 Args.push_back(&TaskPrivatesArg); 4743 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4744 unsigned Counter = 1; 4745 for (const Expr *E : PrivateVars) { 4746 Args.push_back(ImplicitParamDecl::Create( 4747 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4748 C.getPointerType(C.getPointerType(E->getType())) 4749 .withConst() 4750 .withRestrict(), 4751 ImplicitParamDecl::Other)); 4752 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4753 PrivateVarsPos[VD] = Counter; 4754 ++Counter; 4755 } 4756 for (const Expr *E : FirstprivateVars) { 4757 Args.push_back(ImplicitParamDecl::Create( 4758 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4759 C.getPointerType(C.getPointerType(E->getType())) 4760 .withConst() 4761 .withRestrict(), 4762 ImplicitParamDecl::Other)); 4763 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4764 PrivateVarsPos[VD] = Counter; 4765 ++Counter; 4766 } 4767 for (const Expr *E : LastprivateVars) { 4768 Args.push_back(ImplicitParamDecl::Create( 4769 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4770 C.getPointerType(C.getPointerType(E->getType())) 4771 .withConst() 4772 .withRestrict(), 4773 ImplicitParamDecl::Other)); 4774 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4775 PrivateVarsPos[VD] = Counter; 4776 ++Counter; 4777 } 4778 const auto &TaskPrivatesMapFnInfo = 4779 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4780 llvm::FunctionType *TaskPrivatesMapTy = 4781 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4782 std::string Name = 4783 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 4784 auto *TaskPrivatesMap = llvm::Function::Create( 4785 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 4786 &CGM.getModule()); 4787 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4788 TaskPrivatesMapFnInfo); 4789 if (CGM.getLangOpts().Optimize) { 4790 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4791 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4792 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4793 } 4794 CodeGenFunction CGF(CGM); 4795 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4796 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4797 4798 // *privi = &.privates.privi; 4799 LValue Base = CGF.EmitLoadOfPointerLValue( 4800 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4801 TaskPrivatesArg.getType()->castAs<PointerType>()); 4802 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4803 Counter = 0; 4804 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4805 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4806 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4807 LValue RefLVal = 4808 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4809 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4810 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 4811 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 4812 ++Counter; 4813 } 4814 CGF.FinishFunction(); 4815 return TaskPrivatesMap; 4816 } 4817 4818 /// Emit initialization for private variables in task-based directives. 4819 static void emitPrivatesInit(CodeGenFunction &CGF, 4820 const OMPExecutableDirective &D, 4821 Address KmpTaskSharedsPtr, LValue TDBase, 4822 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4823 QualType SharedsTy, QualType SharedsPtrTy, 4824 const OMPTaskDataTy &Data, 4825 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4826 ASTContext &C = CGF.getContext(); 4827 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4828 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4829 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4830 ? OMPD_taskloop 4831 : OMPD_task; 4832 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4833 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4834 LValue SrcBase; 4835 bool IsTargetTask = 4836 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4837 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4838 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4839 // PointersArray and SizesArray. The original variables for these arrays are 4840 // not captured and we get their addresses explicitly. 4841 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || 4842 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4843 SrcBase = CGF.MakeAddrLValue( 4844 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4845 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4846 SharedsTy); 4847 } 4848 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4849 for (const PrivateDataTy &Pair : Privates) { 4850 const VarDecl *VD = Pair.second.PrivateCopy; 4851 const Expr *Init = VD->getAnyInitializer(); 4852 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4853 !CGF.isTrivialInitializer(Init)))) { 4854 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4855 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4856 const VarDecl *OriginalVD = Pair.second.Original; 4857 // Check if the variable is the target-based BasePointersArray, 4858 // PointersArray or SizesArray. 4859 LValue SharedRefLValue; 4860 QualType Type = PrivateLValue.getType(); 4861 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4862 if (IsTargetTask && !SharedField) { 4863 assert(isa<ImplicitParamDecl>(OriginalVD) && 4864 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4865 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4866 ->getNumParams() == 0 && 4867 isa<TranslationUnitDecl>( 4868 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4869 ->getDeclContext()) && 4870 "Expected artificial target data variable."); 4871 SharedRefLValue = 4872 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4873 } else { 4874 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4875 SharedRefLValue = CGF.MakeAddrLValue( 4876 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 4877 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4878 SharedRefLValue.getTBAAInfo()); 4879 } 4880 if (Type->isArrayType()) { 4881 // Initialize firstprivate array. 4882 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4883 // Perform simple memcpy. 4884 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4885 } else { 4886 // Initialize firstprivate array using element-by-element 4887 // initialization. 4888 CGF.EmitOMPAggregateAssign( 4889 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 4890 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4891 Address SrcElement) { 4892 // Clean up any temporaries needed by the initialization. 4893 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4894 InitScope.addPrivate( 4895 Elem, [SrcElement]() -> Address { return SrcElement; }); 4896 (void)InitScope.Privatize(); 4897 // Emit initialization for single element. 4898 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4899 CGF, &CapturesInfo); 4900 CGF.EmitAnyExprToMem(Init, DestElement, 4901 Init->getType().getQualifiers(), 4902 /*IsInitializer=*/false); 4903 }); 4904 } 4905 } else { 4906 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4907 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 4908 return SharedRefLValue.getAddress(); 4909 }); 4910 (void)InitScope.Privatize(); 4911 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4912 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4913 /*capturedByInit=*/false); 4914 } 4915 } else { 4916 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4917 } 4918 } 4919 ++FI; 4920 } 4921 } 4922 4923 /// Check if duplication function is required for taskloops. 4924 static bool checkInitIsRequired(CodeGenFunction &CGF, 4925 ArrayRef<PrivateDataTy> Privates) { 4926 bool InitRequired = false; 4927 for (const PrivateDataTy &Pair : Privates) { 4928 const VarDecl *VD = Pair.second.PrivateCopy; 4929 const Expr *Init = VD->getAnyInitializer(); 4930 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4931 !CGF.isTrivialInitializer(Init)); 4932 if (InitRequired) 4933 break; 4934 } 4935 return InitRequired; 4936 } 4937 4938 4939 /// Emit task_dup function (for initialization of 4940 /// private/firstprivate/lastprivate vars and last_iter flag) 4941 /// \code 4942 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4943 /// lastpriv) { 4944 /// // setup lastprivate flag 4945 /// task_dst->last = lastpriv; 4946 /// // could be constructor calls here... 4947 /// } 4948 /// \endcode 4949 static llvm::Value * 4950 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4951 const OMPExecutableDirective &D, 4952 QualType KmpTaskTWithPrivatesPtrQTy, 4953 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4954 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4955 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4956 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4957 ASTContext &C = CGM.getContext(); 4958 FunctionArgList Args; 4959 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4960 KmpTaskTWithPrivatesPtrQTy, 4961 ImplicitParamDecl::Other); 4962 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4963 KmpTaskTWithPrivatesPtrQTy, 4964 ImplicitParamDecl::Other); 4965 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4966 ImplicitParamDecl::Other); 4967 Args.push_back(&DstArg); 4968 Args.push_back(&SrcArg); 4969 Args.push_back(&LastprivArg); 4970 const auto &TaskDupFnInfo = 4971 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4972 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4973 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4974 auto *TaskDup = llvm::Function::Create( 4975 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4976 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4977 TaskDup->setDoesNotRecurse(); 4978 CodeGenFunction CGF(CGM); 4979 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4980 Loc); 4981 4982 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4983 CGF.GetAddrOfLocalVar(&DstArg), 4984 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4985 // task_dst->liter = lastpriv; 4986 if (WithLastIter) { 4987 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4988 LValue Base = CGF.EmitLValueForField( 4989 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4990 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4991 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4992 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4993 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4994 } 4995 4996 // Emit initial values for private copies (if any). 4997 assert(!Privates.empty()); 4998 Address KmpTaskSharedsPtr = Address::invalid(); 4999 if (!Data.FirstprivateVars.empty()) { 5000 LValue TDBase = CGF.EmitLoadOfPointerLValue( 5001 CGF.GetAddrOfLocalVar(&SrcArg), 5002 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 5003 LValue Base = CGF.EmitLValueForField( 5004 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 5005 KmpTaskSharedsPtr = Address( 5006 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 5007 Base, *std::next(KmpTaskTQTyRD->field_begin(), 5008 KmpTaskTShareds)), 5009 Loc), 5010 CGF.getNaturalTypeAlignment(SharedsTy)); 5011 } 5012 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 5013 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 5014 CGF.FinishFunction(); 5015 return TaskDup; 5016 } 5017 5018 /// Checks if destructor function is required to be generated. 5019 /// \return true if cleanups are required, false otherwise. 5020 static bool 5021 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 5022 bool NeedsCleanup = false; 5023 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 5024 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 5025 for (const FieldDecl *FD : PrivateRD->fields()) { 5026 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 5027 if (NeedsCleanup) 5028 break; 5029 } 5030 return NeedsCleanup; 5031 } 5032 5033 CGOpenMPRuntime::TaskResultTy 5034 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 5035 const OMPExecutableDirective &D, 5036 llvm::Function *TaskFunction, QualType SharedsTy, 5037 Address Shareds, const OMPTaskDataTy &Data) { 5038 ASTContext &C = CGM.getContext(); 5039 llvm::SmallVector<PrivateDataTy, 4> Privates; 5040 // Aggregate privates and sort them by the alignment. 5041 auto I = Data.PrivateCopies.begin(); 5042 for (const Expr *E : Data.PrivateVars) { 5043 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5044 Privates.emplace_back( 5045 C.getDeclAlign(VD), 5046 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5047 /*PrivateElemInit=*/nullptr)); 5048 ++I; 5049 } 5050 I = Data.FirstprivateCopies.begin(); 5051 auto IElemInitRef = Data.FirstprivateInits.begin(); 5052 for (const Expr *E : Data.FirstprivateVars) { 5053 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5054 Privates.emplace_back( 5055 C.getDeclAlign(VD), 5056 PrivateHelpersTy( 5057 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5058 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 5059 ++I; 5060 ++IElemInitRef; 5061 } 5062 I = Data.LastprivateCopies.begin(); 5063 for (const Expr *E : Data.LastprivateVars) { 5064 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5065 Privates.emplace_back( 5066 C.getDeclAlign(VD), 5067 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5068 /*PrivateElemInit=*/nullptr)); 5069 ++I; 5070 } 5071 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 5072 return L.first > R.first; 5073 }); 5074 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 5075 // Build type kmp_routine_entry_t (if not built yet). 5076 emitKmpRoutineEntryT(KmpInt32Ty); 5077 // Build type kmp_task_t (if not built yet). 5078 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 5079 if (SavedKmpTaskloopTQTy.isNull()) { 5080 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5081 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5082 } 5083 KmpTaskTQTy = SavedKmpTaskloopTQTy; 5084 } else { 5085 assert((D.getDirectiveKind() == OMPD_task || 5086 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 5087 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 5088 "Expected taskloop, task or target directive"); 5089 if (SavedKmpTaskTQTy.isNull()) { 5090 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5091 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5092 } 5093 KmpTaskTQTy = SavedKmpTaskTQTy; 5094 } 5095 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 5096 // Build particular struct kmp_task_t for the given task. 5097 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 5098 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 5099 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 5100 QualType KmpTaskTWithPrivatesPtrQTy = 5101 C.getPointerType(KmpTaskTWithPrivatesQTy); 5102 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 5103 llvm::Type *KmpTaskTWithPrivatesPtrTy = 5104 KmpTaskTWithPrivatesTy->getPointerTo(); 5105 llvm::Value *KmpTaskTWithPrivatesTySize = 5106 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 5107 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 5108 5109 // Emit initial values for private copies (if any). 5110 llvm::Value *TaskPrivatesMap = nullptr; 5111 llvm::Type *TaskPrivatesMapTy = 5112 std::next(TaskFunction->arg_begin(), 3)->getType(); 5113 if (!Privates.empty()) { 5114 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 5115 TaskPrivatesMap = emitTaskPrivateMappingFunction( 5116 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 5117 FI->getType(), Privates); 5118 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5119 TaskPrivatesMap, TaskPrivatesMapTy); 5120 } else { 5121 TaskPrivatesMap = llvm::ConstantPointerNull::get( 5122 cast<llvm::PointerType>(TaskPrivatesMapTy)); 5123 } 5124 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 5125 // kmp_task_t *tt); 5126 llvm::Function *TaskEntry = emitProxyTaskFunction( 5127 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5128 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 5129 TaskPrivatesMap); 5130 5131 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 5132 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 5133 // kmp_routine_entry_t *task_entry); 5134 // Task flags. Format is taken from 5135 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 5136 // description of kmp_tasking_flags struct. 5137 enum { 5138 TiedFlag = 0x1, 5139 FinalFlag = 0x2, 5140 DestructorsFlag = 0x8, 5141 PriorityFlag = 0x20 5142 }; 5143 unsigned Flags = Data.Tied ? TiedFlag : 0; 5144 bool NeedsCleanup = false; 5145 if (!Privates.empty()) { 5146 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 5147 if (NeedsCleanup) 5148 Flags = Flags | DestructorsFlag; 5149 } 5150 if (Data.Priority.getInt()) 5151 Flags = Flags | PriorityFlag; 5152 llvm::Value *TaskFlags = 5153 Data.Final.getPointer() 5154 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 5155 CGF.Builder.getInt32(FinalFlag), 5156 CGF.Builder.getInt32(/*C=*/0)) 5157 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 5158 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 5159 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 5160 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 5161 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 5162 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5163 TaskEntry, KmpRoutineEntryPtrTy)}; 5164 llvm::Value *NewTask; 5165 if (D.hasClausesOfKind<OMPNowaitClause>()) { 5166 // Check if we have any device clause associated with the directive. 5167 const Expr *Device = nullptr; 5168 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 5169 Device = C->getDevice(); 5170 // Emit device ID if any otherwise use default value. 5171 llvm::Value *DeviceID; 5172 if (Device) 5173 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 5174 CGF.Int64Ty, /*isSigned=*/true); 5175 else 5176 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 5177 AllocArgs.push_back(DeviceID); 5178 NewTask = CGF.EmitRuntimeCall( 5179 createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs); 5180 } else { 5181 NewTask = CGF.EmitRuntimeCall( 5182 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 5183 } 5184 llvm::Value *NewTaskNewTaskTTy = 5185 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5186 NewTask, KmpTaskTWithPrivatesPtrTy); 5187 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 5188 KmpTaskTWithPrivatesQTy); 5189 LValue TDBase = 5190 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 5191 // Fill the data in the resulting kmp_task_t record. 5192 // Copy shareds if there are any. 5193 Address KmpTaskSharedsPtr = Address::invalid(); 5194 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 5195 KmpTaskSharedsPtr = 5196 Address(CGF.EmitLoadOfScalar( 5197 CGF.EmitLValueForField( 5198 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 5199 KmpTaskTShareds)), 5200 Loc), 5201 CGF.getNaturalTypeAlignment(SharedsTy)); 5202 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 5203 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 5204 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 5205 } 5206 // Emit initial values for private copies (if any). 5207 TaskResultTy Result; 5208 if (!Privates.empty()) { 5209 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 5210 SharedsTy, SharedsPtrTy, Data, Privates, 5211 /*ForDup=*/false); 5212 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 5213 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 5214 Result.TaskDupFn = emitTaskDupFunction( 5215 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 5216 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 5217 /*WithLastIter=*/!Data.LastprivateVars.empty()); 5218 } 5219 } 5220 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 5221 enum { Priority = 0, Destructors = 1 }; 5222 // Provide pointer to function with destructors for privates. 5223 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 5224 const RecordDecl *KmpCmplrdataUD = 5225 (*FI)->getType()->getAsUnionType()->getDecl(); 5226 if (NeedsCleanup) { 5227 llvm::Value *DestructorFn = emitDestructorsFunction( 5228 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5229 KmpTaskTWithPrivatesQTy); 5230 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 5231 LValue DestructorsLV = CGF.EmitLValueForField( 5232 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 5233 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5234 DestructorFn, KmpRoutineEntryPtrTy), 5235 DestructorsLV); 5236 } 5237 // Set priority. 5238 if (Data.Priority.getInt()) { 5239 LValue Data2LV = CGF.EmitLValueForField( 5240 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 5241 LValue PriorityLV = CGF.EmitLValueForField( 5242 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 5243 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 5244 } 5245 Result.NewTask = NewTask; 5246 Result.TaskEntry = TaskEntry; 5247 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 5248 Result.TDBase = TDBase; 5249 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 5250 return Result; 5251 } 5252 5253 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5254 const OMPExecutableDirective &D, 5255 llvm::Function *TaskFunction, 5256 QualType SharedsTy, Address Shareds, 5257 const Expr *IfCond, 5258 const OMPTaskDataTy &Data) { 5259 if (!CGF.HaveInsertPoint()) 5260 return; 5261 5262 TaskResultTy Result = 5263 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5264 llvm::Value *NewTask = Result.NewTask; 5265 llvm::Function *TaskEntry = Result.TaskEntry; 5266 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5267 LValue TDBase = Result.TDBase; 5268 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5269 ASTContext &C = CGM.getContext(); 5270 // Process list of dependences. 5271 Address DependenciesArray = Address::invalid(); 5272 unsigned NumDependencies = Data.Dependences.size(); 5273 if (NumDependencies) { 5274 // Dependence kind for RTL. 5275 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 }; 5276 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 5277 RecordDecl *KmpDependInfoRD; 5278 QualType FlagsTy = 5279 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 5280 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5281 if (KmpDependInfoTy.isNull()) { 5282 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 5283 KmpDependInfoRD->startDefinition(); 5284 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 5285 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 5286 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 5287 KmpDependInfoRD->completeDefinition(); 5288 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 5289 } else { 5290 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5291 } 5292 // Define type kmp_depend_info[<Dependences.size()>]; 5293 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5294 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 5295 ArrayType::Normal, /*IndexTypeQuals=*/0); 5296 // kmp_depend_info[<Dependences.size()>] deps; 5297 DependenciesArray = 5298 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 5299 for (unsigned I = 0; I < NumDependencies; ++I) { 5300 const Expr *E = Data.Dependences[I].second; 5301 LValue Addr = CGF.EmitLValue(E); 5302 llvm::Value *Size; 5303 QualType Ty = E->getType(); 5304 if (const auto *ASE = 5305 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 5306 LValue UpAddrLVal = 5307 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 5308 llvm::Value *UpAddr = 5309 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 5310 llvm::Value *LowIntPtr = 5311 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 5312 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 5313 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 5314 } else { 5315 Size = CGF.getTypeSize(Ty); 5316 } 5317 LValue Base = CGF.MakeAddrLValue( 5318 CGF.Builder.CreateConstArrayGEP(DependenciesArray, I), 5319 KmpDependInfoTy); 5320 // deps[i].base_addr = &<Dependences[i].second>; 5321 LValue BaseAddrLVal = CGF.EmitLValueForField( 5322 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5323 CGF.EmitStoreOfScalar( 5324 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 5325 BaseAddrLVal); 5326 // deps[i].len = sizeof(<Dependences[i].second>); 5327 LValue LenLVal = CGF.EmitLValueForField( 5328 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 5329 CGF.EmitStoreOfScalar(Size, LenLVal); 5330 // deps[i].flags = <Dependences[i].first>; 5331 RTLDependenceKindTy DepKind; 5332 switch (Data.Dependences[I].first) { 5333 case OMPC_DEPEND_in: 5334 DepKind = DepIn; 5335 break; 5336 // Out and InOut dependencies must use the same code. 5337 case OMPC_DEPEND_out: 5338 case OMPC_DEPEND_inout: 5339 DepKind = DepInOut; 5340 break; 5341 case OMPC_DEPEND_mutexinoutset: 5342 DepKind = DepMutexInOutSet; 5343 break; 5344 case OMPC_DEPEND_source: 5345 case OMPC_DEPEND_sink: 5346 case OMPC_DEPEND_unknown: 5347 llvm_unreachable("Unknown task dependence type"); 5348 } 5349 LValue FlagsLVal = CGF.EmitLValueForField( 5350 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5351 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5352 FlagsLVal); 5353 } 5354 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5355 CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy); 5356 } 5357 5358 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5359 // libcall. 5360 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5361 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5362 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5363 // list is not empty 5364 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5365 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5366 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5367 llvm::Value *DepTaskArgs[7]; 5368 if (NumDependencies) { 5369 DepTaskArgs[0] = UpLoc; 5370 DepTaskArgs[1] = ThreadID; 5371 DepTaskArgs[2] = NewTask; 5372 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 5373 DepTaskArgs[4] = DependenciesArray.getPointer(); 5374 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5375 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5376 } 5377 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 5378 &TaskArgs, 5379 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5380 if (!Data.Tied) { 5381 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5382 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5383 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5384 } 5385 if (NumDependencies) { 5386 CGF.EmitRuntimeCall( 5387 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5388 } else { 5389 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5390 TaskArgs); 5391 } 5392 // Check if parent region is untied and build return for untied task; 5393 if (auto *Region = 5394 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5395 Region->emitUntiedSwitch(CGF); 5396 }; 5397 5398 llvm::Value *DepWaitTaskArgs[6]; 5399 if (NumDependencies) { 5400 DepWaitTaskArgs[0] = UpLoc; 5401 DepWaitTaskArgs[1] = ThreadID; 5402 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 5403 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5404 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5405 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5406 } 5407 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5408 NumDependencies, &DepWaitTaskArgs, 5409 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5410 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5411 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5412 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5413 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5414 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5415 // is specified. 5416 if (NumDependencies) 5417 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5418 DepWaitTaskArgs); 5419 // Call proxy_task_entry(gtid, new_task); 5420 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5421 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5422 Action.Enter(CGF); 5423 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5424 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5425 OutlinedFnArgs); 5426 }; 5427 5428 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5429 // kmp_task_t *new_task); 5430 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5431 // kmp_task_t *new_task); 5432 RegionCodeGenTy RCG(CodeGen); 5433 CommonActionTy Action( 5434 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5435 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5436 RCG.setAction(Action); 5437 RCG(CGF); 5438 }; 5439 5440 if (IfCond) { 5441 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5442 } else { 5443 RegionCodeGenTy ThenRCG(ThenCodeGen); 5444 ThenRCG(CGF); 5445 } 5446 } 5447 5448 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5449 const OMPLoopDirective &D, 5450 llvm::Function *TaskFunction, 5451 QualType SharedsTy, Address Shareds, 5452 const Expr *IfCond, 5453 const OMPTaskDataTy &Data) { 5454 if (!CGF.HaveInsertPoint()) 5455 return; 5456 TaskResultTy Result = 5457 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5458 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5459 // libcall. 5460 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5461 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5462 // sched, kmp_uint64 grainsize, void *task_dup); 5463 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5464 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5465 llvm::Value *IfVal; 5466 if (IfCond) { 5467 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5468 /*isSigned=*/true); 5469 } else { 5470 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5471 } 5472 5473 LValue LBLVal = CGF.EmitLValueForField( 5474 Result.TDBase, 5475 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5476 const auto *LBVar = 5477 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5478 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 5479 /*IsInitializer=*/true); 5480 LValue UBLVal = CGF.EmitLValueForField( 5481 Result.TDBase, 5482 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5483 const auto *UBVar = 5484 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5485 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 5486 /*IsInitializer=*/true); 5487 LValue StLVal = CGF.EmitLValueForField( 5488 Result.TDBase, 5489 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5490 const auto *StVar = 5491 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5492 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 5493 /*IsInitializer=*/true); 5494 // Store reductions address. 5495 LValue RedLVal = CGF.EmitLValueForField( 5496 Result.TDBase, 5497 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5498 if (Data.Reductions) { 5499 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5500 } else { 5501 CGF.EmitNullInitialization(RedLVal.getAddress(), 5502 CGF.getContext().VoidPtrTy); 5503 } 5504 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5505 llvm::Value *TaskArgs[] = { 5506 UpLoc, 5507 ThreadID, 5508 Result.NewTask, 5509 IfVal, 5510 LBLVal.getPointer(), 5511 UBLVal.getPointer(), 5512 CGF.EmitLoadOfScalar(StLVal, Loc), 5513 llvm::ConstantInt::getSigned( 5514 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5515 llvm::ConstantInt::getSigned( 5516 CGF.IntTy, Data.Schedule.getPointer() 5517 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5518 : NoSchedule), 5519 Data.Schedule.getPointer() 5520 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5521 /*isSigned=*/false) 5522 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5523 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5524 Result.TaskDupFn, CGF.VoidPtrTy) 5525 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5526 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 5527 } 5528 5529 /// Emit reduction operation for each element of array (required for 5530 /// array sections) LHS op = RHS. 5531 /// \param Type Type of array. 5532 /// \param LHSVar Variable on the left side of the reduction operation 5533 /// (references element of array in original variable). 5534 /// \param RHSVar Variable on the right side of the reduction operation 5535 /// (references element of array in original variable). 5536 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5537 /// RHSVar. 5538 static void EmitOMPAggregateReduction( 5539 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5540 const VarDecl *RHSVar, 5541 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5542 const Expr *, const Expr *)> &RedOpGen, 5543 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5544 const Expr *UpExpr = nullptr) { 5545 // Perform element-by-element initialization. 5546 QualType ElementTy; 5547 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5548 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5549 5550 // Drill down to the base element type on both arrays. 5551 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5552 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5553 5554 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5555 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5556 // Cast from pointer to array type to pointer to single element. 5557 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5558 // The basic structure here is a while-do loop. 5559 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5560 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5561 llvm::Value *IsEmpty = 5562 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5563 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5564 5565 // Enter the loop body, making that address the current address. 5566 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5567 CGF.EmitBlock(BodyBB); 5568 5569 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5570 5571 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5572 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5573 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5574 Address RHSElementCurrent = 5575 Address(RHSElementPHI, 5576 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5577 5578 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5579 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5580 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5581 Address LHSElementCurrent = 5582 Address(LHSElementPHI, 5583 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5584 5585 // Emit copy. 5586 CodeGenFunction::OMPPrivateScope Scope(CGF); 5587 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5588 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5589 Scope.Privatize(); 5590 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5591 Scope.ForceCleanup(); 5592 5593 // Shift the address forward by one element. 5594 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5595 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5596 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5597 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5598 // Check whether we've reached the end. 5599 llvm::Value *Done = 5600 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5601 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5602 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5603 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5604 5605 // Done. 5606 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5607 } 5608 5609 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5610 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5611 /// UDR combiner function. 5612 static void emitReductionCombiner(CodeGenFunction &CGF, 5613 const Expr *ReductionOp) { 5614 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5615 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5616 if (const auto *DRE = 5617 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5618 if (const auto *DRD = 5619 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5620 std::pair<llvm::Function *, llvm::Function *> Reduction = 5621 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5622 RValue Func = RValue::get(Reduction.first); 5623 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5624 CGF.EmitIgnoredExpr(ReductionOp); 5625 return; 5626 } 5627 CGF.EmitIgnoredExpr(ReductionOp); 5628 } 5629 5630 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5631 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5632 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5633 ArrayRef<const Expr *> ReductionOps) { 5634 ASTContext &C = CGM.getContext(); 5635 5636 // void reduction_func(void *LHSArg, void *RHSArg); 5637 FunctionArgList Args; 5638 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5639 ImplicitParamDecl::Other); 5640 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5641 ImplicitParamDecl::Other); 5642 Args.push_back(&LHSArg); 5643 Args.push_back(&RHSArg); 5644 const auto &CGFI = 5645 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5646 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5647 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5648 llvm::GlobalValue::InternalLinkage, Name, 5649 &CGM.getModule()); 5650 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5651 Fn->setDoesNotRecurse(); 5652 CodeGenFunction CGF(CGM); 5653 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5654 5655 // Dst = (void*[n])(LHSArg); 5656 // Src = (void*[n])(RHSArg); 5657 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5658 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5659 ArgsType), CGF.getPointerAlign()); 5660 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5661 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5662 ArgsType), CGF.getPointerAlign()); 5663 5664 // ... 5665 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5666 // ... 5667 CodeGenFunction::OMPPrivateScope Scope(CGF); 5668 auto IPriv = Privates.begin(); 5669 unsigned Idx = 0; 5670 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5671 const auto *RHSVar = 5672 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5673 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5674 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5675 }); 5676 const auto *LHSVar = 5677 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5678 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5679 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5680 }); 5681 QualType PrivTy = (*IPriv)->getType(); 5682 if (PrivTy->isVariablyModifiedType()) { 5683 // Get array size and emit VLA type. 5684 ++Idx; 5685 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5686 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5687 const VariableArrayType *VLA = 5688 CGF.getContext().getAsVariableArrayType(PrivTy); 5689 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5690 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5691 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5692 CGF.EmitVariablyModifiedType(PrivTy); 5693 } 5694 } 5695 Scope.Privatize(); 5696 IPriv = Privates.begin(); 5697 auto ILHS = LHSExprs.begin(); 5698 auto IRHS = RHSExprs.begin(); 5699 for (const Expr *E : ReductionOps) { 5700 if ((*IPriv)->getType()->isArrayType()) { 5701 // Emit reduction for array section. 5702 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5703 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5704 EmitOMPAggregateReduction( 5705 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5706 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5707 emitReductionCombiner(CGF, E); 5708 }); 5709 } else { 5710 // Emit reduction for array subscript or single variable. 5711 emitReductionCombiner(CGF, E); 5712 } 5713 ++IPriv; 5714 ++ILHS; 5715 ++IRHS; 5716 } 5717 Scope.ForceCleanup(); 5718 CGF.FinishFunction(); 5719 return Fn; 5720 } 5721 5722 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5723 const Expr *ReductionOp, 5724 const Expr *PrivateRef, 5725 const DeclRefExpr *LHS, 5726 const DeclRefExpr *RHS) { 5727 if (PrivateRef->getType()->isArrayType()) { 5728 // Emit reduction for array section. 5729 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5730 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5731 EmitOMPAggregateReduction( 5732 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5733 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5734 emitReductionCombiner(CGF, ReductionOp); 5735 }); 5736 } else { 5737 // Emit reduction for array subscript or single variable. 5738 emitReductionCombiner(CGF, ReductionOp); 5739 } 5740 } 5741 5742 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5743 ArrayRef<const Expr *> Privates, 5744 ArrayRef<const Expr *> LHSExprs, 5745 ArrayRef<const Expr *> RHSExprs, 5746 ArrayRef<const Expr *> ReductionOps, 5747 ReductionOptionsTy Options) { 5748 if (!CGF.HaveInsertPoint()) 5749 return; 5750 5751 bool WithNowait = Options.WithNowait; 5752 bool SimpleReduction = Options.SimpleReduction; 5753 5754 // Next code should be emitted for reduction: 5755 // 5756 // static kmp_critical_name lock = { 0 }; 5757 // 5758 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5759 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5760 // ... 5761 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5762 // *(Type<n>-1*)rhs[<n>-1]); 5763 // } 5764 // 5765 // ... 5766 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5767 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5768 // RedList, reduce_func, &<lock>)) { 5769 // case 1: 5770 // ... 5771 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5772 // ... 5773 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5774 // break; 5775 // case 2: 5776 // ... 5777 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5778 // ... 5779 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5780 // break; 5781 // default:; 5782 // } 5783 // 5784 // if SimpleReduction is true, only the next code is generated: 5785 // ... 5786 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5787 // ... 5788 5789 ASTContext &C = CGM.getContext(); 5790 5791 if (SimpleReduction) { 5792 CodeGenFunction::RunCleanupsScope Scope(CGF); 5793 auto IPriv = Privates.begin(); 5794 auto ILHS = LHSExprs.begin(); 5795 auto IRHS = RHSExprs.begin(); 5796 for (const Expr *E : ReductionOps) { 5797 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5798 cast<DeclRefExpr>(*IRHS)); 5799 ++IPriv; 5800 ++ILHS; 5801 ++IRHS; 5802 } 5803 return; 5804 } 5805 5806 // 1. Build a list of reduction variables. 5807 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5808 auto Size = RHSExprs.size(); 5809 for (const Expr *E : Privates) { 5810 if (E->getType()->isVariablyModifiedType()) 5811 // Reserve place for array size. 5812 ++Size; 5813 } 5814 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5815 QualType ReductionArrayTy = 5816 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 5817 /*IndexTypeQuals=*/0); 5818 Address ReductionList = 5819 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5820 auto IPriv = Privates.begin(); 5821 unsigned Idx = 0; 5822 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5823 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5824 CGF.Builder.CreateStore( 5825 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5826 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 5827 Elem); 5828 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5829 // Store array size. 5830 ++Idx; 5831 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5832 llvm::Value *Size = CGF.Builder.CreateIntCast( 5833 CGF.getVLASize( 5834 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5835 .NumElts, 5836 CGF.SizeTy, /*isSigned=*/false); 5837 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5838 Elem); 5839 } 5840 } 5841 5842 // 2. Emit reduce_func(). 5843 llvm::Function *ReductionFn = emitReductionFunction( 5844 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5845 LHSExprs, RHSExprs, ReductionOps); 5846 5847 // 3. Create static kmp_critical_name lock = { 0 }; 5848 std::string Name = getName({"reduction"}); 5849 llvm::Value *Lock = getCriticalRegionLock(Name); 5850 5851 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5852 // RedList, reduce_func, &<lock>); 5853 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5854 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5855 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5856 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5857 ReductionList.getPointer(), CGF.VoidPtrTy); 5858 llvm::Value *Args[] = { 5859 IdentTLoc, // ident_t *<loc> 5860 ThreadId, // i32 <gtid> 5861 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5862 ReductionArrayTySize, // size_type sizeof(RedList) 5863 RL, // void *RedList 5864 ReductionFn, // void (*) (void *, void *) <reduce_func> 5865 Lock // kmp_critical_name *&<lock> 5866 }; 5867 llvm::Value *Res = CGF.EmitRuntimeCall( 5868 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 5869 : OMPRTL__kmpc_reduce), 5870 Args); 5871 5872 // 5. Build switch(res) 5873 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5874 llvm::SwitchInst *SwInst = 5875 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5876 5877 // 6. Build case 1: 5878 // ... 5879 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5880 // ... 5881 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5882 // break; 5883 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5884 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5885 CGF.EmitBlock(Case1BB); 5886 5887 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5888 llvm::Value *EndArgs[] = { 5889 IdentTLoc, // ident_t *<loc> 5890 ThreadId, // i32 <gtid> 5891 Lock // kmp_critical_name *&<lock> 5892 }; 5893 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5894 CodeGenFunction &CGF, PrePostActionTy &Action) { 5895 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5896 auto IPriv = Privates.begin(); 5897 auto ILHS = LHSExprs.begin(); 5898 auto IRHS = RHSExprs.begin(); 5899 for (const Expr *E : ReductionOps) { 5900 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5901 cast<DeclRefExpr>(*IRHS)); 5902 ++IPriv; 5903 ++ILHS; 5904 ++IRHS; 5905 } 5906 }; 5907 RegionCodeGenTy RCG(CodeGen); 5908 CommonActionTy Action( 5909 nullptr, llvm::None, 5910 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 5911 : OMPRTL__kmpc_end_reduce), 5912 EndArgs); 5913 RCG.setAction(Action); 5914 RCG(CGF); 5915 5916 CGF.EmitBranch(DefaultBB); 5917 5918 // 7. Build case 2: 5919 // ... 5920 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5921 // ... 5922 // break; 5923 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5924 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5925 CGF.EmitBlock(Case2BB); 5926 5927 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5928 CodeGenFunction &CGF, PrePostActionTy &Action) { 5929 auto ILHS = LHSExprs.begin(); 5930 auto IRHS = RHSExprs.begin(); 5931 auto IPriv = Privates.begin(); 5932 for (const Expr *E : ReductionOps) { 5933 const Expr *XExpr = nullptr; 5934 const Expr *EExpr = nullptr; 5935 const Expr *UpExpr = nullptr; 5936 BinaryOperatorKind BO = BO_Comma; 5937 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5938 if (BO->getOpcode() == BO_Assign) { 5939 XExpr = BO->getLHS(); 5940 UpExpr = BO->getRHS(); 5941 } 5942 } 5943 // Try to emit update expression as a simple atomic. 5944 const Expr *RHSExpr = UpExpr; 5945 if (RHSExpr) { 5946 // Analyze RHS part of the whole expression. 5947 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5948 RHSExpr->IgnoreParenImpCasts())) { 5949 // If this is a conditional operator, analyze its condition for 5950 // min/max reduction operator. 5951 RHSExpr = ACO->getCond(); 5952 } 5953 if (const auto *BORHS = 5954 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5955 EExpr = BORHS->getRHS(); 5956 BO = BORHS->getOpcode(); 5957 } 5958 } 5959 if (XExpr) { 5960 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5961 auto &&AtomicRedGen = [BO, VD, 5962 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5963 const Expr *EExpr, const Expr *UpExpr) { 5964 LValue X = CGF.EmitLValue(XExpr); 5965 RValue E; 5966 if (EExpr) 5967 E = CGF.EmitAnyExpr(EExpr); 5968 CGF.EmitOMPAtomicSimpleUpdateExpr( 5969 X, E, BO, /*IsXLHSInRHSPart=*/true, 5970 llvm::AtomicOrdering::Monotonic, Loc, 5971 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5972 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5973 PrivateScope.addPrivate( 5974 VD, [&CGF, VD, XRValue, Loc]() { 5975 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5976 CGF.emitOMPSimpleStore( 5977 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5978 VD->getType().getNonReferenceType(), Loc); 5979 return LHSTemp; 5980 }); 5981 (void)PrivateScope.Privatize(); 5982 return CGF.EmitAnyExpr(UpExpr); 5983 }); 5984 }; 5985 if ((*IPriv)->getType()->isArrayType()) { 5986 // Emit atomic reduction for array section. 5987 const auto *RHSVar = 5988 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5989 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5990 AtomicRedGen, XExpr, EExpr, UpExpr); 5991 } else { 5992 // Emit atomic reduction for array subscript or single variable. 5993 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5994 } 5995 } else { 5996 // Emit as a critical region. 5997 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5998 const Expr *, const Expr *) { 5999 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6000 std::string Name = RT.getName({"atomic_reduction"}); 6001 RT.emitCriticalRegion( 6002 CGF, Name, 6003 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 6004 Action.Enter(CGF); 6005 emitReductionCombiner(CGF, E); 6006 }, 6007 Loc); 6008 }; 6009 if ((*IPriv)->getType()->isArrayType()) { 6010 const auto *LHSVar = 6011 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 6012 const auto *RHSVar = 6013 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 6014 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 6015 CritRedGen); 6016 } else { 6017 CritRedGen(CGF, nullptr, nullptr, nullptr); 6018 } 6019 } 6020 ++ILHS; 6021 ++IRHS; 6022 ++IPriv; 6023 } 6024 }; 6025 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 6026 if (!WithNowait) { 6027 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 6028 llvm::Value *EndArgs[] = { 6029 IdentTLoc, // ident_t *<loc> 6030 ThreadId, // i32 <gtid> 6031 Lock // kmp_critical_name *&<lock> 6032 }; 6033 CommonActionTy Action(nullptr, llvm::None, 6034 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 6035 EndArgs); 6036 AtomicRCG.setAction(Action); 6037 AtomicRCG(CGF); 6038 } else { 6039 AtomicRCG(CGF); 6040 } 6041 6042 CGF.EmitBranch(DefaultBB); 6043 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 6044 } 6045 6046 /// Generates unique name for artificial threadprivate variables. 6047 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 6048 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 6049 const Expr *Ref) { 6050 SmallString<256> Buffer; 6051 llvm::raw_svector_ostream Out(Buffer); 6052 const clang::DeclRefExpr *DE; 6053 const VarDecl *D = ::getBaseDecl(Ref, DE); 6054 if (!D) 6055 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 6056 D = D->getCanonicalDecl(); 6057 std::string Name = CGM.getOpenMPRuntime().getName( 6058 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 6059 Out << Prefix << Name << "_" 6060 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 6061 return Out.str(); 6062 } 6063 6064 /// Emits reduction initializer function: 6065 /// \code 6066 /// void @.red_init(void* %arg) { 6067 /// %0 = bitcast void* %arg to <type>* 6068 /// store <type> <init>, <type>* %0 6069 /// ret void 6070 /// } 6071 /// \endcode 6072 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 6073 SourceLocation Loc, 6074 ReductionCodeGen &RCG, unsigned N) { 6075 ASTContext &C = CGM.getContext(); 6076 FunctionArgList Args; 6077 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6078 ImplicitParamDecl::Other); 6079 Args.emplace_back(&Param); 6080 const auto &FnInfo = 6081 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6082 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6083 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 6084 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6085 Name, &CGM.getModule()); 6086 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6087 Fn->setDoesNotRecurse(); 6088 CodeGenFunction CGF(CGM); 6089 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6090 Address PrivateAddr = CGF.EmitLoadOfPointer( 6091 CGF.GetAddrOfLocalVar(&Param), 6092 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6093 llvm::Value *Size = nullptr; 6094 // If the size of the reduction item is non-constant, load it from global 6095 // threadprivate variable. 6096 if (RCG.getSizes(N).second) { 6097 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6098 CGF, CGM.getContext().getSizeType(), 6099 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6100 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6101 CGM.getContext().getSizeType(), Loc); 6102 } 6103 RCG.emitAggregateType(CGF, N, Size); 6104 LValue SharedLVal; 6105 // If initializer uses initializer from declare reduction construct, emit a 6106 // pointer to the address of the original reduction item (reuired by reduction 6107 // initializer) 6108 if (RCG.usesReductionInitializer(N)) { 6109 Address SharedAddr = 6110 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6111 CGF, CGM.getContext().VoidPtrTy, 6112 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6113 SharedAddr = CGF.EmitLoadOfPointer( 6114 SharedAddr, 6115 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 6116 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 6117 } else { 6118 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 6119 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 6120 CGM.getContext().VoidPtrTy); 6121 } 6122 // Emit the initializer: 6123 // %0 = bitcast void* %arg to <type>* 6124 // store <type> <init>, <type>* %0 6125 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 6126 [](CodeGenFunction &) { return false; }); 6127 CGF.FinishFunction(); 6128 return Fn; 6129 } 6130 6131 /// Emits reduction combiner function: 6132 /// \code 6133 /// void @.red_comb(void* %arg0, void* %arg1) { 6134 /// %lhs = bitcast void* %arg0 to <type>* 6135 /// %rhs = bitcast void* %arg1 to <type>* 6136 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 6137 /// store <type> %2, <type>* %lhs 6138 /// ret void 6139 /// } 6140 /// \endcode 6141 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 6142 SourceLocation Loc, 6143 ReductionCodeGen &RCG, unsigned N, 6144 const Expr *ReductionOp, 6145 const Expr *LHS, const Expr *RHS, 6146 const Expr *PrivateRef) { 6147 ASTContext &C = CGM.getContext(); 6148 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 6149 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 6150 FunctionArgList Args; 6151 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 6152 C.VoidPtrTy, ImplicitParamDecl::Other); 6153 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6154 ImplicitParamDecl::Other); 6155 Args.emplace_back(&ParamInOut); 6156 Args.emplace_back(&ParamIn); 6157 const auto &FnInfo = 6158 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6159 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6160 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 6161 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6162 Name, &CGM.getModule()); 6163 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6164 Fn->setDoesNotRecurse(); 6165 CodeGenFunction CGF(CGM); 6166 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6167 llvm::Value *Size = nullptr; 6168 // If the size of the reduction item is non-constant, load it from global 6169 // threadprivate variable. 6170 if (RCG.getSizes(N).second) { 6171 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6172 CGF, CGM.getContext().getSizeType(), 6173 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6174 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6175 CGM.getContext().getSizeType(), Loc); 6176 } 6177 RCG.emitAggregateType(CGF, N, Size); 6178 // Remap lhs and rhs variables to the addresses of the function arguments. 6179 // %lhs = bitcast void* %arg0 to <type>* 6180 // %rhs = bitcast void* %arg1 to <type>* 6181 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6182 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6183 // Pull out the pointer to the variable. 6184 Address PtrAddr = CGF.EmitLoadOfPointer( 6185 CGF.GetAddrOfLocalVar(&ParamInOut), 6186 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6187 return CGF.Builder.CreateElementBitCast( 6188 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6189 }); 6190 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6191 // Pull out the pointer to the variable. 6192 Address PtrAddr = CGF.EmitLoadOfPointer( 6193 CGF.GetAddrOfLocalVar(&ParamIn), 6194 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6195 return CGF.Builder.CreateElementBitCast( 6196 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6197 }); 6198 PrivateScope.Privatize(); 6199 // Emit the combiner body: 6200 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6201 // store <type> %2, <type>* %lhs 6202 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6203 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6204 cast<DeclRefExpr>(RHS)); 6205 CGF.FinishFunction(); 6206 return Fn; 6207 } 6208 6209 /// Emits reduction finalizer function: 6210 /// \code 6211 /// void @.red_fini(void* %arg) { 6212 /// %0 = bitcast void* %arg to <type>* 6213 /// <destroy>(<type>* %0) 6214 /// ret void 6215 /// } 6216 /// \endcode 6217 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6218 SourceLocation Loc, 6219 ReductionCodeGen &RCG, unsigned N) { 6220 if (!RCG.needCleanups(N)) 6221 return nullptr; 6222 ASTContext &C = CGM.getContext(); 6223 FunctionArgList Args; 6224 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6225 ImplicitParamDecl::Other); 6226 Args.emplace_back(&Param); 6227 const auto &FnInfo = 6228 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6229 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6230 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6231 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6232 Name, &CGM.getModule()); 6233 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6234 Fn->setDoesNotRecurse(); 6235 CodeGenFunction CGF(CGM); 6236 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6237 Address PrivateAddr = CGF.EmitLoadOfPointer( 6238 CGF.GetAddrOfLocalVar(&Param), 6239 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6240 llvm::Value *Size = nullptr; 6241 // If the size of the reduction item is non-constant, load it from global 6242 // threadprivate variable. 6243 if (RCG.getSizes(N).second) { 6244 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6245 CGF, CGM.getContext().getSizeType(), 6246 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6247 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6248 CGM.getContext().getSizeType(), Loc); 6249 } 6250 RCG.emitAggregateType(CGF, N, Size); 6251 // Emit the finalizer body: 6252 // <destroy>(<type>* %0) 6253 RCG.emitCleanups(CGF, N, PrivateAddr); 6254 CGF.FinishFunction(); 6255 return Fn; 6256 } 6257 6258 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6259 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6260 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6261 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6262 return nullptr; 6263 6264 // Build typedef struct: 6265 // kmp_task_red_input { 6266 // void *reduce_shar; // shared reduction item 6267 // size_t reduce_size; // size of data item 6268 // void *reduce_init; // data initialization routine 6269 // void *reduce_fini; // data finalization routine 6270 // void *reduce_comb; // data combiner routine 6271 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6272 // } kmp_task_red_input_t; 6273 ASTContext &C = CGM.getContext(); 6274 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 6275 RD->startDefinition(); 6276 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6277 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6278 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6279 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6280 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6281 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6282 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6283 RD->completeDefinition(); 6284 QualType RDType = C.getRecordType(RD); 6285 unsigned Size = Data.ReductionVars.size(); 6286 llvm::APInt ArraySize(/*numBits=*/64, Size); 6287 QualType ArrayRDType = C.getConstantArrayType( 6288 RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); 6289 // kmp_task_red_input_t .rd_input.[Size]; 6290 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6291 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 6292 Data.ReductionOps); 6293 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6294 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6295 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6296 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6297 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6298 TaskRedInput.getPointer(), Idxs, 6299 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6300 ".rd_input.gep."); 6301 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6302 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6303 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6304 RCG.emitSharedLValue(CGF, Cnt); 6305 llvm::Value *CastedShared = 6306 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); 6307 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6308 RCG.emitAggregateType(CGF, Cnt); 6309 llvm::Value *SizeValInChars; 6310 llvm::Value *SizeVal; 6311 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6312 // We use delayed creation/initialization for VLAs, array sections and 6313 // custom reduction initializations. It is required because runtime does not 6314 // provide the way to pass the sizes of VLAs/array sections to 6315 // initializer/combiner/finalizer functions and does not pass the pointer to 6316 // original reduction item to the initializer. Instead threadprivate global 6317 // variables are used to store these values and use them in the functions. 6318 bool DelayedCreation = !!SizeVal; 6319 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6320 /*isSigned=*/false); 6321 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6322 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6323 // ElemLVal.reduce_init = init; 6324 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6325 llvm::Value *InitAddr = 6326 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6327 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6328 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 6329 // ElemLVal.reduce_fini = fini; 6330 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6331 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6332 llvm::Value *FiniAddr = Fini 6333 ? CGF.EmitCastToVoidPtr(Fini) 6334 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6335 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6336 // ElemLVal.reduce_comb = comb; 6337 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6338 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6339 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6340 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6341 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6342 // ElemLVal.flags = 0; 6343 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6344 if (DelayedCreation) { 6345 CGF.EmitStoreOfScalar( 6346 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6347 FlagsLVal); 6348 } else 6349 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); 6350 } 6351 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 6352 // *data); 6353 llvm::Value *Args[] = { 6354 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6355 /*isSigned=*/true), 6356 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6357 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6358 CGM.VoidPtrTy)}; 6359 return CGF.EmitRuntimeCall( 6360 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 6361 } 6362 6363 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6364 SourceLocation Loc, 6365 ReductionCodeGen &RCG, 6366 unsigned N) { 6367 auto Sizes = RCG.getSizes(N); 6368 // Emit threadprivate global variable if the type is non-constant 6369 // (Sizes.second = nullptr). 6370 if (Sizes.second) { 6371 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6372 /*isSigned=*/false); 6373 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6374 CGF, CGM.getContext().getSizeType(), 6375 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6376 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6377 } 6378 // Store address of the original reduction item if custom initializer is used. 6379 if (RCG.usesReductionInitializer(N)) { 6380 Address SharedAddr = getAddrOfArtificialThreadPrivate( 6381 CGF, CGM.getContext().VoidPtrTy, 6382 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6383 CGF.Builder.CreateStore( 6384 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6385 RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), 6386 SharedAddr, /*IsVolatile=*/false); 6387 } 6388 } 6389 6390 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6391 SourceLocation Loc, 6392 llvm::Value *ReductionsPtr, 6393 LValue SharedLVal) { 6394 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6395 // *d); 6396 llvm::Value *Args[] = { 6397 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6398 /*isSigned=*/true), 6399 ReductionsPtr, 6400 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), 6401 CGM.VoidPtrTy)}; 6402 return Address( 6403 CGF.EmitRuntimeCall( 6404 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6405 SharedLVal.getAlignment()); 6406 } 6407 6408 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6409 SourceLocation Loc) { 6410 if (!CGF.HaveInsertPoint()) 6411 return; 6412 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6413 // global_tid); 6414 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6415 // Ignore return result until untied tasks are supported. 6416 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 6417 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6418 Region->emitUntiedSwitch(CGF); 6419 } 6420 6421 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6422 OpenMPDirectiveKind InnerKind, 6423 const RegionCodeGenTy &CodeGen, 6424 bool HasCancel) { 6425 if (!CGF.HaveInsertPoint()) 6426 return; 6427 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6428 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6429 } 6430 6431 namespace { 6432 enum RTCancelKind { 6433 CancelNoreq = 0, 6434 CancelParallel = 1, 6435 CancelLoop = 2, 6436 CancelSections = 3, 6437 CancelTaskgroup = 4 6438 }; 6439 } // anonymous namespace 6440 6441 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6442 RTCancelKind CancelKind = CancelNoreq; 6443 if (CancelRegion == OMPD_parallel) 6444 CancelKind = CancelParallel; 6445 else if (CancelRegion == OMPD_for) 6446 CancelKind = CancelLoop; 6447 else if (CancelRegion == OMPD_sections) 6448 CancelKind = CancelSections; 6449 else { 6450 assert(CancelRegion == OMPD_taskgroup); 6451 CancelKind = CancelTaskgroup; 6452 } 6453 return CancelKind; 6454 } 6455 6456 void CGOpenMPRuntime::emitCancellationPointCall( 6457 CodeGenFunction &CGF, SourceLocation Loc, 6458 OpenMPDirectiveKind CancelRegion) { 6459 if (!CGF.HaveInsertPoint()) 6460 return; 6461 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6462 // global_tid, kmp_int32 cncl_kind); 6463 if (auto *OMPRegionInfo = 6464 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6465 // For 'cancellation point taskgroup', the task region info may not have a 6466 // cancel. This may instead happen in another adjacent task. 6467 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6468 llvm::Value *Args[] = { 6469 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6470 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6471 // Ignore return result until untied tasks are supported. 6472 llvm::Value *Result = CGF.EmitRuntimeCall( 6473 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 6474 // if (__kmpc_cancellationpoint()) { 6475 // exit from construct; 6476 // } 6477 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6478 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6479 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6480 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6481 CGF.EmitBlock(ExitBB); 6482 // exit from construct; 6483 CodeGenFunction::JumpDest CancelDest = 6484 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6485 CGF.EmitBranchThroughCleanup(CancelDest); 6486 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6487 } 6488 } 6489 } 6490 6491 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6492 const Expr *IfCond, 6493 OpenMPDirectiveKind CancelRegion) { 6494 if (!CGF.HaveInsertPoint()) 6495 return; 6496 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6497 // kmp_int32 cncl_kind); 6498 if (auto *OMPRegionInfo = 6499 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6500 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 6501 PrePostActionTy &) { 6502 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6503 llvm::Value *Args[] = { 6504 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6505 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6506 // Ignore return result until untied tasks are supported. 6507 llvm::Value *Result = CGF.EmitRuntimeCall( 6508 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 6509 // if (__kmpc_cancel()) { 6510 // exit from construct; 6511 // } 6512 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6513 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6514 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6515 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6516 CGF.EmitBlock(ExitBB); 6517 // exit from construct; 6518 CodeGenFunction::JumpDest CancelDest = 6519 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6520 CGF.EmitBranchThroughCleanup(CancelDest); 6521 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6522 }; 6523 if (IfCond) { 6524 emitOMPIfClause(CGF, IfCond, ThenGen, 6525 [](CodeGenFunction &, PrePostActionTy &) {}); 6526 } else { 6527 RegionCodeGenTy ThenRCG(ThenGen); 6528 ThenRCG(CGF); 6529 } 6530 } 6531 } 6532 6533 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6534 const OMPExecutableDirective &D, StringRef ParentName, 6535 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6536 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6537 assert(!ParentName.empty() && "Invalid target region parent name!"); 6538 HasEmittedTargetRegion = true; 6539 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6540 IsOffloadEntry, CodeGen); 6541 } 6542 6543 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6544 const OMPExecutableDirective &D, StringRef ParentName, 6545 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6546 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6547 // Create a unique name for the entry function using the source location 6548 // information of the current target region. The name will be something like: 6549 // 6550 // __omp_offloading_DD_FFFF_PP_lBB 6551 // 6552 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6553 // mangled name of the function that encloses the target region and BB is the 6554 // line number of the target region. 6555 6556 unsigned DeviceID; 6557 unsigned FileID; 6558 unsigned Line; 6559 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6560 Line); 6561 SmallString<64> EntryFnName; 6562 { 6563 llvm::raw_svector_ostream OS(EntryFnName); 6564 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6565 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6566 } 6567 6568 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6569 6570 CodeGenFunction CGF(CGM, true); 6571 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6572 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6573 6574 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 6575 6576 // If this target outline function is not an offload entry, we don't need to 6577 // register it. 6578 if (!IsOffloadEntry) 6579 return; 6580 6581 // The target region ID is used by the runtime library to identify the current 6582 // target region, so it only has to be unique and not necessarily point to 6583 // anything. It could be the pointer to the outlined function that implements 6584 // the target region, but we aren't using that so that the compiler doesn't 6585 // need to keep that, and could therefore inline the host function if proven 6586 // worthwhile during optimization. In the other hand, if emitting code for the 6587 // device, the ID has to be the function address so that it can retrieved from 6588 // the offloading entry and launched by the runtime library. We also mark the 6589 // outlined function to have external linkage in case we are emitting code for 6590 // the device, because these functions will be entry points to the device. 6591 6592 if (CGM.getLangOpts().OpenMPIsDevice) { 6593 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6594 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6595 OutlinedFn->setDSOLocal(false); 6596 } else { 6597 std::string Name = getName({EntryFnName, "region_id"}); 6598 OutlinedFnID = new llvm::GlobalVariable( 6599 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6600 llvm::GlobalValue::WeakAnyLinkage, 6601 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6602 } 6603 6604 // Register the information for the entry associated with this target region. 6605 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6606 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6607 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6608 } 6609 6610 /// Checks if the expression is constant or does not have non-trivial function 6611 /// calls. 6612 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6613 // We can skip constant expressions. 6614 // We can skip expressions with trivial calls or simple expressions. 6615 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6616 !E->hasNonTrivialCall(Ctx)) && 6617 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6618 } 6619 6620 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6621 const Stmt *Body) { 6622 const Stmt *Child = Body->IgnoreContainers(); 6623 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6624 Child = nullptr; 6625 for (const Stmt *S : C->body()) { 6626 if (const auto *E = dyn_cast<Expr>(S)) { 6627 if (isTrivial(Ctx, E)) 6628 continue; 6629 } 6630 // Some of the statements can be ignored. 6631 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6632 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6633 continue; 6634 // Analyze declarations. 6635 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6636 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6637 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6638 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6639 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6640 isa<UsingDirectiveDecl>(D) || 6641 isa<OMPDeclareReductionDecl>(D) || 6642 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6643 return true; 6644 const auto *VD = dyn_cast<VarDecl>(D); 6645 if (!VD) 6646 return false; 6647 return VD->isConstexpr() || 6648 ((VD->getType().isTrivialType(Ctx) || 6649 VD->getType()->isReferenceType()) && 6650 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6651 })) 6652 continue; 6653 } 6654 // Found multiple children - cannot get the one child only. 6655 if (Child) 6656 return nullptr; 6657 Child = S; 6658 } 6659 if (Child) 6660 Child = Child->IgnoreContainers(); 6661 } 6662 return Child; 6663 } 6664 6665 /// Emit the number of teams for a target directive. Inspect the num_teams 6666 /// clause associated with a teams construct combined or closely nested 6667 /// with the target directive. 6668 /// 6669 /// Emit a team of size one for directives such as 'target parallel' that 6670 /// have no associated teams construct. 6671 /// 6672 /// Otherwise, return nullptr. 6673 static llvm::Value * 6674 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6675 const OMPExecutableDirective &D) { 6676 assert(!CGF.getLangOpts().OpenMPIsDevice && 6677 "Clauses associated with the teams directive expected to be emitted " 6678 "only for the host!"); 6679 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6680 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6681 "Expected target-based executable directive."); 6682 CGBuilderTy &Bld = CGF.Builder; 6683 switch (DirectiveKind) { 6684 case OMPD_target: { 6685 const auto *CS = D.getInnermostCapturedStmt(); 6686 const auto *Body = 6687 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6688 const Stmt *ChildStmt = 6689 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6690 if (const auto *NestedDir = 6691 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6692 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6693 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6694 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6695 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6696 const Expr *NumTeams = 6697 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6698 llvm::Value *NumTeamsVal = 6699 CGF.EmitScalarExpr(NumTeams, 6700 /*IgnoreResultAssign*/ true); 6701 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6702 /*isSigned=*/true); 6703 } 6704 return Bld.getInt32(0); 6705 } 6706 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6707 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6708 return Bld.getInt32(1); 6709 return Bld.getInt32(0); 6710 } 6711 return nullptr; 6712 } 6713 case OMPD_target_teams: 6714 case OMPD_target_teams_distribute: 6715 case OMPD_target_teams_distribute_simd: 6716 case OMPD_target_teams_distribute_parallel_for: 6717 case OMPD_target_teams_distribute_parallel_for_simd: { 6718 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6719 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6720 const Expr *NumTeams = 6721 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6722 llvm::Value *NumTeamsVal = 6723 CGF.EmitScalarExpr(NumTeams, 6724 /*IgnoreResultAssign*/ true); 6725 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6726 /*isSigned=*/true); 6727 } 6728 return Bld.getInt32(0); 6729 } 6730 case OMPD_target_parallel: 6731 case OMPD_target_parallel_for: 6732 case OMPD_target_parallel_for_simd: 6733 case OMPD_target_simd: 6734 return Bld.getInt32(1); 6735 case OMPD_parallel: 6736 case OMPD_for: 6737 case OMPD_parallel_for: 6738 case OMPD_parallel_sections: 6739 case OMPD_for_simd: 6740 case OMPD_parallel_for_simd: 6741 case OMPD_cancel: 6742 case OMPD_cancellation_point: 6743 case OMPD_ordered: 6744 case OMPD_threadprivate: 6745 case OMPD_allocate: 6746 case OMPD_task: 6747 case OMPD_simd: 6748 case OMPD_sections: 6749 case OMPD_section: 6750 case OMPD_single: 6751 case OMPD_master: 6752 case OMPD_critical: 6753 case OMPD_taskyield: 6754 case OMPD_barrier: 6755 case OMPD_taskwait: 6756 case OMPD_taskgroup: 6757 case OMPD_atomic: 6758 case OMPD_flush: 6759 case OMPD_teams: 6760 case OMPD_target_data: 6761 case OMPD_target_exit_data: 6762 case OMPD_target_enter_data: 6763 case OMPD_distribute: 6764 case OMPD_distribute_simd: 6765 case OMPD_distribute_parallel_for: 6766 case OMPD_distribute_parallel_for_simd: 6767 case OMPD_teams_distribute: 6768 case OMPD_teams_distribute_simd: 6769 case OMPD_teams_distribute_parallel_for: 6770 case OMPD_teams_distribute_parallel_for_simd: 6771 case OMPD_target_update: 6772 case OMPD_declare_simd: 6773 case OMPD_declare_target: 6774 case OMPD_end_declare_target: 6775 case OMPD_declare_reduction: 6776 case OMPD_declare_mapper: 6777 case OMPD_taskloop: 6778 case OMPD_taskloop_simd: 6779 case OMPD_requires: 6780 case OMPD_unknown: 6781 break; 6782 } 6783 llvm_unreachable("Unexpected directive kind."); 6784 } 6785 6786 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6787 llvm::Value *DefaultThreadLimitVal) { 6788 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6789 CGF.getContext(), CS->getCapturedStmt()); 6790 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6791 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6792 llvm::Value *NumThreads = nullptr; 6793 llvm::Value *CondVal = nullptr; 6794 // Handle if clause. If if clause present, the number of threads is 6795 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6796 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6797 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6798 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6799 const OMPIfClause *IfClause = nullptr; 6800 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6801 if (C->getNameModifier() == OMPD_unknown || 6802 C->getNameModifier() == OMPD_parallel) { 6803 IfClause = C; 6804 break; 6805 } 6806 } 6807 if (IfClause) { 6808 const Expr *Cond = IfClause->getCondition(); 6809 bool Result; 6810 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6811 if (!Result) 6812 return CGF.Builder.getInt32(1); 6813 } else { 6814 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6815 if (const auto *PreInit = 6816 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6817 for (const auto *I : PreInit->decls()) { 6818 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6819 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6820 } else { 6821 CodeGenFunction::AutoVarEmission Emission = 6822 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6823 CGF.EmitAutoVarCleanups(Emission); 6824 } 6825 } 6826 } 6827 CondVal = CGF.EvaluateExprAsBool(Cond); 6828 } 6829 } 6830 } 6831 // Check the value of num_threads clause iff if clause was not specified 6832 // or is not evaluated to false. 6833 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6834 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6835 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6836 const auto *NumThreadsClause = 6837 Dir->getSingleClause<OMPNumThreadsClause>(); 6838 CodeGenFunction::LexicalScope Scope( 6839 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6840 if (const auto *PreInit = 6841 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6842 for (const auto *I : PreInit->decls()) { 6843 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6844 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6845 } else { 6846 CodeGenFunction::AutoVarEmission Emission = 6847 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6848 CGF.EmitAutoVarCleanups(Emission); 6849 } 6850 } 6851 } 6852 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6853 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6854 /*isSigned=*/false); 6855 if (DefaultThreadLimitVal) 6856 NumThreads = CGF.Builder.CreateSelect( 6857 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6858 DefaultThreadLimitVal, NumThreads); 6859 } else { 6860 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6861 : CGF.Builder.getInt32(0); 6862 } 6863 // Process condition of the if clause. 6864 if (CondVal) { 6865 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6866 CGF.Builder.getInt32(1)); 6867 } 6868 return NumThreads; 6869 } 6870 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6871 return CGF.Builder.getInt32(1); 6872 return DefaultThreadLimitVal; 6873 } 6874 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6875 : CGF.Builder.getInt32(0); 6876 } 6877 6878 /// Emit the number of threads for a target directive. Inspect the 6879 /// thread_limit clause associated with a teams construct combined or closely 6880 /// nested with the target directive. 6881 /// 6882 /// Emit the num_threads clause for directives such as 'target parallel' that 6883 /// have no associated teams construct. 6884 /// 6885 /// Otherwise, return nullptr. 6886 static llvm::Value * 6887 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6888 const OMPExecutableDirective &D) { 6889 assert(!CGF.getLangOpts().OpenMPIsDevice && 6890 "Clauses associated with the teams directive expected to be emitted " 6891 "only for the host!"); 6892 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6893 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6894 "Expected target-based executable directive."); 6895 CGBuilderTy &Bld = CGF.Builder; 6896 llvm::Value *ThreadLimitVal = nullptr; 6897 llvm::Value *NumThreadsVal = nullptr; 6898 switch (DirectiveKind) { 6899 case OMPD_target: { 6900 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6901 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6902 return NumThreads; 6903 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6904 CGF.getContext(), CS->getCapturedStmt()); 6905 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6906 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6907 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6908 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6909 const auto *ThreadLimitClause = 6910 Dir->getSingleClause<OMPThreadLimitClause>(); 6911 CodeGenFunction::LexicalScope Scope( 6912 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6913 if (const auto *PreInit = 6914 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6915 for (const auto *I : PreInit->decls()) { 6916 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6917 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6918 } else { 6919 CodeGenFunction::AutoVarEmission Emission = 6920 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6921 CGF.EmitAutoVarCleanups(Emission); 6922 } 6923 } 6924 } 6925 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6926 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6927 ThreadLimitVal = 6928 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6929 } 6930 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6931 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6932 CS = Dir->getInnermostCapturedStmt(); 6933 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6934 CGF.getContext(), CS->getCapturedStmt()); 6935 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6936 } 6937 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6938 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6939 CS = Dir->getInnermostCapturedStmt(); 6940 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6941 return NumThreads; 6942 } 6943 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6944 return Bld.getInt32(1); 6945 } 6946 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6947 } 6948 case OMPD_target_teams: { 6949 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6950 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6951 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6952 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6953 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6954 ThreadLimitVal = 6955 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6956 } 6957 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6958 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6959 return NumThreads; 6960 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6961 CGF.getContext(), CS->getCapturedStmt()); 6962 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6963 if (Dir->getDirectiveKind() == OMPD_distribute) { 6964 CS = Dir->getInnermostCapturedStmt(); 6965 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6966 return NumThreads; 6967 } 6968 } 6969 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6970 } 6971 case OMPD_target_teams_distribute: 6972 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6973 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6974 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6975 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6976 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6977 ThreadLimitVal = 6978 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6979 } 6980 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6981 case OMPD_target_parallel: 6982 case OMPD_target_parallel_for: 6983 case OMPD_target_parallel_for_simd: 6984 case OMPD_target_teams_distribute_parallel_for: 6985 case OMPD_target_teams_distribute_parallel_for_simd: { 6986 llvm::Value *CondVal = nullptr; 6987 // Handle if clause. If if clause present, the number of threads is 6988 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6989 if (D.hasClausesOfKind<OMPIfClause>()) { 6990 const OMPIfClause *IfClause = nullptr; 6991 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6992 if (C->getNameModifier() == OMPD_unknown || 6993 C->getNameModifier() == OMPD_parallel) { 6994 IfClause = C; 6995 break; 6996 } 6997 } 6998 if (IfClause) { 6999 const Expr *Cond = IfClause->getCondition(); 7000 bool Result; 7001 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7002 if (!Result) 7003 return Bld.getInt32(1); 7004 } else { 7005 CodeGenFunction::RunCleanupsScope Scope(CGF); 7006 CondVal = CGF.EvaluateExprAsBool(Cond); 7007 } 7008 } 7009 } 7010 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7011 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7012 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7013 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7014 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7015 ThreadLimitVal = 7016 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7017 } 7018 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7019 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7020 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7021 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7022 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7023 NumThreadsVal = 7024 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7025 ThreadLimitVal = ThreadLimitVal 7026 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7027 ThreadLimitVal), 7028 NumThreadsVal, ThreadLimitVal) 7029 : NumThreadsVal; 7030 } 7031 if (!ThreadLimitVal) 7032 ThreadLimitVal = Bld.getInt32(0); 7033 if (CondVal) 7034 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7035 return ThreadLimitVal; 7036 } 7037 case OMPD_target_teams_distribute_simd: 7038 case OMPD_target_simd: 7039 return Bld.getInt32(1); 7040 case OMPD_parallel: 7041 case OMPD_for: 7042 case OMPD_parallel_for: 7043 case OMPD_parallel_sections: 7044 case OMPD_for_simd: 7045 case OMPD_parallel_for_simd: 7046 case OMPD_cancel: 7047 case OMPD_cancellation_point: 7048 case OMPD_ordered: 7049 case OMPD_threadprivate: 7050 case OMPD_allocate: 7051 case OMPD_task: 7052 case OMPD_simd: 7053 case OMPD_sections: 7054 case OMPD_section: 7055 case OMPD_single: 7056 case OMPD_master: 7057 case OMPD_critical: 7058 case OMPD_taskyield: 7059 case OMPD_barrier: 7060 case OMPD_taskwait: 7061 case OMPD_taskgroup: 7062 case OMPD_atomic: 7063 case OMPD_flush: 7064 case OMPD_teams: 7065 case OMPD_target_data: 7066 case OMPD_target_exit_data: 7067 case OMPD_target_enter_data: 7068 case OMPD_distribute: 7069 case OMPD_distribute_simd: 7070 case OMPD_distribute_parallel_for: 7071 case OMPD_distribute_parallel_for_simd: 7072 case OMPD_teams_distribute: 7073 case OMPD_teams_distribute_simd: 7074 case OMPD_teams_distribute_parallel_for: 7075 case OMPD_teams_distribute_parallel_for_simd: 7076 case OMPD_target_update: 7077 case OMPD_declare_simd: 7078 case OMPD_declare_target: 7079 case OMPD_end_declare_target: 7080 case OMPD_declare_reduction: 7081 case OMPD_declare_mapper: 7082 case OMPD_taskloop: 7083 case OMPD_taskloop_simd: 7084 case OMPD_requires: 7085 case OMPD_unknown: 7086 break; 7087 } 7088 llvm_unreachable("Unsupported directive kind."); 7089 } 7090 7091 namespace { 7092 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7093 7094 // Utility to handle information from clauses associated with a given 7095 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7096 // It provides a convenient interface to obtain the information and generate 7097 // code for that information. 7098 class MappableExprsHandler { 7099 public: 7100 /// Values for bit flags used to specify the mapping type for 7101 /// offloading. 7102 enum OpenMPOffloadMappingFlags : uint64_t { 7103 /// No flags 7104 OMP_MAP_NONE = 0x0, 7105 /// Allocate memory on the device and move data from host to device. 7106 OMP_MAP_TO = 0x01, 7107 /// Allocate memory on the device and move data from device to host. 7108 OMP_MAP_FROM = 0x02, 7109 /// Always perform the requested mapping action on the element, even 7110 /// if it was already mapped before. 7111 OMP_MAP_ALWAYS = 0x04, 7112 /// Delete the element from the device environment, ignoring the 7113 /// current reference count associated with the element. 7114 OMP_MAP_DELETE = 0x08, 7115 /// The element being mapped is a pointer-pointee pair; both the 7116 /// pointer and the pointee should be mapped. 7117 OMP_MAP_PTR_AND_OBJ = 0x10, 7118 /// This flags signals that the base address of an entry should be 7119 /// passed to the target kernel as an argument. 7120 OMP_MAP_TARGET_PARAM = 0x20, 7121 /// Signal that the runtime library has to return the device pointer 7122 /// in the current position for the data being mapped. Used when we have the 7123 /// use_device_ptr clause. 7124 OMP_MAP_RETURN_PARAM = 0x40, 7125 /// This flag signals that the reference being passed is a pointer to 7126 /// private data. 7127 OMP_MAP_PRIVATE = 0x80, 7128 /// Pass the element to the device by value. 7129 OMP_MAP_LITERAL = 0x100, 7130 /// Implicit map 7131 OMP_MAP_IMPLICIT = 0x200, 7132 /// Close is a hint to the runtime to allocate memory close to 7133 /// the target device. 7134 OMP_MAP_CLOSE = 0x400, 7135 /// The 16 MSBs of the flags indicate whether the entry is member of some 7136 /// struct/class. 7137 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7138 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7139 }; 7140 7141 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7142 static unsigned getFlagMemberOffset() { 7143 unsigned Offset = 0; 7144 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7145 Remain = Remain >> 1) 7146 Offset++; 7147 return Offset; 7148 } 7149 7150 /// Class that associates information with a base pointer to be passed to the 7151 /// runtime library. 7152 class BasePointerInfo { 7153 /// The base pointer. 7154 llvm::Value *Ptr = nullptr; 7155 /// The base declaration that refers to this device pointer, or null if 7156 /// there is none. 7157 const ValueDecl *DevPtrDecl = nullptr; 7158 7159 public: 7160 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7161 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7162 llvm::Value *operator*() const { return Ptr; } 7163 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7164 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7165 }; 7166 7167 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7168 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7169 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7170 7171 /// Map between a struct and the its lowest & highest elements which have been 7172 /// mapped. 7173 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7174 /// HE(FieldIndex, Pointer)} 7175 struct StructRangeInfoTy { 7176 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7177 0, Address::invalid()}; 7178 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7179 0, Address::invalid()}; 7180 Address Base = Address::invalid(); 7181 }; 7182 7183 private: 7184 /// Kind that defines how a device pointer has to be returned. 7185 struct MapInfo { 7186 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7187 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7188 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7189 bool ReturnDevicePointer = false; 7190 bool IsImplicit = false; 7191 7192 MapInfo() = default; 7193 MapInfo( 7194 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7195 OpenMPMapClauseKind MapType, 7196 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7197 bool ReturnDevicePointer, bool IsImplicit) 7198 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7199 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 7200 }; 7201 7202 /// If use_device_ptr is used on a pointer which is a struct member and there 7203 /// is no map information about it, then emission of that entry is deferred 7204 /// until the whole struct has been processed. 7205 struct DeferredDevicePtrEntryTy { 7206 const Expr *IE = nullptr; 7207 const ValueDecl *VD = nullptr; 7208 7209 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 7210 : IE(IE), VD(VD) {} 7211 }; 7212 7213 /// The target directive from where the mappable clauses were extracted. It 7214 /// is either a executable directive or a user-defined mapper directive. 7215 llvm::PointerUnion<const OMPExecutableDirective *, 7216 const OMPDeclareMapperDecl *> 7217 CurDir; 7218 7219 /// Function the directive is being generated for. 7220 CodeGenFunction &CGF; 7221 7222 /// Set of all first private variables in the current directive. 7223 /// bool data is set to true if the variable is implicitly marked as 7224 /// firstprivate, false otherwise. 7225 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7226 7227 /// Map between device pointer declarations and their expression components. 7228 /// The key value for declarations in 'this' is null. 7229 llvm::DenseMap< 7230 const ValueDecl *, 7231 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7232 DevPointersMap; 7233 7234 llvm::Value *getExprTypeSize(const Expr *E) const { 7235 QualType ExprTy = E->getType().getCanonicalType(); 7236 7237 // Reference types are ignored for mapping purposes. 7238 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7239 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7240 7241 // Given that an array section is considered a built-in type, we need to 7242 // do the calculation based on the length of the section instead of relying 7243 // on CGF.getTypeSize(E->getType()). 7244 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7245 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7246 OAE->getBase()->IgnoreParenImpCasts()) 7247 .getCanonicalType(); 7248 7249 // If there is no length associated with the expression, that means we 7250 // are using the whole length of the base. 7251 if (!OAE->getLength() && OAE->getColonLoc().isValid()) 7252 return CGF.getTypeSize(BaseTy); 7253 7254 llvm::Value *ElemSize; 7255 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7256 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7257 } else { 7258 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7259 assert(ATy && "Expecting array type if not a pointer type."); 7260 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7261 } 7262 7263 // If we don't have a length at this point, that is because we have an 7264 // array section with a single element. 7265 if (!OAE->getLength()) 7266 return ElemSize; 7267 7268 llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); 7269 LengthVal = 7270 CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); 7271 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7272 } 7273 return CGF.getTypeSize(ExprTy); 7274 } 7275 7276 /// Return the corresponding bits for a given map clause modifier. Add 7277 /// a flag marking the map as a pointer if requested. Add a flag marking the 7278 /// map as the first one of a series of maps that relate to the same map 7279 /// expression. 7280 OpenMPOffloadMappingFlags getMapTypeBits( 7281 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7282 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7283 OpenMPOffloadMappingFlags Bits = 7284 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7285 switch (MapType) { 7286 case OMPC_MAP_alloc: 7287 case OMPC_MAP_release: 7288 // alloc and release is the default behavior in the runtime library, i.e. 7289 // if we don't pass any bits alloc/release that is what the runtime is 7290 // going to do. Therefore, we don't need to signal anything for these two 7291 // type modifiers. 7292 break; 7293 case OMPC_MAP_to: 7294 Bits |= OMP_MAP_TO; 7295 break; 7296 case OMPC_MAP_from: 7297 Bits |= OMP_MAP_FROM; 7298 break; 7299 case OMPC_MAP_tofrom: 7300 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7301 break; 7302 case OMPC_MAP_delete: 7303 Bits |= OMP_MAP_DELETE; 7304 break; 7305 case OMPC_MAP_unknown: 7306 llvm_unreachable("Unexpected map type!"); 7307 } 7308 if (AddPtrFlag) 7309 Bits |= OMP_MAP_PTR_AND_OBJ; 7310 if (AddIsTargetParamFlag) 7311 Bits |= OMP_MAP_TARGET_PARAM; 7312 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7313 != MapModifiers.end()) 7314 Bits |= OMP_MAP_ALWAYS; 7315 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7316 != MapModifiers.end()) 7317 Bits |= OMP_MAP_CLOSE; 7318 return Bits; 7319 } 7320 7321 /// Return true if the provided expression is a final array section. A 7322 /// final array section, is one whose length can't be proved to be one. 7323 bool isFinalArraySectionExpression(const Expr *E) const { 7324 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7325 7326 // It is not an array section and therefore not a unity-size one. 7327 if (!OASE) 7328 return false; 7329 7330 // An array section with no colon always refer to a single element. 7331 if (OASE->getColonLoc().isInvalid()) 7332 return false; 7333 7334 const Expr *Length = OASE->getLength(); 7335 7336 // If we don't have a length we have to check if the array has size 1 7337 // for this dimension. Also, we should always expect a length if the 7338 // base type is pointer. 7339 if (!Length) { 7340 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7341 OASE->getBase()->IgnoreParenImpCasts()) 7342 .getCanonicalType(); 7343 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7344 return ATy->getSize().getSExtValue() != 1; 7345 // If we don't have a constant dimension length, we have to consider 7346 // the current section as having any size, so it is not necessarily 7347 // unitary. If it happen to be unity size, that's user fault. 7348 return true; 7349 } 7350 7351 // Check if the length evaluates to 1. 7352 Expr::EvalResult Result; 7353 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7354 return true; // Can have more that size 1. 7355 7356 llvm::APSInt ConstLength = Result.Val.getInt(); 7357 return ConstLength.getSExtValue() != 1; 7358 } 7359 7360 /// Generate the base pointers, section pointers, sizes and map type 7361 /// bits for the provided map type, map modifier, and expression components. 7362 /// \a IsFirstComponent should be set to true if the provided set of 7363 /// components is the first associated with a capture. 7364 void generateInfoForComponentList( 7365 OpenMPMapClauseKind MapType, 7366 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7367 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7368 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7369 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7370 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 7371 bool IsImplicit, 7372 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7373 OverlappedElements = llvm::None) const { 7374 // The following summarizes what has to be generated for each map and the 7375 // types below. The generated information is expressed in this order: 7376 // base pointer, section pointer, size, flags 7377 // (to add to the ones that come from the map type and modifier). 7378 // 7379 // double d; 7380 // int i[100]; 7381 // float *p; 7382 // 7383 // struct S1 { 7384 // int i; 7385 // float f[50]; 7386 // } 7387 // struct S2 { 7388 // int i; 7389 // float f[50]; 7390 // S1 s; 7391 // double *p; 7392 // struct S2 *ps; 7393 // } 7394 // S2 s; 7395 // S2 *ps; 7396 // 7397 // map(d) 7398 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7399 // 7400 // map(i) 7401 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7402 // 7403 // map(i[1:23]) 7404 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7405 // 7406 // map(p) 7407 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7408 // 7409 // map(p[1:24]) 7410 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7411 // 7412 // map(s) 7413 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7414 // 7415 // map(s.i) 7416 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7417 // 7418 // map(s.s.f) 7419 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7420 // 7421 // map(s.p) 7422 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7423 // 7424 // map(to: s.p[:22]) 7425 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7426 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7427 // &(s.p), &(s.p[0]), 22*sizeof(double), 7428 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7429 // (*) alloc space for struct members, only this is a target parameter 7430 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7431 // optimizes this entry out, same in the examples below) 7432 // (***) map the pointee (map: to) 7433 // 7434 // map(s.ps) 7435 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7436 // 7437 // map(from: s.ps->s.i) 7438 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7439 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7440 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7441 // 7442 // map(to: s.ps->ps) 7443 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7444 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7445 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7446 // 7447 // map(s.ps->ps->ps) 7448 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7449 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7450 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7451 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7452 // 7453 // map(to: s.ps->ps->s.f[:22]) 7454 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7455 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7456 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7457 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7458 // 7459 // map(ps) 7460 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7461 // 7462 // map(ps->i) 7463 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7464 // 7465 // map(ps->s.f) 7466 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7467 // 7468 // map(from: ps->p) 7469 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7470 // 7471 // map(to: ps->p[:22]) 7472 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7473 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7474 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7475 // 7476 // map(ps->ps) 7477 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7478 // 7479 // map(from: ps->ps->s.i) 7480 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7481 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7482 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7483 // 7484 // map(from: ps->ps->ps) 7485 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7486 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7487 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7488 // 7489 // map(ps->ps->ps->ps) 7490 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7491 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7492 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7493 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7494 // 7495 // map(to: ps->ps->ps->s.f[:22]) 7496 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7497 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7498 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7499 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7500 // 7501 // map(to: s.f[:22]) map(from: s.p[:33]) 7502 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7503 // sizeof(double*) (**), TARGET_PARAM 7504 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7505 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7506 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7507 // (*) allocate contiguous space needed to fit all mapped members even if 7508 // we allocate space for members not mapped (in this example, 7509 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7510 // them as well because they fall between &s.f[0] and &s.p) 7511 // 7512 // map(from: s.f[:22]) map(to: ps->p[:33]) 7513 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7514 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7515 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7516 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7517 // (*) the struct this entry pertains to is the 2nd element in the list of 7518 // arguments, hence MEMBER_OF(2) 7519 // 7520 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7521 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7522 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7523 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7524 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7525 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7526 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7527 // (*) the struct this entry pertains to is the 4th element in the list 7528 // of arguments, hence MEMBER_OF(4) 7529 7530 // Track if the map information being generated is the first for a capture. 7531 bool IsCaptureFirstInfo = IsFirstComponentList; 7532 // When the variable is on a declare target link or in a to clause with 7533 // unified memory, a reference is needed to hold the host/device address 7534 // of the variable. 7535 bool RequiresReference = false; 7536 7537 // Scan the components from the base to the complete expression. 7538 auto CI = Components.rbegin(); 7539 auto CE = Components.rend(); 7540 auto I = CI; 7541 7542 // Track if the map information being generated is the first for a list of 7543 // components. 7544 bool IsExpressionFirstInfo = true; 7545 Address BP = Address::invalid(); 7546 const Expr *AssocExpr = I->getAssociatedExpression(); 7547 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7548 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7549 7550 if (isa<MemberExpr>(AssocExpr)) { 7551 // The base is the 'this' pointer. The content of the pointer is going 7552 // to be the base of the field being mapped. 7553 BP = CGF.LoadCXXThisAddress(); 7554 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7555 (OASE && 7556 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7557 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7558 } else { 7559 // The base is the reference to the variable. 7560 // BP = &Var. 7561 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7562 if (const auto *VD = 7563 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7564 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7565 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7566 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7567 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7568 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7569 RequiresReference = true; 7570 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7571 } 7572 } 7573 } 7574 7575 // If the variable is a pointer and is being dereferenced (i.e. is not 7576 // the last component), the base has to be the pointer itself, not its 7577 // reference. References are ignored for mapping purposes. 7578 QualType Ty = 7579 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7580 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7581 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7582 7583 // We do not need to generate individual map information for the 7584 // pointer, it can be associated with the combined storage. 7585 ++I; 7586 } 7587 } 7588 7589 // Track whether a component of the list should be marked as MEMBER_OF some 7590 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7591 // in a component list should be marked as MEMBER_OF, all subsequent entries 7592 // do not belong to the base struct. E.g. 7593 // struct S2 s; 7594 // s.ps->ps->ps->f[:] 7595 // (1) (2) (3) (4) 7596 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7597 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7598 // is the pointee of ps(2) which is not member of struct s, so it should not 7599 // be marked as such (it is still PTR_AND_OBJ). 7600 // The variable is initialized to false so that PTR_AND_OBJ entries which 7601 // are not struct members are not considered (e.g. array of pointers to 7602 // data). 7603 bool ShouldBeMemberOf = false; 7604 7605 // Variable keeping track of whether or not we have encountered a component 7606 // in the component list which is a member expression. Useful when we have a 7607 // pointer or a final array section, in which case it is the previous 7608 // component in the list which tells us whether we have a member expression. 7609 // E.g. X.f[:] 7610 // While processing the final array section "[:]" it is "f" which tells us 7611 // whether we are dealing with a member of a declared struct. 7612 const MemberExpr *EncounteredME = nullptr; 7613 7614 for (; I != CE; ++I) { 7615 // If the current component is member of a struct (parent struct) mark it. 7616 if (!EncounteredME) { 7617 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7618 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7619 // as MEMBER_OF the parent struct. 7620 if (EncounteredME) 7621 ShouldBeMemberOf = true; 7622 } 7623 7624 auto Next = std::next(I); 7625 7626 // We need to generate the addresses and sizes if this is the last 7627 // component, if the component is a pointer or if it is an array section 7628 // whose length can't be proved to be one. If this is a pointer, it 7629 // becomes the base address for the following components. 7630 7631 // A final array section, is one whose length can't be proved to be one. 7632 bool IsFinalArraySection = 7633 isFinalArraySectionExpression(I->getAssociatedExpression()); 7634 7635 // Get information on whether the element is a pointer. Have to do a 7636 // special treatment for array sections given that they are built-in 7637 // types. 7638 const auto *OASE = 7639 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7640 bool IsPointer = 7641 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7642 .getCanonicalType() 7643 ->isAnyPointerType()) || 7644 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7645 7646 if (Next == CE || IsPointer || IsFinalArraySection) { 7647 // If this is not the last component, we expect the pointer to be 7648 // associated with an array expression or member expression. 7649 assert((Next == CE || 7650 isa<MemberExpr>(Next->getAssociatedExpression()) || 7651 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7652 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 7653 "Unexpected expression"); 7654 7655 Address LB = 7656 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); 7657 7658 // If this component is a pointer inside the base struct then we don't 7659 // need to create any entry for it - it will be combined with the object 7660 // it is pointing to into a single PTR_AND_OBJ entry. 7661 bool IsMemberPointer = 7662 IsPointer && EncounteredME && 7663 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7664 EncounteredME); 7665 if (!OverlappedElements.empty()) { 7666 // Handle base element with the info for overlapped elements. 7667 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7668 assert(Next == CE && 7669 "Expected last element for the overlapped elements."); 7670 assert(!IsPointer && 7671 "Unexpected base element with the pointer type."); 7672 // Mark the whole struct as the struct that requires allocation on the 7673 // device. 7674 PartialStruct.LowestElem = {0, LB}; 7675 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7676 I->getAssociatedExpression()->getType()); 7677 Address HB = CGF.Builder.CreateConstGEP( 7678 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7679 CGF.VoidPtrTy), 7680 TypeSize.getQuantity() - 1); 7681 PartialStruct.HighestElem = { 7682 std::numeric_limits<decltype( 7683 PartialStruct.HighestElem.first)>::max(), 7684 HB}; 7685 PartialStruct.Base = BP; 7686 // Emit data for non-overlapped data. 7687 OpenMPOffloadMappingFlags Flags = 7688 OMP_MAP_MEMBER_OF | 7689 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7690 /*AddPtrFlag=*/false, 7691 /*AddIsTargetParamFlag=*/false); 7692 LB = BP; 7693 llvm::Value *Size = nullptr; 7694 // Do bitcopy of all non-overlapped structure elements. 7695 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7696 Component : OverlappedElements) { 7697 Address ComponentLB = Address::invalid(); 7698 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7699 Component) { 7700 if (MC.getAssociatedDeclaration()) { 7701 ComponentLB = 7702 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7703 .getAddress(); 7704 Size = CGF.Builder.CreatePtrDiff( 7705 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7706 CGF.EmitCastToVoidPtr(LB.getPointer())); 7707 break; 7708 } 7709 } 7710 BasePointers.push_back(BP.getPointer()); 7711 Pointers.push_back(LB.getPointer()); 7712 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, 7713 /*isSigned=*/true)); 7714 Types.push_back(Flags); 7715 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7716 } 7717 BasePointers.push_back(BP.getPointer()); 7718 Pointers.push_back(LB.getPointer()); 7719 Size = CGF.Builder.CreatePtrDiff( 7720 CGF.EmitCastToVoidPtr( 7721 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7722 CGF.EmitCastToVoidPtr(LB.getPointer())); 7723 Sizes.push_back( 7724 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7725 Types.push_back(Flags); 7726 break; 7727 } 7728 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7729 if (!IsMemberPointer) { 7730 BasePointers.push_back(BP.getPointer()); 7731 Pointers.push_back(LB.getPointer()); 7732 Sizes.push_back( 7733 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7734 7735 // We need to add a pointer flag for each map that comes from the 7736 // same expression except for the first one. We also need to signal 7737 // this map is the first one that relates with the current capture 7738 // (there is a set of entries for each capture). 7739 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7740 MapType, MapModifiers, IsImplicit, 7741 !IsExpressionFirstInfo || RequiresReference, 7742 IsCaptureFirstInfo && !RequiresReference); 7743 7744 if (!IsExpressionFirstInfo) { 7745 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7746 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7747 if (IsPointer) 7748 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7749 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7750 7751 if (ShouldBeMemberOf) { 7752 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7753 // should be later updated with the correct value of MEMBER_OF. 7754 Flags |= OMP_MAP_MEMBER_OF; 7755 // From now on, all subsequent PTR_AND_OBJ entries should not be 7756 // marked as MEMBER_OF. 7757 ShouldBeMemberOf = false; 7758 } 7759 } 7760 7761 Types.push_back(Flags); 7762 } 7763 7764 // If we have encountered a member expression so far, keep track of the 7765 // mapped member. If the parent is "*this", then the value declaration 7766 // is nullptr. 7767 if (EncounteredME) { 7768 const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl()); 7769 unsigned FieldIndex = FD->getFieldIndex(); 7770 7771 // Update info about the lowest and highest elements for this struct 7772 if (!PartialStruct.Base.isValid()) { 7773 PartialStruct.LowestElem = {FieldIndex, LB}; 7774 PartialStruct.HighestElem = {FieldIndex, LB}; 7775 PartialStruct.Base = BP; 7776 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7777 PartialStruct.LowestElem = {FieldIndex, LB}; 7778 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7779 PartialStruct.HighestElem = {FieldIndex, LB}; 7780 } 7781 } 7782 7783 // If we have a final array section, we are done with this expression. 7784 if (IsFinalArraySection) 7785 break; 7786 7787 // The pointer becomes the base for the next element. 7788 if (Next != CE) 7789 BP = LB; 7790 7791 IsExpressionFirstInfo = false; 7792 IsCaptureFirstInfo = false; 7793 } 7794 } 7795 } 7796 7797 /// Return the adjusted map modifiers if the declaration a capture refers to 7798 /// appears in a first-private clause. This is expected to be used only with 7799 /// directives that start with 'target'. 7800 MappableExprsHandler::OpenMPOffloadMappingFlags 7801 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7802 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7803 7804 // A first private variable captured by reference will use only the 7805 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7806 // declaration is known as first-private in this handler. 7807 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7808 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7809 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7810 return MappableExprsHandler::OMP_MAP_ALWAYS | 7811 MappableExprsHandler::OMP_MAP_TO; 7812 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7813 return MappableExprsHandler::OMP_MAP_TO | 7814 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 7815 return MappableExprsHandler::OMP_MAP_PRIVATE | 7816 MappableExprsHandler::OMP_MAP_TO; 7817 } 7818 return MappableExprsHandler::OMP_MAP_TO | 7819 MappableExprsHandler::OMP_MAP_FROM; 7820 } 7821 7822 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7823 // Rotate by getFlagMemberOffset() bits. 7824 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7825 << getFlagMemberOffset()); 7826 } 7827 7828 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7829 OpenMPOffloadMappingFlags MemberOfFlag) { 7830 // If the entry is PTR_AND_OBJ but has not been marked with the special 7831 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7832 // marked as MEMBER_OF. 7833 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7834 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7835 return; 7836 7837 // Reset the placeholder value to prepare the flag for the assignment of the 7838 // proper MEMBER_OF value. 7839 Flags &= ~OMP_MAP_MEMBER_OF; 7840 Flags |= MemberOfFlag; 7841 } 7842 7843 void getPlainLayout(const CXXRecordDecl *RD, 7844 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7845 bool AsBase) const { 7846 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7847 7848 llvm::StructType *St = 7849 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7850 7851 unsigned NumElements = St->getNumElements(); 7852 llvm::SmallVector< 7853 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7854 RecordLayout(NumElements); 7855 7856 // Fill bases. 7857 for (const auto &I : RD->bases()) { 7858 if (I.isVirtual()) 7859 continue; 7860 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7861 // Ignore empty bases. 7862 if (Base->isEmpty() || CGF.getContext() 7863 .getASTRecordLayout(Base) 7864 .getNonVirtualSize() 7865 .isZero()) 7866 continue; 7867 7868 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7869 RecordLayout[FieldIndex] = Base; 7870 } 7871 // Fill in virtual bases. 7872 for (const auto &I : RD->vbases()) { 7873 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7874 // Ignore empty bases. 7875 if (Base->isEmpty()) 7876 continue; 7877 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7878 if (RecordLayout[FieldIndex]) 7879 continue; 7880 RecordLayout[FieldIndex] = Base; 7881 } 7882 // Fill in all the fields. 7883 assert(!RD->isUnion() && "Unexpected union."); 7884 for (const auto *Field : RD->fields()) { 7885 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7886 // will fill in later.) 7887 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 7888 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7889 RecordLayout[FieldIndex] = Field; 7890 } 7891 } 7892 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7893 &Data : RecordLayout) { 7894 if (Data.isNull()) 7895 continue; 7896 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7897 getPlainLayout(Base, Layout, /*AsBase=*/true); 7898 else 7899 Layout.push_back(Data.get<const FieldDecl *>()); 7900 } 7901 } 7902 7903 public: 7904 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7905 : CurDir(&Dir), CGF(CGF) { 7906 // Extract firstprivate clause information. 7907 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7908 for (const auto *D : C->varlists()) 7909 FirstPrivateDecls.try_emplace( 7910 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 7911 // Extract device pointer clause information. 7912 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7913 for (auto L : C->component_lists()) 7914 DevPointersMap[L.first].push_back(L.second); 7915 } 7916 7917 /// Constructor for the declare mapper directive. 7918 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 7919 : CurDir(&Dir), CGF(CGF) {} 7920 7921 /// Generate code for the combined entry if we have a partially mapped struct 7922 /// and take care of the mapping flags of the arguments corresponding to 7923 /// individual struct members. 7924 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 7925 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7926 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 7927 const StructRangeInfoTy &PartialStruct) const { 7928 // Base is the base of the struct 7929 BasePointers.push_back(PartialStruct.Base.getPointer()); 7930 // Pointer is the address of the lowest element 7931 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7932 Pointers.push_back(LB); 7933 // Size is (addr of {highest+1} element) - (addr of lowest element) 7934 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7935 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7936 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7937 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7938 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7939 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 7940 /*isSigned=*/false); 7941 Sizes.push_back(Size); 7942 // Map type is always TARGET_PARAM 7943 Types.push_back(OMP_MAP_TARGET_PARAM); 7944 // Remove TARGET_PARAM flag from the first element 7945 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7946 7947 // All other current entries will be MEMBER_OF the combined entry 7948 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7949 // 0xFFFF in the MEMBER_OF field). 7950 OpenMPOffloadMappingFlags MemberOfFlag = 7951 getMemberOfFlag(BasePointers.size() - 1); 7952 for (auto &M : CurTypes) 7953 setCorrectMemberOfFlag(M, MemberOfFlag); 7954 } 7955 7956 /// Generate all the base pointers, section pointers, sizes and map 7957 /// types for the extracted mappable expressions. Also, for each item that 7958 /// relates with a device pointer, a pair of the relevant declaration and 7959 /// index where it occurs is appended to the device pointers info array. 7960 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 7961 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7962 MapFlagsArrayTy &Types) const { 7963 // We have to process the component lists that relate with the same 7964 // declaration in a single chunk so that we can generate the map flags 7965 // correctly. Therefore, we organize all lists in a map. 7966 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 7967 7968 // Helper function to fill the information map for the different supported 7969 // clauses. 7970 auto &&InfoGen = [&Info]( 7971 const ValueDecl *D, 7972 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7973 OpenMPMapClauseKind MapType, 7974 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7975 bool ReturnDevicePointer, bool IsImplicit) { 7976 const ValueDecl *VD = 7977 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 7978 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 7979 IsImplicit); 7980 }; 7981 7982 assert(CurDir.is<const OMPExecutableDirective *>() && 7983 "Expect a executable directive"); 7984 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 7985 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 7986 for (const auto &L : C->component_lists()) { 7987 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 7988 /*ReturnDevicePointer=*/false, C->isImplicit()); 7989 } 7990 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 7991 for (const auto &L : C->component_lists()) { 7992 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 7993 /*ReturnDevicePointer=*/false, C->isImplicit()); 7994 } 7995 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 7996 for (const auto &L : C->component_lists()) { 7997 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 7998 /*ReturnDevicePointer=*/false, C->isImplicit()); 7999 } 8000 8001 // Look at the use_device_ptr clause information and mark the existing map 8002 // entries as such. If there is no map information for an entry in the 8003 // use_device_ptr list, we create one with map type 'alloc' and zero size 8004 // section. It is the user fault if that was not mapped before. If there is 8005 // no map information and the pointer is a struct member, then we defer the 8006 // emission of that entry until the whole struct has been processed. 8007 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 8008 DeferredInfo; 8009 8010 for (const auto *C : 8011 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 8012 for (const auto &L : C->component_lists()) { 8013 assert(!L.second.empty() && "Not expecting empty list of components!"); 8014 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 8015 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8016 const Expr *IE = L.second.back().getAssociatedExpression(); 8017 // If the first component is a member expression, we have to look into 8018 // 'this', which maps to null in the map of map information. Otherwise 8019 // look directly for the information. 8020 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8021 8022 // We potentially have map information for this declaration already. 8023 // Look for the first set of components that refer to it. 8024 if (It != Info.end()) { 8025 auto CI = std::find_if( 8026 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 8027 return MI.Components.back().getAssociatedDeclaration() == VD; 8028 }); 8029 // If we found a map entry, signal that the pointer has to be returned 8030 // and move on to the next declaration. 8031 if (CI != It->second.end()) { 8032 CI->ReturnDevicePointer = true; 8033 continue; 8034 } 8035 } 8036 8037 // We didn't find any match in our map information - generate a zero 8038 // size array section - if the pointer is a struct member we defer this 8039 // action until the whole struct has been processed. 8040 if (isa<MemberExpr>(IE)) { 8041 // Insert the pointer into Info to be processed by 8042 // generateInfoForComponentList. Because it is a member pointer 8043 // without a pointee, no entry will be generated for it, therefore 8044 // we need to generate one after the whole struct has been processed. 8045 // Nonetheless, generateInfoForComponentList must be called to take 8046 // the pointer into account for the calculation of the range of the 8047 // partial struct. 8048 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 8049 /*ReturnDevicePointer=*/false, C->isImplicit()); 8050 DeferredInfo[nullptr].emplace_back(IE, VD); 8051 } else { 8052 llvm::Value *Ptr = 8053 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8054 BasePointers.emplace_back(Ptr, VD); 8055 Pointers.push_back(Ptr); 8056 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8057 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8058 } 8059 } 8060 } 8061 8062 for (const auto &M : Info) { 8063 // We need to know when we generate information for the first component 8064 // associated with a capture, because the mapping flags depend on it. 8065 bool IsFirstComponentList = true; 8066 8067 // Temporary versions of arrays 8068 MapBaseValuesArrayTy CurBasePointers; 8069 MapValuesArrayTy CurPointers; 8070 MapValuesArrayTy CurSizes; 8071 MapFlagsArrayTy CurTypes; 8072 StructRangeInfoTy PartialStruct; 8073 8074 for (const MapInfo &L : M.second) { 8075 assert(!L.Components.empty() && 8076 "Not expecting declaration with no component lists."); 8077 8078 // Remember the current base pointer index. 8079 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 8080 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8081 CurBasePointers, CurPointers, CurSizes, 8082 CurTypes, PartialStruct, 8083 IsFirstComponentList, L.IsImplicit); 8084 8085 // If this entry relates with a device pointer, set the relevant 8086 // declaration and add the 'return pointer' flag. 8087 if (L.ReturnDevicePointer) { 8088 assert(CurBasePointers.size() > CurrentBasePointersIdx && 8089 "Unexpected number of mapped base pointers."); 8090 8091 const ValueDecl *RelevantVD = 8092 L.Components.back().getAssociatedDeclaration(); 8093 assert(RelevantVD && 8094 "No relevant declaration related with device pointer??"); 8095 8096 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 8097 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8098 } 8099 IsFirstComponentList = false; 8100 } 8101 8102 // Append any pending zero-length pointers which are struct members and 8103 // used with use_device_ptr. 8104 auto CI = DeferredInfo.find(M.first); 8105 if (CI != DeferredInfo.end()) { 8106 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8107 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(); 8108 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 8109 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 8110 CurBasePointers.emplace_back(BasePtr, L.VD); 8111 CurPointers.push_back(Ptr); 8112 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8113 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8114 // value MEMBER_OF=FFFF so that the entry is later updated with the 8115 // correct value of MEMBER_OF. 8116 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8117 OMP_MAP_MEMBER_OF); 8118 } 8119 } 8120 8121 // If there is an entry in PartialStruct it means we have a struct with 8122 // individual members mapped. Emit an extra combined entry. 8123 if (PartialStruct.Base.isValid()) 8124 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8125 PartialStruct); 8126 8127 // We need to append the results of this capture to what we already have. 8128 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8129 Pointers.append(CurPointers.begin(), CurPointers.end()); 8130 Sizes.append(CurSizes.begin(), CurSizes.end()); 8131 Types.append(CurTypes.begin(), CurTypes.end()); 8132 } 8133 } 8134 8135 /// Generate all the base pointers, section pointers, sizes and map types for 8136 /// the extracted map clauses of user-defined mapper. 8137 void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, 8138 MapValuesArrayTy &Pointers, 8139 MapValuesArrayTy &Sizes, 8140 MapFlagsArrayTy &Types) const { 8141 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8142 "Expect a declare mapper directive"); 8143 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8144 // We have to process the component lists that relate with the same 8145 // declaration in a single chunk so that we can generate the map flags 8146 // correctly. Therefore, we organize all lists in a map. 8147 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8148 8149 // Helper function to fill the information map for the different supported 8150 // clauses. 8151 auto &&InfoGen = [&Info]( 8152 const ValueDecl *D, 8153 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8154 OpenMPMapClauseKind MapType, 8155 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8156 bool ReturnDevicePointer, bool IsImplicit) { 8157 const ValueDecl *VD = 8158 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8159 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8160 IsImplicit); 8161 }; 8162 8163 for (const auto *C : CurMapperDir->clauselists()) { 8164 const auto *MC = cast<OMPMapClause>(C); 8165 for (const auto &L : MC->component_lists()) { 8166 InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), 8167 /*ReturnDevicePointer=*/false, MC->isImplicit()); 8168 } 8169 } 8170 8171 for (const auto &M : Info) { 8172 // We need to know when we generate information for the first component 8173 // associated with a capture, because the mapping flags depend on it. 8174 bool IsFirstComponentList = true; 8175 8176 // Temporary versions of arrays 8177 MapBaseValuesArrayTy CurBasePointers; 8178 MapValuesArrayTy CurPointers; 8179 MapValuesArrayTy CurSizes; 8180 MapFlagsArrayTy CurTypes; 8181 StructRangeInfoTy PartialStruct; 8182 8183 for (const MapInfo &L : M.second) { 8184 assert(!L.Components.empty() && 8185 "Not expecting declaration with no component lists."); 8186 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8187 CurBasePointers, CurPointers, CurSizes, 8188 CurTypes, PartialStruct, 8189 IsFirstComponentList, L.IsImplicit); 8190 IsFirstComponentList = false; 8191 } 8192 8193 // If there is an entry in PartialStruct it means we have a struct with 8194 // individual members mapped. Emit an extra combined entry. 8195 if (PartialStruct.Base.isValid()) 8196 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8197 PartialStruct); 8198 8199 // We need to append the results of this capture to what we already have. 8200 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8201 Pointers.append(CurPointers.begin(), CurPointers.end()); 8202 Sizes.append(CurSizes.begin(), CurSizes.end()); 8203 Types.append(CurTypes.begin(), CurTypes.end()); 8204 } 8205 } 8206 8207 /// Emit capture info for lambdas for variables captured by reference. 8208 void generateInfoForLambdaCaptures( 8209 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 8210 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8211 MapFlagsArrayTy &Types, 8212 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8213 const auto *RD = VD->getType() 8214 .getCanonicalType() 8215 .getNonReferenceType() 8216 ->getAsCXXRecordDecl(); 8217 if (!RD || !RD->isLambda()) 8218 return; 8219 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8220 LValue VDLVal = CGF.MakeAddrLValue( 8221 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8222 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8223 FieldDecl *ThisCapture = nullptr; 8224 RD->getCaptureFields(Captures, ThisCapture); 8225 if (ThisCapture) { 8226 LValue ThisLVal = 8227 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8228 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8229 LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer()); 8230 BasePointers.push_back(ThisLVal.getPointer()); 8231 Pointers.push_back(ThisLValVal.getPointer()); 8232 Sizes.push_back( 8233 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8234 CGF.Int64Ty, /*isSigned=*/true)); 8235 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8236 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8237 } 8238 for (const LambdaCapture &LC : RD->captures()) { 8239 if (!LC.capturesVariable()) 8240 continue; 8241 const VarDecl *VD = LC.getCapturedVar(); 8242 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8243 continue; 8244 auto It = Captures.find(VD); 8245 assert(It != Captures.end() && "Found lambda capture without field."); 8246 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8247 if (LC.getCaptureKind() == LCK_ByRef) { 8248 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8249 LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); 8250 BasePointers.push_back(VarLVal.getPointer()); 8251 Pointers.push_back(VarLValVal.getPointer()); 8252 Sizes.push_back(CGF.Builder.CreateIntCast( 8253 CGF.getTypeSize( 8254 VD->getType().getCanonicalType().getNonReferenceType()), 8255 CGF.Int64Ty, /*isSigned=*/true)); 8256 } else { 8257 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8258 LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); 8259 BasePointers.push_back(VarLVal.getPointer()); 8260 Pointers.push_back(VarRVal.getScalarVal()); 8261 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8262 } 8263 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8264 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8265 } 8266 } 8267 8268 /// Set correct indices for lambdas captures. 8269 void adjustMemberOfForLambdaCaptures( 8270 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8271 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8272 MapFlagsArrayTy &Types) const { 8273 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8274 // Set correct member_of idx for all implicit lambda captures. 8275 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8276 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8277 continue; 8278 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8279 assert(BasePtr && "Unable to find base lambda address."); 8280 int TgtIdx = -1; 8281 for (unsigned J = I; J > 0; --J) { 8282 unsigned Idx = J - 1; 8283 if (Pointers[Idx] != BasePtr) 8284 continue; 8285 TgtIdx = Idx; 8286 break; 8287 } 8288 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8289 // All other current entries will be MEMBER_OF the combined entry 8290 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8291 // 0xFFFF in the MEMBER_OF field). 8292 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8293 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8294 } 8295 } 8296 8297 /// Generate the base pointers, section pointers, sizes and map types 8298 /// associated to a given capture. 8299 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8300 llvm::Value *Arg, 8301 MapBaseValuesArrayTy &BasePointers, 8302 MapValuesArrayTy &Pointers, 8303 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8304 StructRangeInfoTy &PartialStruct) const { 8305 assert(!Cap->capturesVariableArrayType() && 8306 "Not expecting to generate map info for a variable array type!"); 8307 8308 // We need to know when we generating information for the first component 8309 const ValueDecl *VD = Cap->capturesThis() 8310 ? nullptr 8311 : Cap->getCapturedVar()->getCanonicalDecl(); 8312 8313 // If this declaration appears in a is_device_ptr clause we just have to 8314 // pass the pointer by value. If it is a reference to a declaration, we just 8315 // pass its value. 8316 if (DevPointersMap.count(VD)) { 8317 BasePointers.emplace_back(Arg, VD); 8318 Pointers.push_back(Arg); 8319 Sizes.push_back( 8320 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8321 CGF.Int64Ty, /*isSigned=*/true)); 8322 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8323 return; 8324 } 8325 8326 using MapData = 8327 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8328 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 8329 SmallVector<MapData, 4> DeclComponentLists; 8330 assert(CurDir.is<const OMPExecutableDirective *>() && 8331 "Expect a executable directive"); 8332 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8333 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8334 for (const auto &L : C->decl_component_lists(VD)) { 8335 assert(L.first == VD && 8336 "We got information for the wrong declaration??"); 8337 assert(!L.second.empty() && 8338 "Not expecting declaration with no component lists."); 8339 DeclComponentLists.emplace_back(L.second, C->getMapType(), 8340 C->getMapTypeModifiers(), 8341 C->isImplicit()); 8342 } 8343 } 8344 8345 // Find overlapping elements (including the offset from the base element). 8346 llvm::SmallDenseMap< 8347 const MapData *, 8348 llvm::SmallVector< 8349 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8350 4> 8351 OverlappedData; 8352 size_t Count = 0; 8353 for (const MapData &L : DeclComponentLists) { 8354 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8355 OpenMPMapClauseKind MapType; 8356 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8357 bool IsImplicit; 8358 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8359 ++Count; 8360 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8361 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8362 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 8363 auto CI = Components.rbegin(); 8364 auto CE = Components.rend(); 8365 auto SI = Components1.rbegin(); 8366 auto SE = Components1.rend(); 8367 for (; CI != CE && SI != SE; ++CI, ++SI) { 8368 if (CI->getAssociatedExpression()->getStmtClass() != 8369 SI->getAssociatedExpression()->getStmtClass()) 8370 break; 8371 // Are we dealing with different variables/fields? 8372 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8373 break; 8374 } 8375 // Found overlapping if, at least for one component, reached the head of 8376 // the components list. 8377 if (CI == CE || SI == SE) { 8378 assert((CI != CE || SI != SE) && 8379 "Unexpected full match of the mapping components."); 8380 const MapData &BaseData = CI == CE ? L : L1; 8381 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8382 SI == SE ? Components : Components1; 8383 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8384 OverlappedElements.getSecond().push_back(SubData); 8385 } 8386 } 8387 } 8388 // Sort the overlapped elements for each item. 8389 llvm::SmallVector<const FieldDecl *, 4> Layout; 8390 if (!OverlappedData.empty()) { 8391 if (const auto *CRD = 8392 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8393 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8394 else { 8395 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8396 Layout.append(RD->field_begin(), RD->field_end()); 8397 } 8398 } 8399 for (auto &Pair : OverlappedData) { 8400 llvm::sort( 8401 Pair.getSecond(), 8402 [&Layout]( 8403 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8404 OMPClauseMappableExprCommon::MappableExprComponentListRef 8405 Second) { 8406 auto CI = First.rbegin(); 8407 auto CE = First.rend(); 8408 auto SI = Second.rbegin(); 8409 auto SE = Second.rend(); 8410 for (; CI != CE && SI != SE; ++CI, ++SI) { 8411 if (CI->getAssociatedExpression()->getStmtClass() != 8412 SI->getAssociatedExpression()->getStmtClass()) 8413 break; 8414 // Are we dealing with different variables/fields? 8415 if (CI->getAssociatedDeclaration() != 8416 SI->getAssociatedDeclaration()) 8417 break; 8418 } 8419 8420 // Lists contain the same elements. 8421 if (CI == CE && SI == SE) 8422 return false; 8423 8424 // List with less elements is less than list with more elements. 8425 if (CI == CE || SI == SE) 8426 return CI == CE; 8427 8428 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8429 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8430 if (FD1->getParent() == FD2->getParent()) 8431 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8432 const auto It = 8433 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8434 return FD == FD1 || FD == FD2; 8435 }); 8436 return *It == FD1; 8437 }); 8438 } 8439 8440 // Associated with a capture, because the mapping flags depend on it. 8441 // Go through all of the elements with the overlapped elements. 8442 for (const auto &Pair : OverlappedData) { 8443 const MapData &L = *Pair.getFirst(); 8444 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8445 OpenMPMapClauseKind MapType; 8446 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8447 bool IsImplicit; 8448 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8449 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8450 OverlappedComponents = Pair.getSecond(); 8451 bool IsFirstComponentList = true; 8452 generateInfoForComponentList(MapType, MapModifiers, Components, 8453 BasePointers, Pointers, Sizes, Types, 8454 PartialStruct, IsFirstComponentList, 8455 IsImplicit, OverlappedComponents); 8456 } 8457 // Go through other elements without overlapped elements. 8458 bool IsFirstComponentList = OverlappedData.empty(); 8459 for (const MapData &L : DeclComponentLists) { 8460 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8461 OpenMPMapClauseKind MapType; 8462 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8463 bool IsImplicit; 8464 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8465 auto It = OverlappedData.find(&L); 8466 if (It == OverlappedData.end()) 8467 generateInfoForComponentList(MapType, MapModifiers, Components, 8468 BasePointers, Pointers, Sizes, Types, 8469 PartialStruct, IsFirstComponentList, 8470 IsImplicit); 8471 IsFirstComponentList = false; 8472 } 8473 } 8474 8475 /// Generate the base pointers, section pointers, sizes and map types 8476 /// associated with the declare target link variables. 8477 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 8478 MapValuesArrayTy &Pointers, 8479 MapValuesArrayTy &Sizes, 8480 MapFlagsArrayTy &Types) const { 8481 assert(CurDir.is<const OMPExecutableDirective *>() && 8482 "Expect a executable directive"); 8483 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8484 // Map other list items in the map clause which are not captured variables 8485 // but "declare target link" global variables. 8486 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8487 for (const auto &L : C->component_lists()) { 8488 if (!L.first) 8489 continue; 8490 const auto *VD = dyn_cast<VarDecl>(L.first); 8491 if (!VD) 8492 continue; 8493 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8494 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8495 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8496 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 8497 continue; 8498 StructRangeInfoTy PartialStruct; 8499 generateInfoForComponentList( 8500 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 8501 Pointers, Sizes, Types, PartialStruct, 8502 /*IsFirstComponentList=*/true, C->isImplicit()); 8503 assert(!PartialStruct.Base.isValid() && 8504 "No partial structs for declare target link expected."); 8505 } 8506 } 8507 } 8508 8509 /// Generate the default map information for a given capture \a CI, 8510 /// record field declaration \a RI and captured value \a CV. 8511 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8512 const FieldDecl &RI, llvm::Value *CV, 8513 MapBaseValuesArrayTy &CurBasePointers, 8514 MapValuesArrayTy &CurPointers, 8515 MapValuesArrayTy &CurSizes, 8516 MapFlagsArrayTy &CurMapTypes) const { 8517 bool IsImplicit = true; 8518 // Do the default mapping. 8519 if (CI.capturesThis()) { 8520 CurBasePointers.push_back(CV); 8521 CurPointers.push_back(CV); 8522 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8523 CurSizes.push_back( 8524 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8525 CGF.Int64Ty, /*isSigned=*/true)); 8526 // Default map type. 8527 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8528 } else if (CI.capturesVariableByCopy()) { 8529 CurBasePointers.push_back(CV); 8530 CurPointers.push_back(CV); 8531 if (!RI.getType()->isAnyPointerType()) { 8532 // We have to signal to the runtime captures passed by value that are 8533 // not pointers. 8534 CurMapTypes.push_back(OMP_MAP_LITERAL); 8535 CurSizes.push_back(CGF.Builder.CreateIntCast( 8536 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8537 } else { 8538 // Pointers are implicitly mapped with a zero size and no flags 8539 // (other than first map that is added for all implicit maps). 8540 CurMapTypes.push_back(OMP_MAP_NONE); 8541 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8542 } 8543 const VarDecl *VD = CI.getCapturedVar(); 8544 auto I = FirstPrivateDecls.find(VD); 8545 if (I != FirstPrivateDecls.end()) 8546 IsImplicit = I->getSecond(); 8547 } else { 8548 assert(CI.capturesVariable() && "Expected captured reference."); 8549 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8550 QualType ElementType = PtrTy->getPointeeType(); 8551 CurSizes.push_back(CGF.Builder.CreateIntCast( 8552 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8553 // The default map type for a scalar/complex type is 'to' because by 8554 // default the value doesn't have to be retrieved. For an aggregate 8555 // type, the default is 'tofrom'. 8556 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 8557 const VarDecl *VD = CI.getCapturedVar(); 8558 auto I = FirstPrivateDecls.find(VD); 8559 if (I != FirstPrivateDecls.end() && 8560 VD->getType().isConstant(CGF.getContext())) { 8561 llvm::Constant *Addr = 8562 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8563 // Copy the value of the original variable to the new global copy. 8564 CGF.Builder.CreateMemCpy( 8565 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(), 8566 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8567 CurSizes.back(), /*IsVolatile=*/false); 8568 // Use new global variable as the base pointers. 8569 CurBasePointers.push_back(Addr); 8570 CurPointers.push_back(Addr); 8571 } else { 8572 CurBasePointers.push_back(CV); 8573 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8574 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8575 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8576 AlignmentSource::Decl)); 8577 CurPointers.push_back(PtrAddr.getPointer()); 8578 } else { 8579 CurPointers.push_back(CV); 8580 } 8581 } 8582 if (I != FirstPrivateDecls.end()) 8583 IsImplicit = I->getSecond(); 8584 } 8585 // Every default map produces a single argument which is a target parameter. 8586 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 8587 8588 // Add flag stating this is an implicit map. 8589 if (IsImplicit) 8590 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 8591 } 8592 }; 8593 } // anonymous namespace 8594 8595 /// Emit the arrays used to pass the captures and map information to the 8596 /// offloading runtime library. If there is no map or capture information, 8597 /// return nullptr by reference. 8598 static void 8599 emitOffloadingArrays(CodeGenFunction &CGF, 8600 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 8601 MappableExprsHandler::MapValuesArrayTy &Pointers, 8602 MappableExprsHandler::MapValuesArrayTy &Sizes, 8603 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 8604 CGOpenMPRuntime::TargetDataInfo &Info) { 8605 CodeGenModule &CGM = CGF.CGM; 8606 ASTContext &Ctx = CGF.getContext(); 8607 8608 // Reset the array information. 8609 Info.clearArrayInfo(); 8610 Info.NumberOfPtrs = BasePointers.size(); 8611 8612 if (Info.NumberOfPtrs) { 8613 // Detect if we have any capture size requiring runtime evaluation of the 8614 // size so that a constant array could be eventually used. 8615 bool hasRuntimeEvaluationCaptureSize = false; 8616 for (llvm::Value *S : Sizes) 8617 if (!isa<llvm::Constant>(S)) { 8618 hasRuntimeEvaluationCaptureSize = true; 8619 break; 8620 } 8621 8622 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8623 QualType PointerArrayType = 8624 Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 8625 /*IndexTypeQuals=*/0); 8626 8627 Info.BasePointersArray = 8628 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8629 Info.PointersArray = 8630 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8631 8632 // If we don't have any VLA types or other types that require runtime 8633 // evaluation, we can use a constant array for the map sizes, otherwise we 8634 // need to fill up the arrays as we do for the pointers. 8635 QualType Int64Ty = 8636 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8637 if (hasRuntimeEvaluationCaptureSize) { 8638 QualType SizeArrayType = 8639 Ctx.getConstantArrayType(Int64Ty, PointerNumAP, ArrayType::Normal, 8640 /*IndexTypeQuals=*/0); 8641 Info.SizesArray = 8642 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8643 } else { 8644 // We expect all the sizes to be constant, so we collect them to create 8645 // a constant array. 8646 SmallVector<llvm::Constant *, 16> ConstSizes; 8647 for (llvm::Value *S : Sizes) 8648 ConstSizes.push_back(cast<llvm::Constant>(S)); 8649 8650 auto *SizesArrayInit = llvm::ConstantArray::get( 8651 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 8652 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8653 auto *SizesArrayGbl = new llvm::GlobalVariable( 8654 CGM.getModule(), SizesArrayInit->getType(), 8655 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8656 SizesArrayInit, Name); 8657 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8658 Info.SizesArray = SizesArrayGbl; 8659 } 8660 8661 // The map types are always constant so we don't need to generate code to 8662 // fill arrays. Instead, we create an array constant. 8663 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 8664 llvm::copy(MapTypes, Mapping.begin()); 8665 llvm::Constant *MapTypesArrayInit = 8666 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8667 std::string MaptypesName = 8668 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8669 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8670 CGM.getModule(), MapTypesArrayInit->getType(), 8671 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8672 MapTypesArrayInit, MaptypesName); 8673 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8674 Info.MapTypesArray = MapTypesArrayGbl; 8675 8676 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8677 llvm::Value *BPVal = *BasePointers[I]; 8678 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8679 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8680 Info.BasePointersArray, 0, I); 8681 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8682 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8683 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8684 CGF.Builder.CreateStore(BPVal, BPAddr); 8685 8686 if (Info.requiresDevicePointerInfo()) 8687 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 8688 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8689 8690 llvm::Value *PVal = Pointers[I]; 8691 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8692 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8693 Info.PointersArray, 0, I); 8694 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8695 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8696 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8697 CGF.Builder.CreateStore(PVal, PAddr); 8698 8699 if (hasRuntimeEvaluationCaptureSize) { 8700 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8701 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8702 Info.SizesArray, 8703 /*Idx0=*/0, 8704 /*Idx1=*/I); 8705 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 8706 CGF.Builder.CreateStore( 8707 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true), 8708 SAddr); 8709 } 8710 } 8711 } 8712 } 8713 8714 /// Emit the arguments to be passed to the runtime library based on the 8715 /// arrays of pointers, sizes and map types. 8716 static void emitOffloadingArraysArgument( 8717 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8718 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8719 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 8720 CodeGenModule &CGM = CGF.CGM; 8721 if (Info.NumberOfPtrs) { 8722 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8723 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8724 Info.BasePointersArray, 8725 /*Idx0=*/0, /*Idx1=*/0); 8726 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8727 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8728 Info.PointersArray, 8729 /*Idx0=*/0, 8730 /*Idx1=*/0); 8731 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8732 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 8733 /*Idx0=*/0, /*Idx1=*/0); 8734 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8735 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8736 Info.MapTypesArray, 8737 /*Idx0=*/0, 8738 /*Idx1=*/0); 8739 } else { 8740 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8741 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8742 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8743 MapTypesArrayArg = 8744 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8745 } 8746 } 8747 8748 /// Check for inner distribute directive. 8749 static const OMPExecutableDirective * 8750 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8751 const auto *CS = D.getInnermostCapturedStmt(); 8752 const auto *Body = 8753 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8754 const Stmt *ChildStmt = 8755 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8756 8757 if (const auto *NestedDir = 8758 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8759 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8760 switch (D.getDirectiveKind()) { 8761 case OMPD_target: 8762 if (isOpenMPDistributeDirective(DKind)) 8763 return NestedDir; 8764 if (DKind == OMPD_teams) { 8765 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8766 /*IgnoreCaptured=*/true); 8767 if (!Body) 8768 return nullptr; 8769 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8770 if (const auto *NND = 8771 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8772 DKind = NND->getDirectiveKind(); 8773 if (isOpenMPDistributeDirective(DKind)) 8774 return NND; 8775 } 8776 } 8777 return nullptr; 8778 case OMPD_target_teams: 8779 if (isOpenMPDistributeDirective(DKind)) 8780 return NestedDir; 8781 return nullptr; 8782 case OMPD_target_parallel: 8783 case OMPD_target_simd: 8784 case OMPD_target_parallel_for: 8785 case OMPD_target_parallel_for_simd: 8786 return nullptr; 8787 case OMPD_target_teams_distribute: 8788 case OMPD_target_teams_distribute_simd: 8789 case OMPD_target_teams_distribute_parallel_for: 8790 case OMPD_target_teams_distribute_parallel_for_simd: 8791 case OMPD_parallel: 8792 case OMPD_for: 8793 case OMPD_parallel_for: 8794 case OMPD_parallel_sections: 8795 case OMPD_for_simd: 8796 case OMPD_parallel_for_simd: 8797 case OMPD_cancel: 8798 case OMPD_cancellation_point: 8799 case OMPD_ordered: 8800 case OMPD_threadprivate: 8801 case OMPD_allocate: 8802 case OMPD_task: 8803 case OMPD_simd: 8804 case OMPD_sections: 8805 case OMPD_section: 8806 case OMPD_single: 8807 case OMPD_master: 8808 case OMPD_critical: 8809 case OMPD_taskyield: 8810 case OMPD_barrier: 8811 case OMPD_taskwait: 8812 case OMPD_taskgroup: 8813 case OMPD_atomic: 8814 case OMPD_flush: 8815 case OMPD_teams: 8816 case OMPD_target_data: 8817 case OMPD_target_exit_data: 8818 case OMPD_target_enter_data: 8819 case OMPD_distribute: 8820 case OMPD_distribute_simd: 8821 case OMPD_distribute_parallel_for: 8822 case OMPD_distribute_parallel_for_simd: 8823 case OMPD_teams_distribute: 8824 case OMPD_teams_distribute_simd: 8825 case OMPD_teams_distribute_parallel_for: 8826 case OMPD_teams_distribute_parallel_for_simd: 8827 case OMPD_target_update: 8828 case OMPD_declare_simd: 8829 case OMPD_declare_target: 8830 case OMPD_end_declare_target: 8831 case OMPD_declare_reduction: 8832 case OMPD_declare_mapper: 8833 case OMPD_taskloop: 8834 case OMPD_taskloop_simd: 8835 case OMPD_requires: 8836 case OMPD_unknown: 8837 llvm_unreachable("Unexpected directive."); 8838 } 8839 } 8840 8841 return nullptr; 8842 } 8843 8844 /// Emit the user-defined mapper function. The code generation follows the 8845 /// pattern in the example below. 8846 /// \code 8847 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 8848 /// void *base, void *begin, 8849 /// int64_t size, int64_t type) { 8850 /// // Allocate space for an array section first. 8851 /// if (size > 1 && !maptype.IsDelete) 8852 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8853 /// size*sizeof(Ty), clearToFrom(type)); 8854 /// // Map members. 8855 /// for (unsigned i = 0; i < size; i++) { 8856 /// // For each component specified by this mapper: 8857 /// for (auto c : all_components) { 8858 /// if (c.hasMapper()) 8859 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 8860 /// c.arg_type); 8861 /// else 8862 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 8863 /// c.arg_begin, c.arg_size, c.arg_type); 8864 /// } 8865 /// } 8866 /// // Delete the array section. 8867 /// if (size > 1 && maptype.IsDelete) 8868 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8869 /// size*sizeof(Ty), clearToFrom(type)); 8870 /// } 8871 /// \endcode 8872 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 8873 CodeGenFunction *CGF) { 8874 if (UDMMap.count(D) > 0) 8875 return; 8876 ASTContext &C = CGM.getContext(); 8877 QualType Ty = D->getType(); 8878 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 8879 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 8880 auto *MapperVarDecl = 8881 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 8882 SourceLocation Loc = D->getLocation(); 8883 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 8884 8885 // Prepare mapper function arguments and attributes. 8886 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8887 C.VoidPtrTy, ImplicitParamDecl::Other); 8888 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 8889 ImplicitParamDecl::Other); 8890 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8891 C.VoidPtrTy, ImplicitParamDecl::Other); 8892 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8893 ImplicitParamDecl::Other); 8894 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8895 ImplicitParamDecl::Other); 8896 FunctionArgList Args; 8897 Args.push_back(&HandleArg); 8898 Args.push_back(&BaseArg); 8899 Args.push_back(&BeginArg); 8900 Args.push_back(&SizeArg); 8901 Args.push_back(&TypeArg); 8902 const CGFunctionInfo &FnInfo = 8903 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 8904 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 8905 SmallString<64> TyStr; 8906 llvm::raw_svector_ostream Out(TyStr); 8907 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 8908 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 8909 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 8910 Name, &CGM.getModule()); 8911 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 8912 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 8913 // Start the mapper function code generation. 8914 CodeGenFunction MapperCGF(CGM); 8915 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 8916 // Compute the starting and end addreses of array elements. 8917 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 8918 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 8919 C.getPointerType(Int64Ty), Loc); 8920 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 8921 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 8922 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 8923 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 8924 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 8925 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 8926 C.getPointerType(Int64Ty), Loc); 8927 // Prepare common arguments for array initiation and deletion. 8928 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 8929 MapperCGF.GetAddrOfLocalVar(&HandleArg), 8930 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8931 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 8932 MapperCGF.GetAddrOfLocalVar(&BaseArg), 8933 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8934 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 8935 MapperCGF.GetAddrOfLocalVar(&BeginArg), 8936 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8937 8938 // Emit array initiation if this is an array section and \p MapType indicates 8939 // that memory allocation is required. 8940 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 8941 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 8942 ElementSize, HeadBB, /*IsInit=*/true); 8943 8944 // Emit a for loop to iterate through SizeArg of elements and map all of them. 8945 8946 // Emit the loop header block. 8947 MapperCGF.EmitBlock(HeadBB); 8948 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 8949 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 8950 // Evaluate whether the initial condition is satisfied. 8951 llvm::Value *IsEmpty = 8952 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 8953 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 8954 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 8955 8956 // Emit the loop body block. 8957 MapperCGF.EmitBlock(BodyBB); 8958 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 8959 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 8960 PtrPHI->addIncoming(PtrBegin, EntryBB); 8961 Address PtrCurrent = 8962 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 8963 .getAlignment() 8964 .alignmentOfArrayElement(ElementSize)); 8965 // Privatize the declared variable of mapper to be the current array element. 8966 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 8967 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 8968 return MapperCGF 8969 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 8970 .getAddress(); 8971 }); 8972 (void)Scope.Privatize(); 8973 8974 // Get map clause information. Fill up the arrays with all mapped variables. 8975 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 8976 MappableExprsHandler::MapValuesArrayTy Pointers; 8977 MappableExprsHandler::MapValuesArrayTy Sizes; 8978 MappableExprsHandler::MapFlagsArrayTy MapTypes; 8979 MappableExprsHandler MEHandler(*D, MapperCGF); 8980 MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); 8981 8982 // Call the runtime API __tgt_mapper_num_components to get the number of 8983 // pre-existing components. 8984 llvm::Value *OffloadingArgs[] = {Handle}; 8985 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 8986 createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs); 8987 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 8988 PreviousSize, 8989 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 8990 8991 // Fill up the runtime mapper handle for all components. 8992 for (unsigned I = 0; I < BasePointers.size(); ++I) { 8993 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 8994 *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 8995 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 8996 Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 8997 llvm::Value *CurSizeArg = Sizes[I]; 8998 8999 // Extract the MEMBER_OF field from the map type. 9000 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 9001 MapperCGF.EmitBlock(MemberBB); 9002 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); 9003 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 9004 OriMapType, 9005 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 9006 llvm::BasicBlock *MemberCombineBB = 9007 MapperCGF.createBasicBlock("omp.member.combine"); 9008 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 9009 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 9010 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 9011 // Add the number of pre-existing components to the MEMBER_OF field if it 9012 // is valid. 9013 MapperCGF.EmitBlock(MemberCombineBB); 9014 llvm::Value *CombinedMember = 9015 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9016 // Do nothing if it is not a member of previous components. 9017 MapperCGF.EmitBlock(TypeBB); 9018 llvm::PHINode *MemberMapType = 9019 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9020 MemberMapType->addIncoming(OriMapType, MemberBB); 9021 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9022 9023 // Combine the map type inherited from user-defined mapper with that 9024 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9025 // bits of the \a MapType, which is the input argument of the mapper 9026 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9027 // bits of MemberMapType. 9028 // [OpenMP 5.0], 1.2.6. map-type decay. 9029 // | alloc | to | from | tofrom | release | delete 9030 // ---------------------------------------------------------- 9031 // alloc | alloc | alloc | alloc | alloc | release | delete 9032 // to | alloc | to | alloc | to | release | delete 9033 // from | alloc | alloc | from | from | release | delete 9034 // tofrom | alloc | to | from | tofrom | release | delete 9035 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9036 MapType, 9037 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9038 MappableExprsHandler::OMP_MAP_FROM)); 9039 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9040 llvm::BasicBlock *AllocElseBB = 9041 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9042 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9043 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9044 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9045 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9046 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9047 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9048 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9049 MapperCGF.EmitBlock(AllocBB); 9050 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9051 MemberMapType, 9052 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9053 MappableExprsHandler::OMP_MAP_FROM))); 9054 MapperCGF.Builder.CreateBr(EndBB); 9055 MapperCGF.EmitBlock(AllocElseBB); 9056 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9057 LeftToFrom, 9058 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9059 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9060 // In case of to, clear OMP_MAP_FROM. 9061 MapperCGF.EmitBlock(ToBB); 9062 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9063 MemberMapType, 9064 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9065 MapperCGF.Builder.CreateBr(EndBB); 9066 MapperCGF.EmitBlock(ToElseBB); 9067 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9068 LeftToFrom, 9069 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9070 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9071 // In case of from, clear OMP_MAP_TO. 9072 MapperCGF.EmitBlock(FromBB); 9073 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9074 MemberMapType, 9075 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9076 // In case of tofrom, do nothing. 9077 MapperCGF.EmitBlock(EndBB); 9078 llvm::PHINode *CurMapType = 9079 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9080 CurMapType->addIncoming(AllocMapType, AllocBB); 9081 CurMapType->addIncoming(ToMapType, ToBB); 9082 CurMapType->addIncoming(FromMapType, FromBB); 9083 CurMapType->addIncoming(MemberMapType, ToElseBB); 9084 9085 // TODO: call the corresponding mapper function if a user-defined mapper is 9086 // associated with this map clause. 9087 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9088 // data structure. 9089 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9090 CurSizeArg, CurMapType}; 9091 MapperCGF.EmitRuntimeCall( 9092 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), 9093 OffloadingArgs); 9094 } 9095 9096 // Update the pointer to point to the next element that needs to be mapped, 9097 // and check whether we have mapped all elements. 9098 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9099 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9100 PtrPHI->addIncoming(PtrNext, BodyBB); 9101 llvm::Value *IsDone = 9102 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9103 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9104 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9105 9106 MapperCGF.EmitBlock(ExitBB); 9107 // Emit array deletion if this is an array section and \p MapType indicates 9108 // that deletion is required. 9109 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9110 ElementSize, DoneBB, /*IsInit=*/false); 9111 9112 // Emit the function exit block. 9113 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9114 MapperCGF.FinishFunction(); 9115 UDMMap.try_emplace(D, Fn); 9116 if (CGF) { 9117 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9118 Decls.second.push_back(D); 9119 } 9120 } 9121 9122 /// Emit the array initialization or deletion portion for user-defined mapper 9123 /// code generation. First, it evaluates whether an array section is mapped and 9124 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9125 /// true, and \a MapType indicates to not delete this array, array 9126 /// initialization code is generated. If \a IsInit is false, and \a MapType 9127 /// indicates to not this array, array deletion code is generated. 9128 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9129 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9130 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9131 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9132 StringRef Prefix = IsInit ? ".init" : ".del"; 9133 9134 // Evaluate if this is an array section. 9135 llvm::BasicBlock *IsDeleteBB = 9136 MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete"); 9137 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix); 9138 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9139 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9140 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9141 9142 // Evaluate if we are going to delete this section. 9143 MapperCGF.EmitBlock(IsDeleteBB); 9144 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9145 MapType, 9146 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9147 llvm::Value *DeleteCond; 9148 if (IsInit) { 9149 DeleteCond = MapperCGF.Builder.CreateIsNull( 9150 DeleteBit, "omp.array" + Prefix + ".delete"); 9151 } else { 9152 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9153 DeleteBit, "omp.array" + Prefix + ".delete"); 9154 } 9155 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9156 9157 MapperCGF.EmitBlock(BodyBB); 9158 // Get the array size by multiplying element size and element number (i.e., \p 9159 // Size). 9160 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9161 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9162 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9163 // memory allocation/deletion purpose only. 9164 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9165 MapType, 9166 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9167 MappableExprsHandler::OMP_MAP_FROM))); 9168 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9169 // data structure. 9170 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9171 MapperCGF.EmitRuntimeCall( 9172 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs); 9173 } 9174 9175 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9176 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device, 9177 const llvm::function_ref<llvm::Value *( 9178 CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) { 9179 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9180 const OMPExecutableDirective *TD = &D; 9181 // Get nested teams distribute kind directive, if any. 9182 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9183 TD = getNestedDistributeDirective(CGM.getContext(), D); 9184 if (!TD) 9185 return; 9186 const auto *LD = cast<OMPLoopDirective>(TD); 9187 auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF, 9188 PrePostActionTy &) { 9189 llvm::Value *NumIterations = SizeEmitter(CGF, *LD); 9190 9191 // Emit device ID if any. 9192 llvm::Value *DeviceID; 9193 if (Device) 9194 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9195 CGF.Int64Ty, /*isSigned=*/true); 9196 else 9197 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9198 9199 llvm::Value *Args[] = {DeviceID, NumIterations}; 9200 CGF.EmitRuntimeCall( 9201 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); 9202 }; 9203 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9204 } 9205 9206 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 9207 const OMPExecutableDirective &D, 9208 llvm::Function *OutlinedFn, 9209 llvm::Value *OutlinedFnID, 9210 const Expr *IfCond, const Expr *Device) { 9211 if (!CGF.HaveInsertPoint()) 9212 return; 9213 9214 assert(OutlinedFn && "Invalid outlined function!"); 9215 9216 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 9217 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9218 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9219 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9220 PrePostActionTy &) { 9221 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9222 }; 9223 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9224 9225 CodeGenFunction::OMPTargetDataInfo InputInfo; 9226 llvm::Value *MapTypesArray = nullptr; 9227 // Fill up the pointer arrays and transfer execution to the device. 9228 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9229 &MapTypesArray, &CS, RequiresOuterTask, 9230 &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) { 9231 // On top of the arrays that were filled up, the target offloading call 9232 // takes as arguments the device id as well as the host pointer. The host 9233 // pointer is used by the runtime library to identify the current target 9234 // region, so it only has to be unique and not necessarily point to 9235 // anything. It could be the pointer to the outlined function that 9236 // implements the target region, but we aren't using that so that the 9237 // compiler doesn't need to keep that, and could therefore inline the host 9238 // function if proven worthwhile during optimization. 9239 9240 // From this point on, we need to have an ID of the target region defined. 9241 assert(OutlinedFnID && "Invalid outlined function ID!"); 9242 9243 // Emit device ID if any. 9244 llvm::Value *DeviceID; 9245 if (Device) { 9246 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9247 CGF.Int64Ty, /*isSigned=*/true); 9248 } else { 9249 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9250 } 9251 9252 // Emit the number of elements in the offloading arrays. 9253 llvm::Value *PointerNum = 9254 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9255 9256 // Return value of the runtime offloading call. 9257 llvm::Value *Return; 9258 9259 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9260 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9261 9262 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9263 // The target region is an outlined function launched by the runtime 9264 // via calls __tgt_target() or __tgt_target_teams(). 9265 // 9266 // __tgt_target() launches a target region with one team and one thread, 9267 // executing a serial region. This master thread may in turn launch 9268 // more threads within its team upon encountering a parallel region, 9269 // however, no additional teams can be launched on the device. 9270 // 9271 // __tgt_target_teams() launches a target region with one or more teams, 9272 // each with one or more threads. This call is required for target 9273 // constructs such as: 9274 // 'target teams' 9275 // 'target' / 'teams' 9276 // 'target teams distribute parallel for' 9277 // 'target parallel' 9278 // and so on. 9279 // 9280 // Note that on the host and CPU targets, the runtime implementation of 9281 // these calls simply call the outlined function without forking threads. 9282 // The outlined functions themselves have runtime calls to 9283 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9284 // the compiler in emitTeamsCall() and emitParallelCall(). 9285 // 9286 // In contrast, on the NVPTX target, the implementation of 9287 // __tgt_target_teams() launches a GPU kernel with the requested number 9288 // of teams and threads so no additional calls to the runtime are required. 9289 if (NumTeams) { 9290 // If we have NumTeams defined this means that we have an enclosed teams 9291 // region. Therefore we also expect to have NumThreads defined. These two 9292 // values should be defined in the presence of a teams directive, 9293 // regardless of having any clauses associated. If the user is using teams 9294 // but no clauses, these two values will be the default that should be 9295 // passed to the runtime library - a 32-bit integer with the value zero. 9296 assert(NumThreads && "Thread limit expression should be available along " 9297 "with number of teams."); 9298 llvm::Value *OffloadingArgs[] = {DeviceID, 9299 OutlinedFnID, 9300 PointerNum, 9301 InputInfo.BasePointersArray.getPointer(), 9302 InputInfo.PointersArray.getPointer(), 9303 InputInfo.SizesArray.getPointer(), 9304 MapTypesArray, 9305 NumTeams, 9306 NumThreads}; 9307 Return = CGF.EmitRuntimeCall( 9308 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 9309 : OMPRTL__tgt_target_teams), 9310 OffloadingArgs); 9311 } else { 9312 llvm::Value *OffloadingArgs[] = {DeviceID, 9313 OutlinedFnID, 9314 PointerNum, 9315 InputInfo.BasePointersArray.getPointer(), 9316 InputInfo.PointersArray.getPointer(), 9317 InputInfo.SizesArray.getPointer(), 9318 MapTypesArray}; 9319 Return = CGF.EmitRuntimeCall( 9320 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 9321 : OMPRTL__tgt_target), 9322 OffloadingArgs); 9323 } 9324 9325 // Check the error code and execute the host version if required. 9326 llvm::BasicBlock *OffloadFailedBlock = 9327 CGF.createBasicBlock("omp_offload.failed"); 9328 llvm::BasicBlock *OffloadContBlock = 9329 CGF.createBasicBlock("omp_offload.cont"); 9330 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9331 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9332 9333 CGF.EmitBlock(OffloadFailedBlock); 9334 if (RequiresOuterTask) { 9335 CapturedVars.clear(); 9336 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9337 } 9338 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9339 CGF.EmitBranch(OffloadContBlock); 9340 9341 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9342 }; 9343 9344 // Notify that the host version must be executed. 9345 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 9346 RequiresOuterTask](CodeGenFunction &CGF, 9347 PrePostActionTy &) { 9348 if (RequiresOuterTask) { 9349 CapturedVars.clear(); 9350 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9351 } 9352 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9353 }; 9354 9355 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 9356 &CapturedVars, RequiresOuterTask, 9357 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 9358 // Fill up the arrays with all the captured variables. 9359 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9360 MappableExprsHandler::MapValuesArrayTy Pointers; 9361 MappableExprsHandler::MapValuesArrayTy Sizes; 9362 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9363 9364 // Get mappable expression information. 9365 MappableExprsHandler MEHandler(D, CGF); 9366 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9367 9368 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9369 auto CV = CapturedVars.begin(); 9370 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9371 CE = CS.capture_end(); 9372 CI != CE; ++CI, ++RI, ++CV) { 9373 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 9374 MappableExprsHandler::MapValuesArrayTy CurPointers; 9375 MappableExprsHandler::MapValuesArrayTy CurSizes; 9376 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 9377 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9378 9379 // VLA sizes are passed to the outlined region by copy and do not have map 9380 // information associated. 9381 if (CI->capturesVariableArrayType()) { 9382 CurBasePointers.push_back(*CV); 9383 CurPointers.push_back(*CV); 9384 CurSizes.push_back(CGF.Builder.CreateIntCast( 9385 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9386 // Copy to the device as an argument. No need to retrieve it. 9387 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 9388 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 9389 MappableExprsHandler::OMP_MAP_IMPLICIT); 9390 } else { 9391 // If we have any information in the map clause, we use it, otherwise we 9392 // just do a default mapping. 9393 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 9394 CurSizes, CurMapTypes, PartialStruct); 9395 if (CurBasePointers.empty()) 9396 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 9397 CurPointers, CurSizes, CurMapTypes); 9398 // Generate correct mapping for variables captured by reference in 9399 // lambdas. 9400 if (CI->capturesVariable()) 9401 MEHandler.generateInfoForLambdaCaptures( 9402 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 9403 CurMapTypes, LambdaPointers); 9404 } 9405 // We expect to have at least an element of information for this capture. 9406 assert(!CurBasePointers.empty() && 9407 "Non-existing map pointer for capture!"); 9408 assert(CurBasePointers.size() == CurPointers.size() && 9409 CurBasePointers.size() == CurSizes.size() && 9410 CurBasePointers.size() == CurMapTypes.size() && 9411 "Inconsistent map information sizes!"); 9412 9413 // If there is an entry in PartialStruct it means we have a struct with 9414 // individual members mapped. Emit an extra combined entry. 9415 if (PartialStruct.Base.isValid()) 9416 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 9417 CurMapTypes, PartialStruct); 9418 9419 // We need to append the results of this capture to what we already have. 9420 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 9421 Pointers.append(CurPointers.begin(), CurPointers.end()); 9422 Sizes.append(CurSizes.begin(), CurSizes.end()); 9423 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 9424 } 9425 // Adjust MEMBER_OF flags for the lambdas captures. 9426 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 9427 Pointers, MapTypes); 9428 // Map other list items in the map clause which are not captured variables 9429 // but "declare target link" global variables. 9430 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 9431 MapTypes); 9432 9433 TargetDataInfo Info; 9434 // Fill up the arrays and create the arguments. 9435 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9436 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9437 Info.PointersArray, Info.SizesArray, 9438 Info.MapTypesArray, Info); 9439 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9440 InputInfo.BasePointersArray = 9441 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9442 InputInfo.PointersArray = 9443 Address(Info.PointersArray, CGM.getPointerAlign()); 9444 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 9445 MapTypesArray = Info.MapTypesArray; 9446 if (RequiresOuterTask) 9447 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9448 else 9449 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9450 }; 9451 9452 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 9453 CodeGenFunction &CGF, PrePostActionTy &) { 9454 if (RequiresOuterTask) { 9455 CodeGenFunction::OMPTargetDataInfo InputInfo; 9456 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9457 } else { 9458 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9459 } 9460 }; 9461 9462 // If we have a target function ID it means that we need to support 9463 // offloading, otherwise, just execute on the host. We need to execute on host 9464 // regardless of the conditional in the if clause if, e.g., the user do not 9465 // specify target triples. 9466 if (OutlinedFnID) { 9467 if (IfCond) { 9468 emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9469 } else { 9470 RegionCodeGenTy ThenRCG(TargetThenGen); 9471 ThenRCG(CGF); 9472 } 9473 } else { 9474 RegionCodeGenTy ElseRCG(TargetElseGen); 9475 ElseRCG(CGF); 9476 } 9477 } 9478 9479 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9480 StringRef ParentName) { 9481 if (!S) 9482 return; 9483 9484 // Codegen OMP target directives that offload compute to the device. 9485 bool RequiresDeviceCodegen = 9486 isa<OMPExecutableDirective>(S) && 9487 isOpenMPTargetExecutionDirective( 9488 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9489 9490 if (RequiresDeviceCodegen) { 9491 const auto &E = *cast<OMPExecutableDirective>(S); 9492 unsigned DeviceID; 9493 unsigned FileID; 9494 unsigned Line; 9495 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 9496 FileID, Line); 9497 9498 // Is this a target region that should not be emitted as an entry point? If 9499 // so just signal we are done with this target region. 9500 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 9501 ParentName, Line)) 9502 return; 9503 9504 switch (E.getDirectiveKind()) { 9505 case OMPD_target: 9506 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9507 cast<OMPTargetDirective>(E)); 9508 break; 9509 case OMPD_target_parallel: 9510 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9511 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9512 break; 9513 case OMPD_target_teams: 9514 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9515 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9516 break; 9517 case OMPD_target_teams_distribute: 9518 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9519 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9520 break; 9521 case OMPD_target_teams_distribute_simd: 9522 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9523 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9524 break; 9525 case OMPD_target_parallel_for: 9526 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9527 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9528 break; 9529 case OMPD_target_parallel_for_simd: 9530 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9531 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9532 break; 9533 case OMPD_target_simd: 9534 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9535 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9536 break; 9537 case OMPD_target_teams_distribute_parallel_for: 9538 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9539 CGM, ParentName, 9540 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9541 break; 9542 case OMPD_target_teams_distribute_parallel_for_simd: 9543 CodeGenFunction:: 9544 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9545 CGM, ParentName, 9546 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9547 break; 9548 case OMPD_parallel: 9549 case OMPD_for: 9550 case OMPD_parallel_for: 9551 case OMPD_parallel_sections: 9552 case OMPD_for_simd: 9553 case OMPD_parallel_for_simd: 9554 case OMPD_cancel: 9555 case OMPD_cancellation_point: 9556 case OMPD_ordered: 9557 case OMPD_threadprivate: 9558 case OMPD_allocate: 9559 case OMPD_task: 9560 case OMPD_simd: 9561 case OMPD_sections: 9562 case OMPD_section: 9563 case OMPD_single: 9564 case OMPD_master: 9565 case OMPD_critical: 9566 case OMPD_taskyield: 9567 case OMPD_barrier: 9568 case OMPD_taskwait: 9569 case OMPD_taskgroup: 9570 case OMPD_atomic: 9571 case OMPD_flush: 9572 case OMPD_teams: 9573 case OMPD_target_data: 9574 case OMPD_target_exit_data: 9575 case OMPD_target_enter_data: 9576 case OMPD_distribute: 9577 case OMPD_distribute_simd: 9578 case OMPD_distribute_parallel_for: 9579 case OMPD_distribute_parallel_for_simd: 9580 case OMPD_teams_distribute: 9581 case OMPD_teams_distribute_simd: 9582 case OMPD_teams_distribute_parallel_for: 9583 case OMPD_teams_distribute_parallel_for_simd: 9584 case OMPD_target_update: 9585 case OMPD_declare_simd: 9586 case OMPD_declare_target: 9587 case OMPD_end_declare_target: 9588 case OMPD_declare_reduction: 9589 case OMPD_declare_mapper: 9590 case OMPD_taskloop: 9591 case OMPD_taskloop_simd: 9592 case OMPD_requires: 9593 case OMPD_unknown: 9594 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9595 } 9596 return; 9597 } 9598 9599 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9600 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9601 return; 9602 9603 scanForTargetRegionsFunctions( 9604 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9605 return; 9606 } 9607 9608 // If this is a lambda function, look into its body. 9609 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9610 S = L->getBody(); 9611 9612 // Keep looking for target regions recursively. 9613 for (const Stmt *II : S->children()) 9614 scanForTargetRegionsFunctions(II, ParentName); 9615 } 9616 9617 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9618 // If emitting code for the host, we do not process FD here. Instead we do 9619 // the normal code generation. 9620 if (!CGM.getLangOpts().OpenMPIsDevice) { 9621 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 9622 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9623 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9624 // Do not emit device_type(nohost) functions for the host. 9625 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 9626 return true; 9627 } 9628 return false; 9629 } 9630 9631 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9632 StringRef Name = CGM.getMangledName(GD); 9633 // Try to detect target regions in the function. 9634 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 9635 scanForTargetRegionsFunctions(FD->getBody(), Name); 9636 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9637 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9638 // Do not emit device_type(nohost) functions for the host. 9639 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 9640 return true; 9641 } 9642 9643 // Do not to emit function if it is not marked as declare target. 9644 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9645 AlreadyEmittedTargetFunctions.count(Name) == 0; 9646 } 9647 9648 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9649 if (!CGM.getLangOpts().OpenMPIsDevice) 9650 return false; 9651 9652 // Check if there are Ctors/Dtors in this declaration and look for target 9653 // regions in it. We use the complete variant to produce the kernel name 9654 // mangling. 9655 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9656 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9657 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9658 StringRef ParentName = 9659 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9660 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9661 } 9662 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9663 StringRef ParentName = 9664 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9665 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9666 } 9667 } 9668 9669 // Do not to emit variable if it is not marked as declare target. 9670 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9671 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9672 cast<VarDecl>(GD.getDecl())); 9673 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9674 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9675 HasRequiresUnifiedSharedMemory)) { 9676 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9677 return true; 9678 } 9679 return false; 9680 } 9681 9682 llvm::Constant * 9683 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9684 const VarDecl *VD) { 9685 assert(VD->getType().isConstant(CGM.getContext()) && 9686 "Expected constant variable."); 9687 StringRef VarName; 9688 llvm::Constant *Addr; 9689 llvm::GlobalValue::LinkageTypes Linkage; 9690 QualType Ty = VD->getType(); 9691 SmallString<128> Buffer; 9692 { 9693 unsigned DeviceID; 9694 unsigned FileID; 9695 unsigned Line; 9696 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9697 FileID, Line); 9698 llvm::raw_svector_ostream OS(Buffer); 9699 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9700 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9701 VarName = OS.str(); 9702 } 9703 Linkage = llvm::GlobalValue::InternalLinkage; 9704 Addr = 9705 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9706 getDefaultFirstprivateAddressSpace()); 9707 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9708 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9709 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9710 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9711 VarName, Addr, VarSize, 9712 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9713 return Addr; 9714 } 9715 9716 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9717 llvm::Constant *Addr) { 9718 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9719 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9720 if (!Res) { 9721 if (CGM.getLangOpts().OpenMPIsDevice) { 9722 // Register non-target variables being emitted in device code (debug info 9723 // may cause this). 9724 StringRef VarName = CGM.getMangledName(VD); 9725 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9726 } 9727 return; 9728 } 9729 // Register declare target variables. 9730 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9731 StringRef VarName; 9732 CharUnits VarSize; 9733 llvm::GlobalValue::LinkageTypes Linkage; 9734 9735 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9736 !HasRequiresUnifiedSharedMemory) { 9737 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9738 VarName = CGM.getMangledName(VD); 9739 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9740 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9741 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9742 } else { 9743 VarSize = CharUnits::Zero(); 9744 } 9745 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 9746 // Temp solution to prevent optimizations of the internal variables. 9747 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 9748 std::string RefName = getName({VarName, "ref"}); 9749 if (!CGM.GetGlobalValue(RefName)) { 9750 llvm::Constant *AddrRef = 9751 getOrCreateInternalVariable(Addr->getType(), RefName); 9752 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 9753 GVAddrRef->setConstant(/*Val=*/true); 9754 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 9755 GVAddrRef->setInitializer(Addr); 9756 CGM.addCompilerUsedGlobal(GVAddrRef); 9757 } 9758 } 9759 } else { 9760 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 9761 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9762 HasRequiresUnifiedSharedMemory)) && 9763 "Declare target attribute must link or to with unified memory."); 9764 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 9765 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 9766 else 9767 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9768 9769 if (CGM.getLangOpts().OpenMPIsDevice) { 9770 VarName = Addr->getName(); 9771 Addr = nullptr; 9772 } else { 9773 VarName = getAddrOfDeclareTargetVar(VD).getName(); 9774 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 9775 } 9776 VarSize = CGM.getPointerSize(); 9777 Linkage = llvm::GlobalValue::WeakAnyLinkage; 9778 } 9779 9780 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9781 VarName, Addr, VarSize, Flags, Linkage); 9782 } 9783 9784 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9785 if (isa<FunctionDecl>(GD.getDecl()) || 9786 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9787 return emitTargetFunctions(GD); 9788 9789 return emitTargetGlobalVariable(GD); 9790 } 9791 9792 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9793 for (const VarDecl *VD : DeferredGlobalVariables) { 9794 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9795 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9796 if (!Res) 9797 continue; 9798 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9799 !HasRequiresUnifiedSharedMemory) { 9800 CGM.EmitGlobal(VD); 9801 } else { 9802 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 9803 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9804 HasRequiresUnifiedSharedMemory)) && 9805 "Expected link clause or to clause with unified memory."); 9806 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 9807 } 9808 } 9809 } 9810 9811 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 9812 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 9813 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 9814 " Expected target-based directive."); 9815 } 9816 9817 void CGOpenMPRuntime::checkArchForUnifiedAddressing( 9818 const OMPRequiresDecl *D) { 9819 for (const OMPClause *Clause : D->clauselists()) { 9820 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 9821 HasRequiresUnifiedSharedMemory = true; 9822 break; 9823 } 9824 } 9825 } 9826 9827 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 9828 LangAS &AS) { 9829 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 9830 return false; 9831 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 9832 switch(A->getAllocatorType()) { 9833 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 9834 // Not supported, fallback to the default mem space. 9835 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 9836 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 9837 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 9838 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 9839 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 9840 case OMPAllocateDeclAttr::OMPConstMemAlloc: 9841 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 9842 AS = LangAS::Default; 9843 return true; 9844 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 9845 llvm_unreachable("Expected predefined allocator for the variables with the " 9846 "static storage."); 9847 } 9848 return false; 9849 } 9850 9851 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 9852 return HasRequiresUnifiedSharedMemory; 9853 } 9854 9855 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 9856 CodeGenModule &CGM) 9857 : CGM(CGM) { 9858 if (CGM.getLangOpts().OpenMPIsDevice) { 9859 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 9860 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 9861 } 9862 } 9863 9864 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 9865 if (CGM.getLangOpts().OpenMPIsDevice) 9866 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 9867 } 9868 9869 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 9870 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 9871 return true; 9872 9873 StringRef Name = CGM.getMangledName(GD); 9874 const auto *D = cast<FunctionDecl>(GD.getDecl()); 9875 // Do not to emit function if it is marked as declare target as it was already 9876 // emitted. 9877 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 9878 if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) { 9879 if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name))) 9880 return !F->isDeclaration(); 9881 return false; 9882 } 9883 return true; 9884 } 9885 9886 return !AlreadyEmittedTargetFunctions.insert(Name).second; 9887 } 9888 9889 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 9890 // If we don't have entries or if we are emitting code for the device, we 9891 // don't need to do anything. 9892 if (CGM.getLangOpts().OMPTargetTriples.empty() || 9893 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 9894 (OffloadEntriesInfoManager.empty() && 9895 !HasEmittedDeclareTargetRegion && 9896 !HasEmittedTargetRegion)) 9897 return nullptr; 9898 9899 // Create and register the function that handles the requires directives. 9900 ASTContext &C = CGM.getContext(); 9901 9902 llvm::Function *RequiresRegFn; 9903 { 9904 CodeGenFunction CGF(CGM); 9905 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 9906 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 9907 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 9908 RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); 9909 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 9910 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 9911 // TODO: check for other requires clauses. 9912 // The requires directive takes effect only when a target region is 9913 // present in the compilation unit. Otherwise it is ignored and not 9914 // passed to the runtime. This avoids the runtime from throwing an error 9915 // for mismatching requires clauses across compilation units that don't 9916 // contain at least 1 target region. 9917 assert((HasEmittedTargetRegion || 9918 HasEmittedDeclareTargetRegion || 9919 !OffloadEntriesInfoManager.empty()) && 9920 "Target or declare target region expected."); 9921 if (HasRequiresUnifiedSharedMemory) 9922 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 9923 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires), 9924 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 9925 CGF.FinishFunction(); 9926 } 9927 return RequiresRegFn; 9928 } 9929 9930 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 9931 // If we have offloading in the current module, we need to emit the entries 9932 // now and register the offloading descriptor. 9933 createOffloadEntriesAndInfoMetadata(); 9934 9935 // Create and register the offloading binary descriptors. This is the main 9936 // entity that captures all the information about offloading in the current 9937 // compilation unit. 9938 return createOffloadingBinaryDescriptorRegistration(); 9939 } 9940 9941 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 9942 const OMPExecutableDirective &D, 9943 SourceLocation Loc, 9944 llvm::Function *OutlinedFn, 9945 ArrayRef<llvm::Value *> CapturedVars) { 9946 if (!CGF.HaveInsertPoint()) 9947 return; 9948 9949 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9950 CodeGenFunction::RunCleanupsScope Scope(CGF); 9951 9952 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 9953 llvm::Value *Args[] = { 9954 RTLoc, 9955 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 9956 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 9957 llvm::SmallVector<llvm::Value *, 16> RealArgs; 9958 RealArgs.append(std::begin(Args), std::end(Args)); 9959 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 9960 9961 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 9962 CGF.EmitRuntimeCall(RTLFn, RealArgs); 9963 } 9964 9965 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 9966 const Expr *NumTeams, 9967 const Expr *ThreadLimit, 9968 SourceLocation Loc) { 9969 if (!CGF.HaveInsertPoint()) 9970 return; 9971 9972 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9973 9974 llvm::Value *NumTeamsVal = 9975 NumTeams 9976 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 9977 CGF.CGM.Int32Ty, /* isSigned = */ true) 9978 : CGF.Builder.getInt32(0); 9979 9980 llvm::Value *ThreadLimitVal = 9981 ThreadLimit 9982 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 9983 CGF.CGM.Int32Ty, /* isSigned = */ true) 9984 : CGF.Builder.getInt32(0); 9985 9986 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 9987 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 9988 ThreadLimitVal}; 9989 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 9990 PushNumTeamsArgs); 9991 } 9992 9993 void CGOpenMPRuntime::emitTargetDataCalls( 9994 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9995 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 9996 if (!CGF.HaveInsertPoint()) 9997 return; 9998 9999 // Action used to replace the default codegen action and turn privatization 10000 // off. 10001 PrePostActionTy NoPrivAction; 10002 10003 // Generate the code for the opening of the data environment. Capture all the 10004 // arguments of the runtime call by reference because they are used in the 10005 // closing of the region. 10006 auto &&BeginThenGen = [this, &D, Device, &Info, 10007 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10008 // Fill up the arrays with all the mapped variables. 10009 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10010 MappableExprsHandler::MapValuesArrayTy Pointers; 10011 MappableExprsHandler::MapValuesArrayTy Sizes; 10012 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10013 10014 // Get map clause information. 10015 MappableExprsHandler MCHandler(D, CGF); 10016 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10017 10018 // Fill up the arrays and create the arguments. 10019 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10020 10021 llvm::Value *BasePointersArrayArg = nullptr; 10022 llvm::Value *PointersArrayArg = nullptr; 10023 llvm::Value *SizesArrayArg = nullptr; 10024 llvm::Value *MapTypesArrayArg = nullptr; 10025 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10026 SizesArrayArg, MapTypesArrayArg, Info); 10027 10028 // Emit device ID if any. 10029 llvm::Value *DeviceID = nullptr; 10030 if (Device) { 10031 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10032 CGF.Int64Ty, /*isSigned=*/true); 10033 } else { 10034 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10035 } 10036 10037 // Emit the number of elements in the offloading arrays. 10038 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10039 10040 llvm::Value *OffloadingArgs[] = { 10041 DeviceID, PointerNum, BasePointersArrayArg, 10042 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10043 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 10044 OffloadingArgs); 10045 10046 // If device pointer privatization is required, emit the body of the region 10047 // here. It will have to be duplicated: with and without privatization. 10048 if (!Info.CaptureDeviceAddrMap.empty()) 10049 CodeGen(CGF); 10050 }; 10051 10052 // Generate code for the closing of the data region. 10053 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10054 PrePostActionTy &) { 10055 assert(Info.isValid() && "Invalid data environment closing arguments."); 10056 10057 llvm::Value *BasePointersArrayArg = nullptr; 10058 llvm::Value *PointersArrayArg = nullptr; 10059 llvm::Value *SizesArrayArg = nullptr; 10060 llvm::Value *MapTypesArrayArg = nullptr; 10061 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10062 SizesArrayArg, MapTypesArrayArg, Info); 10063 10064 // Emit device ID if any. 10065 llvm::Value *DeviceID = nullptr; 10066 if (Device) { 10067 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10068 CGF.Int64Ty, /*isSigned=*/true); 10069 } else { 10070 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10071 } 10072 10073 // Emit the number of elements in the offloading arrays. 10074 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10075 10076 llvm::Value *OffloadingArgs[] = { 10077 DeviceID, PointerNum, BasePointersArrayArg, 10078 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10079 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 10080 OffloadingArgs); 10081 }; 10082 10083 // If we need device pointer privatization, we need to emit the body of the 10084 // region with no privatization in the 'else' branch of the conditional. 10085 // Otherwise, we don't have to do anything. 10086 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10087 PrePostActionTy &) { 10088 if (!Info.CaptureDeviceAddrMap.empty()) { 10089 CodeGen.setAction(NoPrivAction); 10090 CodeGen(CGF); 10091 } 10092 }; 10093 10094 // We don't have to do anything to close the region if the if clause evaluates 10095 // to false. 10096 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10097 10098 if (IfCond) { 10099 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10100 } else { 10101 RegionCodeGenTy RCG(BeginThenGen); 10102 RCG(CGF); 10103 } 10104 10105 // If we don't require privatization of device pointers, we emit the body in 10106 // between the runtime calls. This avoids duplicating the body code. 10107 if (Info.CaptureDeviceAddrMap.empty()) { 10108 CodeGen.setAction(NoPrivAction); 10109 CodeGen(CGF); 10110 } 10111 10112 if (IfCond) { 10113 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10114 } else { 10115 RegionCodeGenTy RCG(EndThenGen); 10116 RCG(CGF); 10117 } 10118 } 10119 10120 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10121 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10122 const Expr *Device) { 10123 if (!CGF.HaveInsertPoint()) 10124 return; 10125 10126 assert((isa<OMPTargetEnterDataDirective>(D) || 10127 isa<OMPTargetExitDataDirective>(D) || 10128 isa<OMPTargetUpdateDirective>(D)) && 10129 "Expecting either target enter, exit data, or update directives."); 10130 10131 CodeGenFunction::OMPTargetDataInfo InputInfo; 10132 llvm::Value *MapTypesArray = nullptr; 10133 // Generate the code for the opening of the data environment. 10134 auto &&ThenGen = [this, &D, Device, &InputInfo, 10135 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10136 // Emit device ID if any. 10137 llvm::Value *DeviceID = nullptr; 10138 if (Device) { 10139 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10140 CGF.Int64Ty, /*isSigned=*/true); 10141 } else { 10142 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10143 } 10144 10145 // Emit the number of elements in the offloading arrays. 10146 llvm::Constant *PointerNum = 10147 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10148 10149 llvm::Value *OffloadingArgs[] = {DeviceID, 10150 PointerNum, 10151 InputInfo.BasePointersArray.getPointer(), 10152 InputInfo.PointersArray.getPointer(), 10153 InputInfo.SizesArray.getPointer(), 10154 MapTypesArray}; 10155 10156 // Select the right runtime function call for each expected standalone 10157 // directive. 10158 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10159 OpenMPRTLFunction RTLFn; 10160 switch (D.getDirectiveKind()) { 10161 case OMPD_target_enter_data: 10162 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 10163 : OMPRTL__tgt_target_data_begin; 10164 break; 10165 case OMPD_target_exit_data: 10166 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 10167 : OMPRTL__tgt_target_data_end; 10168 break; 10169 case OMPD_target_update: 10170 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 10171 : OMPRTL__tgt_target_data_update; 10172 break; 10173 case OMPD_parallel: 10174 case OMPD_for: 10175 case OMPD_parallel_for: 10176 case OMPD_parallel_sections: 10177 case OMPD_for_simd: 10178 case OMPD_parallel_for_simd: 10179 case OMPD_cancel: 10180 case OMPD_cancellation_point: 10181 case OMPD_ordered: 10182 case OMPD_threadprivate: 10183 case OMPD_allocate: 10184 case OMPD_task: 10185 case OMPD_simd: 10186 case OMPD_sections: 10187 case OMPD_section: 10188 case OMPD_single: 10189 case OMPD_master: 10190 case OMPD_critical: 10191 case OMPD_taskyield: 10192 case OMPD_barrier: 10193 case OMPD_taskwait: 10194 case OMPD_taskgroup: 10195 case OMPD_atomic: 10196 case OMPD_flush: 10197 case OMPD_teams: 10198 case OMPD_target_data: 10199 case OMPD_distribute: 10200 case OMPD_distribute_simd: 10201 case OMPD_distribute_parallel_for: 10202 case OMPD_distribute_parallel_for_simd: 10203 case OMPD_teams_distribute: 10204 case OMPD_teams_distribute_simd: 10205 case OMPD_teams_distribute_parallel_for: 10206 case OMPD_teams_distribute_parallel_for_simd: 10207 case OMPD_declare_simd: 10208 case OMPD_declare_target: 10209 case OMPD_end_declare_target: 10210 case OMPD_declare_reduction: 10211 case OMPD_declare_mapper: 10212 case OMPD_taskloop: 10213 case OMPD_taskloop_simd: 10214 case OMPD_target: 10215 case OMPD_target_simd: 10216 case OMPD_target_teams_distribute: 10217 case OMPD_target_teams_distribute_simd: 10218 case OMPD_target_teams_distribute_parallel_for: 10219 case OMPD_target_teams_distribute_parallel_for_simd: 10220 case OMPD_target_teams: 10221 case OMPD_target_parallel: 10222 case OMPD_target_parallel_for: 10223 case OMPD_target_parallel_for_simd: 10224 case OMPD_requires: 10225 case OMPD_unknown: 10226 llvm_unreachable("Unexpected standalone target data directive."); 10227 break; 10228 } 10229 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 10230 }; 10231 10232 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 10233 CodeGenFunction &CGF, PrePostActionTy &) { 10234 // Fill up the arrays with all the mapped variables. 10235 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10236 MappableExprsHandler::MapValuesArrayTy Pointers; 10237 MappableExprsHandler::MapValuesArrayTy Sizes; 10238 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10239 10240 // Get map clause information. 10241 MappableExprsHandler MEHandler(D, CGF); 10242 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10243 10244 TargetDataInfo Info; 10245 // Fill up the arrays and create the arguments. 10246 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10247 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10248 Info.PointersArray, Info.SizesArray, 10249 Info.MapTypesArray, Info); 10250 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10251 InputInfo.BasePointersArray = 10252 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10253 InputInfo.PointersArray = 10254 Address(Info.PointersArray, CGM.getPointerAlign()); 10255 InputInfo.SizesArray = 10256 Address(Info.SizesArray, CGM.getPointerAlign()); 10257 MapTypesArray = Info.MapTypesArray; 10258 if (D.hasClausesOfKind<OMPDependClause>()) 10259 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10260 else 10261 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10262 }; 10263 10264 if (IfCond) { 10265 emitOMPIfClause(CGF, IfCond, TargetThenGen, 10266 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10267 } else { 10268 RegionCodeGenTy ThenRCG(TargetThenGen); 10269 ThenRCG(CGF); 10270 } 10271 } 10272 10273 namespace { 10274 /// Kind of parameter in a function with 'declare simd' directive. 10275 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 10276 /// Attribute set of the parameter. 10277 struct ParamAttrTy { 10278 ParamKindTy Kind = Vector; 10279 llvm::APSInt StrideOrArg; 10280 llvm::APSInt Alignment; 10281 }; 10282 } // namespace 10283 10284 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10285 ArrayRef<ParamAttrTy> ParamAttrs) { 10286 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10287 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10288 // of that clause. The VLEN value must be power of 2. 10289 // In other case the notion of the function`s "characteristic data type" (CDT) 10290 // is used to compute the vector length. 10291 // CDT is defined in the following order: 10292 // a) For non-void function, the CDT is the return type. 10293 // b) If the function has any non-uniform, non-linear parameters, then the 10294 // CDT is the type of the first such parameter. 10295 // c) If the CDT determined by a) or b) above is struct, union, or class 10296 // type which is pass-by-value (except for the type that maps to the 10297 // built-in complex data type), the characteristic data type is int. 10298 // d) If none of the above three cases is applicable, the CDT is int. 10299 // The VLEN is then determined based on the CDT and the size of vector 10300 // register of that ISA for which current vector version is generated. The 10301 // VLEN is computed using the formula below: 10302 // VLEN = sizeof(vector_register) / sizeof(CDT), 10303 // where vector register size specified in section 3.2.1 Registers and the 10304 // Stack Frame of original AMD64 ABI document. 10305 QualType RetType = FD->getReturnType(); 10306 if (RetType.isNull()) 10307 return 0; 10308 ASTContext &C = FD->getASTContext(); 10309 QualType CDT; 10310 if (!RetType.isNull() && !RetType->isVoidType()) { 10311 CDT = RetType; 10312 } else { 10313 unsigned Offset = 0; 10314 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10315 if (ParamAttrs[Offset].Kind == Vector) 10316 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10317 ++Offset; 10318 } 10319 if (CDT.isNull()) { 10320 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10321 if (ParamAttrs[I + Offset].Kind == Vector) { 10322 CDT = FD->getParamDecl(I)->getType(); 10323 break; 10324 } 10325 } 10326 } 10327 } 10328 if (CDT.isNull()) 10329 CDT = C.IntTy; 10330 CDT = CDT->getCanonicalTypeUnqualified(); 10331 if (CDT->isRecordType() || CDT->isUnionType()) 10332 CDT = C.IntTy; 10333 return C.getTypeSize(CDT); 10334 } 10335 10336 static void 10337 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10338 const llvm::APSInt &VLENVal, 10339 ArrayRef<ParamAttrTy> ParamAttrs, 10340 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10341 struct ISADataTy { 10342 char ISA; 10343 unsigned VecRegSize; 10344 }; 10345 ISADataTy ISAData[] = { 10346 { 10347 'b', 128 10348 }, // SSE 10349 { 10350 'c', 256 10351 }, // AVX 10352 { 10353 'd', 256 10354 }, // AVX2 10355 { 10356 'e', 512 10357 }, // AVX512 10358 }; 10359 llvm::SmallVector<char, 2> Masked; 10360 switch (State) { 10361 case OMPDeclareSimdDeclAttr::BS_Undefined: 10362 Masked.push_back('N'); 10363 Masked.push_back('M'); 10364 break; 10365 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10366 Masked.push_back('N'); 10367 break; 10368 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10369 Masked.push_back('M'); 10370 break; 10371 } 10372 for (char Mask : Masked) { 10373 for (const ISADataTy &Data : ISAData) { 10374 SmallString<256> Buffer; 10375 llvm::raw_svector_ostream Out(Buffer); 10376 Out << "_ZGV" << Data.ISA << Mask; 10377 if (!VLENVal) { 10378 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10379 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10380 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10381 } else { 10382 Out << VLENVal; 10383 } 10384 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 10385 switch (ParamAttr.Kind){ 10386 case LinearWithVarStride: 10387 Out << 's' << ParamAttr.StrideOrArg; 10388 break; 10389 case Linear: 10390 Out << 'l'; 10391 if (!!ParamAttr.StrideOrArg) 10392 Out << ParamAttr.StrideOrArg; 10393 break; 10394 case Uniform: 10395 Out << 'u'; 10396 break; 10397 case Vector: 10398 Out << 'v'; 10399 break; 10400 } 10401 if (!!ParamAttr.Alignment) 10402 Out << 'a' << ParamAttr.Alignment; 10403 } 10404 Out << '_' << Fn->getName(); 10405 Fn->addFnAttr(Out.str()); 10406 } 10407 } 10408 } 10409 10410 // This are the Functions that are needed to mangle the name of the 10411 // vector functions generated by the compiler, according to the rules 10412 // defined in the "Vector Function ABI specifications for AArch64", 10413 // available at 10414 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10415 10416 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 10417 /// 10418 /// TODO: Need to implement the behavior for reference marked with a 10419 /// var or no linear modifiers (1.b in the section). For this, we 10420 /// need to extend ParamKindTy to support the linear modifiers. 10421 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10422 QT = QT.getCanonicalType(); 10423 10424 if (QT->isVoidType()) 10425 return false; 10426 10427 if (Kind == ParamKindTy::Uniform) 10428 return false; 10429 10430 if (Kind == ParamKindTy::Linear) 10431 return false; 10432 10433 // TODO: Handle linear references with modifiers 10434 10435 if (Kind == ParamKindTy::LinearWithVarStride) 10436 return false; 10437 10438 return true; 10439 } 10440 10441 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10442 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10443 QT = QT.getCanonicalType(); 10444 unsigned Size = C.getTypeSize(QT); 10445 10446 // Only scalars and complex within 16 bytes wide set PVB to true. 10447 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10448 return false; 10449 10450 if (QT->isFloatingType()) 10451 return true; 10452 10453 if (QT->isIntegerType()) 10454 return true; 10455 10456 if (QT->isPointerType()) 10457 return true; 10458 10459 // TODO: Add support for complex types (section 3.1.2, item 2). 10460 10461 return false; 10462 } 10463 10464 /// Computes the lane size (LS) of a return type or of an input parameter, 10465 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10466 /// TODO: Add support for references, section 3.2.1, item 1. 10467 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10468 if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10469 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10470 if (getAArch64PBV(PTy, C)) 10471 return C.getTypeSize(PTy); 10472 } 10473 if (getAArch64PBV(QT, C)) 10474 return C.getTypeSize(QT); 10475 10476 return C.getTypeSize(C.getUIntPtrType()); 10477 } 10478 10479 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10480 // signature of the scalar function, as defined in 3.2.2 of the 10481 // AAVFABI. 10482 static std::tuple<unsigned, unsigned, bool> 10483 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10484 QualType RetType = FD->getReturnType().getCanonicalType(); 10485 10486 ASTContext &C = FD->getASTContext(); 10487 10488 bool OutputBecomesInput = false; 10489 10490 llvm::SmallVector<unsigned, 8> Sizes; 10491 if (!RetType->isVoidType()) { 10492 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10493 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10494 OutputBecomesInput = true; 10495 } 10496 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10497 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10498 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10499 } 10500 10501 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10502 // The LS of a function parameter / return value can only be a power 10503 // of 2, starting from 8 bits, up to 128. 10504 assert(std::all_of(Sizes.begin(), Sizes.end(), 10505 [](unsigned Size) { 10506 return Size == 8 || Size == 16 || Size == 32 || 10507 Size == 64 || Size == 128; 10508 }) && 10509 "Invalid size"); 10510 10511 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10512 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10513 OutputBecomesInput); 10514 } 10515 10516 /// Mangle the parameter part of the vector function name according to 10517 /// their OpenMP classification. The mangling function is defined in 10518 /// section 3.5 of the AAVFABI. 10519 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10520 SmallString<256> Buffer; 10521 llvm::raw_svector_ostream Out(Buffer); 10522 for (const auto &ParamAttr : ParamAttrs) { 10523 switch (ParamAttr.Kind) { 10524 case LinearWithVarStride: 10525 Out << "ls" << ParamAttr.StrideOrArg; 10526 break; 10527 case Linear: 10528 Out << 'l'; 10529 // Don't print the step value if it is not present or if it is 10530 // equal to 1. 10531 if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1) 10532 Out << ParamAttr.StrideOrArg; 10533 break; 10534 case Uniform: 10535 Out << 'u'; 10536 break; 10537 case Vector: 10538 Out << 'v'; 10539 break; 10540 } 10541 10542 if (!!ParamAttr.Alignment) 10543 Out << 'a' << ParamAttr.Alignment; 10544 } 10545 10546 return Out.str(); 10547 } 10548 10549 // Function used to add the attribute. The parameter `VLEN` is 10550 // templated to allow the use of "x" when targeting scalable functions 10551 // for SVE. 10552 template <typename T> 10553 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10554 char ISA, StringRef ParSeq, 10555 StringRef MangledName, bool OutputBecomesInput, 10556 llvm::Function *Fn) { 10557 SmallString<256> Buffer; 10558 llvm::raw_svector_ostream Out(Buffer); 10559 Out << Prefix << ISA << LMask << VLEN; 10560 if (OutputBecomesInput) 10561 Out << "v"; 10562 Out << ParSeq << "_" << MangledName; 10563 Fn->addFnAttr(Out.str()); 10564 } 10565 10566 // Helper function to generate the Advanced SIMD names depending on 10567 // the value of the NDS when simdlen is not present. 10568 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10569 StringRef Prefix, char ISA, 10570 StringRef ParSeq, StringRef MangledName, 10571 bool OutputBecomesInput, 10572 llvm::Function *Fn) { 10573 switch (NDS) { 10574 case 8: 10575 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10576 OutputBecomesInput, Fn); 10577 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10578 OutputBecomesInput, Fn); 10579 break; 10580 case 16: 10581 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10582 OutputBecomesInput, Fn); 10583 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10584 OutputBecomesInput, Fn); 10585 break; 10586 case 32: 10587 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10588 OutputBecomesInput, Fn); 10589 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10590 OutputBecomesInput, Fn); 10591 break; 10592 case 64: 10593 case 128: 10594 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10595 OutputBecomesInput, Fn); 10596 break; 10597 default: 10598 llvm_unreachable("Scalar type is too wide."); 10599 } 10600 } 10601 10602 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10603 static void emitAArch64DeclareSimdFunction( 10604 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10605 ArrayRef<ParamAttrTy> ParamAttrs, 10606 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10607 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10608 10609 // Get basic data for building the vector signature. 10610 const auto Data = getNDSWDS(FD, ParamAttrs); 10611 const unsigned NDS = std::get<0>(Data); 10612 const unsigned WDS = std::get<1>(Data); 10613 const bool OutputBecomesInput = std::get<2>(Data); 10614 10615 // Check the values provided via `simdlen` by the user. 10616 // 1. A `simdlen(1)` doesn't produce vector signatures, 10617 if (UserVLEN == 1) { 10618 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10619 DiagnosticsEngine::Warning, 10620 "The clause simdlen(1) has no effect when targeting aarch64."); 10621 CGM.getDiags().Report(SLoc, DiagID); 10622 return; 10623 } 10624 10625 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10626 // Advanced SIMD output. 10627 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10628 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10629 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10630 "power of 2 when targeting Advanced SIMD."); 10631 CGM.getDiags().Report(SLoc, DiagID); 10632 return; 10633 } 10634 10635 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10636 // limits. 10637 if (ISA == 's' && UserVLEN != 0) { 10638 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10639 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10640 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10641 "lanes in the architectural constraints " 10642 "for SVE (min is 128-bit, max is " 10643 "2048-bit, by steps of 128-bit)"); 10644 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10645 return; 10646 } 10647 } 10648 10649 // Sort out parameter sequence. 10650 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10651 StringRef Prefix = "_ZGV"; 10652 // Generate simdlen from user input (if any). 10653 if (UserVLEN) { 10654 if (ISA == 's') { 10655 // SVE generates only a masked function. 10656 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10657 OutputBecomesInput, Fn); 10658 } else { 10659 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10660 // Advanced SIMD generates one or two functions, depending on 10661 // the `[not]inbranch` clause. 10662 switch (State) { 10663 case OMPDeclareSimdDeclAttr::BS_Undefined: 10664 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10665 OutputBecomesInput, Fn); 10666 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10667 OutputBecomesInput, Fn); 10668 break; 10669 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10670 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10671 OutputBecomesInput, Fn); 10672 break; 10673 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10674 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10675 OutputBecomesInput, Fn); 10676 break; 10677 } 10678 } 10679 } else { 10680 // If no user simdlen is provided, follow the AAVFABI rules for 10681 // generating the vector length. 10682 if (ISA == 's') { 10683 // SVE, section 3.4.1, item 1. 10684 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10685 OutputBecomesInput, Fn); 10686 } else { 10687 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10688 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10689 // two vector names depending on the use of the clause 10690 // `[not]inbranch`. 10691 switch (State) { 10692 case OMPDeclareSimdDeclAttr::BS_Undefined: 10693 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10694 OutputBecomesInput, Fn); 10695 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10696 OutputBecomesInput, Fn); 10697 break; 10698 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10699 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10700 OutputBecomesInput, Fn); 10701 break; 10702 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10703 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10704 OutputBecomesInput, Fn); 10705 break; 10706 } 10707 } 10708 } 10709 } 10710 10711 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10712 llvm::Function *Fn) { 10713 ASTContext &C = CGM.getContext(); 10714 FD = FD->getMostRecentDecl(); 10715 // Map params to their positions in function decl. 10716 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10717 if (isa<CXXMethodDecl>(FD)) 10718 ParamPositions.try_emplace(FD, 0); 10719 unsigned ParamPos = ParamPositions.size(); 10720 for (const ParmVarDecl *P : FD->parameters()) { 10721 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10722 ++ParamPos; 10723 } 10724 while (FD) { 10725 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10726 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10727 // Mark uniform parameters. 10728 for (const Expr *E : Attr->uniforms()) { 10729 E = E->IgnoreParenImpCasts(); 10730 unsigned Pos; 10731 if (isa<CXXThisExpr>(E)) { 10732 Pos = ParamPositions[FD]; 10733 } else { 10734 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10735 ->getCanonicalDecl(); 10736 Pos = ParamPositions[PVD]; 10737 } 10738 ParamAttrs[Pos].Kind = Uniform; 10739 } 10740 // Get alignment info. 10741 auto NI = Attr->alignments_begin(); 10742 for (const Expr *E : Attr->aligneds()) { 10743 E = E->IgnoreParenImpCasts(); 10744 unsigned Pos; 10745 QualType ParmTy; 10746 if (isa<CXXThisExpr>(E)) { 10747 Pos = ParamPositions[FD]; 10748 ParmTy = E->getType(); 10749 } else { 10750 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10751 ->getCanonicalDecl(); 10752 Pos = ParamPositions[PVD]; 10753 ParmTy = PVD->getType(); 10754 } 10755 ParamAttrs[Pos].Alignment = 10756 (*NI) 10757 ? (*NI)->EvaluateKnownConstInt(C) 10758 : llvm::APSInt::getUnsigned( 10759 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10760 .getQuantity()); 10761 ++NI; 10762 } 10763 // Mark linear parameters. 10764 auto SI = Attr->steps_begin(); 10765 auto MI = Attr->modifiers_begin(); 10766 for (const Expr *E : Attr->linears()) { 10767 E = E->IgnoreParenImpCasts(); 10768 unsigned Pos; 10769 if (isa<CXXThisExpr>(E)) { 10770 Pos = ParamPositions[FD]; 10771 } else { 10772 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10773 ->getCanonicalDecl(); 10774 Pos = ParamPositions[PVD]; 10775 } 10776 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 10777 ParamAttr.Kind = Linear; 10778 if (*SI) { 10779 Expr::EvalResult Result; 10780 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 10781 if (const auto *DRE = 10782 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 10783 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 10784 ParamAttr.Kind = LinearWithVarStride; 10785 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 10786 ParamPositions[StridePVD->getCanonicalDecl()]); 10787 } 10788 } 10789 } else { 10790 ParamAttr.StrideOrArg = Result.Val.getInt(); 10791 } 10792 } 10793 ++SI; 10794 ++MI; 10795 } 10796 llvm::APSInt VLENVal; 10797 SourceLocation ExprLoc; 10798 const Expr *VLENExpr = Attr->getSimdlen(); 10799 if (VLENExpr) { 10800 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 10801 ExprLoc = VLENExpr->getExprLoc(); 10802 } 10803 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 10804 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 10805 CGM.getTriple().getArch() == llvm::Triple::x86_64) { 10806 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 10807 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 10808 unsigned VLEN = VLENVal.getExtValue(); 10809 StringRef MangledName = Fn->getName(); 10810 if (CGM.getTarget().hasFeature("sve")) 10811 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10812 MangledName, 's', 128, Fn, ExprLoc); 10813 if (CGM.getTarget().hasFeature("neon")) 10814 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10815 MangledName, 'n', 128, Fn, ExprLoc); 10816 } 10817 } 10818 FD = FD->getPreviousDecl(); 10819 } 10820 } 10821 10822 namespace { 10823 /// Cleanup action for doacross support. 10824 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 10825 public: 10826 static const int DoacrossFinArgs = 2; 10827 10828 private: 10829 llvm::FunctionCallee RTLFn; 10830 llvm::Value *Args[DoacrossFinArgs]; 10831 10832 public: 10833 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 10834 ArrayRef<llvm::Value *> CallArgs) 10835 : RTLFn(RTLFn) { 10836 assert(CallArgs.size() == DoacrossFinArgs); 10837 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10838 } 10839 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10840 if (!CGF.HaveInsertPoint()) 10841 return; 10842 CGF.EmitRuntimeCall(RTLFn, Args); 10843 } 10844 }; 10845 } // namespace 10846 10847 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 10848 const OMPLoopDirective &D, 10849 ArrayRef<Expr *> NumIterations) { 10850 if (!CGF.HaveInsertPoint()) 10851 return; 10852 10853 ASTContext &C = CGM.getContext(); 10854 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 10855 RecordDecl *RD; 10856 if (KmpDimTy.isNull()) { 10857 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 10858 // kmp_int64 lo; // lower 10859 // kmp_int64 up; // upper 10860 // kmp_int64 st; // stride 10861 // }; 10862 RD = C.buildImplicitRecord("kmp_dim"); 10863 RD->startDefinition(); 10864 addFieldToRecordDecl(C, RD, Int64Ty); 10865 addFieldToRecordDecl(C, RD, Int64Ty); 10866 addFieldToRecordDecl(C, RD, Int64Ty); 10867 RD->completeDefinition(); 10868 KmpDimTy = C.getRecordType(RD); 10869 } else { 10870 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 10871 } 10872 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 10873 QualType ArrayTy = 10874 C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0); 10875 10876 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 10877 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 10878 enum { LowerFD = 0, UpperFD, StrideFD }; 10879 // Fill dims with data. 10880 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 10881 LValue DimsLVal = CGF.MakeAddrLValue( 10882 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 10883 // dims.upper = num_iterations; 10884 LValue UpperLVal = CGF.EmitLValueForField( 10885 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 10886 llvm::Value *NumIterVal = 10887 CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]), 10888 D.getNumIterations()->getType(), Int64Ty, 10889 D.getNumIterations()->getExprLoc()); 10890 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 10891 // dims.stride = 1; 10892 LValue StrideLVal = CGF.EmitLValueForField( 10893 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 10894 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 10895 StrideLVal); 10896 } 10897 10898 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 10899 // kmp_int32 num_dims, struct kmp_dim * dims); 10900 llvm::Value *Args[] = { 10901 emitUpdateLocation(CGF, D.getBeginLoc()), 10902 getThreadID(CGF, D.getBeginLoc()), 10903 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 10904 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 10905 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 10906 CGM.VoidPtrTy)}; 10907 10908 llvm::FunctionCallee RTLFn = 10909 createRuntimeFunction(OMPRTL__kmpc_doacross_init); 10910 CGF.EmitRuntimeCall(RTLFn, Args); 10911 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 10912 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 10913 llvm::FunctionCallee FiniRTLFn = 10914 createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 10915 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 10916 llvm::makeArrayRef(FiniArgs)); 10917 } 10918 10919 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 10920 const OMPDependClause *C) { 10921 QualType Int64Ty = 10922 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 10923 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 10924 QualType ArrayTy = CGM.getContext().getConstantArrayType( 10925 Int64Ty, Size, ArrayType::Normal, 0); 10926 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 10927 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 10928 const Expr *CounterVal = C->getLoopData(I); 10929 assert(CounterVal); 10930 llvm::Value *CntVal = CGF.EmitScalarConversion( 10931 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 10932 CounterVal->getExprLoc()); 10933 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 10934 /*Volatile=*/false, Int64Ty); 10935 } 10936 llvm::Value *Args[] = { 10937 emitUpdateLocation(CGF, C->getBeginLoc()), 10938 getThreadID(CGF, C->getBeginLoc()), 10939 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 10940 llvm::FunctionCallee RTLFn; 10941 if (C->getDependencyKind() == OMPC_DEPEND_source) { 10942 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 10943 } else { 10944 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 10945 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 10946 } 10947 CGF.EmitRuntimeCall(RTLFn, Args); 10948 } 10949 10950 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 10951 llvm::FunctionCallee Callee, 10952 ArrayRef<llvm::Value *> Args) const { 10953 assert(Loc.isValid() && "Outlined function call location must be valid."); 10954 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 10955 10956 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 10957 if (Fn->doesNotThrow()) { 10958 CGF.EmitNounwindRuntimeCall(Fn, Args); 10959 return; 10960 } 10961 } 10962 CGF.EmitRuntimeCall(Callee, Args); 10963 } 10964 10965 void CGOpenMPRuntime::emitOutlinedFunctionCall( 10966 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 10967 ArrayRef<llvm::Value *> Args) const { 10968 emitCall(CGF, Loc, OutlinedFn, Args); 10969 } 10970 10971 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 10972 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 10973 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 10974 HasEmittedDeclareTargetRegion = true; 10975 } 10976 10977 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 10978 const VarDecl *NativeParam, 10979 const VarDecl *TargetParam) const { 10980 return CGF.GetAddrOfLocalVar(NativeParam); 10981 } 10982 10983 namespace { 10984 /// Cleanup action for allocate support. 10985 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 10986 public: 10987 static const int CleanupArgs = 3; 10988 10989 private: 10990 llvm::FunctionCallee RTLFn; 10991 llvm::Value *Args[CleanupArgs]; 10992 10993 public: 10994 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 10995 ArrayRef<llvm::Value *> CallArgs) 10996 : RTLFn(RTLFn) { 10997 assert(CallArgs.size() == CleanupArgs && 10998 "Size of arguments does not match."); 10999 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11000 } 11001 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11002 if (!CGF.HaveInsertPoint()) 11003 return; 11004 CGF.EmitRuntimeCall(RTLFn, Args); 11005 } 11006 }; 11007 } // namespace 11008 11009 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11010 const VarDecl *VD) { 11011 if (!VD) 11012 return Address::invalid(); 11013 const VarDecl *CVD = VD->getCanonicalDecl(); 11014 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 11015 return Address::invalid(); 11016 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11017 // Use the default allocation. 11018 if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 11019 !AA->getAllocator()) 11020 return Address::invalid(); 11021 llvm::Value *Size; 11022 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11023 if (CVD->getType()->isVariablyModifiedType()) { 11024 Size = CGF.getTypeSize(CVD->getType()); 11025 // Align the size: ((size + align - 1) / align) * align 11026 Size = CGF.Builder.CreateNUWAdd( 11027 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11028 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11029 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11030 } else { 11031 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11032 Size = CGM.getSize(Sz.alignTo(Align)); 11033 } 11034 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11035 assert(AA->getAllocator() && 11036 "Expected allocator expression for non-default allocator."); 11037 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11038 // According to the standard, the original allocator type is a enum (integer). 11039 // Convert to pointer type, if required. 11040 if (Allocator->getType()->isIntegerTy()) 11041 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 11042 else if (Allocator->getType()->isPointerTy()) 11043 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 11044 CGM.VoidPtrTy); 11045 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11046 11047 llvm::Value *Addr = 11048 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, 11049 CVD->getName() + ".void.addr"); 11050 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 11051 Allocator}; 11052 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); 11053 11054 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11055 llvm::makeArrayRef(FiniArgs)); 11056 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11057 Addr, 11058 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 11059 CVD->getName() + ".addr"); 11060 return Address(Addr, Align); 11061 } 11062 11063 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11064 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11065 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11066 llvm_unreachable("Not supported in SIMD-only mode"); 11067 } 11068 11069 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11070 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11071 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11072 llvm_unreachable("Not supported in SIMD-only mode"); 11073 } 11074 11075 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11076 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11077 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11078 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11079 bool Tied, unsigned &NumberOfParts) { 11080 llvm_unreachable("Not supported in SIMD-only mode"); 11081 } 11082 11083 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11084 SourceLocation Loc, 11085 llvm::Function *OutlinedFn, 11086 ArrayRef<llvm::Value *> CapturedVars, 11087 const Expr *IfCond) { 11088 llvm_unreachable("Not supported in SIMD-only mode"); 11089 } 11090 11091 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11092 CodeGenFunction &CGF, StringRef CriticalName, 11093 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11094 const Expr *Hint) { 11095 llvm_unreachable("Not supported in SIMD-only mode"); 11096 } 11097 11098 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11099 const RegionCodeGenTy &MasterOpGen, 11100 SourceLocation Loc) { 11101 llvm_unreachable("Not supported in SIMD-only mode"); 11102 } 11103 11104 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11105 SourceLocation Loc) { 11106 llvm_unreachable("Not supported in SIMD-only mode"); 11107 } 11108 11109 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 11110 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 11111 SourceLocation Loc) { 11112 llvm_unreachable("Not supported in SIMD-only mode"); 11113 } 11114 11115 void CGOpenMPSIMDRuntime::emitSingleRegion( 11116 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 11117 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 11118 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 11119 ArrayRef<const Expr *> AssignmentOps) { 11120 llvm_unreachable("Not supported in SIMD-only mode"); 11121 } 11122 11123 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 11124 const RegionCodeGenTy &OrderedOpGen, 11125 SourceLocation Loc, 11126 bool IsThreads) { 11127 llvm_unreachable("Not supported in SIMD-only mode"); 11128 } 11129 11130 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 11131 SourceLocation Loc, 11132 OpenMPDirectiveKind Kind, 11133 bool EmitChecks, 11134 bool ForceSimpleCall) { 11135 llvm_unreachable("Not supported in SIMD-only mode"); 11136 } 11137 11138 void CGOpenMPSIMDRuntime::emitForDispatchInit( 11139 CodeGenFunction &CGF, SourceLocation Loc, 11140 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 11141 bool Ordered, const DispatchRTInput &DispatchValues) { 11142 llvm_unreachable("Not supported in SIMD-only mode"); 11143 } 11144 11145 void CGOpenMPSIMDRuntime::emitForStaticInit( 11146 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 11147 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 11148 llvm_unreachable("Not supported in SIMD-only mode"); 11149 } 11150 11151 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 11152 CodeGenFunction &CGF, SourceLocation Loc, 11153 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 11154 llvm_unreachable("Not supported in SIMD-only mode"); 11155 } 11156 11157 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 11158 SourceLocation Loc, 11159 unsigned IVSize, 11160 bool IVSigned) { 11161 llvm_unreachable("Not supported in SIMD-only mode"); 11162 } 11163 11164 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 11165 SourceLocation Loc, 11166 OpenMPDirectiveKind DKind) { 11167 llvm_unreachable("Not supported in SIMD-only mode"); 11168 } 11169 11170 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 11171 SourceLocation Loc, 11172 unsigned IVSize, bool IVSigned, 11173 Address IL, Address LB, 11174 Address UB, Address ST) { 11175 llvm_unreachable("Not supported in SIMD-only mode"); 11176 } 11177 11178 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 11179 llvm::Value *NumThreads, 11180 SourceLocation Loc) { 11181 llvm_unreachable("Not supported in SIMD-only mode"); 11182 } 11183 11184 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 11185 OpenMPProcBindClauseKind ProcBind, 11186 SourceLocation Loc) { 11187 llvm_unreachable("Not supported in SIMD-only mode"); 11188 } 11189 11190 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 11191 const VarDecl *VD, 11192 Address VDAddr, 11193 SourceLocation Loc) { 11194 llvm_unreachable("Not supported in SIMD-only mode"); 11195 } 11196 11197 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 11198 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 11199 CodeGenFunction *CGF) { 11200 llvm_unreachable("Not supported in SIMD-only mode"); 11201 } 11202 11203 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 11204 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 11205 llvm_unreachable("Not supported in SIMD-only mode"); 11206 } 11207 11208 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 11209 ArrayRef<const Expr *> Vars, 11210 SourceLocation Loc) { 11211 llvm_unreachable("Not supported in SIMD-only mode"); 11212 } 11213 11214 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 11215 const OMPExecutableDirective &D, 11216 llvm::Function *TaskFunction, 11217 QualType SharedsTy, Address Shareds, 11218 const Expr *IfCond, 11219 const OMPTaskDataTy &Data) { 11220 llvm_unreachable("Not supported in SIMD-only mode"); 11221 } 11222 11223 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 11224 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 11225 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 11226 const Expr *IfCond, const OMPTaskDataTy &Data) { 11227 llvm_unreachable("Not supported in SIMD-only mode"); 11228 } 11229 11230 void CGOpenMPSIMDRuntime::emitReduction( 11231 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 11232 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 11233 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 11234 assert(Options.SimpleReduction && "Only simple reduction is expected."); 11235 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 11236 ReductionOps, Options); 11237 } 11238 11239 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 11240 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 11241 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 11242 llvm_unreachable("Not supported in SIMD-only mode"); 11243 } 11244 11245 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 11246 SourceLocation Loc, 11247 ReductionCodeGen &RCG, 11248 unsigned N) { 11249 llvm_unreachable("Not supported in SIMD-only mode"); 11250 } 11251 11252 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 11253 SourceLocation Loc, 11254 llvm::Value *ReductionsPtr, 11255 LValue SharedLVal) { 11256 llvm_unreachable("Not supported in SIMD-only mode"); 11257 } 11258 11259 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 11260 SourceLocation Loc) { 11261 llvm_unreachable("Not supported in SIMD-only mode"); 11262 } 11263 11264 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 11265 CodeGenFunction &CGF, SourceLocation Loc, 11266 OpenMPDirectiveKind CancelRegion) { 11267 llvm_unreachable("Not supported in SIMD-only mode"); 11268 } 11269 11270 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 11271 SourceLocation Loc, const Expr *IfCond, 11272 OpenMPDirectiveKind CancelRegion) { 11273 llvm_unreachable("Not supported in SIMD-only mode"); 11274 } 11275 11276 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 11277 const OMPExecutableDirective &D, StringRef ParentName, 11278 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 11279 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 11280 llvm_unreachable("Not supported in SIMD-only mode"); 11281 } 11282 11283 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF, 11284 const OMPExecutableDirective &D, 11285 llvm::Function *OutlinedFn, 11286 llvm::Value *OutlinedFnID, 11287 const Expr *IfCond, 11288 const Expr *Device) { 11289 llvm_unreachable("Not supported in SIMD-only mode"); 11290 } 11291 11292 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 11293 llvm_unreachable("Not supported in SIMD-only mode"); 11294 } 11295 11296 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 11297 llvm_unreachable("Not supported in SIMD-only mode"); 11298 } 11299 11300 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 11301 return false; 11302 } 11303 11304 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() { 11305 return nullptr; 11306 } 11307 11308 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 11309 const OMPExecutableDirective &D, 11310 SourceLocation Loc, 11311 llvm::Function *OutlinedFn, 11312 ArrayRef<llvm::Value *> CapturedVars) { 11313 llvm_unreachable("Not supported in SIMD-only mode"); 11314 } 11315 11316 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11317 const Expr *NumTeams, 11318 const Expr *ThreadLimit, 11319 SourceLocation Loc) { 11320 llvm_unreachable("Not supported in SIMD-only mode"); 11321 } 11322 11323 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 11324 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11325 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11326 llvm_unreachable("Not supported in SIMD-only mode"); 11327 } 11328 11329 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 11330 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11331 const Expr *Device) { 11332 llvm_unreachable("Not supported in SIMD-only mode"); 11333 } 11334 11335 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11336 const OMPLoopDirective &D, 11337 ArrayRef<Expr *> NumIterations) { 11338 llvm_unreachable("Not supported in SIMD-only mode"); 11339 } 11340 11341 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11342 const OMPDependClause *C) { 11343 llvm_unreachable("Not supported in SIMD-only mode"); 11344 } 11345 11346 const VarDecl * 11347 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 11348 const VarDecl *NativeParam) const { 11349 llvm_unreachable("Not supported in SIMD-only mode"); 11350 } 11351 11352 Address 11353 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 11354 const VarDecl *NativeParam, 11355 const VarDecl *TargetParam) const { 11356 llvm_unreachable("Not supported in SIMD-only mode"); 11357 } 11358