1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGCXXABI.h" 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/CodeGen/ConstantInitBuilder.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/Basic/BitmaskEnum.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/Bitcode/BitcodeReader.h" 24 #include "llvm/IR/DerivedTypes.h" 25 #include "llvm/IR/GlobalValue.h" 26 #include "llvm/IR/Value.h" 27 #include "llvm/Support/Format.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <cassert> 30 31 using namespace clang; 32 using namespace CodeGen; 33 34 namespace { 35 /// Base class for handling code generation inside OpenMP regions. 36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 37 public: 38 /// Kinds of OpenMP regions used in codegen. 39 enum CGOpenMPRegionKind { 40 /// Region with outlined function for standalone 'parallel' 41 /// directive. 42 ParallelOutlinedRegion, 43 /// Region with outlined function for standalone 'task' directive. 44 TaskOutlinedRegion, 45 /// Region for constructs that do not require function outlining, 46 /// like 'for', 'sections', 'atomic' etc. directives. 47 InlinedRegion, 48 /// Region with outlined function for standalone 'target' directive. 49 TargetRegion, 50 }; 51 52 CGOpenMPRegionInfo(const CapturedStmt &CS, 53 const CGOpenMPRegionKind RegionKind, 54 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 55 bool HasCancel) 56 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 57 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 58 59 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 60 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 61 bool HasCancel) 62 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 63 Kind(Kind), HasCancel(HasCancel) {} 64 65 /// Get a variable or parameter for storing global thread id 66 /// inside OpenMP construct. 67 virtual const VarDecl *getThreadIDVariable() const = 0; 68 69 /// Emit the captured statement body. 70 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 71 72 /// Get an LValue for the current ThreadID variable. 73 /// \return LValue for thread id variable. This LValue always has type int32*. 74 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 75 76 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 77 78 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 79 80 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 81 82 bool hasCancel() const { return HasCancel; } 83 84 static bool classof(const CGCapturedStmtInfo *Info) { 85 return Info->getKind() == CR_OpenMP; 86 } 87 88 ~CGOpenMPRegionInfo() override = default; 89 90 protected: 91 CGOpenMPRegionKind RegionKind; 92 RegionCodeGenTy CodeGen; 93 OpenMPDirectiveKind Kind; 94 bool HasCancel; 95 }; 96 97 /// API for captured statement code generation in OpenMP constructs. 98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 99 public: 100 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 101 const RegionCodeGenTy &CodeGen, 102 OpenMPDirectiveKind Kind, bool HasCancel, 103 StringRef HelperName) 104 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 105 HasCancel), 106 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 107 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 108 } 109 110 /// Get a variable or parameter for storing global thread id 111 /// inside OpenMP construct. 112 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 113 114 /// Get the name of the capture helper. 115 StringRef getHelperName() const override { return HelperName; } 116 117 static bool classof(const CGCapturedStmtInfo *Info) { 118 return CGOpenMPRegionInfo::classof(Info) && 119 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 120 ParallelOutlinedRegion; 121 } 122 123 private: 124 /// A variable or parameter storing global thread id for OpenMP 125 /// constructs. 126 const VarDecl *ThreadIDVar; 127 StringRef HelperName; 128 }; 129 130 /// API for captured statement code generation in OpenMP constructs. 131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 132 public: 133 class UntiedTaskActionTy final : public PrePostActionTy { 134 bool Untied; 135 const VarDecl *PartIDVar; 136 const RegionCodeGenTy UntiedCodeGen; 137 llvm::SwitchInst *UntiedSwitch = nullptr; 138 139 public: 140 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 141 const RegionCodeGenTy &UntiedCodeGen) 142 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 143 void Enter(CodeGenFunction &CGF) override { 144 if (Untied) { 145 // Emit task switching point. 146 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 147 CGF.GetAddrOfLocalVar(PartIDVar), 148 PartIDVar->getType()->castAs<PointerType>()); 149 llvm::Value *Res = 150 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 151 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 152 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 153 CGF.EmitBlock(DoneBB); 154 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 155 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 156 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 157 CGF.Builder.GetInsertBlock()); 158 emitUntiedSwitch(CGF); 159 } 160 } 161 void emitUntiedSwitch(CodeGenFunction &CGF) const { 162 if (Untied) { 163 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 164 CGF.GetAddrOfLocalVar(PartIDVar), 165 PartIDVar->getType()->castAs<PointerType>()); 166 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 167 PartIdLVal); 168 UntiedCodeGen(CGF); 169 CodeGenFunction::JumpDest CurPoint = 170 CGF.getJumpDestInCurrentScope(".untied.next."); 171 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 172 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 173 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 174 CGF.Builder.GetInsertBlock()); 175 CGF.EmitBranchThroughCleanup(CurPoint); 176 CGF.EmitBlock(CurPoint.getBlock()); 177 } 178 } 179 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 180 }; 181 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 182 const VarDecl *ThreadIDVar, 183 const RegionCodeGenTy &CodeGen, 184 OpenMPDirectiveKind Kind, bool HasCancel, 185 const UntiedTaskActionTy &Action) 186 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 187 ThreadIDVar(ThreadIDVar), Action(Action) { 188 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 189 } 190 191 /// Get a variable or parameter for storing global thread id 192 /// inside OpenMP construct. 193 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 194 195 /// Get an LValue for the current ThreadID variable. 196 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 197 198 /// Get the name of the capture helper. 199 StringRef getHelperName() const override { return ".omp_outlined."; } 200 201 void emitUntiedSwitch(CodeGenFunction &CGF) override { 202 Action.emitUntiedSwitch(CGF); 203 } 204 205 static bool classof(const CGCapturedStmtInfo *Info) { 206 return CGOpenMPRegionInfo::classof(Info) && 207 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 208 TaskOutlinedRegion; 209 } 210 211 private: 212 /// A variable or parameter storing global thread id for OpenMP 213 /// constructs. 214 const VarDecl *ThreadIDVar; 215 /// Action for emitting code for untied tasks. 216 const UntiedTaskActionTy &Action; 217 }; 218 219 /// API for inlined captured statement code generation in OpenMP 220 /// constructs. 221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 222 public: 223 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 224 const RegionCodeGenTy &CodeGen, 225 OpenMPDirectiveKind Kind, bool HasCancel) 226 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 227 OldCSI(OldCSI), 228 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 229 230 // Retrieve the value of the context parameter. 231 llvm::Value *getContextValue() const override { 232 if (OuterRegionInfo) 233 return OuterRegionInfo->getContextValue(); 234 llvm_unreachable("No context value for inlined OpenMP region"); 235 } 236 237 void setContextValue(llvm::Value *V) override { 238 if (OuterRegionInfo) { 239 OuterRegionInfo->setContextValue(V); 240 return; 241 } 242 llvm_unreachable("No context value for inlined OpenMP region"); 243 } 244 245 /// Lookup the captured field decl for a variable. 246 const FieldDecl *lookup(const VarDecl *VD) const override { 247 if (OuterRegionInfo) 248 return OuterRegionInfo->lookup(VD); 249 // If there is no outer outlined region,no need to lookup in a list of 250 // captured variables, we can use the original one. 251 return nullptr; 252 } 253 254 FieldDecl *getThisFieldDecl() const override { 255 if (OuterRegionInfo) 256 return OuterRegionInfo->getThisFieldDecl(); 257 return nullptr; 258 } 259 260 /// Get a variable or parameter for storing global thread id 261 /// inside OpenMP construct. 262 const VarDecl *getThreadIDVariable() const override { 263 if (OuterRegionInfo) 264 return OuterRegionInfo->getThreadIDVariable(); 265 return nullptr; 266 } 267 268 /// Get an LValue for the current ThreadID variable. 269 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 270 if (OuterRegionInfo) 271 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 272 llvm_unreachable("No LValue for inlined OpenMP construct"); 273 } 274 275 /// Get the name of the capture helper. 276 StringRef getHelperName() const override { 277 if (auto *OuterRegionInfo = getOldCSI()) 278 return OuterRegionInfo->getHelperName(); 279 llvm_unreachable("No helper name for inlined OpenMP construct"); 280 } 281 282 void emitUntiedSwitch(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 OuterRegionInfo->emitUntiedSwitch(CGF); 285 } 286 287 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 288 289 static bool classof(const CGCapturedStmtInfo *Info) { 290 return CGOpenMPRegionInfo::classof(Info) && 291 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 292 } 293 294 ~CGOpenMPInlinedRegionInfo() override = default; 295 296 private: 297 /// CodeGen info about outer OpenMP region. 298 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 299 CGOpenMPRegionInfo *OuterRegionInfo; 300 }; 301 302 /// API for captured statement code generation in OpenMP target 303 /// constructs. For this captures, implicit parameters are used instead of the 304 /// captured fields. The name of the target region has to be unique in a given 305 /// application so it is provided by the client, because only the client has 306 /// the information to generate that. 307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 308 public: 309 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 310 const RegionCodeGenTy &CodeGen, StringRef HelperName) 311 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 312 /*HasCancel=*/false), 313 HelperName(HelperName) {} 314 315 /// This is unused for target regions because each starts executing 316 /// with a single thread. 317 const VarDecl *getThreadIDVariable() const override { return nullptr; } 318 319 /// Get the name of the capture helper. 320 StringRef getHelperName() const override { return HelperName; } 321 322 static bool classof(const CGCapturedStmtInfo *Info) { 323 return CGOpenMPRegionInfo::classof(Info) && 324 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 325 } 326 327 private: 328 StringRef HelperName; 329 }; 330 331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 332 llvm_unreachable("No codegen for expressions"); 333 } 334 /// API for generation of expressions captured in a innermost OpenMP 335 /// region. 336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 337 public: 338 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 339 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 340 OMPD_unknown, 341 /*HasCancel=*/false), 342 PrivScope(CGF) { 343 // Make sure the globals captured in the provided statement are local by 344 // using the privatization logic. We assume the same variable is not 345 // captured more than once. 346 for (const auto &C : CS.captures()) { 347 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 348 continue; 349 350 const VarDecl *VD = C.getCapturedVar(); 351 if (VD->isLocalVarDeclOrParm()) 352 continue; 353 354 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 355 /*RefersToEnclosingVariableOrCapture=*/false, 356 VD->getType().getNonReferenceType(), VK_LValue, 357 C.getLocation()); 358 PrivScope.addPrivate( 359 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); }); 360 } 361 (void)PrivScope.Privatize(); 362 } 363 364 /// Lookup the captured field decl for a variable. 365 const FieldDecl *lookup(const VarDecl *VD) const override { 366 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 367 return FD; 368 return nullptr; 369 } 370 371 /// Emit the captured statement body. 372 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 373 llvm_unreachable("No body for expressions"); 374 } 375 376 /// Get a variable or parameter for storing global thread id 377 /// inside OpenMP construct. 378 const VarDecl *getThreadIDVariable() const override { 379 llvm_unreachable("No thread id for expressions"); 380 } 381 382 /// Get the name of the capture helper. 383 StringRef getHelperName() const override { 384 llvm_unreachable("No helper name for expressions"); 385 } 386 387 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 388 389 private: 390 /// Private scope to capture global variables. 391 CodeGenFunction::OMPPrivateScope PrivScope; 392 }; 393 394 /// RAII for emitting code of OpenMP constructs. 395 class InlinedOpenMPRegionRAII { 396 CodeGenFunction &CGF; 397 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 398 FieldDecl *LambdaThisCaptureField = nullptr; 399 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 400 401 public: 402 /// Constructs region for combined constructs. 403 /// \param CodeGen Code generation sequence for combined directives. Includes 404 /// a list of functions used for code generation of implicitly inlined 405 /// regions. 406 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 407 OpenMPDirectiveKind Kind, bool HasCancel) 408 : CGF(CGF) { 409 // Start emission for the construct. 410 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 411 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 412 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 413 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 414 CGF.LambdaThisCaptureField = nullptr; 415 BlockInfo = CGF.BlockInfo; 416 CGF.BlockInfo = nullptr; 417 } 418 419 ~InlinedOpenMPRegionRAII() { 420 // Restore original CapturedStmtInfo only if we're done with code emission. 421 auto *OldCSI = 422 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 423 delete CGF.CapturedStmtInfo; 424 CGF.CapturedStmtInfo = OldCSI; 425 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 426 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 427 CGF.BlockInfo = BlockInfo; 428 } 429 }; 430 431 /// Values for bit flags used in the ident_t to describe the fields. 432 /// All enumeric elements are named and described in accordance with the code 433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 434 enum OpenMPLocationFlags : unsigned { 435 /// Use trampoline for internal microtask. 436 OMP_IDENT_IMD = 0x01, 437 /// Use c-style ident structure. 438 OMP_IDENT_KMPC = 0x02, 439 /// Atomic reduction option for kmpc_reduce. 440 OMP_ATOMIC_REDUCE = 0x10, 441 /// Explicit 'barrier' directive. 442 OMP_IDENT_BARRIER_EXPL = 0x20, 443 /// Implicit barrier in code. 444 OMP_IDENT_BARRIER_IMPL = 0x40, 445 /// Implicit barrier in 'for' directive. 446 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 447 /// Implicit barrier in 'sections' directive. 448 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 449 /// Implicit barrier in 'single' directive. 450 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 451 /// Call of __kmp_for_static_init for static loop. 452 OMP_IDENT_WORK_LOOP = 0x200, 453 /// Call of __kmp_for_static_init for sections. 454 OMP_IDENT_WORK_SECTIONS = 0x400, 455 /// Call of __kmp_for_static_init for distribute. 456 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 457 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 458 }; 459 460 namespace { 461 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 462 /// Values for bit flags for marking which requires clauses have been used. 463 enum OpenMPOffloadingRequiresDirFlags : int64_t { 464 /// flag undefined. 465 OMP_REQ_UNDEFINED = 0x000, 466 /// no requires clause present. 467 OMP_REQ_NONE = 0x001, 468 /// reverse_offload clause. 469 OMP_REQ_REVERSE_OFFLOAD = 0x002, 470 /// unified_address clause. 471 OMP_REQ_UNIFIED_ADDRESS = 0x004, 472 /// unified_shared_memory clause. 473 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 474 /// dynamic_allocators clause. 475 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 476 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 477 }; 478 479 enum OpenMPOffloadingReservedDeviceIDs { 480 /// Device ID if the device was not defined, runtime should get it 481 /// from environment variables in the spec. 482 OMP_DEVICEID_UNDEF = -1, 483 }; 484 } // anonymous namespace 485 486 /// Describes ident structure that describes a source location. 487 /// All descriptions are taken from 488 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 489 /// Original structure: 490 /// typedef struct ident { 491 /// kmp_int32 reserved_1; /**< might be used in Fortran; 492 /// see above */ 493 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 494 /// KMP_IDENT_KMPC identifies this union 495 /// member */ 496 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 497 /// see above */ 498 ///#if USE_ITT_BUILD 499 /// /* but currently used for storing 500 /// region-specific ITT */ 501 /// /* contextual information. */ 502 ///#endif /* USE_ITT_BUILD */ 503 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 504 /// C++ */ 505 /// char const *psource; /**< String describing the source location. 506 /// The string is composed of semi-colon separated 507 // fields which describe the source file, 508 /// the function and a pair of line numbers that 509 /// delimit the construct. 510 /// */ 511 /// } ident_t; 512 enum IdentFieldIndex { 513 /// might be used in Fortran 514 IdentField_Reserved_1, 515 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 516 IdentField_Flags, 517 /// Not really used in Fortran any more 518 IdentField_Reserved_2, 519 /// Source[4] in Fortran, do not use for C++ 520 IdentField_Reserved_3, 521 /// String describing the source location. The string is composed of 522 /// semi-colon separated fields which describe the source file, the function 523 /// and a pair of line numbers that delimit the construct. 524 IdentField_PSource 525 }; 526 527 /// Schedule types for 'omp for' loops (these enumerators are taken from 528 /// the enum sched_type in kmp.h). 529 enum OpenMPSchedType { 530 /// Lower bound for default (unordered) versions. 531 OMP_sch_lower = 32, 532 OMP_sch_static_chunked = 33, 533 OMP_sch_static = 34, 534 OMP_sch_dynamic_chunked = 35, 535 OMP_sch_guided_chunked = 36, 536 OMP_sch_runtime = 37, 537 OMP_sch_auto = 38, 538 /// static with chunk adjustment (e.g., simd) 539 OMP_sch_static_balanced_chunked = 45, 540 /// Lower bound for 'ordered' versions. 541 OMP_ord_lower = 64, 542 OMP_ord_static_chunked = 65, 543 OMP_ord_static = 66, 544 OMP_ord_dynamic_chunked = 67, 545 OMP_ord_guided_chunked = 68, 546 OMP_ord_runtime = 69, 547 OMP_ord_auto = 70, 548 OMP_sch_default = OMP_sch_static, 549 /// dist_schedule types 550 OMP_dist_sch_static_chunked = 91, 551 OMP_dist_sch_static = 92, 552 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 553 /// Set if the monotonic schedule modifier was present. 554 OMP_sch_modifier_monotonic = (1 << 29), 555 /// Set if the nonmonotonic schedule modifier was present. 556 OMP_sch_modifier_nonmonotonic = (1 << 30), 557 }; 558 559 enum OpenMPRTLFunction { 560 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 561 /// kmpc_micro microtask, ...); 562 OMPRTL__kmpc_fork_call, 563 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, 564 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 565 OMPRTL__kmpc_threadprivate_cached, 566 /// Call to void __kmpc_threadprivate_register( ident_t *, 567 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 568 OMPRTL__kmpc_threadprivate_register, 569 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 570 OMPRTL__kmpc_global_thread_num, 571 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 572 // kmp_critical_name *crit); 573 OMPRTL__kmpc_critical, 574 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 575 // global_tid, kmp_critical_name *crit, uintptr_t hint); 576 OMPRTL__kmpc_critical_with_hint, 577 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 578 // kmp_critical_name *crit); 579 OMPRTL__kmpc_end_critical, 580 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 581 // global_tid); 582 OMPRTL__kmpc_cancel_barrier, 583 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 584 OMPRTL__kmpc_barrier, 585 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 586 OMPRTL__kmpc_for_static_fini, 587 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 588 // global_tid); 589 OMPRTL__kmpc_serialized_parallel, 590 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 591 // global_tid); 592 OMPRTL__kmpc_end_serialized_parallel, 593 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 594 // kmp_int32 num_threads); 595 OMPRTL__kmpc_push_num_threads, 596 // Call to void __kmpc_flush(ident_t *loc); 597 OMPRTL__kmpc_flush, 598 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 599 OMPRTL__kmpc_master, 600 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 601 OMPRTL__kmpc_end_master, 602 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 603 // int end_part); 604 OMPRTL__kmpc_omp_taskyield, 605 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 606 OMPRTL__kmpc_single, 607 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 608 OMPRTL__kmpc_end_single, 609 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 610 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 611 // kmp_routine_entry_t *task_entry); 612 OMPRTL__kmpc_omp_task_alloc, 613 // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *, 614 // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, 615 // size_t sizeof_shareds, kmp_routine_entry_t *task_entry, 616 // kmp_int64 device_id); 617 OMPRTL__kmpc_omp_target_task_alloc, 618 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 619 // new_task); 620 OMPRTL__kmpc_omp_task, 621 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 622 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 623 // kmp_int32 didit); 624 OMPRTL__kmpc_copyprivate, 625 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 626 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 627 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 628 OMPRTL__kmpc_reduce, 629 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 630 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 631 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 632 // *lck); 633 OMPRTL__kmpc_reduce_nowait, 634 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 635 // kmp_critical_name *lck); 636 OMPRTL__kmpc_end_reduce, 637 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 638 // kmp_critical_name *lck); 639 OMPRTL__kmpc_end_reduce_nowait, 640 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 641 // kmp_task_t * new_task); 642 OMPRTL__kmpc_omp_task_begin_if0, 643 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 644 // kmp_task_t * new_task); 645 OMPRTL__kmpc_omp_task_complete_if0, 646 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 647 OMPRTL__kmpc_ordered, 648 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 649 OMPRTL__kmpc_end_ordered, 650 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 651 // global_tid); 652 OMPRTL__kmpc_omp_taskwait, 653 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 654 OMPRTL__kmpc_taskgroup, 655 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 656 OMPRTL__kmpc_end_taskgroup, 657 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 658 // int proc_bind); 659 OMPRTL__kmpc_push_proc_bind, 660 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 661 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 662 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 663 OMPRTL__kmpc_omp_task_with_deps, 664 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 665 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 666 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 667 OMPRTL__kmpc_omp_wait_deps, 668 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 669 // global_tid, kmp_int32 cncl_kind); 670 OMPRTL__kmpc_cancellationpoint, 671 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 672 // kmp_int32 cncl_kind); 673 OMPRTL__kmpc_cancel, 674 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 675 // kmp_int32 num_teams, kmp_int32 thread_limit); 676 OMPRTL__kmpc_push_num_teams, 677 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 678 // microtask, ...); 679 OMPRTL__kmpc_fork_teams, 680 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 681 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 682 // sched, kmp_uint64 grainsize, void *task_dup); 683 OMPRTL__kmpc_taskloop, 684 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 685 // num_dims, struct kmp_dim *dims); 686 OMPRTL__kmpc_doacross_init, 687 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 688 OMPRTL__kmpc_doacross_fini, 689 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 690 // *vec); 691 OMPRTL__kmpc_doacross_post, 692 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 693 // *vec); 694 OMPRTL__kmpc_doacross_wait, 695 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 696 // *data); 697 OMPRTL__kmpc_task_reduction_init, 698 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 699 // *d); 700 OMPRTL__kmpc_task_reduction_get_th_data, 701 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); 702 OMPRTL__kmpc_alloc, 703 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); 704 OMPRTL__kmpc_free, 705 706 // 707 // Offloading related calls 708 // 709 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 710 // size); 711 OMPRTL__kmpc_push_target_tripcount, 712 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 713 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 714 // *arg_types); 715 OMPRTL__tgt_target, 716 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 717 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 718 // *arg_types); 719 OMPRTL__tgt_target_nowait, 720 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 721 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 722 // *arg_types, int32_t num_teams, int32_t thread_limit); 723 OMPRTL__tgt_target_teams, 724 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 725 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 726 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 727 OMPRTL__tgt_target_teams_nowait, 728 // Call to void __tgt_register_requires(int64_t flags); 729 OMPRTL__tgt_register_requires, 730 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 731 OMPRTL__tgt_register_lib, 732 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 733 OMPRTL__tgt_unregister_lib, 734 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 735 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 736 OMPRTL__tgt_target_data_begin, 737 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 738 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 739 // *arg_types); 740 OMPRTL__tgt_target_data_begin_nowait, 741 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 742 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 743 OMPRTL__tgt_target_data_end, 744 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 745 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 746 // *arg_types); 747 OMPRTL__tgt_target_data_end_nowait, 748 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 749 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 750 OMPRTL__tgt_target_data_update, 751 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 752 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 753 // *arg_types); 754 OMPRTL__tgt_target_data_update_nowait, 755 // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 756 OMPRTL__tgt_mapper_num_components, 757 // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void 758 // *base, void *begin, int64_t size, int64_t type); 759 OMPRTL__tgt_push_mapper_component, 760 }; 761 762 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 763 /// region. 764 class CleanupTy final : public EHScopeStack::Cleanup { 765 PrePostActionTy *Action; 766 767 public: 768 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 769 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 770 if (!CGF.HaveInsertPoint()) 771 return; 772 Action->Exit(CGF); 773 } 774 }; 775 776 } // anonymous namespace 777 778 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 779 CodeGenFunction::RunCleanupsScope Scope(CGF); 780 if (PrePostAction) { 781 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 782 Callback(CodeGen, CGF, *PrePostAction); 783 } else { 784 PrePostActionTy Action; 785 Callback(CodeGen, CGF, Action); 786 } 787 } 788 789 /// Check if the combiner is a call to UDR combiner and if it is so return the 790 /// UDR decl used for reduction. 791 static const OMPDeclareReductionDecl * 792 getReductionInit(const Expr *ReductionOp) { 793 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 794 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 795 if (const auto *DRE = 796 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 797 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 798 return DRD; 799 return nullptr; 800 } 801 802 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 803 const OMPDeclareReductionDecl *DRD, 804 const Expr *InitOp, 805 Address Private, Address Original, 806 QualType Ty) { 807 if (DRD->getInitializer()) { 808 std::pair<llvm::Function *, llvm::Function *> Reduction = 809 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 810 const auto *CE = cast<CallExpr>(InitOp); 811 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 812 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 813 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 814 const auto *LHSDRE = 815 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 816 const auto *RHSDRE = 817 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 818 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 819 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 820 [=]() { return Private; }); 821 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 822 [=]() { return Original; }); 823 (void)PrivateScope.Privatize(); 824 RValue Func = RValue::get(Reduction.second); 825 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 826 CGF.EmitIgnoredExpr(InitOp); 827 } else { 828 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 829 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 830 auto *GV = new llvm::GlobalVariable( 831 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 832 llvm::GlobalValue::PrivateLinkage, Init, Name); 833 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 834 RValue InitRVal; 835 switch (CGF.getEvaluationKind(Ty)) { 836 case TEK_Scalar: 837 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 838 break; 839 case TEK_Complex: 840 InitRVal = 841 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 842 break; 843 case TEK_Aggregate: 844 InitRVal = RValue::getAggregate(LV.getAddress()); 845 break; 846 } 847 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 848 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 849 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 850 /*IsInitializer=*/false); 851 } 852 } 853 854 /// Emit initialization of arrays of complex types. 855 /// \param DestAddr Address of the array. 856 /// \param Type Type of array. 857 /// \param Init Initial expression of array. 858 /// \param SrcAddr Address of the original array. 859 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 860 QualType Type, bool EmitDeclareReductionInit, 861 const Expr *Init, 862 const OMPDeclareReductionDecl *DRD, 863 Address SrcAddr = Address::invalid()) { 864 // Perform element-by-element initialization. 865 QualType ElementTy; 866 867 // Drill down to the base element type on both arrays. 868 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 869 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 870 DestAddr = 871 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 872 if (DRD) 873 SrcAddr = 874 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 875 876 llvm::Value *SrcBegin = nullptr; 877 if (DRD) 878 SrcBegin = SrcAddr.getPointer(); 879 llvm::Value *DestBegin = DestAddr.getPointer(); 880 // Cast from pointer to array type to pointer to single element. 881 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 882 // The basic structure here is a while-do loop. 883 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 884 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 885 llvm::Value *IsEmpty = 886 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 887 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 888 889 // Enter the loop body, making that address the current address. 890 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 891 CGF.EmitBlock(BodyBB); 892 893 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 894 895 llvm::PHINode *SrcElementPHI = nullptr; 896 Address SrcElementCurrent = Address::invalid(); 897 if (DRD) { 898 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 899 "omp.arraycpy.srcElementPast"); 900 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 901 SrcElementCurrent = 902 Address(SrcElementPHI, 903 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 904 } 905 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 906 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 907 DestElementPHI->addIncoming(DestBegin, EntryBB); 908 Address DestElementCurrent = 909 Address(DestElementPHI, 910 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 911 912 // Emit copy. 913 { 914 CodeGenFunction::RunCleanupsScope InitScope(CGF); 915 if (EmitDeclareReductionInit) { 916 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 917 SrcElementCurrent, ElementTy); 918 } else 919 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 920 /*IsInitializer=*/false); 921 } 922 923 if (DRD) { 924 // Shift the address forward by one element. 925 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 926 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 927 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 928 } 929 930 // Shift the address forward by one element. 931 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 932 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 933 // Check whether we've reached the end. 934 llvm::Value *Done = 935 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 936 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 937 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 938 939 // Done. 940 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 941 } 942 943 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 944 return CGF.EmitOMPSharedLValue(E); 945 } 946 947 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 948 const Expr *E) { 949 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 950 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 951 return LValue(); 952 } 953 954 void ReductionCodeGen::emitAggregateInitialization( 955 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 956 const OMPDeclareReductionDecl *DRD) { 957 // Emit VarDecl with copy init for arrays. 958 // Get the address of the original variable captured in current 959 // captured region. 960 const auto *PrivateVD = 961 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 962 bool EmitDeclareReductionInit = 963 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 964 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 965 EmitDeclareReductionInit, 966 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 967 : PrivateVD->getInit(), 968 DRD, SharedLVal.getAddress()); 969 } 970 971 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 972 ArrayRef<const Expr *> Privates, 973 ArrayRef<const Expr *> ReductionOps) { 974 ClausesData.reserve(Shareds.size()); 975 SharedAddresses.reserve(Shareds.size()); 976 Sizes.reserve(Shareds.size()); 977 BaseDecls.reserve(Shareds.size()); 978 auto IPriv = Privates.begin(); 979 auto IRed = ReductionOps.begin(); 980 for (const Expr *Ref : Shareds) { 981 ClausesData.emplace_back(Ref, *IPriv, *IRed); 982 std::advance(IPriv, 1); 983 std::advance(IRed, 1); 984 } 985 } 986 987 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 988 assert(SharedAddresses.size() == N && 989 "Number of generated lvalues must be exactly N."); 990 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 991 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 992 SharedAddresses.emplace_back(First, Second); 993 } 994 995 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 996 const auto *PrivateVD = 997 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 998 QualType PrivateType = PrivateVD->getType(); 999 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 1000 if (!PrivateType->isVariablyModifiedType()) { 1001 Sizes.emplace_back( 1002 CGF.getTypeSize( 1003 SharedAddresses[N].first.getType().getNonReferenceType()), 1004 nullptr); 1005 return; 1006 } 1007 llvm::Value *Size; 1008 llvm::Value *SizeInChars; 1009 auto *ElemType = 1010 cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) 1011 ->getElementType(); 1012 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 1013 if (AsArraySection) { 1014 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), 1015 SharedAddresses[N].first.getPointer()); 1016 Size = CGF.Builder.CreateNUWAdd( 1017 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 1018 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 1019 } else { 1020 SizeInChars = CGF.getTypeSize( 1021 SharedAddresses[N].first.getType().getNonReferenceType()); 1022 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 1023 } 1024 Sizes.emplace_back(SizeInChars, Size); 1025 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1026 CGF, 1027 cast<OpaqueValueExpr>( 1028 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1029 RValue::get(Size)); 1030 CGF.EmitVariablyModifiedType(PrivateType); 1031 } 1032 1033 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 1034 llvm::Value *Size) { 1035 const auto *PrivateVD = 1036 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1037 QualType PrivateType = PrivateVD->getType(); 1038 if (!PrivateType->isVariablyModifiedType()) { 1039 assert(!Size && !Sizes[N].second && 1040 "Size should be nullptr for non-variably modified reduction " 1041 "items."); 1042 return; 1043 } 1044 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1045 CGF, 1046 cast<OpaqueValueExpr>( 1047 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1048 RValue::get(Size)); 1049 CGF.EmitVariablyModifiedType(PrivateType); 1050 } 1051 1052 void ReductionCodeGen::emitInitialization( 1053 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1054 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1055 assert(SharedAddresses.size() > N && "No variable was generated"); 1056 const auto *PrivateVD = 1057 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1058 const OMPDeclareReductionDecl *DRD = 1059 getReductionInit(ClausesData[N].ReductionOp); 1060 QualType PrivateType = PrivateVD->getType(); 1061 PrivateAddr = CGF.Builder.CreateElementBitCast( 1062 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1063 QualType SharedType = SharedAddresses[N].first.getType(); 1064 SharedLVal = CGF.MakeAddrLValue( 1065 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), 1066 CGF.ConvertTypeForMem(SharedType)), 1067 SharedType, SharedAddresses[N].first.getBaseInfo(), 1068 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1069 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1070 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1071 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1072 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1073 PrivateAddr, SharedLVal.getAddress(), 1074 SharedLVal.getType()); 1075 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1076 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1077 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1078 PrivateVD->getType().getQualifiers(), 1079 /*IsInitializer=*/false); 1080 } 1081 } 1082 1083 bool ReductionCodeGen::needCleanups(unsigned N) { 1084 const auto *PrivateVD = 1085 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1086 QualType PrivateType = PrivateVD->getType(); 1087 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1088 return DTorKind != QualType::DK_none; 1089 } 1090 1091 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1092 Address PrivateAddr) { 1093 const auto *PrivateVD = 1094 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1095 QualType PrivateType = PrivateVD->getType(); 1096 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1097 if (needCleanups(N)) { 1098 PrivateAddr = CGF.Builder.CreateElementBitCast( 1099 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1100 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1101 } 1102 } 1103 1104 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1105 LValue BaseLV) { 1106 BaseTy = BaseTy.getNonReferenceType(); 1107 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1108 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1109 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1110 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 1111 } else { 1112 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); 1113 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1114 } 1115 BaseTy = BaseTy->getPointeeType(); 1116 } 1117 return CGF.MakeAddrLValue( 1118 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), 1119 CGF.ConvertTypeForMem(ElTy)), 1120 BaseLV.getType(), BaseLV.getBaseInfo(), 1121 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1122 } 1123 1124 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1125 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1126 llvm::Value *Addr) { 1127 Address Tmp = Address::invalid(); 1128 Address TopTmp = Address::invalid(); 1129 Address MostTopTmp = Address::invalid(); 1130 BaseTy = BaseTy.getNonReferenceType(); 1131 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1132 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1133 Tmp = CGF.CreateMemTemp(BaseTy); 1134 if (TopTmp.isValid()) 1135 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1136 else 1137 MostTopTmp = Tmp; 1138 TopTmp = Tmp; 1139 BaseTy = BaseTy->getPointeeType(); 1140 } 1141 llvm::Type *Ty = BaseLVType; 1142 if (Tmp.isValid()) 1143 Ty = Tmp.getElementType(); 1144 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1145 if (Tmp.isValid()) { 1146 CGF.Builder.CreateStore(Addr, Tmp); 1147 return MostTopTmp; 1148 } 1149 return Address(Addr, BaseLVAlignment); 1150 } 1151 1152 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1153 const VarDecl *OrigVD = nullptr; 1154 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1155 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1156 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1157 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1158 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1159 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1160 DE = cast<DeclRefExpr>(Base); 1161 OrigVD = cast<VarDecl>(DE->getDecl()); 1162 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1163 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1164 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1165 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1166 DE = cast<DeclRefExpr>(Base); 1167 OrigVD = cast<VarDecl>(DE->getDecl()); 1168 } 1169 return OrigVD; 1170 } 1171 1172 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1173 Address PrivateAddr) { 1174 const DeclRefExpr *DE; 1175 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1176 BaseDecls.emplace_back(OrigVD); 1177 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1178 LValue BaseLValue = 1179 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1180 OriginalBaseLValue); 1181 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1182 BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); 1183 llvm::Value *PrivatePointer = 1184 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1185 PrivateAddr.getPointer(), 1186 SharedAddresses[N].first.getAddress().getType()); 1187 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1188 return castToBase(CGF, OrigVD->getType(), 1189 SharedAddresses[N].first.getType(), 1190 OriginalBaseLValue.getAddress().getType(), 1191 OriginalBaseLValue.getAlignment(), Ptr); 1192 } 1193 BaseDecls.emplace_back( 1194 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1195 return PrivateAddr; 1196 } 1197 1198 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1199 const OMPDeclareReductionDecl *DRD = 1200 getReductionInit(ClausesData[N].ReductionOp); 1201 return DRD && DRD->getInitializer(); 1202 } 1203 1204 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1205 return CGF.EmitLoadOfPointerLValue( 1206 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1207 getThreadIDVariable()->getType()->castAs<PointerType>()); 1208 } 1209 1210 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1211 if (!CGF.HaveInsertPoint()) 1212 return; 1213 // 1.2.2 OpenMP Language Terminology 1214 // Structured block - An executable statement with a single entry at the 1215 // top and a single exit at the bottom. 1216 // The point of exit cannot be a branch out of the structured block. 1217 // longjmp() and throw() must not violate the entry/exit criteria. 1218 CGF.EHStack.pushTerminate(); 1219 CodeGen(CGF); 1220 CGF.EHStack.popTerminate(); 1221 } 1222 1223 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1224 CodeGenFunction &CGF) { 1225 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1226 getThreadIDVariable()->getType(), 1227 AlignmentSource::Decl); 1228 } 1229 1230 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1231 QualType FieldTy) { 1232 auto *Field = FieldDecl::Create( 1233 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1234 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1235 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1236 Field->setAccess(AS_public); 1237 DC->addDecl(Field); 1238 return Field; 1239 } 1240 1241 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1242 StringRef Separator) 1243 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1244 OffloadEntriesInfoManager(CGM) { 1245 ASTContext &C = CGM.getContext(); 1246 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1247 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1248 RD->startDefinition(); 1249 // reserved_1 1250 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1251 // flags 1252 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1253 // reserved_2 1254 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1255 // reserved_3 1256 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1257 // psource 1258 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1259 RD->completeDefinition(); 1260 IdentQTy = C.getRecordType(RD); 1261 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1262 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1263 1264 loadOffloadInfoMetadata(); 1265 } 1266 1267 void CGOpenMPRuntime::clear() { 1268 InternalVars.clear(); 1269 // Clean non-target variable declarations possibly used only in debug info. 1270 for (const auto &Data : EmittedNonTargetVariables) { 1271 if (!Data.getValue().pointsToAliveValue()) 1272 continue; 1273 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1274 if (!GV) 1275 continue; 1276 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1277 continue; 1278 GV->eraseFromParent(); 1279 } 1280 } 1281 1282 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1283 SmallString<128> Buffer; 1284 llvm::raw_svector_ostream OS(Buffer); 1285 StringRef Sep = FirstSeparator; 1286 for (StringRef Part : Parts) { 1287 OS << Sep << Part; 1288 Sep = Separator; 1289 } 1290 return OS.str(); 1291 } 1292 1293 static llvm::Function * 1294 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1295 const Expr *CombinerInitializer, const VarDecl *In, 1296 const VarDecl *Out, bool IsCombiner) { 1297 // void .omp_combiner.(Ty *in, Ty *out); 1298 ASTContext &C = CGM.getContext(); 1299 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1300 FunctionArgList Args; 1301 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1302 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1303 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1304 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1305 Args.push_back(&OmpOutParm); 1306 Args.push_back(&OmpInParm); 1307 const CGFunctionInfo &FnInfo = 1308 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1309 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1310 std::string Name = CGM.getOpenMPRuntime().getName( 1311 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1312 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1313 Name, &CGM.getModule()); 1314 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1315 if (CGM.getLangOpts().Optimize) { 1316 Fn->removeFnAttr(llvm::Attribute::NoInline); 1317 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1318 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1319 } 1320 CodeGenFunction CGF(CGM); 1321 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1322 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1323 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1324 Out->getLocation()); 1325 CodeGenFunction::OMPPrivateScope Scope(CGF); 1326 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1327 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1328 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1329 .getAddress(); 1330 }); 1331 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1332 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1333 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1334 .getAddress(); 1335 }); 1336 (void)Scope.Privatize(); 1337 if (!IsCombiner && Out->hasInit() && 1338 !CGF.isTrivialInitializer(Out->getInit())) { 1339 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1340 Out->getType().getQualifiers(), 1341 /*IsInitializer=*/true); 1342 } 1343 if (CombinerInitializer) 1344 CGF.EmitIgnoredExpr(CombinerInitializer); 1345 Scope.ForceCleanup(); 1346 CGF.FinishFunction(); 1347 return Fn; 1348 } 1349 1350 void CGOpenMPRuntime::emitUserDefinedReduction( 1351 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1352 if (UDRMap.count(D) > 0) 1353 return; 1354 llvm::Function *Combiner = emitCombinerOrInitializer( 1355 CGM, D->getType(), D->getCombiner(), 1356 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1357 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1358 /*IsCombiner=*/true); 1359 llvm::Function *Initializer = nullptr; 1360 if (const Expr *Init = D->getInitializer()) { 1361 Initializer = emitCombinerOrInitializer( 1362 CGM, D->getType(), 1363 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1364 : nullptr, 1365 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1366 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1367 /*IsCombiner=*/false); 1368 } 1369 UDRMap.try_emplace(D, Combiner, Initializer); 1370 if (CGF) { 1371 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1372 Decls.second.push_back(D); 1373 } 1374 } 1375 1376 std::pair<llvm::Function *, llvm::Function *> 1377 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1378 auto I = UDRMap.find(D); 1379 if (I != UDRMap.end()) 1380 return I->second; 1381 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1382 return UDRMap.lookup(D); 1383 } 1384 1385 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1386 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1387 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1388 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1389 assert(ThreadIDVar->getType()->isPointerType() && 1390 "thread id variable must be of type kmp_int32 *"); 1391 CodeGenFunction CGF(CGM, true); 1392 bool HasCancel = false; 1393 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1394 HasCancel = OPD->hasCancel(); 1395 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1396 HasCancel = OPSD->hasCancel(); 1397 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1398 HasCancel = OPFD->hasCancel(); 1399 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1400 HasCancel = OPFD->hasCancel(); 1401 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1402 HasCancel = OPFD->hasCancel(); 1403 else if (const auto *OPFD = 1404 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1405 HasCancel = OPFD->hasCancel(); 1406 else if (const auto *OPFD = 1407 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1408 HasCancel = OPFD->hasCancel(); 1409 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1410 HasCancel, OutlinedHelperName); 1411 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1412 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 1413 } 1414 1415 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1416 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1417 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1418 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1419 return emitParallelOrTeamsOutlinedFunction( 1420 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1421 } 1422 1423 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1424 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1425 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1426 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1427 return emitParallelOrTeamsOutlinedFunction( 1428 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1429 } 1430 1431 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1432 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1433 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1434 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1435 bool Tied, unsigned &NumberOfParts) { 1436 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1437 PrePostActionTy &) { 1438 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1439 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1440 llvm::Value *TaskArgs[] = { 1441 UpLoc, ThreadID, 1442 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1443 TaskTVar->getType()->castAs<PointerType>()) 1444 .getPointer()}; 1445 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1446 }; 1447 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1448 UntiedCodeGen); 1449 CodeGen.setAction(Action); 1450 assert(!ThreadIDVar->getType()->isPointerType() && 1451 "thread id variable must be of type kmp_int32 for tasks"); 1452 const OpenMPDirectiveKind Region = 1453 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1454 : OMPD_task; 1455 const CapturedStmt *CS = D.getCapturedStmt(Region); 1456 const auto *TD = dyn_cast<OMPTaskDirective>(&D); 1457 CodeGenFunction CGF(CGM, true); 1458 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1459 InnermostKind, 1460 TD ? TD->hasCancel() : false, Action); 1461 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1462 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1463 if (!Tied) 1464 NumberOfParts = Action.getNumberOfParts(); 1465 return Res; 1466 } 1467 1468 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1469 const RecordDecl *RD, const CGRecordLayout &RL, 1470 ArrayRef<llvm::Constant *> Data) { 1471 llvm::StructType *StructTy = RL.getLLVMType(); 1472 unsigned PrevIdx = 0; 1473 ConstantInitBuilder CIBuilder(CGM); 1474 auto DI = Data.begin(); 1475 for (const FieldDecl *FD : RD->fields()) { 1476 unsigned Idx = RL.getLLVMFieldNo(FD); 1477 // Fill the alignment. 1478 for (unsigned I = PrevIdx; I < Idx; ++I) 1479 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1480 PrevIdx = Idx + 1; 1481 Fields.add(*DI); 1482 ++DI; 1483 } 1484 } 1485 1486 template <class... As> 1487 static llvm::GlobalVariable * 1488 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1489 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1490 As &&... Args) { 1491 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1492 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1493 ConstantInitBuilder CIBuilder(CGM); 1494 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1495 buildStructValue(Fields, CGM, RD, RL, Data); 1496 return Fields.finishAndCreateGlobal( 1497 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1498 std::forward<As>(Args)...); 1499 } 1500 1501 template <typename T> 1502 static void 1503 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1504 ArrayRef<llvm::Constant *> Data, 1505 T &Parent) { 1506 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1507 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1508 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1509 buildStructValue(Fields, CGM, RD, RL, Data); 1510 Fields.finishAndAddTo(Parent); 1511 } 1512 1513 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1514 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1515 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1516 FlagsTy FlagsKey(Flags, Reserved2Flags); 1517 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1518 if (!Entry) { 1519 if (!DefaultOpenMPPSource) { 1520 // Initialize default location for psource field of ident_t structure of 1521 // all ident_t objects. Format is ";file;function;line;column;;". 1522 // Taken from 1523 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1524 DefaultOpenMPPSource = 1525 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1526 DefaultOpenMPPSource = 1527 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1528 } 1529 1530 llvm::Constant *Data[] = { 1531 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1532 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1533 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1534 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1535 llvm::GlobalValue *DefaultOpenMPLocation = 1536 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1537 llvm::GlobalValue::PrivateLinkage); 1538 DefaultOpenMPLocation->setUnnamedAddr( 1539 llvm::GlobalValue::UnnamedAddr::Global); 1540 1541 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1542 } 1543 return Address(Entry, Align); 1544 } 1545 1546 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1547 bool AtCurrentPoint) { 1548 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1549 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1550 1551 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1552 if (AtCurrentPoint) { 1553 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1554 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1555 } else { 1556 Elem.second.ServiceInsertPt = 1557 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1558 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1559 } 1560 } 1561 1562 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1563 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1564 if (Elem.second.ServiceInsertPt) { 1565 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1566 Elem.second.ServiceInsertPt = nullptr; 1567 Ptr->eraseFromParent(); 1568 } 1569 } 1570 1571 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1572 SourceLocation Loc, 1573 unsigned Flags) { 1574 Flags |= OMP_IDENT_KMPC; 1575 // If no debug info is generated - return global default location. 1576 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1577 Loc.isInvalid()) 1578 return getOrCreateDefaultLocation(Flags).getPointer(); 1579 1580 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1581 1582 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1583 Address LocValue = Address::invalid(); 1584 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1585 if (I != OpenMPLocThreadIDMap.end()) 1586 LocValue = Address(I->second.DebugLoc, Align); 1587 1588 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1589 // GetOpenMPThreadID was called before this routine. 1590 if (!LocValue.isValid()) { 1591 // Generate "ident_t .kmpc_loc.addr;" 1592 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1593 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1594 Elem.second.DebugLoc = AI.getPointer(); 1595 LocValue = AI; 1596 1597 if (!Elem.second.ServiceInsertPt) 1598 setLocThreadIdInsertPt(CGF); 1599 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1600 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1601 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1602 CGF.getTypeSize(IdentQTy)); 1603 } 1604 1605 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1606 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1607 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1608 LValue PSource = 1609 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1610 1611 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1612 if (OMPDebugLoc == nullptr) { 1613 SmallString<128> Buffer2; 1614 llvm::raw_svector_ostream OS2(Buffer2); 1615 // Build debug location 1616 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1617 OS2 << ";" << PLoc.getFilename() << ";"; 1618 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1619 OS2 << FD->getQualifiedNameAsString(); 1620 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1621 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1622 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1623 } 1624 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1625 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1626 1627 // Our callers always pass this to a runtime function, so for 1628 // convenience, go ahead and return a naked pointer. 1629 return LocValue.getPointer(); 1630 } 1631 1632 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1633 SourceLocation Loc) { 1634 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1635 1636 llvm::Value *ThreadID = nullptr; 1637 // Check whether we've already cached a load of the thread id in this 1638 // function. 1639 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1640 if (I != OpenMPLocThreadIDMap.end()) { 1641 ThreadID = I->second.ThreadID; 1642 if (ThreadID != nullptr) 1643 return ThreadID; 1644 } 1645 // If exceptions are enabled, do not use parameter to avoid possible crash. 1646 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1647 !CGF.getLangOpts().CXXExceptions || 1648 CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1649 if (auto *OMPRegionInfo = 1650 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1651 if (OMPRegionInfo->getThreadIDVariable()) { 1652 // Check if this an outlined function with thread id passed as argument. 1653 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1654 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1655 // If value loaded in entry block, cache it and use it everywhere in 1656 // function. 1657 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1658 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1659 Elem.second.ThreadID = ThreadID; 1660 } 1661 return ThreadID; 1662 } 1663 } 1664 } 1665 1666 // This is not an outlined function region - need to call __kmpc_int32 1667 // kmpc_global_thread_num(ident_t *loc). 1668 // Generate thread id value and cache this value for use across the 1669 // function. 1670 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1671 if (!Elem.second.ServiceInsertPt) 1672 setLocThreadIdInsertPt(CGF); 1673 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1674 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1675 llvm::CallInst *Call = CGF.Builder.CreateCall( 1676 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1677 emitUpdateLocation(CGF, Loc)); 1678 Call->setCallingConv(CGF.getRuntimeCC()); 1679 Elem.second.ThreadID = Call; 1680 return Call; 1681 } 1682 1683 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1684 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1685 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1686 clearLocThreadIdInsertPt(CGF); 1687 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1688 } 1689 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1690 for(auto *D : FunctionUDRMap[CGF.CurFn]) 1691 UDRMap.erase(D); 1692 FunctionUDRMap.erase(CGF.CurFn); 1693 } 1694 auto I = FunctionUDMMap.find(CGF.CurFn); 1695 if (I != FunctionUDMMap.end()) { 1696 for(auto *D : I->second) 1697 UDMMap.erase(D); 1698 FunctionUDMMap.erase(I); 1699 } 1700 } 1701 1702 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1703 return IdentTy->getPointerTo(); 1704 } 1705 1706 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1707 if (!Kmpc_MicroTy) { 1708 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1709 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1710 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1711 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1712 } 1713 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1714 } 1715 1716 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1717 llvm::FunctionCallee RTLFn = nullptr; 1718 switch (static_cast<OpenMPRTLFunction>(Function)) { 1719 case OMPRTL__kmpc_fork_call: { 1720 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1721 // microtask, ...); 1722 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1723 getKmpc_MicroPointerTy()}; 1724 auto *FnTy = 1725 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1726 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1727 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 1728 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 1729 llvm::LLVMContext &Ctx = F->getContext(); 1730 llvm::MDBuilder MDB(Ctx); 1731 // Annotate the callback behavior of the __kmpc_fork_call: 1732 // - The callback callee is argument number 2 (microtask). 1733 // - The first two arguments of the callback callee are unknown (-1). 1734 // - All variadic arguments to the __kmpc_fork_call are passed to the 1735 // callback callee. 1736 F->addMetadata( 1737 llvm::LLVMContext::MD_callback, 1738 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 1739 2, {-1, -1}, 1740 /* VarArgsArePassed */ true)})); 1741 } 1742 } 1743 break; 1744 } 1745 case OMPRTL__kmpc_global_thread_num: { 1746 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1747 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1748 auto *FnTy = 1749 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1750 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1751 break; 1752 } 1753 case OMPRTL__kmpc_threadprivate_cached: { 1754 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1755 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1756 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1757 CGM.VoidPtrTy, CGM.SizeTy, 1758 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1759 auto *FnTy = 1760 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1761 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1762 break; 1763 } 1764 case OMPRTL__kmpc_critical: { 1765 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1766 // kmp_critical_name *crit); 1767 llvm::Type *TypeParams[] = { 1768 getIdentTyPointerTy(), CGM.Int32Ty, 1769 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1770 auto *FnTy = 1771 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1772 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1773 break; 1774 } 1775 case OMPRTL__kmpc_critical_with_hint: { 1776 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1777 // kmp_critical_name *crit, uintptr_t hint); 1778 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1779 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1780 CGM.IntPtrTy}; 1781 auto *FnTy = 1782 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1783 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1784 break; 1785 } 1786 case OMPRTL__kmpc_threadprivate_register: { 1787 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1788 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1789 // typedef void *(*kmpc_ctor)(void *); 1790 auto *KmpcCtorTy = 1791 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1792 /*isVarArg*/ false)->getPointerTo(); 1793 // typedef void *(*kmpc_cctor)(void *, void *); 1794 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1795 auto *KmpcCopyCtorTy = 1796 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1797 /*isVarArg*/ false) 1798 ->getPointerTo(); 1799 // typedef void (*kmpc_dtor)(void *); 1800 auto *KmpcDtorTy = 1801 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1802 ->getPointerTo(); 1803 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1804 KmpcCopyCtorTy, KmpcDtorTy}; 1805 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1806 /*isVarArg*/ false); 1807 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1808 break; 1809 } 1810 case OMPRTL__kmpc_end_critical: { 1811 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1812 // kmp_critical_name *crit); 1813 llvm::Type *TypeParams[] = { 1814 getIdentTyPointerTy(), CGM.Int32Ty, 1815 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1816 auto *FnTy = 1817 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1818 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1819 break; 1820 } 1821 case OMPRTL__kmpc_cancel_barrier: { 1822 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1823 // global_tid); 1824 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1825 auto *FnTy = 1826 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1827 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1828 break; 1829 } 1830 case OMPRTL__kmpc_barrier: { 1831 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1832 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1833 auto *FnTy = 1834 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1835 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1836 break; 1837 } 1838 case OMPRTL__kmpc_for_static_fini: { 1839 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1840 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1841 auto *FnTy = 1842 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1843 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1844 break; 1845 } 1846 case OMPRTL__kmpc_push_num_threads: { 1847 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1848 // kmp_int32 num_threads) 1849 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1850 CGM.Int32Ty}; 1851 auto *FnTy = 1852 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1853 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1854 break; 1855 } 1856 case OMPRTL__kmpc_serialized_parallel: { 1857 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1858 // global_tid); 1859 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1860 auto *FnTy = 1861 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1862 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1863 break; 1864 } 1865 case OMPRTL__kmpc_end_serialized_parallel: { 1866 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1867 // global_tid); 1868 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1869 auto *FnTy = 1870 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1871 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1872 break; 1873 } 1874 case OMPRTL__kmpc_flush: { 1875 // Build void __kmpc_flush(ident_t *loc); 1876 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1877 auto *FnTy = 1878 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1879 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1880 break; 1881 } 1882 case OMPRTL__kmpc_master: { 1883 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1884 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1885 auto *FnTy = 1886 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1887 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1888 break; 1889 } 1890 case OMPRTL__kmpc_end_master: { 1891 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1892 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1893 auto *FnTy = 1894 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1895 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1896 break; 1897 } 1898 case OMPRTL__kmpc_omp_taskyield: { 1899 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1900 // int end_part); 1901 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1902 auto *FnTy = 1903 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1904 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1905 break; 1906 } 1907 case OMPRTL__kmpc_single: { 1908 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1909 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1910 auto *FnTy = 1911 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1912 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1913 break; 1914 } 1915 case OMPRTL__kmpc_end_single: { 1916 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1917 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1918 auto *FnTy = 1919 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1920 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1921 break; 1922 } 1923 case OMPRTL__kmpc_omp_task_alloc: { 1924 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1925 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1926 // kmp_routine_entry_t *task_entry); 1927 assert(KmpRoutineEntryPtrTy != nullptr && 1928 "Type kmp_routine_entry_t must be created."); 1929 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1930 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1931 // Return void * and then cast to particular kmp_task_t type. 1932 auto *FnTy = 1933 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1934 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1935 break; 1936 } 1937 case OMPRTL__kmpc_omp_target_task_alloc: { 1938 // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid, 1939 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1940 // kmp_routine_entry_t *task_entry, kmp_int64 device_id); 1941 assert(KmpRoutineEntryPtrTy != nullptr && 1942 "Type kmp_routine_entry_t must be created."); 1943 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1944 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy, 1945 CGM.Int64Ty}; 1946 // Return void * and then cast to particular kmp_task_t type. 1947 auto *FnTy = 1948 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1949 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc"); 1950 break; 1951 } 1952 case OMPRTL__kmpc_omp_task: { 1953 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1954 // *new_task); 1955 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1956 CGM.VoidPtrTy}; 1957 auto *FnTy = 1958 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1959 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 1960 break; 1961 } 1962 case OMPRTL__kmpc_copyprivate: { 1963 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 1964 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 1965 // kmp_int32 didit); 1966 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1967 auto *CpyFnTy = 1968 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 1969 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 1970 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 1971 CGM.Int32Ty}; 1972 auto *FnTy = 1973 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1974 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 1975 break; 1976 } 1977 case OMPRTL__kmpc_reduce: { 1978 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 1979 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 1980 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 1981 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1982 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1983 /*isVarArg=*/false); 1984 llvm::Type *TypeParams[] = { 1985 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1986 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1987 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1988 auto *FnTy = 1989 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1990 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 1991 break; 1992 } 1993 case OMPRTL__kmpc_reduce_nowait: { 1994 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 1995 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 1996 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 1997 // *lck); 1998 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1999 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2000 /*isVarArg=*/false); 2001 llvm::Type *TypeParams[] = { 2002 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2003 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2004 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2005 auto *FnTy = 2006 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2007 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 2008 break; 2009 } 2010 case OMPRTL__kmpc_end_reduce: { 2011 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 2012 // kmp_critical_name *lck); 2013 llvm::Type *TypeParams[] = { 2014 getIdentTyPointerTy(), CGM.Int32Ty, 2015 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2016 auto *FnTy = 2017 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2018 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 2019 break; 2020 } 2021 case OMPRTL__kmpc_end_reduce_nowait: { 2022 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 2023 // kmp_critical_name *lck); 2024 llvm::Type *TypeParams[] = { 2025 getIdentTyPointerTy(), CGM.Int32Ty, 2026 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2027 auto *FnTy = 2028 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2029 RTLFn = 2030 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 2031 break; 2032 } 2033 case OMPRTL__kmpc_omp_task_begin_if0: { 2034 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2035 // *new_task); 2036 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2037 CGM.VoidPtrTy}; 2038 auto *FnTy = 2039 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2040 RTLFn = 2041 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 2042 break; 2043 } 2044 case OMPRTL__kmpc_omp_task_complete_if0: { 2045 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2046 // *new_task); 2047 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2048 CGM.VoidPtrTy}; 2049 auto *FnTy = 2050 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2051 RTLFn = CGM.CreateRuntimeFunction(FnTy, 2052 /*Name=*/"__kmpc_omp_task_complete_if0"); 2053 break; 2054 } 2055 case OMPRTL__kmpc_ordered: { 2056 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 2057 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2058 auto *FnTy = 2059 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2060 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 2061 break; 2062 } 2063 case OMPRTL__kmpc_end_ordered: { 2064 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 2065 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2066 auto *FnTy = 2067 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2068 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 2069 break; 2070 } 2071 case OMPRTL__kmpc_omp_taskwait: { 2072 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 2073 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2074 auto *FnTy = 2075 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2076 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 2077 break; 2078 } 2079 case OMPRTL__kmpc_taskgroup: { 2080 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 2081 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2082 auto *FnTy = 2083 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2084 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 2085 break; 2086 } 2087 case OMPRTL__kmpc_end_taskgroup: { 2088 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 2089 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2090 auto *FnTy = 2091 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2092 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 2093 break; 2094 } 2095 case OMPRTL__kmpc_push_proc_bind: { 2096 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 2097 // int proc_bind) 2098 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2099 auto *FnTy = 2100 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2101 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 2102 break; 2103 } 2104 case OMPRTL__kmpc_omp_task_with_deps: { 2105 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2106 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2107 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 2108 llvm::Type *TypeParams[] = { 2109 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 2110 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 2111 auto *FnTy = 2112 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2113 RTLFn = 2114 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 2115 break; 2116 } 2117 case OMPRTL__kmpc_omp_wait_deps: { 2118 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2119 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2120 // kmp_depend_info_t *noalias_dep_list); 2121 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2122 CGM.Int32Ty, CGM.VoidPtrTy, 2123 CGM.Int32Ty, CGM.VoidPtrTy}; 2124 auto *FnTy = 2125 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2126 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2127 break; 2128 } 2129 case OMPRTL__kmpc_cancellationpoint: { 2130 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2131 // global_tid, kmp_int32 cncl_kind) 2132 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2133 auto *FnTy = 2134 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2135 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2136 break; 2137 } 2138 case OMPRTL__kmpc_cancel: { 2139 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2140 // kmp_int32 cncl_kind) 2141 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2142 auto *FnTy = 2143 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2144 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2145 break; 2146 } 2147 case OMPRTL__kmpc_push_num_teams: { 2148 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2149 // kmp_int32 num_teams, kmp_int32 num_threads) 2150 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2151 CGM.Int32Ty}; 2152 auto *FnTy = 2153 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2154 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2155 break; 2156 } 2157 case OMPRTL__kmpc_fork_teams: { 2158 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2159 // microtask, ...); 2160 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2161 getKmpc_MicroPointerTy()}; 2162 auto *FnTy = 2163 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2164 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2165 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 2166 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 2167 llvm::LLVMContext &Ctx = F->getContext(); 2168 llvm::MDBuilder MDB(Ctx); 2169 // Annotate the callback behavior of the __kmpc_fork_teams: 2170 // - The callback callee is argument number 2 (microtask). 2171 // - The first two arguments of the callback callee are unknown (-1). 2172 // - All variadic arguments to the __kmpc_fork_teams are passed to the 2173 // callback callee. 2174 F->addMetadata( 2175 llvm::LLVMContext::MD_callback, 2176 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 2177 2, {-1, -1}, 2178 /* VarArgsArePassed */ true)})); 2179 } 2180 } 2181 break; 2182 } 2183 case OMPRTL__kmpc_taskloop: { 2184 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2185 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2186 // sched, kmp_uint64 grainsize, void *task_dup); 2187 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2188 CGM.IntTy, 2189 CGM.VoidPtrTy, 2190 CGM.IntTy, 2191 CGM.Int64Ty->getPointerTo(), 2192 CGM.Int64Ty->getPointerTo(), 2193 CGM.Int64Ty, 2194 CGM.IntTy, 2195 CGM.IntTy, 2196 CGM.Int64Ty, 2197 CGM.VoidPtrTy}; 2198 auto *FnTy = 2199 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2200 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2201 break; 2202 } 2203 case OMPRTL__kmpc_doacross_init: { 2204 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2205 // num_dims, struct kmp_dim *dims); 2206 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2207 CGM.Int32Ty, 2208 CGM.Int32Ty, 2209 CGM.VoidPtrTy}; 2210 auto *FnTy = 2211 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2212 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2213 break; 2214 } 2215 case OMPRTL__kmpc_doacross_fini: { 2216 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2217 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2218 auto *FnTy = 2219 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2220 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2221 break; 2222 } 2223 case OMPRTL__kmpc_doacross_post: { 2224 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2225 // *vec); 2226 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2227 CGM.Int64Ty->getPointerTo()}; 2228 auto *FnTy = 2229 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2230 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2231 break; 2232 } 2233 case OMPRTL__kmpc_doacross_wait: { 2234 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2235 // *vec); 2236 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2237 CGM.Int64Ty->getPointerTo()}; 2238 auto *FnTy = 2239 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2240 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2241 break; 2242 } 2243 case OMPRTL__kmpc_task_reduction_init: { 2244 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2245 // *data); 2246 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2247 auto *FnTy = 2248 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2249 RTLFn = 2250 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2251 break; 2252 } 2253 case OMPRTL__kmpc_task_reduction_get_th_data: { 2254 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2255 // *d); 2256 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2257 auto *FnTy = 2258 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2259 RTLFn = CGM.CreateRuntimeFunction( 2260 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2261 break; 2262 } 2263 case OMPRTL__kmpc_alloc: { 2264 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t 2265 // al); omp_allocator_handle_t type is void *. 2266 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy}; 2267 auto *FnTy = 2268 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2269 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc"); 2270 break; 2271 } 2272 case OMPRTL__kmpc_free: { 2273 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t 2274 // al); omp_allocator_handle_t type is void *. 2275 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2276 auto *FnTy = 2277 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2278 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); 2279 break; 2280 } 2281 case OMPRTL__kmpc_push_target_tripcount: { 2282 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 2283 // size); 2284 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; 2285 llvm::FunctionType *FnTy = 2286 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2287 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); 2288 break; 2289 } 2290 case OMPRTL__tgt_target: { 2291 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2292 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2293 // *arg_types); 2294 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2295 CGM.VoidPtrTy, 2296 CGM.Int32Ty, 2297 CGM.VoidPtrPtrTy, 2298 CGM.VoidPtrPtrTy, 2299 CGM.Int64Ty->getPointerTo(), 2300 CGM.Int64Ty->getPointerTo()}; 2301 auto *FnTy = 2302 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2303 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2304 break; 2305 } 2306 case OMPRTL__tgt_target_nowait: { 2307 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2308 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2309 // int64_t *arg_types); 2310 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2311 CGM.VoidPtrTy, 2312 CGM.Int32Ty, 2313 CGM.VoidPtrPtrTy, 2314 CGM.VoidPtrPtrTy, 2315 CGM.Int64Ty->getPointerTo(), 2316 CGM.Int64Ty->getPointerTo()}; 2317 auto *FnTy = 2318 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2319 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2320 break; 2321 } 2322 case OMPRTL__tgt_target_teams: { 2323 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2324 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2325 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2326 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2327 CGM.VoidPtrTy, 2328 CGM.Int32Ty, 2329 CGM.VoidPtrPtrTy, 2330 CGM.VoidPtrPtrTy, 2331 CGM.Int64Ty->getPointerTo(), 2332 CGM.Int64Ty->getPointerTo(), 2333 CGM.Int32Ty, 2334 CGM.Int32Ty}; 2335 auto *FnTy = 2336 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2337 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2338 break; 2339 } 2340 case OMPRTL__tgt_target_teams_nowait: { 2341 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2342 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 2343 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2344 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2345 CGM.VoidPtrTy, 2346 CGM.Int32Ty, 2347 CGM.VoidPtrPtrTy, 2348 CGM.VoidPtrPtrTy, 2349 CGM.Int64Ty->getPointerTo(), 2350 CGM.Int64Ty->getPointerTo(), 2351 CGM.Int32Ty, 2352 CGM.Int32Ty}; 2353 auto *FnTy = 2354 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2355 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2356 break; 2357 } 2358 case OMPRTL__tgt_register_requires: { 2359 // Build void __tgt_register_requires(int64_t flags); 2360 llvm::Type *TypeParams[] = {CGM.Int64Ty}; 2361 auto *FnTy = 2362 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2363 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); 2364 break; 2365 } 2366 case OMPRTL__tgt_register_lib: { 2367 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 2368 QualType ParamTy = 2369 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2370 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2371 auto *FnTy = 2372 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2373 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 2374 break; 2375 } 2376 case OMPRTL__tgt_unregister_lib: { 2377 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 2378 QualType ParamTy = 2379 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2380 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2381 auto *FnTy = 2382 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2383 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 2384 break; 2385 } 2386 case OMPRTL__tgt_target_data_begin: { 2387 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2388 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2389 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2390 CGM.Int32Ty, 2391 CGM.VoidPtrPtrTy, 2392 CGM.VoidPtrPtrTy, 2393 CGM.Int64Ty->getPointerTo(), 2394 CGM.Int64Ty->getPointerTo()}; 2395 auto *FnTy = 2396 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2397 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2398 break; 2399 } 2400 case OMPRTL__tgt_target_data_begin_nowait: { 2401 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2402 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2403 // *arg_types); 2404 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2405 CGM.Int32Ty, 2406 CGM.VoidPtrPtrTy, 2407 CGM.VoidPtrPtrTy, 2408 CGM.Int64Ty->getPointerTo(), 2409 CGM.Int64Ty->getPointerTo()}; 2410 auto *FnTy = 2411 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2412 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2413 break; 2414 } 2415 case OMPRTL__tgt_target_data_end: { 2416 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2417 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2418 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2419 CGM.Int32Ty, 2420 CGM.VoidPtrPtrTy, 2421 CGM.VoidPtrPtrTy, 2422 CGM.Int64Ty->getPointerTo(), 2423 CGM.Int64Ty->getPointerTo()}; 2424 auto *FnTy = 2425 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2426 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2427 break; 2428 } 2429 case OMPRTL__tgt_target_data_end_nowait: { 2430 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2431 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2432 // *arg_types); 2433 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2434 CGM.Int32Ty, 2435 CGM.VoidPtrPtrTy, 2436 CGM.VoidPtrPtrTy, 2437 CGM.Int64Ty->getPointerTo(), 2438 CGM.Int64Ty->getPointerTo()}; 2439 auto *FnTy = 2440 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2441 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2442 break; 2443 } 2444 case OMPRTL__tgt_target_data_update: { 2445 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2446 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2447 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2448 CGM.Int32Ty, 2449 CGM.VoidPtrPtrTy, 2450 CGM.VoidPtrPtrTy, 2451 CGM.Int64Ty->getPointerTo(), 2452 CGM.Int64Ty->getPointerTo()}; 2453 auto *FnTy = 2454 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2455 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2456 break; 2457 } 2458 case OMPRTL__tgt_target_data_update_nowait: { 2459 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2460 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2461 // *arg_types); 2462 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2463 CGM.Int32Ty, 2464 CGM.VoidPtrPtrTy, 2465 CGM.VoidPtrPtrTy, 2466 CGM.Int64Ty->getPointerTo(), 2467 CGM.Int64Ty->getPointerTo()}; 2468 auto *FnTy = 2469 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2470 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2471 break; 2472 } 2473 case OMPRTL__tgt_mapper_num_components: { 2474 // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 2475 llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; 2476 auto *FnTy = 2477 llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); 2478 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components"); 2479 break; 2480 } 2481 case OMPRTL__tgt_push_mapper_component: { 2482 // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void 2483 // *base, void *begin, int64_t size, int64_t type); 2484 llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy, 2485 CGM.Int64Ty, CGM.Int64Ty}; 2486 auto *FnTy = 2487 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2488 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component"); 2489 break; 2490 } 2491 } 2492 assert(RTLFn && "Unable to find OpenMP runtime function"); 2493 return RTLFn; 2494 } 2495 2496 llvm::FunctionCallee 2497 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 2498 assert((IVSize == 32 || IVSize == 64) && 2499 "IV size is not compatible with the omp runtime"); 2500 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2501 : "__kmpc_for_static_init_4u") 2502 : (IVSigned ? "__kmpc_for_static_init_8" 2503 : "__kmpc_for_static_init_8u"); 2504 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2505 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2506 llvm::Type *TypeParams[] = { 2507 getIdentTyPointerTy(), // loc 2508 CGM.Int32Ty, // tid 2509 CGM.Int32Ty, // schedtype 2510 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2511 PtrTy, // p_lower 2512 PtrTy, // p_upper 2513 PtrTy, // p_stride 2514 ITy, // incr 2515 ITy // chunk 2516 }; 2517 auto *FnTy = 2518 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2519 return CGM.CreateRuntimeFunction(FnTy, Name); 2520 } 2521 2522 llvm::FunctionCallee 2523 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 2524 assert((IVSize == 32 || IVSize == 64) && 2525 "IV size is not compatible with the omp runtime"); 2526 StringRef Name = 2527 IVSize == 32 2528 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2529 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2530 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2531 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2532 CGM.Int32Ty, // tid 2533 CGM.Int32Ty, // schedtype 2534 ITy, // lower 2535 ITy, // upper 2536 ITy, // stride 2537 ITy // chunk 2538 }; 2539 auto *FnTy = 2540 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2541 return CGM.CreateRuntimeFunction(FnTy, Name); 2542 } 2543 2544 llvm::FunctionCallee 2545 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 2546 assert((IVSize == 32 || IVSize == 64) && 2547 "IV size is not compatible with the omp runtime"); 2548 StringRef Name = 2549 IVSize == 32 2550 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2551 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2552 llvm::Type *TypeParams[] = { 2553 getIdentTyPointerTy(), // loc 2554 CGM.Int32Ty, // tid 2555 }; 2556 auto *FnTy = 2557 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2558 return CGM.CreateRuntimeFunction(FnTy, Name); 2559 } 2560 2561 llvm::FunctionCallee 2562 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 2563 assert((IVSize == 32 || IVSize == 64) && 2564 "IV size is not compatible with the omp runtime"); 2565 StringRef Name = 2566 IVSize == 32 2567 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2568 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2569 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2570 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2571 llvm::Type *TypeParams[] = { 2572 getIdentTyPointerTy(), // loc 2573 CGM.Int32Ty, // tid 2574 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2575 PtrTy, // p_lower 2576 PtrTy, // p_upper 2577 PtrTy // p_stride 2578 }; 2579 auto *FnTy = 2580 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2581 return CGM.CreateRuntimeFunction(FnTy, Name); 2582 } 2583 2584 /// Obtain information that uniquely identifies a target entry. This 2585 /// consists of the file and device IDs as well as line number associated with 2586 /// the relevant entry source location. 2587 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2588 unsigned &DeviceID, unsigned &FileID, 2589 unsigned &LineNum) { 2590 SourceManager &SM = C.getSourceManager(); 2591 2592 // The loc should be always valid and have a file ID (the user cannot use 2593 // #pragma directives in macros) 2594 2595 assert(Loc.isValid() && "Source location is expected to be always valid."); 2596 2597 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2598 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2599 2600 llvm::sys::fs::UniqueID ID; 2601 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2602 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2603 << PLoc.getFilename() << EC.message(); 2604 2605 DeviceID = ID.getDevice(); 2606 FileID = ID.getFile(); 2607 LineNum = PLoc.getLine(); 2608 } 2609 2610 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 2611 if (CGM.getLangOpts().OpenMPSimd) 2612 return Address::invalid(); 2613 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2614 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2615 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 2616 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2617 HasRequiresUnifiedSharedMemory))) { 2618 SmallString<64> PtrName; 2619 { 2620 llvm::raw_svector_ostream OS(PtrName); 2621 OS << CGM.getMangledName(GlobalDecl(VD)); 2622 if (!VD->isExternallyVisible()) { 2623 unsigned DeviceID, FileID, Line; 2624 getTargetEntryUniqueInfo(CGM.getContext(), 2625 VD->getCanonicalDecl()->getBeginLoc(), 2626 DeviceID, FileID, Line); 2627 OS << llvm::format("_%x", FileID); 2628 } 2629 OS << "_decl_tgt_ref_ptr"; 2630 } 2631 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2632 if (!Ptr) { 2633 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2634 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2635 PtrName); 2636 2637 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2638 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 2639 2640 if (!CGM.getLangOpts().OpenMPIsDevice) 2641 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2642 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2643 } 2644 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2645 } 2646 return Address::invalid(); 2647 } 2648 2649 llvm::Constant * 2650 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2651 assert(!CGM.getLangOpts().OpenMPUseTLS || 2652 !CGM.getContext().getTargetInfo().isTLSSupported()); 2653 // Lookup the entry, lazily creating it if necessary. 2654 std::string Suffix = getName({"cache", ""}); 2655 return getOrCreateInternalVariable( 2656 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 2657 } 2658 2659 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2660 const VarDecl *VD, 2661 Address VDAddr, 2662 SourceLocation Loc) { 2663 if (CGM.getLangOpts().OpenMPUseTLS && 2664 CGM.getContext().getTargetInfo().isTLSSupported()) 2665 return VDAddr; 2666 2667 llvm::Type *VarTy = VDAddr.getElementType(); 2668 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2669 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2670 CGM.Int8PtrTy), 2671 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2672 getOrCreateThreadPrivateCache(VD)}; 2673 return Address(CGF.EmitRuntimeCall( 2674 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2675 VDAddr.getAlignment()); 2676 } 2677 2678 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2679 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2680 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2681 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2682 // library. 2683 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2684 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2685 OMPLoc); 2686 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2687 // to register constructor/destructor for variable. 2688 llvm::Value *Args[] = { 2689 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2690 Ctor, CopyCtor, Dtor}; 2691 CGF.EmitRuntimeCall( 2692 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2693 } 2694 2695 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2696 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2697 bool PerformInit, CodeGenFunction *CGF) { 2698 if (CGM.getLangOpts().OpenMPUseTLS && 2699 CGM.getContext().getTargetInfo().isTLSSupported()) 2700 return nullptr; 2701 2702 VD = VD->getDefinition(CGM.getContext()); 2703 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 2704 QualType ASTTy = VD->getType(); 2705 2706 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2707 const Expr *Init = VD->getAnyInitializer(); 2708 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2709 // Generate function that re-emits the declaration's initializer into the 2710 // threadprivate copy of the variable VD 2711 CodeGenFunction CtorCGF(CGM); 2712 FunctionArgList Args; 2713 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2714 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2715 ImplicitParamDecl::Other); 2716 Args.push_back(&Dst); 2717 2718 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2719 CGM.getContext().VoidPtrTy, Args); 2720 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2721 std::string Name = getName({"__kmpc_global_ctor_", ""}); 2722 llvm::Function *Fn = 2723 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2724 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2725 Args, Loc, Loc); 2726 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2727 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2728 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2729 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2730 Arg = CtorCGF.Builder.CreateElementBitCast( 2731 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2732 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2733 /*IsInitializer=*/true); 2734 ArgVal = CtorCGF.EmitLoadOfScalar( 2735 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2736 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2737 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2738 CtorCGF.FinishFunction(); 2739 Ctor = Fn; 2740 } 2741 if (VD->getType().isDestructedType() != QualType::DK_none) { 2742 // Generate function that emits destructor call for the threadprivate copy 2743 // of the variable VD 2744 CodeGenFunction DtorCGF(CGM); 2745 FunctionArgList Args; 2746 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2747 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2748 ImplicitParamDecl::Other); 2749 Args.push_back(&Dst); 2750 2751 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2752 CGM.getContext().VoidTy, Args); 2753 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2754 std::string Name = getName({"__kmpc_global_dtor_", ""}); 2755 llvm::Function *Fn = 2756 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2757 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2758 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2759 Loc, Loc); 2760 // Create a scope with an artificial location for the body of this function. 2761 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2762 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2763 DtorCGF.GetAddrOfLocalVar(&Dst), 2764 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2765 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2766 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2767 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2768 DtorCGF.FinishFunction(); 2769 Dtor = Fn; 2770 } 2771 // Do not emit init function if it is not required. 2772 if (!Ctor && !Dtor) 2773 return nullptr; 2774 2775 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2776 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2777 /*isVarArg=*/false) 2778 ->getPointerTo(); 2779 // Copying constructor for the threadprivate variable. 2780 // Must be NULL - reserved by runtime, but currently it requires that this 2781 // parameter is always NULL. Otherwise it fires assertion. 2782 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2783 if (Ctor == nullptr) { 2784 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2785 /*isVarArg=*/false) 2786 ->getPointerTo(); 2787 Ctor = llvm::Constant::getNullValue(CtorTy); 2788 } 2789 if (Dtor == nullptr) { 2790 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2791 /*isVarArg=*/false) 2792 ->getPointerTo(); 2793 Dtor = llvm::Constant::getNullValue(DtorTy); 2794 } 2795 if (!CGF) { 2796 auto *InitFunctionTy = 2797 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2798 std::string Name = getName({"__omp_threadprivate_init_", ""}); 2799 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2800 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 2801 CodeGenFunction InitCGF(CGM); 2802 FunctionArgList ArgList; 2803 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2804 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2805 Loc, Loc); 2806 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2807 InitCGF.FinishFunction(); 2808 return InitFunction; 2809 } 2810 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2811 } 2812 return nullptr; 2813 } 2814 2815 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2816 llvm::GlobalVariable *Addr, 2817 bool PerformInit) { 2818 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2819 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2820 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 2821 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2822 HasRequiresUnifiedSharedMemory)) 2823 return CGM.getLangOpts().OpenMPIsDevice; 2824 VD = VD->getDefinition(CGM.getContext()); 2825 if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 2826 return CGM.getLangOpts().OpenMPIsDevice; 2827 2828 QualType ASTTy = VD->getType(); 2829 2830 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 2831 // Produce the unique prefix to identify the new target regions. We use 2832 // the source location of the variable declaration which we know to not 2833 // conflict with any target region. 2834 unsigned DeviceID; 2835 unsigned FileID; 2836 unsigned Line; 2837 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2838 SmallString<128> Buffer, Out; 2839 { 2840 llvm::raw_svector_ostream OS(Buffer); 2841 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2842 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2843 } 2844 2845 const Expr *Init = VD->getAnyInitializer(); 2846 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2847 llvm::Constant *Ctor; 2848 llvm::Constant *ID; 2849 if (CGM.getLangOpts().OpenMPIsDevice) { 2850 // Generate function that re-emits the declaration's initializer into 2851 // the threadprivate copy of the variable VD 2852 CodeGenFunction CtorCGF(CGM); 2853 2854 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2855 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2856 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2857 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2858 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2859 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2860 FunctionArgList(), Loc, Loc); 2861 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2862 CtorCGF.EmitAnyExprToMem(Init, 2863 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2864 Init->getType().getQualifiers(), 2865 /*IsInitializer=*/true); 2866 CtorCGF.FinishFunction(); 2867 Ctor = Fn; 2868 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2869 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 2870 } else { 2871 Ctor = new llvm::GlobalVariable( 2872 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2873 llvm::GlobalValue::PrivateLinkage, 2874 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2875 ID = Ctor; 2876 } 2877 2878 // Register the information for the entry associated with the constructor. 2879 Out.clear(); 2880 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2881 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2882 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2883 } 2884 if (VD->getType().isDestructedType() != QualType::DK_none) { 2885 llvm::Constant *Dtor; 2886 llvm::Constant *ID; 2887 if (CGM.getLangOpts().OpenMPIsDevice) { 2888 // Generate function that emits destructor call for the threadprivate 2889 // copy of the variable VD 2890 CodeGenFunction DtorCGF(CGM); 2891 2892 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2893 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2894 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2895 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2896 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2897 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2898 FunctionArgList(), Loc, Loc); 2899 // Create a scope with an artificial location for the body of this 2900 // function. 2901 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2902 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 2903 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2904 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2905 DtorCGF.FinishFunction(); 2906 Dtor = Fn; 2907 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2908 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 2909 } else { 2910 Dtor = new llvm::GlobalVariable( 2911 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2912 llvm::GlobalValue::PrivateLinkage, 2913 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2914 ID = Dtor; 2915 } 2916 // Register the information for the entry associated with the destructor. 2917 Out.clear(); 2918 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2919 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2920 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2921 } 2922 return CGM.getLangOpts().OpenMPIsDevice; 2923 } 2924 2925 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2926 QualType VarType, 2927 StringRef Name) { 2928 std::string Suffix = getName({"artificial", ""}); 2929 std::string CacheSuffix = getName({"cache", ""}); 2930 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2931 llvm::Value *GAddr = 2932 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2933 llvm::Value *Args[] = { 2934 emitUpdateLocation(CGF, SourceLocation()), 2935 getThreadID(CGF, SourceLocation()), 2936 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2937 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2938 /*isSigned=*/false), 2939 getOrCreateInternalVariable( 2940 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2941 return Address( 2942 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2943 CGF.EmitRuntimeCall( 2944 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2945 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2946 CGM.getPointerAlign()); 2947 } 2948 2949 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 2950 const RegionCodeGenTy &ThenGen, 2951 const RegionCodeGenTy &ElseGen) { 2952 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2953 2954 // If the condition constant folds and can be elided, try to avoid emitting 2955 // the condition and the dead arm of the if/else. 2956 bool CondConstant; 2957 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2958 if (CondConstant) 2959 ThenGen(CGF); 2960 else 2961 ElseGen(CGF); 2962 return; 2963 } 2964 2965 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2966 // emit the conditional branch. 2967 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2968 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2969 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2970 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2971 2972 // Emit the 'then' code. 2973 CGF.EmitBlock(ThenBlock); 2974 ThenGen(CGF); 2975 CGF.EmitBranch(ContBlock); 2976 // Emit the 'else' code if present. 2977 // There is no need to emit line number for unconditional branch. 2978 (void)ApplyDebugLocation::CreateEmpty(CGF); 2979 CGF.EmitBlock(ElseBlock); 2980 ElseGen(CGF); 2981 // There is no need to emit line number for unconditional branch. 2982 (void)ApplyDebugLocation::CreateEmpty(CGF); 2983 CGF.EmitBranch(ContBlock); 2984 // Emit the continuation block for code after the if. 2985 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2986 } 2987 2988 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2989 llvm::Function *OutlinedFn, 2990 ArrayRef<llvm::Value *> CapturedVars, 2991 const Expr *IfCond) { 2992 if (!CGF.HaveInsertPoint()) 2993 return; 2994 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2995 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 2996 PrePostActionTy &) { 2997 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2998 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2999 llvm::Value *Args[] = { 3000 RTLoc, 3001 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 3002 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 3003 llvm::SmallVector<llvm::Value *, 16> RealArgs; 3004 RealArgs.append(std::begin(Args), std::end(Args)); 3005 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 3006 3007 llvm::FunctionCallee RTLFn = 3008 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 3009 CGF.EmitRuntimeCall(RTLFn, RealArgs); 3010 }; 3011 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 3012 PrePostActionTy &) { 3013 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3014 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 3015 // Build calls: 3016 // __kmpc_serialized_parallel(&Loc, GTid); 3017 llvm::Value *Args[] = {RTLoc, ThreadID}; 3018 CGF.EmitRuntimeCall( 3019 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 3020 3021 // OutlinedFn(>id, &zero, CapturedStruct); 3022 Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 3023 /*Name*/ ".zero.addr"); 3024 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 3025 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 3026 // ThreadId for serialized parallels is 0. 3027 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 3028 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 3029 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 3030 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 3031 3032 // __kmpc_end_serialized_parallel(&Loc, GTid); 3033 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 3034 CGF.EmitRuntimeCall( 3035 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 3036 EndArgs); 3037 }; 3038 if (IfCond) { 3039 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 3040 } else { 3041 RegionCodeGenTy ThenRCG(ThenGen); 3042 ThenRCG(CGF); 3043 } 3044 } 3045 3046 // If we're inside an (outlined) parallel region, use the region info's 3047 // thread-ID variable (it is passed in a first argument of the outlined function 3048 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 3049 // regular serial code region, get thread ID by calling kmp_int32 3050 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 3051 // return the address of that temp. 3052 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 3053 SourceLocation Loc) { 3054 if (auto *OMPRegionInfo = 3055 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3056 if (OMPRegionInfo->getThreadIDVariable()) 3057 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 3058 3059 llvm::Value *ThreadID = getThreadID(CGF, Loc); 3060 QualType Int32Ty = 3061 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 3062 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 3063 CGF.EmitStoreOfScalar(ThreadID, 3064 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 3065 3066 return ThreadIDTemp; 3067 } 3068 3069 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 3070 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 3071 SmallString<256> Buffer; 3072 llvm::raw_svector_ostream Out(Buffer); 3073 Out << Name; 3074 StringRef RuntimeName = Out.str(); 3075 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 3076 if (Elem.second) { 3077 assert(Elem.second->getType()->getPointerElementType() == Ty && 3078 "OMP internal variable has different type than requested"); 3079 return &*Elem.second; 3080 } 3081 3082 return Elem.second = new llvm::GlobalVariable( 3083 CGM.getModule(), Ty, /*IsConstant*/ false, 3084 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 3085 Elem.first(), /*InsertBefore=*/nullptr, 3086 llvm::GlobalValue::NotThreadLocal, AddressSpace); 3087 } 3088 3089 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 3090 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 3091 std::string Name = getName({Prefix, "var"}); 3092 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 3093 } 3094 3095 namespace { 3096 /// Common pre(post)-action for different OpenMP constructs. 3097 class CommonActionTy final : public PrePostActionTy { 3098 llvm::FunctionCallee EnterCallee; 3099 ArrayRef<llvm::Value *> EnterArgs; 3100 llvm::FunctionCallee ExitCallee; 3101 ArrayRef<llvm::Value *> ExitArgs; 3102 bool Conditional; 3103 llvm::BasicBlock *ContBlock = nullptr; 3104 3105 public: 3106 CommonActionTy(llvm::FunctionCallee EnterCallee, 3107 ArrayRef<llvm::Value *> EnterArgs, 3108 llvm::FunctionCallee ExitCallee, 3109 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 3110 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 3111 ExitArgs(ExitArgs), Conditional(Conditional) {} 3112 void Enter(CodeGenFunction &CGF) override { 3113 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 3114 if (Conditional) { 3115 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 3116 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3117 ContBlock = CGF.createBasicBlock("omp_if.end"); 3118 // Generate the branch (If-stmt) 3119 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 3120 CGF.EmitBlock(ThenBlock); 3121 } 3122 } 3123 void Done(CodeGenFunction &CGF) { 3124 // Emit the rest of blocks/branches 3125 CGF.EmitBranch(ContBlock); 3126 CGF.EmitBlock(ContBlock, true); 3127 } 3128 void Exit(CodeGenFunction &CGF) override { 3129 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 3130 } 3131 }; 3132 } // anonymous namespace 3133 3134 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 3135 StringRef CriticalName, 3136 const RegionCodeGenTy &CriticalOpGen, 3137 SourceLocation Loc, const Expr *Hint) { 3138 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 3139 // CriticalOpGen(); 3140 // __kmpc_end_critical(ident_t *, gtid, Lock); 3141 // Prepare arguments and build a call to __kmpc_critical 3142 if (!CGF.HaveInsertPoint()) 3143 return; 3144 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3145 getCriticalRegionLock(CriticalName)}; 3146 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 3147 std::end(Args)); 3148 if (Hint) { 3149 EnterArgs.push_back(CGF.Builder.CreateIntCast( 3150 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 3151 } 3152 CommonActionTy Action( 3153 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 3154 : OMPRTL__kmpc_critical), 3155 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 3156 CriticalOpGen.setAction(Action); 3157 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 3158 } 3159 3160 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 3161 const RegionCodeGenTy &MasterOpGen, 3162 SourceLocation Loc) { 3163 if (!CGF.HaveInsertPoint()) 3164 return; 3165 // if(__kmpc_master(ident_t *, gtid)) { 3166 // MasterOpGen(); 3167 // __kmpc_end_master(ident_t *, gtid); 3168 // } 3169 // Prepare arguments and build a call to __kmpc_master 3170 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3171 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 3172 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 3173 /*Conditional=*/true); 3174 MasterOpGen.setAction(Action); 3175 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 3176 Action.Done(CGF); 3177 } 3178 3179 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 3180 SourceLocation Loc) { 3181 if (!CGF.HaveInsertPoint()) 3182 return; 3183 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 3184 llvm::Value *Args[] = { 3185 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3186 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 3187 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 3188 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3189 Region->emitUntiedSwitch(CGF); 3190 } 3191 3192 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 3193 const RegionCodeGenTy &TaskgroupOpGen, 3194 SourceLocation Loc) { 3195 if (!CGF.HaveInsertPoint()) 3196 return; 3197 // __kmpc_taskgroup(ident_t *, gtid); 3198 // TaskgroupOpGen(); 3199 // __kmpc_end_taskgroup(ident_t *, gtid); 3200 // Prepare arguments and build a call to __kmpc_taskgroup 3201 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3202 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3203 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3204 Args); 3205 TaskgroupOpGen.setAction(Action); 3206 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3207 } 3208 3209 /// Given an array of pointers to variables, project the address of a 3210 /// given variable. 3211 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3212 unsigned Index, const VarDecl *Var) { 3213 // Pull out the pointer to the variable. 3214 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 3215 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3216 3217 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3218 Addr = CGF.Builder.CreateElementBitCast( 3219 Addr, CGF.ConvertTypeForMem(Var->getType())); 3220 return Addr; 3221 } 3222 3223 static llvm::Value *emitCopyprivateCopyFunction( 3224 CodeGenModule &CGM, llvm::Type *ArgsType, 3225 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3226 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3227 SourceLocation Loc) { 3228 ASTContext &C = CGM.getContext(); 3229 // void copy_func(void *LHSArg, void *RHSArg); 3230 FunctionArgList Args; 3231 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3232 ImplicitParamDecl::Other); 3233 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3234 ImplicitParamDecl::Other); 3235 Args.push_back(&LHSArg); 3236 Args.push_back(&RHSArg); 3237 const auto &CGFI = 3238 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3239 std::string Name = 3240 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 3241 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 3242 llvm::GlobalValue::InternalLinkage, Name, 3243 &CGM.getModule()); 3244 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3245 Fn->setDoesNotRecurse(); 3246 CodeGenFunction CGF(CGM); 3247 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3248 // Dest = (void*[n])(LHSArg); 3249 // Src = (void*[n])(RHSArg); 3250 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3251 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3252 ArgsType), CGF.getPointerAlign()); 3253 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3254 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3255 ArgsType), CGF.getPointerAlign()); 3256 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3257 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3258 // ... 3259 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3260 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3261 const auto *DestVar = 3262 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3263 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3264 3265 const auto *SrcVar = 3266 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3267 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3268 3269 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3270 QualType Type = VD->getType(); 3271 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3272 } 3273 CGF.FinishFunction(); 3274 return Fn; 3275 } 3276 3277 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3278 const RegionCodeGenTy &SingleOpGen, 3279 SourceLocation Loc, 3280 ArrayRef<const Expr *> CopyprivateVars, 3281 ArrayRef<const Expr *> SrcExprs, 3282 ArrayRef<const Expr *> DstExprs, 3283 ArrayRef<const Expr *> AssignmentOps) { 3284 if (!CGF.HaveInsertPoint()) 3285 return; 3286 assert(CopyprivateVars.size() == SrcExprs.size() && 3287 CopyprivateVars.size() == DstExprs.size() && 3288 CopyprivateVars.size() == AssignmentOps.size()); 3289 ASTContext &C = CGM.getContext(); 3290 // int32 did_it = 0; 3291 // if(__kmpc_single(ident_t *, gtid)) { 3292 // SingleOpGen(); 3293 // __kmpc_end_single(ident_t *, gtid); 3294 // did_it = 1; 3295 // } 3296 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3297 // <copy_func>, did_it); 3298 3299 Address DidIt = Address::invalid(); 3300 if (!CopyprivateVars.empty()) { 3301 // int32 did_it = 0; 3302 QualType KmpInt32Ty = 3303 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3304 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3305 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3306 } 3307 // Prepare arguments and build a call to __kmpc_single 3308 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3309 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3310 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3311 /*Conditional=*/true); 3312 SingleOpGen.setAction(Action); 3313 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3314 if (DidIt.isValid()) { 3315 // did_it = 1; 3316 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3317 } 3318 Action.Done(CGF); 3319 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3320 // <copy_func>, did_it); 3321 if (DidIt.isValid()) { 3322 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3323 QualType CopyprivateArrayTy = 3324 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 3325 /*IndexTypeQuals=*/0); 3326 // Create a list of all private variables for copyprivate. 3327 Address CopyprivateList = 3328 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3329 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3330 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 3331 CGF.Builder.CreateStore( 3332 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3333 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 3334 Elem); 3335 } 3336 // Build function that copies private values from single region to all other 3337 // threads in the corresponding parallel region. 3338 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3339 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3340 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3341 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3342 Address CL = 3343 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3344 CGF.VoidPtrTy); 3345 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3346 llvm::Value *Args[] = { 3347 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3348 getThreadID(CGF, Loc), // i32 <gtid> 3349 BufSize, // size_t <buf_size> 3350 CL.getPointer(), // void *<copyprivate list> 3351 CpyFn, // void (*) (void *, void *) <copy_func> 3352 DidItVal // i32 did_it 3353 }; 3354 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3355 } 3356 } 3357 3358 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3359 const RegionCodeGenTy &OrderedOpGen, 3360 SourceLocation Loc, bool IsThreads) { 3361 if (!CGF.HaveInsertPoint()) 3362 return; 3363 // __kmpc_ordered(ident_t *, gtid); 3364 // OrderedOpGen(); 3365 // __kmpc_end_ordered(ident_t *, gtid); 3366 // Prepare arguments and build a call to __kmpc_ordered 3367 if (IsThreads) { 3368 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3369 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3370 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3371 Args); 3372 OrderedOpGen.setAction(Action); 3373 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3374 return; 3375 } 3376 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3377 } 3378 3379 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 3380 unsigned Flags; 3381 if (Kind == OMPD_for) 3382 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3383 else if (Kind == OMPD_sections) 3384 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3385 else if (Kind == OMPD_single) 3386 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3387 else if (Kind == OMPD_barrier) 3388 Flags = OMP_IDENT_BARRIER_EXPL; 3389 else 3390 Flags = OMP_IDENT_BARRIER_IMPL; 3391 return Flags; 3392 } 3393 3394 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 3395 CodeGenFunction &CGF, const OMPLoopDirective &S, 3396 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 3397 // Check if the loop directive is actually a doacross loop directive. In this 3398 // case choose static, 1 schedule. 3399 if (llvm::any_of( 3400 S.getClausesOfKind<OMPOrderedClause>(), 3401 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 3402 ScheduleKind = OMPC_SCHEDULE_static; 3403 // Chunk size is 1 in this case. 3404 llvm::APInt ChunkSize(32, 1); 3405 ChunkExpr = IntegerLiteral::Create( 3406 CGF.getContext(), ChunkSize, 3407 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3408 SourceLocation()); 3409 } 3410 } 3411 3412 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3413 OpenMPDirectiveKind Kind, bool EmitChecks, 3414 bool ForceSimpleCall) { 3415 if (!CGF.HaveInsertPoint()) 3416 return; 3417 // Build call __kmpc_cancel_barrier(loc, thread_id); 3418 // Build call __kmpc_barrier(loc, thread_id); 3419 unsigned Flags = getDefaultFlagsForBarriers(Kind); 3420 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3421 // thread_id); 3422 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3423 getThreadID(CGF, Loc)}; 3424 if (auto *OMPRegionInfo = 3425 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 3426 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3427 llvm::Value *Result = CGF.EmitRuntimeCall( 3428 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3429 if (EmitChecks) { 3430 // if (__kmpc_cancel_barrier()) { 3431 // exit from construct; 3432 // } 3433 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3434 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3435 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3436 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3437 CGF.EmitBlock(ExitBB); 3438 // exit from construct; 3439 CodeGenFunction::JumpDest CancelDestination = 3440 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3441 CGF.EmitBranchThroughCleanup(CancelDestination); 3442 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3443 } 3444 return; 3445 } 3446 } 3447 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3448 } 3449 3450 /// Map the OpenMP loop schedule to the runtime enumeration. 3451 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3452 bool Chunked, bool Ordered) { 3453 switch (ScheduleKind) { 3454 case OMPC_SCHEDULE_static: 3455 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3456 : (Ordered ? OMP_ord_static : OMP_sch_static); 3457 case OMPC_SCHEDULE_dynamic: 3458 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3459 case OMPC_SCHEDULE_guided: 3460 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3461 case OMPC_SCHEDULE_runtime: 3462 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3463 case OMPC_SCHEDULE_auto: 3464 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3465 case OMPC_SCHEDULE_unknown: 3466 assert(!Chunked && "chunk was specified but schedule kind not known"); 3467 return Ordered ? OMP_ord_static : OMP_sch_static; 3468 } 3469 llvm_unreachable("Unexpected runtime schedule"); 3470 } 3471 3472 /// Map the OpenMP distribute schedule to the runtime enumeration. 3473 static OpenMPSchedType 3474 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3475 // only static is allowed for dist_schedule 3476 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3477 } 3478 3479 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3480 bool Chunked) const { 3481 OpenMPSchedType Schedule = 3482 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3483 return Schedule == OMP_sch_static; 3484 } 3485 3486 bool CGOpenMPRuntime::isStaticNonchunked( 3487 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3488 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3489 return Schedule == OMP_dist_sch_static; 3490 } 3491 3492 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 3493 bool Chunked) const { 3494 OpenMPSchedType Schedule = 3495 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3496 return Schedule == OMP_sch_static_chunked; 3497 } 3498 3499 bool CGOpenMPRuntime::isStaticChunked( 3500 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3501 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3502 return Schedule == OMP_dist_sch_static_chunked; 3503 } 3504 3505 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3506 OpenMPSchedType Schedule = 3507 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3508 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3509 return Schedule != OMP_sch_static; 3510 } 3511 3512 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 3513 OpenMPScheduleClauseModifier M1, 3514 OpenMPScheduleClauseModifier M2) { 3515 int Modifier = 0; 3516 switch (M1) { 3517 case OMPC_SCHEDULE_MODIFIER_monotonic: 3518 Modifier = OMP_sch_modifier_monotonic; 3519 break; 3520 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3521 Modifier = OMP_sch_modifier_nonmonotonic; 3522 break; 3523 case OMPC_SCHEDULE_MODIFIER_simd: 3524 if (Schedule == OMP_sch_static_chunked) 3525 Schedule = OMP_sch_static_balanced_chunked; 3526 break; 3527 case OMPC_SCHEDULE_MODIFIER_last: 3528 case OMPC_SCHEDULE_MODIFIER_unknown: 3529 break; 3530 } 3531 switch (M2) { 3532 case OMPC_SCHEDULE_MODIFIER_monotonic: 3533 Modifier = OMP_sch_modifier_monotonic; 3534 break; 3535 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3536 Modifier = OMP_sch_modifier_nonmonotonic; 3537 break; 3538 case OMPC_SCHEDULE_MODIFIER_simd: 3539 if (Schedule == OMP_sch_static_chunked) 3540 Schedule = OMP_sch_static_balanced_chunked; 3541 break; 3542 case OMPC_SCHEDULE_MODIFIER_last: 3543 case OMPC_SCHEDULE_MODIFIER_unknown: 3544 break; 3545 } 3546 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 3547 // If the static schedule kind is specified or if the ordered clause is 3548 // specified, and if the nonmonotonic modifier is not specified, the effect is 3549 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 3550 // modifier is specified, the effect is as if the nonmonotonic modifier is 3551 // specified. 3552 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 3553 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 3554 Schedule == OMP_sch_static_balanced_chunked || 3555 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static)) 3556 Modifier = OMP_sch_modifier_nonmonotonic; 3557 } 3558 return Schedule | Modifier; 3559 } 3560 3561 void CGOpenMPRuntime::emitForDispatchInit( 3562 CodeGenFunction &CGF, SourceLocation Loc, 3563 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3564 bool Ordered, const DispatchRTInput &DispatchValues) { 3565 if (!CGF.HaveInsertPoint()) 3566 return; 3567 OpenMPSchedType Schedule = getRuntimeSchedule( 3568 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3569 assert(Ordered || 3570 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3571 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3572 Schedule != OMP_sch_static_balanced_chunked)); 3573 // Call __kmpc_dispatch_init( 3574 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3575 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3576 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3577 3578 // If the Chunk was not specified in the clause - use default value 1. 3579 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3580 : CGF.Builder.getIntN(IVSize, 1); 3581 llvm::Value *Args[] = { 3582 emitUpdateLocation(CGF, Loc), 3583 getThreadID(CGF, Loc), 3584 CGF.Builder.getInt32(addMonoNonMonoModifier( 3585 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3586 DispatchValues.LB, // Lower 3587 DispatchValues.UB, // Upper 3588 CGF.Builder.getIntN(IVSize, 1), // Stride 3589 Chunk // Chunk 3590 }; 3591 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3592 } 3593 3594 static void emitForStaticInitCall( 3595 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3596 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 3597 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3598 const CGOpenMPRuntime::StaticRTInput &Values) { 3599 if (!CGF.HaveInsertPoint()) 3600 return; 3601 3602 assert(!Values.Ordered); 3603 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3604 Schedule == OMP_sch_static_balanced_chunked || 3605 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3606 Schedule == OMP_dist_sch_static || 3607 Schedule == OMP_dist_sch_static_chunked); 3608 3609 // Call __kmpc_for_static_init( 3610 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3611 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3612 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3613 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3614 llvm::Value *Chunk = Values.Chunk; 3615 if (Chunk == nullptr) { 3616 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3617 Schedule == OMP_dist_sch_static) && 3618 "expected static non-chunked schedule"); 3619 // If the Chunk was not specified in the clause - use default value 1. 3620 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3621 } else { 3622 assert((Schedule == OMP_sch_static_chunked || 3623 Schedule == OMP_sch_static_balanced_chunked || 3624 Schedule == OMP_ord_static_chunked || 3625 Schedule == OMP_dist_sch_static_chunked) && 3626 "expected static chunked schedule"); 3627 } 3628 llvm::Value *Args[] = { 3629 UpdateLocation, 3630 ThreadId, 3631 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 3632 M2)), // Schedule type 3633 Values.IL.getPointer(), // &isLastIter 3634 Values.LB.getPointer(), // &LB 3635 Values.UB.getPointer(), // &UB 3636 Values.ST.getPointer(), // &Stride 3637 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3638 Chunk // Chunk 3639 }; 3640 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3641 } 3642 3643 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3644 SourceLocation Loc, 3645 OpenMPDirectiveKind DKind, 3646 const OpenMPScheduleTy &ScheduleKind, 3647 const StaticRTInput &Values) { 3648 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3649 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3650 assert(isOpenMPWorksharingDirective(DKind) && 3651 "Expected loop-based or sections-based directive."); 3652 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3653 isOpenMPLoopDirective(DKind) 3654 ? OMP_IDENT_WORK_LOOP 3655 : OMP_IDENT_WORK_SECTIONS); 3656 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3657 llvm::FunctionCallee StaticInitFunction = 3658 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3659 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3660 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3661 } 3662 3663 void CGOpenMPRuntime::emitDistributeStaticInit( 3664 CodeGenFunction &CGF, SourceLocation Loc, 3665 OpenMPDistScheduleClauseKind SchedKind, 3666 const CGOpenMPRuntime::StaticRTInput &Values) { 3667 OpenMPSchedType ScheduleNum = 3668 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3669 llvm::Value *UpdatedLocation = 3670 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3671 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3672 llvm::FunctionCallee StaticInitFunction = 3673 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3674 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3675 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3676 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3677 } 3678 3679 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3680 SourceLocation Loc, 3681 OpenMPDirectiveKind DKind) { 3682 if (!CGF.HaveInsertPoint()) 3683 return; 3684 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3685 llvm::Value *Args[] = { 3686 emitUpdateLocation(CGF, Loc, 3687 isOpenMPDistributeDirective(DKind) 3688 ? OMP_IDENT_WORK_DISTRIBUTE 3689 : isOpenMPLoopDirective(DKind) 3690 ? OMP_IDENT_WORK_LOOP 3691 : OMP_IDENT_WORK_SECTIONS), 3692 getThreadID(CGF, Loc)}; 3693 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3694 Args); 3695 } 3696 3697 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3698 SourceLocation Loc, 3699 unsigned IVSize, 3700 bool IVSigned) { 3701 if (!CGF.HaveInsertPoint()) 3702 return; 3703 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3704 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3705 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3706 } 3707 3708 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3709 SourceLocation Loc, unsigned IVSize, 3710 bool IVSigned, Address IL, 3711 Address LB, Address UB, 3712 Address ST) { 3713 // Call __kmpc_dispatch_next( 3714 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3715 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3716 // kmp_int[32|64] *p_stride); 3717 llvm::Value *Args[] = { 3718 emitUpdateLocation(CGF, Loc), 3719 getThreadID(CGF, Loc), 3720 IL.getPointer(), // &isLastIter 3721 LB.getPointer(), // &Lower 3722 UB.getPointer(), // &Upper 3723 ST.getPointer() // &Stride 3724 }; 3725 llvm::Value *Call = 3726 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3727 return CGF.EmitScalarConversion( 3728 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3729 CGF.getContext().BoolTy, Loc); 3730 } 3731 3732 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3733 llvm::Value *NumThreads, 3734 SourceLocation Loc) { 3735 if (!CGF.HaveInsertPoint()) 3736 return; 3737 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3738 llvm::Value *Args[] = { 3739 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3740 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3741 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3742 Args); 3743 } 3744 3745 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3746 OpenMPProcBindClauseKind ProcBind, 3747 SourceLocation Loc) { 3748 if (!CGF.HaveInsertPoint()) 3749 return; 3750 // Constants for proc bind value accepted by the runtime. 3751 enum ProcBindTy { 3752 ProcBindFalse = 0, 3753 ProcBindTrue, 3754 ProcBindMaster, 3755 ProcBindClose, 3756 ProcBindSpread, 3757 ProcBindIntel, 3758 ProcBindDefault 3759 } RuntimeProcBind; 3760 switch (ProcBind) { 3761 case OMPC_PROC_BIND_master: 3762 RuntimeProcBind = ProcBindMaster; 3763 break; 3764 case OMPC_PROC_BIND_close: 3765 RuntimeProcBind = ProcBindClose; 3766 break; 3767 case OMPC_PROC_BIND_spread: 3768 RuntimeProcBind = ProcBindSpread; 3769 break; 3770 case OMPC_PROC_BIND_unknown: 3771 llvm_unreachable("Unsupported proc_bind value."); 3772 } 3773 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3774 llvm::Value *Args[] = { 3775 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3776 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 3777 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3778 } 3779 3780 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3781 SourceLocation Loc) { 3782 if (!CGF.HaveInsertPoint()) 3783 return; 3784 // Build call void __kmpc_flush(ident_t *loc) 3785 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3786 emitUpdateLocation(CGF, Loc)); 3787 } 3788 3789 namespace { 3790 /// Indexes of fields for type kmp_task_t. 3791 enum KmpTaskTFields { 3792 /// List of shared variables. 3793 KmpTaskTShareds, 3794 /// Task routine. 3795 KmpTaskTRoutine, 3796 /// Partition id for the untied tasks. 3797 KmpTaskTPartId, 3798 /// Function with call of destructors for private variables. 3799 Data1, 3800 /// Task priority. 3801 Data2, 3802 /// (Taskloops only) Lower bound. 3803 KmpTaskTLowerBound, 3804 /// (Taskloops only) Upper bound. 3805 KmpTaskTUpperBound, 3806 /// (Taskloops only) Stride. 3807 KmpTaskTStride, 3808 /// (Taskloops only) Is last iteration flag. 3809 KmpTaskTLastIter, 3810 /// (Taskloops only) Reduction data. 3811 KmpTaskTReductions, 3812 }; 3813 } // anonymous namespace 3814 3815 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3816 return OffloadEntriesTargetRegion.empty() && 3817 OffloadEntriesDeviceGlobalVar.empty(); 3818 } 3819 3820 /// Initialize target region entry. 3821 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3822 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3823 StringRef ParentName, unsigned LineNum, 3824 unsigned Order) { 3825 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3826 "only required for the device " 3827 "code generation."); 3828 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3829 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3830 OMPTargetRegionEntryTargetRegion); 3831 ++OffloadingEntriesNum; 3832 } 3833 3834 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3835 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3836 StringRef ParentName, unsigned LineNum, 3837 llvm::Constant *Addr, llvm::Constant *ID, 3838 OMPTargetRegionEntryKind Flags) { 3839 // If we are emitting code for a target, the entry is already initialized, 3840 // only has to be registered. 3841 if (CGM.getLangOpts().OpenMPIsDevice) { 3842 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3843 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3844 DiagnosticsEngine::Error, 3845 "Unable to find target region on line '%0' in the device code."); 3846 CGM.getDiags().Report(DiagID) << LineNum; 3847 return; 3848 } 3849 auto &Entry = 3850 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3851 assert(Entry.isValid() && "Entry not initialized!"); 3852 Entry.setAddress(Addr); 3853 Entry.setID(ID); 3854 Entry.setFlags(Flags); 3855 } else { 3856 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3857 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3858 ++OffloadingEntriesNum; 3859 } 3860 } 3861 3862 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3863 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3864 unsigned LineNum) const { 3865 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3866 if (PerDevice == OffloadEntriesTargetRegion.end()) 3867 return false; 3868 auto PerFile = PerDevice->second.find(FileID); 3869 if (PerFile == PerDevice->second.end()) 3870 return false; 3871 auto PerParentName = PerFile->second.find(ParentName); 3872 if (PerParentName == PerFile->second.end()) 3873 return false; 3874 auto PerLine = PerParentName->second.find(LineNum); 3875 if (PerLine == PerParentName->second.end()) 3876 return false; 3877 // Fail if this entry is already registered. 3878 if (PerLine->second.getAddress() || PerLine->second.getID()) 3879 return false; 3880 return true; 3881 } 3882 3883 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3884 const OffloadTargetRegionEntryInfoActTy &Action) { 3885 // Scan all target region entries and perform the provided action. 3886 for (const auto &D : OffloadEntriesTargetRegion) 3887 for (const auto &F : D.second) 3888 for (const auto &P : F.second) 3889 for (const auto &L : P.second) 3890 Action(D.first, F.first, P.first(), L.first, L.second); 3891 } 3892 3893 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3894 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3895 OMPTargetGlobalVarEntryKind Flags, 3896 unsigned Order) { 3897 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3898 "only required for the device " 3899 "code generation."); 3900 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3901 ++OffloadingEntriesNum; 3902 } 3903 3904 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3905 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3906 CharUnits VarSize, 3907 OMPTargetGlobalVarEntryKind Flags, 3908 llvm::GlobalValue::LinkageTypes Linkage) { 3909 if (CGM.getLangOpts().OpenMPIsDevice) { 3910 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3911 assert(Entry.isValid() && Entry.getFlags() == Flags && 3912 "Entry not initialized!"); 3913 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3914 "Resetting with the new address."); 3915 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3916 if (Entry.getVarSize().isZero()) { 3917 Entry.setVarSize(VarSize); 3918 Entry.setLinkage(Linkage); 3919 } 3920 return; 3921 } 3922 Entry.setVarSize(VarSize); 3923 Entry.setLinkage(Linkage); 3924 Entry.setAddress(Addr); 3925 } else { 3926 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3927 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3928 assert(Entry.isValid() && Entry.getFlags() == Flags && 3929 "Entry not initialized!"); 3930 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3931 "Resetting with the new address."); 3932 if (Entry.getVarSize().isZero()) { 3933 Entry.setVarSize(VarSize); 3934 Entry.setLinkage(Linkage); 3935 } 3936 return; 3937 } 3938 OffloadEntriesDeviceGlobalVar.try_emplace( 3939 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3940 ++OffloadingEntriesNum; 3941 } 3942 } 3943 3944 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3945 actOnDeviceGlobalVarEntriesInfo( 3946 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3947 // Scan all target region entries and perform the provided action. 3948 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3949 Action(E.getKey(), E.getValue()); 3950 } 3951 3952 llvm::Function * 3953 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 3954 // If we don't have entries or if we are emitting code for the device, we 3955 // don't need to do anything. 3956 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 3957 return nullptr; 3958 3959 llvm::Module &M = CGM.getModule(); 3960 ASTContext &C = CGM.getContext(); 3961 3962 // Get list of devices we care about 3963 const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples; 3964 3965 // We should be creating an offloading descriptor only if there are devices 3966 // specified. 3967 assert(!Devices.empty() && "No OpenMP offloading devices??"); 3968 3969 // Create the external variables that will point to the begin and end of the 3970 // host entries section. These will be defined by the linker. 3971 llvm::Type *OffloadEntryTy = 3972 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 3973 std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"}); 3974 auto *HostEntriesBegin = new llvm::GlobalVariable( 3975 M, OffloadEntryTy, /*isConstant=*/true, 3976 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 3977 EntriesBeginName); 3978 std::string EntriesEndName = getName({"omp_offloading", "entries_end"}); 3979 auto *HostEntriesEnd = 3980 new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true, 3981 llvm::GlobalValue::ExternalLinkage, 3982 /*Initializer=*/nullptr, EntriesEndName); 3983 3984 // Create all device images 3985 auto *DeviceImageTy = cast<llvm::StructType>( 3986 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 3987 ConstantInitBuilder DeviceImagesBuilder(CGM); 3988 ConstantArrayBuilder DeviceImagesEntries = 3989 DeviceImagesBuilder.beginArray(DeviceImageTy); 3990 3991 for (const llvm::Triple &Device : Devices) { 3992 StringRef T = Device.getTriple(); 3993 std::string BeginName = getName({"omp_offloading", "img_start", ""}); 3994 auto *ImgBegin = new llvm::GlobalVariable( 3995 M, CGM.Int8Ty, /*isConstant=*/true, 3996 llvm::GlobalValue::ExternalWeakLinkage, 3997 /*Initializer=*/nullptr, Twine(BeginName).concat(T)); 3998 std::string EndName = getName({"omp_offloading", "img_end", ""}); 3999 auto *ImgEnd = new llvm::GlobalVariable( 4000 M, CGM.Int8Ty, /*isConstant=*/true, 4001 llvm::GlobalValue::ExternalWeakLinkage, 4002 /*Initializer=*/nullptr, Twine(EndName).concat(T)); 4003 4004 llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin, 4005 HostEntriesEnd}; 4006 createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data, 4007 DeviceImagesEntries); 4008 } 4009 4010 // Create device images global array. 4011 std::string ImagesName = getName({"omp_offloading", "device_images"}); 4012 llvm::GlobalVariable *DeviceImages = 4013 DeviceImagesEntries.finishAndCreateGlobal(ImagesName, 4014 CGM.getPointerAlign(), 4015 /*isConstant=*/true); 4016 DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4017 4018 // This is a Zero array to be used in the creation of the constant expressions 4019 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 4020 llvm::Constant::getNullValue(CGM.Int32Ty)}; 4021 4022 // Create the target region descriptor. 4023 llvm::Constant *Data[] = { 4024 llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()), 4025 llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), 4026 DeviceImages, Index), 4027 HostEntriesBegin, HostEntriesEnd}; 4028 std::string Descriptor = getName({"omp_offloading", "descriptor"}); 4029 llvm::GlobalVariable *Desc = createGlobalStruct( 4030 CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor); 4031 4032 // Emit code to register or unregister the descriptor at execution 4033 // startup or closing, respectively. 4034 4035 llvm::Function *UnRegFn; 4036 { 4037 FunctionArgList Args; 4038 ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); 4039 Args.push_back(&DummyPtr); 4040 4041 CodeGenFunction CGF(CGM); 4042 // Disable debug info for global (de-)initializer because they are not part 4043 // of some particular construct. 4044 CGF.disableDebugInfo(); 4045 const auto &FI = 4046 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4047 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 4048 std::string UnregName = getName({"omp_offloading", "descriptor_unreg"}); 4049 UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI); 4050 CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args); 4051 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 4052 Desc); 4053 CGF.FinishFunction(); 4054 } 4055 llvm::Function *RegFn; 4056 { 4057 CodeGenFunction CGF(CGM); 4058 // Disable debug info for global (de-)initializer because they are not part 4059 // of some particular construct. 4060 CGF.disableDebugInfo(); 4061 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 4062 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 4063 4064 // Encode offload target triples into the registration function name. It 4065 // will serve as a comdat key for the registration/unregistration code for 4066 // this particular combination of offloading targets. 4067 SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U); 4068 RegFnNameParts[0] = "omp_offloading"; 4069 RegFnNameParts[1] = "descriptor_reg"; 4070 llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2), 4071 [](const llvm::Triple &T) -> const std::string& { 4072 return T.getTriple(); 4073 }); 4074 llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end()); 4075 std::string Descriptor = getName(RegFnNameParts); 4076 RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI); 4077 CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList()); 4078 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); 4079 // Create a variable to drive the registration and unregistration of the 4080 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 4081 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), 4082 SourceLocation(), nullptr, C.CharTy, 4083 ImplicitParamDecl::Other); 4084 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 4085 CGF.FinishFunction(); 4086 } 4087 if (CGM.supportsCOMDAT()) { 4088 // It is sufficient to call registration function only once, so create a 4089 // COMDAT group for registration/unregistration functions and associated 4090 // data. That would reduce startup time and code size. Registration 4091 // function serves as a COMDAT group key. 4092 llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName()); 4093 RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); 4094 RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); 4095 RegFn->setComdat(ComdatKey); 4096 UnRegFn->setComdat(ComdatKey); 4097 DeviceImages->setComdat(ComdatKey); 4098 Desc->setComdat(ComdatKey); 4099 } 4100 return RegFn; 4101 } 4102 4103 void CGOpenMPRuntime::createOffloadEntry( 4104 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 4105 llvm::GlobalValue::LinkageTypes Linkage) { 4106 StringRef Name = Addr->getName(); 4107 llvm::Module &M = CGM.getModule(); 4108 llvm::LLVMContext &C = M.getContext(); 4109 4110 // Create constant string with the name. 4111 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 4112 4113 std::string StringName = getName({"omp_offloading", "entry_name"}); 4114 auto *Str = new llvm::GlobalVariable( 4115 M, StrPtrInit->getType(), /*isConstant=*/true, 4116 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 4117 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4118 4119 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 4120 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 4121 llvm::ConstantInt::get(CGM.SizeTy, Size), 4122 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 4123 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 4124 std::string EntryName = getName({"omp_offloading", "entry", ""}); 4125 llvm::GlobalVariable *Entry = createGlobalStruct( 4126 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 4127 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 4128 4129 // The entry has to be created in the section the linker expects it to be. 4130 std::string Section = getName({"omp_offloading", "entries"}); 4131 Entry->setSection(Section); 4132 } 4133 4134 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 4135 // Emit the offloading entries and metadata so that the device codegen side 4136 // can easily figure out what to emit. The produced metadata looks like 4137 // this: 4138 // 4139 // !omp_offload.info = !{!1, ...} 4140 // 4141 // Right now we only generate metadata for function that contain target 4142 // regions. 4143 4144 // If we do not have entries, we don't need to do anything. 4145 if (OffloadEntriesInfoManager.empty()) 4146 return; 4147 4148 llvm::Module &M = CGM.getModule(); 4149 llvm::LLVMContext &C = M.getContext(); 4150 SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 4151 OrderedEntries(OffloadEntriesInfoManager.size()); 4152 llvm::SmallVector<StringRef, 16> ParentFunctions( 4153 OffloadEntriesInfoManager.size()); 4154 4155 // Auxiliary methods to create metadata values and strings. 4156 auto &&GetMDInt = [this](unsigned V) { 4157 return llvm::ConstantAsMetadata::get( 4158 llvm::ConstantInt::get(CGM.Int32Ty, V)); 4159 }; 4160 4161 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 4162 4163 // Create the offloading info metadata node. 4164 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 4165 4166 // Create function that emits metadata for each target region entry; 4167 auto &&TargetRegionMetadataEmitter = 4168 [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString]( 4169 unsigned DeviceID, unsigned FileID, StringRef ParentName, 4170 unsigned Line, 4171 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 4172 // Generate metadata for target regions. Each entry of this metadata 4173 // contains: 4174 // - Entry 0 -> Kind of this type of metadata (0). 4175 // - Entry 1 -> Device ID of the file where the entry was identified. 4176 // - Entry 2 -> File ID of the file where the entry was identified. 4177 // - Entry 3 -> Mangled name of the function where the entry was 4178 // identified. 4179 // - Entry 4 -> Line in the file where the entry was identified. 4180 // - Entry 5 -> Order the entry was created. 4181 // The first element of the metadata node is the kind. 4182 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 4183 GetMDInt(FileID), GetMDString(ParentName), 4184 GetMDInt(Line), GetMDInt(E.getOrder())}; 4185 4186 // Save this entry in the right position of the ordered entries array. 4187 OrderedEntries[E.getOrder()] = &E; 4188 ParentFunctions[E.getOrder()] = ParentName; 4189 4190 // Add metadata to the named metadata node. 4191 MD->addOperand(llvm::MDNode::get(C, Ops)); 4192 }; 4193 4194 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 4195 TargetRegionMetadataEmitter); 4196 4197 // Create function that emits metadata for each device global variable entry; 4198 auto &&DeviceGlobalVarMetadataEmitter = 4199 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 4200 MD](StringRef MangledName, 4201 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 4202 &E) { 4203 // Generate metadata for global variables. Each entry of this metadata 4204 // contains: 4205 // - Entry 0 -> Kind of this type of metadata (1). 4206 // - Entry 1 -> Mangled name of the variable. 4207 // - Entry 2 -> Declare target kind. 4208 // - Entry 3 -> Order the entry was created. 4209 // The first element of the metadata node is the kind. 4210 llvm::Metadata *Ops[] = { 4211 GetMDInt(E.getKind()), GetMDString(MangledName), 4212 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 4213 4214 // Save this entry in the right position of the ordered entries array. 4215 OrderedEntries[E.getOrder()] = &E; 4216 4217 // Add metadata to the named metadata node. 4218 MD->addOperand(llvm::MDNode::get(C, Ops)); 4219 }; 4220 4221 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 4222 DeviceGlobalVarMetadataEmitter); 4223 4224 for (const auto *E : OrderedEntries) { 4225 assert(E && "All ordered entries must exist!"); 4226 if (const auto *CE = 4227 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 4228 E)) { 4229 if (!CE->getID() || !CE->getAddress()) { 4230 // Do not blame the entry if the parent funtion is not emitted. 4231 StringRef FnName = ParentFunctions[CE->getOrder()]; 4232 if (!CGM.GetGlobalValue(FnName)) 4233 continue; 4234 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4235 DiagnosticsEngine::Error, 4236 "Offloading entry for target region is incorrect: either the " 4237 "address or the ID is invalid."); 4238 CGM.getDiags().Report(DiagID); 4239 continue; 4240 } 4241 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 4242 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 4243 } else if (const auto *CE = 4244 dyn_cast<OffloadEntriesInfoManagerTy:: 4245 OffloadEntryInfoDeviceGlobalVar>(E)) { 4246 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 4247 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4248 CE->getFlags()); 4249 switch (Flags) { 4250 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 4251 if (CGM.getLangOpts().OpenMPIsDevice && 4252 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 4253 continue; 4254 if (!CE->getAddress()) { 4255 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4256 DiagnosticsEngine::Error, 4257 "Offloading entry for declare target variable is incorrect: the " 4258 "address is invalid."); 4259 CGM.getDiags().Report(DiagID); 4260 continue; 4261 } 4262 // The vaiable has no definition - no need to add the entry. 4263 if (CE->getVarSize().isZero()) 4264 continue; 4265 break; 4266 } 4267 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 4268 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 4269 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 4270 "Declaret target link address is set."); 4271 if (CGM.getLangOpts().OpenMPIsDevice) 4272 continue; 4273 if (!CE->getAddress()) { 4274 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4275 DiagnosticsEngine::Error, 4276 "Offloading entry for declare target variable is incorrect: the " 4277 "address is invalid."); 4278 CGM.getDiags().Report(DiagID); 4279 continue; 4280 } 4281 break; 4282 } 4283 createOffloadEntry(CE->getAddress(), CE->getAddress(), 4284 CE->getVarSize().getQuantity(), Flags, 4285 CE->getLinkage()); 4286 } else { 4287 llvm_unreachable("Unsupported entry kind."); 4288 } 4289 } 4290 } 4291 4292 /// Loads all the offload entries information from the host IR 4293 /// metadata. 4294 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 4295 // If we are in target mode, load the metadata from the host IR. This code has 4296 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 4297 4298 if (!CGM.getLangOpts().OpenMPIsDevice) 4299 return; 4300 4301 if (CGM.getLangOpts().OMPHostIRFile.empty()) 4302 return; 4303 4304 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 4305 if (auto EC = Buf.getError()) { 4306 CGM.getDiags().Report(diag::err_cannot_open_file) 4307 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4308 return; 4309 } 4310 4311 llvm::LLVMContext C; 4312 auto ME = expectedToErrorOrAndEmitErrors( 4313 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 4314 4315 if (auto EC = ME.getError()) { 4316 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4317 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 4318 CGM.getDiags().Report(DiagID) 4319 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4320 return; 4321 } 4322 4323 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4324 if (!MD) 4325 return; 4326 4327 for (llvm::MDNode *MN : MD->operands()) { 4328 auto &&GetMDInt = [MN](unsigned Idx) { 4329 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4330 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4331 }; 4332 4333 auto &&GetMDString = [MN](unsigned Idx) { 4334 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4335 return V->getString(); 4336 }; 4337 4338 switch (GetMDInt(0)) { 4339 default: 4340 llvm_unreachable("Unexpected metadata!"); 4341 break; 4342 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4343 OffloadingEntryInfoTargetRegion: 4344 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4345 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4346 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4347 /*Order=*/GetMDInt(5)); 4348 break; 4349 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4350 OffloadingEntryInfoDeviceGlobalVar: 4351 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4352 /*MangledName=*/GetMDString(1), 4353 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4354 /*Flags=*/GetMDInt(2)), 4355 /*Order=*/GetMDInt(3)); 4356 break; 4357 } 4358 } 4359 } 4360 4361 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4362 if (!KmpRoutineEntryPtrTy) { 4363 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4364 ASTContext &C = CGM.getContext(); 4365 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4366 FunctionProtoType::ExtProtoInfo EPI; 4367 KmpRoutineEntryPtrQTy = C.getPointerType( 4368 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4369 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4370 } 4371 } 4372 4373 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4374 // Make sure the type of the entry is already created. This is the type we 4375 // have to create: 4376 // struct __tgt_offload_entry{ 4377 // void *addr; // Pointer to the offload entry info. 4378 // // (function or global) 4379 // char *name; // Name of the function or global. 4380 // size_t size; // Size of the entry info (0 if it a function). 4381 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4382 // int32_t reserved; // Reserved, to use by the runtime library. 4383 // }; 4384 if (TgtOffloadEntryQTy.isNull()) { 4385 ASTContext &C = CGM.getContext(); 4386 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4387 RD->startDefinition(); 4388 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4389 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4390 addFieldToRecordDecl(C, RD, C.getSizeType()); 4391 addFieldToRecordDecl( 4392 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4393 addFieldToRecordDecl( 4394 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4395 RD->completeDefinition(); 4396 RD->addAttr(PackedAttr::CreateImplicit(C)); 4397 TgtOffloadEntryQTy = C.getRecordType(RD); 4398 } 4399 return TgtOffloadEntryQTy; 4400 } 4401 4402 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 4403 // These are the types we need to build: 4404 // struct __tgt_device_image{ 4405 // void *ImageStart; // Pointer to the target code start. 4406 // void *ImageEnd; // Pointer to the target code end. 4407 // // We also add the host entries to the device image, as it may be useful 4408 // // for the target runtime to have access to that information. 4409 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 4410 // // the entries. 4411 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4412 // // entries (non inclusive). 4413 // }; 4414 if (TgtDeviceImageQTy.isNull()) { 4415 ASTContext &C = CGM.getContext(); 4416 RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image"); 4417 RD->startDefinition(); 4418 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4419 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4420 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4421 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4422 RD->completeDefinition(); 4423 TgtDeviceImageQTy = C.getRecordType(RD); 4424 } 4425 return TgtDeviceImageQTy; 4426 } 4427 4428 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 4429 // struct __tgt_bin_desc{ 4430 // int32_t NumDevices; // Number of devices supported. 4431 // __tgt_device_image *DeviceImages; // Arrays of device images 4432 // // (one per device). 4433 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 4434 // // entries. 4435 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4436 // // entries (non inclusive). 4437 // }; 4438 if (TgtBinaryDescriptorQTy.isNull()) { 4439 ASTContext &C = CGM.getContext(); 4440 RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc"); 4441 RD->startDefinition(); 4442 addFieldToRecordDecl( 4443 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4444 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 4445 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4446 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4447 RD->completeDefinition(); 4448 TgtBinaryDescriptorQTy = C.getRecordType(RD); 4449 } 4450 return TgtBinaryDescriptorQTy; 4451 } 4452 4453 namespace { 4454 struct PrivateHelpersTy { 4455 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 4456 const VarDecl *PrivateElemInit) 4457 : Original(Original), PrivateCopy(PrivateCopy), 4458 PrivateElemInit(PrivateElemInit) {} 4459 const VarDecl *Original; 4460 const VarDecl *PrivateCopy; 4461 const VarDecl *PrivateElemInit; 4462 }; 4463 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4464 } // anonymous namespace 4465 4466 static RecordDecl * 4467 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4468 if (!Privates.empty()) { 4469 ASTContext &C = CGM.getContext(); 4470 // Build struct .kmp_privates_t. { 4471 // /* private vars */ 4472 // }; 4473 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4474 RD->startDefinition(); 4475 for (const auto &Pair : Privates) { 4476 const VarDecl *VD = Pair.second.Original; 4477 QualType Type = VD->getType().getNonReferenceType(); 4478 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4479 if (VD->hasAttrs()) { 4480 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4481 E(VD->getAttrs().end()); 4482 I != E; ++I) 4483 FD->addAttr(*I); 4484 } 4485 } 4486 RD->completeDefinition(); 4487 return RD; 4488 } 4489 return nullptr; 4490 } 4491 4492 static RecordDecl * 4493 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4494 QualType KmpInt32Ty, 4495 QualType KmpRoutineEntryPointerQTy) { 4496 ASTContext &C = CGM.getContext(); 4497 // Build struct kmp_task_t { 4498 // void * shareds; 4499 // kmp_routine_entry_t routine; 4500 // kmp_int32 part_id; 4501 // kmp_cmplrdata_t data1; 4502 // kmp_cmplrdata_t data2; 4503 // For taskloops additional fields: 4504 // kmp_uint64 lb; 4505 // kmp_uint64 ub; 4506 // kmp_int64 st; 4507 // kmp_int32 liter; 4508 // void * reductions; 4509 // }; 4510 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4511 UD->startDefinition(); 4512 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4513 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4514 UD->completeDefinition(); 4515 QualType KmpCmplrdataTy = C.getRecordType(UD); 4516 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4517 RD->startDefinition(); 4518 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4519 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4520 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4521 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4522 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4523 if (isOpenMPTaskLoopDirective(Kind)) { 4524 QualType KmpUInt64Ty = 4525 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4526 QualType KmpInt64Ty = 4527 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4528 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4529 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4530 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4531 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4532 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4533 } 4534 RD->completeDefinition(); 4535 return RD; 4536 } 4537 4538 static RecordDecl * 4539 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4540 ArrayRef<PrivateDataTy> Privates) { 4541 ASTContext &C = CGM.getContext(); 4542 // Build struct kmp_task_t_with_privates { 4543 // kmp_task_t task_data; 4544 // .kmp_privates_t. privates; 4545 // }; 4546 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4547 RD->startDefinition(); 4548 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4549 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4550 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4551 RD->completeDefinition(); 4552 return RD; 4553 } 4554 4555 /// Emit a proxy function which accepts kmp_task_t as the second 4556 /// argument. 4557 /// \code 4558 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4559 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4560 /// For taskloops: 4561 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4562 /// tt->reductions, tt->shareds); 4563 /// return 0; 4564 /// } 4565 /// \endcode 4566 static llvm::Function * 4567 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4568 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4569 QualType KmpTaskTWithPrivatesPtrQTy, 4570 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4571 QualType SharedsPtrTy, llvm::Function *TaskFunction, 4572 llvm::Value *TaskPrivatesMap) { 4573 ASTContext &C = CGM.getContext(); 4574 FunctionArgList Args; 4575 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4576 ImplicitParamDecl::Other); 4577 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4578 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4579 ImplicitParamDecl::Other); 4580 Args.push_back(&GtidArg); 4581 Args.push_back(&TaskTypeArg); 4582 const auto &TaskEntryFnInfo = 4583 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4584 llvm::FunctionType *TaskEntryTy = 4585 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4586 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 4587 auto *TaskEntry = llvm::Function::Create( 4588 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4589 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4590 TaskEntry->setDoesNotRecurse(); 4591 CodeGenFunction CGF(CGM); 4592 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4593 Loc, Loc); 4594 4595 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4596 // tt, 4597 // For taskloops: 4598 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4599 // tt->task_data.shareds); 4600 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4601 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4602 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4603 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4604 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4605 const auto *KmpTaskTWithPrivatesQTyRD = 4606 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4607 LValue Base = 4608 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4609 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4610 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4611 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4612 llvm::Value *PartidParam = PartIdLVal.getPointer(); 4613 4614 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4615 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4616 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4617 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4618 CGF.ConvertTypeForMem(SharedsPtrTy)); 4619 4620 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4621 llvm::Value *PrivatesParam; 4622 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4623 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4624 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4625 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 4626 } else { 4627 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4628 } 4629 4630 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4631 TaskPrivatesMap, 4632 CGF.Builder 4633 .CreatePointerBitCastOrAddrSpaceCast( 4634 TDBase.getAddress(), CGF.VoidPtrTy) 4635 .getPointer()}; 4636 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4637 std::end(CommonArgs)); 4638 if (isOpenMPTaskLoopDirective(Kind)) { 4639 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4640 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4641 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4642 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4643 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4644 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4645 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4646 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4647 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4648 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4649 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4650 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4651 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4652 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4653 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4654 CallArgs.push_back(LBParam); 4655 CallArgs.push_back(UBParam); 4656 CallArgs.push_back(StParam); 4657 CallArgs.push_back(LIParam); 4658 CallArgs.push_back(RParam); 4659 } 4660 CallArgs.push_back(SharedsParam); 4661 4662 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4663 CallArgs); 4664 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4665 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4666 CGF.FinishFunction(); 4667 return TaskEntry; 4668 } 4669 4670 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4671 SourceLocation Loc, 4672 QualType KmpInt32Ty, 4673 QualType KmpTaskTWithPrivatesPtrQTy, 4674 QualType KmpTaskTWithPrivatesQTy) { 4675 ASTContext &C = CGM.getContext(); 4676 FunctionArgList Args; 4677 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4678 ImplicitParamDecl::Other); 4679 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4680 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4681 ImplicitParamDecl::Other); 4682 Args.push_back(&GtidArg); 4683 Args.push_back(&TaskTypeArg); 4684 const auto &DestructorFnInfo = 4685 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4686 llvm::FunctionType *DestructorFnTy = 4687 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4688 std::string Name = 4689 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 4690 auto *DestructorFn = 4691 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4692 Name, &CGM.getModule()); 4693 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4694 DestructorFnInfo); 4695 DestructorFn->setDoesNotRecurse(); 4696 CodeGenFunction CGF(CGM); 4697 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4698 Args, Loc, Loc); 4699 4700 LValue Base = CGF.EmitLoadOfPointerLValue( 4701 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4702 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4703 const auto *KmpTaskTWithPrivatesQTyRD = 4704 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4705 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4706 Base = CGF.EmitLValueForField(Base, *FI); 4707 for (const auto *Field : 4708 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4709 if (QualType::DestructionKind DtorKind = 4710 Field->getType().isDestructedType()) { 4711 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4712 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 4713 } 4714 } 4715 CGF.FinishFunction(); 4716 return DestructorFn; 4717 } 4718 4719 /// Emit a privates mapping function for correct handling of private and 4720 /// firstprivate variables. 4721 /// \code 4722 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4723 /// **noalias priv1,..., <tyn> **noalias privn) { 4724 /// *priv1 = &.privates.priv1; 4725 /// ...; 4726 /// *privn = &.privates.privn; 4727 /// } 4728 /// \endcode 4729 static llvm::Value * 4730 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4731 ArrayRef<const Expr *> PrivateVars, 4732 ArrayRef<const Expr *> FirstprivateVars, 4733 ArrayRef<const Expr *> LastprivateVars, 4734 QualType PrivatesQTy, 4735 ArrayRef<PrivateDataTy> Privates) { 4736 ASTContext &C = CGM.getContext(); 4737 FunctionArgList Args; 4738 ImplicitParamDecl TaskPrivatesArg( 4739 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4740 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4741 ImplicitParamDecl::Other); 4742 Args.push_back(&TaskPrivatesArg); 4743 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4744 unsigned Counter = 1; 4745 for (const Expr *E : PrivateVars) { 4746 Args.push_back(ImplicitParamDecl::Create( 4747 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4748 C.getPointerType(C.getPointerType(E->getType())) 4749 .withConst() 4750 .withRestrict(), 4751 ImplicitParamDecl::Other)); 4752 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4753 PrivateVarsPos[VD] = Counter; 4754 ++Counter; 4755 } 4756 for (const Expr *E : FirstprivateVars) { 4757 Args.push_back(ImplicitParamDecl::Create( 4758 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4759 C.getPointerType(C.getPointerType(E->getType())) 4760 .withConst() 4761 .withRestrict(), 4762 ImplicitParamDecl::Other)); 4763 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4764 PrivateVarsPos[VD] = Counter; 4765 ++Counter; 4766 } 4767 for (const Expr *E : LastprivateVars) { 4768 Args.push_back(ImplicitParamDecl::Create( 4769 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4770 C.getPointerType(C.getPointerType(E->getType())) 4771 .withConst() 4772 .withRestrict(), 4773 ImplicitParamDecl::Other)); 4774 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4775 PrivateVarsPos[VD] = Counter; 4776 ++Counter; 4777 } 4778 const auto &TaskPrivatesMapFnInfo = 4779 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4780 llvm::FunctionType *TaskPrivatesMapTy = 4781 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4782 std::string Name = 4783 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 4784 auto *TaskPrivatesMap = llvm::Function::Create( 4785 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 4786 &CGM.getModule()); 4787 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4788 TaskPrivatesMapFnInfo); 4789 if (CGM.getLangOpts().Optimize) { 4790 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4791 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4792 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4793 } 4794 CodeGenFunction CGF(CGM); 4795 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4796 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4797 4798 // *privi = &.privates.privi; 4799 LValue Base = CGF.EmitLoadOfPointerLValue( 4800 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4801 TaskPrivatesArg.getType()->castAs<PointerType>()); 4802 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4803 Counter = 0; 4804 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4805 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4806 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4807 LValue RefLVal = 4808 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4809 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4810 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 4811 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 4812 ++Counter; 4813 } 4814 CGF.FinishFunction(); 4815 return TaskPrivatesMap; 4816 } 4817 4818 /// Emit initialization for private variables in task-based directives. 4819 static void emitPrivatesInit(CodeGenFunction &CGF, 4820 const OMPExecutableDirective &D, 4821 Address KmpTaskSharedsPtr, LValue TDBase, 4822 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4823 QualType SharedsTy, QualType SharedsPtrTy, 4824 const OMPTaskDataTy &Data, 4825 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4826 ASTContext &C = CGF.getContext(); 4827 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4828 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4829 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4830 ? OMPD_taskloop 4831 : OMPD_task; 4832 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4833 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4834 LValue SrcBase; 4835 bool IsTargetTask = 4836 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4837 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4838 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4839 // PointersArray and SizesArray. The original variables for these arrays are 4840 // not captured and we get their addresses explicitly. 4841 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || 4842 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4843 SrcBase = CGF.MakeAddrLValue( 4844 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4845 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4846 SharedsTy); 4847 } 4848 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4849 for (const PrivateDataTy &Pair : Privates) { 4850 const VarDecl *VD = Pair.second.PrivateCopy; 4851 const Expr *Init = VD->getAnyInitializer(); 4852 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4853 !CGF.isTrivialInitializer(Init)))) { 4854 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4855 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4856 const VarDecl *OriginalVD = Pair.second.Original; 4857 // Check if the variable is the target-based BasePointersArray, 4858 // PointersArray or SizesArray. 4859 LValue SharedRefLValue; 4860 QualType Type = PrivateLValue.getType(); 4861 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4862 if (IsTargetTask && !SharedField) { 4863 assert(isa<ImplicitParamDecl>(OriginalVD) && 4864 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4865 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4866 ->getNumParams() == 0 && 4867 isa<TranslationUnitDecl>( 4868 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4869 ->getDeclContext()) && 4870 "Expected artificial target data variable."); 4871 SharedRefLValue = 4872 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4873 } else { 4874 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4875 SharedRefLValue = CGF.MakeAddrLValue( 4876 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 4877 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4878 SharedRefLValue.getTBAAInfo()); 4879 } 4880 if (Type->isArrayType()) { 4881 // Initialize firstprivate array. 4882 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4883 // Perform simple memcpy. 4884 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4885 } else { 4886 // Initialize firstprivate array using element-by-element 4887 // initialization. 4888 CGF.EmitOMPAggregateAssign( 4889 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 4890 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4891 Address SrcElement) { 4892 // Clean up any temporaries needed by the initialization. 4893 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4894 InitScope.addPrivate( 4895 Elem, [SrcElement]() -> Address { return SrcElement; }); 4896 (void)InitScope.Privatize(); 4897 // Emit initialization for single element. 4898 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4899 CGF, &CapturesInfo); 4900 CGF.EmitAnyExprToMem(Init, DestElement, 4901 Init->getType().getQualifiers(), 4902 /*IsInitializer=*/false); 4903 }); 4904 } 4905 } else { 4906 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4907 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 4908 return SharedRefLValue.getAddress(); 4909 }); 4910 (void)InitScope.Privatize(); 4911 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4912 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4913 /*capturedByInit=*/false); 4914 } 4915 } else { 4916 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4917 } 4918 } 4919 ++FI; 4920 } 4921 } 4922 4923 /// Check if duplication function is required for taskloops. 4924 static bool checkInitIsRequired(CodeGenFunction &CGF, 4925 ArrayRef<PrivateDataTy> Privates) { 4926 bool InitRequired = false; 4927 for (const PrivateDataTy &Pair : Privates) { 4928 const VarDecl *VD = Pair.second.PrivateCopy; 4929 const Expr *Init = VD->getAnyInitializer(); 4930 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4931 !CGF.isTrivialInitializer(Init)); 4932 if (InitRequired) 4933 break; 4934 } 4935 return InitRequired; 4936 } 4937 4938 4939 /// Emit task_dup function (for initialization of 4940 /// private/firstprivate/lastprivate vars and last_iter flag) 4941 /// \code 4942 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4943 /// lastpriv) { 4944 /// // setup lastprivate flag 4945 /// task_dst->last = lastpriv; 4946 /// // could be constructor calls here... 4947 /// } 4948 /// \endcode 4949 static llvm::Value * 4950 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4951 const OMPExecutableDirective &D, 4952 QualType KmpTaskTWithPrivatesPtrQTy, 4953 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4954 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4955 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4956 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4957 ASTContext &C = CGM.getContext(); 4958 FunctionArgList Args; 4959 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4960 KmpTaskTWithPrivatesPtrQTy, 4961 ImplicitParamDecl::Other); 4962 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4963 KmpTaskTWithPrivatesPtrQTy, 4964 ImplicitParamDecl::Other); 4965 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4966 ImplicitParamDecl::Other); 4967 Args.push_back(&DstArg); 4968 Args.push_back(&SrcArg); 4969 Args.push_back(&LastprivArg); 4970 const auto &TaskDupFnInfo = 4971 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4972 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4973 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4974 auto *TaskDup = llvm::Function::Create( 4975 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4976 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4977 TaskDup->setDoesNotRecurse(); 4978 CodeGenFunction CGF(CGM); 4979 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4980 Loc); 4981 4982 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4983 CGF.GetAddrOfLocalVar(&DstArg), 4984 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4985 // task_dst->liter = lastpriv; 4986 if (WithLastIter) { 4987 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4988 LValue Base = CGF.EmitLValueForField( 4989 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4990 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4991 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4992 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4993 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4994 } 4995 4996 // Emit initial values for private copies (if any). 4997 assert(!Privates.empty()); 4998 Address KmpTaskSharedsPtr = Address::invalid(); 4999 if (!Data.FirstprivateVars.empty()) { 5000 LValue TDBase = CGF.EmitLoadOfPointerLValue( 5001 CGF.GetAddrOfLocalVar(&SrcArg), 5002 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 5003 LValue Base = CGF.EmitLValueForField( 5004 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 5005 KmpTaskSharedsPtr = Address( 5006 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 5007 Base, *std::next(KmpTaskTQTyRD->field_begin(), 5008 KmpTaskTShareds)), 5009 Loc), 5010 CGF.getNaturalTypeAlignment(SharedsTy)); 5011 } 5012 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 5013 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 5014 CGF.FinishFunction(); 5015 return TaskDup; 5016 } 5017 5018 /// Checks if destructor function is required to be generated. 5019 /// \return true if cleanups are required, false otherwise. 5020 static bool 5021 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 5022 bool NeedsCleanup = false; 5023 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 5024 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 5025 for (const FieldDecl *FD : PrivateRD->fields()) { 5026 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 5027 if (NeedsCleanup) 5028 break; 5029 } 5030 return NeedsCleanup; 5031 } 5032 5033 CGOpenMPRuntime::TaskResultTy 5034 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 5035 const OMPExecutableDirective &D, 5036 llvm::Function *TaskFunction, QualType SharedsTy, 5037 Address Shareds, const OMPTaskDataTy &Data) { 5038 ASTContext &C = CGM.getContext(); 5039 llvm::SmallVector<PrivateDataTy, 4> Privates; 5040 // Aggregate privates and sort them by the alignment. 5041 auto I = Data.PrivateCopies.begin(); 5042 for (const Expr *E : Data.PrivateVars) { 5043 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5044 Privates.emplace_back( 5045 C.getDeclAlign(VD), 5046 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5047 /*PrivateElemInit=*/nullptr)); 5048 ++I; 5049 } 5050 I = Data.FirstprivateCopies.begin(); 5051 auto IElemInitRef = Data.FirstprivateInits.begin(); 5052 for (const Expr *E : Data.FirstprivateVars) { 5053 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5054 Privates.emplace_back( 5055 C.getDeclAlign(VD), 5056 PrivateHelpersTy( 5057 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5058 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 5059 ++I; 5060 ++IElemInitRef; 5061 } 5062 I = Data.LastprivateCopies.begin(); 5063 for (const Expr *E : Data.LastprivateVars) { 5064 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5065 Privates.emplace_back( 5066 C.getDeclAlign(VD), 5067 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5068 /*PrivateElemInit=*/nullptr)); 5069 ++I; 5070 } 5071 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 5072 return L.first > R.first; 5073 }); 5074 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 5075 // Build type kmp_routine_entry_t (if not built yet). 5076 emitKmpRoutineEntryT(KmpInt32Ty); 5077 // Build type kmp_task_t (if not built yet). 5078 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 5079 if (SavedKmpTaskloopTQTy.isNull()) { 5080 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5081 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5082 } 5083 KmpTaskTQTy = SavedKmpTaskloopTQTy; 5084 } else { 5085 assert((D.getDirectiveKind() == OMPD_task || 5086 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 5087 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 5088 "Expected taskloop, task or target directive"); 5089 if (SavedKmpTaskTQTy.isNull()) { 5090 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5091 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5092 } 5093 KmpTaskTQTy = SavedKmpTaskTQTy; 5094 } 5095 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 5096 // Build particular struct kmp_task_t for the given task. 5097 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 5098 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 5099 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 5100 QualType KmpTaskTWithPrivatesPtrQTy = 5101 C.getPointerType(KmpTaskTWithPrivatesQTy); 5102 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 5103 llvm::Type *KmpTaskTWithPrivatesPtrTy = 5104 KmpTaskTWithPrivatesTy->getPointerTo(); 5105 llvm::Value *KmpTaskTWithPrivatesTySize = 5106 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 5107 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 5108 5109 // Emit initial values for private copies (if any). 5110 llvm::Value *TaskPrivatesMap = nullptr; 5111 llvm::Type *TaskPrivatesMapTy = 5112 std::next(TaskFunction->arg_begin(), 3)->getType(); 5113 if (!Privates.empty()) { 5114 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 5115 TaskPrivatesMap = emitTaskPrivateMappingFunction( 5116 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 5117 FI->getType(), Privates); 5118 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5119 TaskPrivatesMap, TaskPrivatesMapTy); 5120 } else { 5121 TaskPrivatesMap = llvm::ConstantPointerNull::get( 5122 cast<llvm::PointerType>(TaskPrivatesMapTy)); 5123 } 5124 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 5125 // kmp_task_t *tt); 5126 llvm::Function *TaskEntry = emitProxyTaskFunction( 5127 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5128 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 5129 TaskPrivatesMap); 5130 5131 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 5132 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 5133 // kmp_routine_entry_t *task_entry); 5134 // Task flags. Format is taken from 5135 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 5136 // description of kmp_tasking_flags struct. 5137 enum { 5138 TiedFlag = 0x1, 5139 FinalFlag = 0x2, 5140 DestructorsFlag = 0x8, 5141 PriorityFlag = 0x20 5142 }; 5143 unsigned Flags = Data.Tied ? TiedFlag : 0; 5144 bool NeedsCleanup = false; 5145 if (!Privates.empty()) { 5146 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 5147 if (NeedsCleanup) 5148 Flags = Flags | DestructorsFlag; 5149 } 5150 if (Data.Priority.getInt()) 5151 Flags = Flags | PriorityFlag; 5152 llvm::Value *TaskFlags = 5153 Data.Final.getPointer() 5154 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 5155 CGF.Builder.getInt32(FinalFlag), 5156 CGF.Builder.getInt32(/*C=*/0)) 5157 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 5158 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 5159 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 5160 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 5161 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 5162 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5163 TaskEntry, KmpRoutineEntryPtrTy)}; 5164 llvm::Value *NewTask; 5165 if (D.hasClausesOfKind<OMPNowaitClause>()) { 5166 // Check if we have any device clause associated with the directive. 5167 const Expr *Device = nullptr; 5168 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 5169 Device = C->getDevice(); 5170 // Emit device ID if any otherwise use default value. 5171 llvm::Value *DeviceID; 5172 if (Device) 5173 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 5174 CGF.Int64Ty, /*isSigned=*/true); 5175 else 5176 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 5177 AllocArgs.push_back(DeviceID); 5178 NewTask = CGF.EmitRuntimeCall( 5179 createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs); 5180 } else { 5181 NewTask = CGF.EmitRuntimeCall( 5182 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 5183 } 5184 llvm::Value *NewTaskNewTaskTTy = 5185 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5186 NewTask, KmpTaskTWithPrivatesPtrTy); 5187 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 5188 KmpTaskTWithPrivatesQTy); 5189 LValue TDBase = 5190 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 5191 // Fill the data in the resulting kmp_task_t record. 5192 // Copy shareds if there are any. 5193 Address KmpTaskSharedsPtr = Address::invalid(); 5194 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 5195 KmpTaskSharedsPtr = 5196 Address(CGF.EmitLoadOfScalar( 5197 CGF.EmitLValueForField( 5198 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 5199 KmpTaskTShareds)), 5200 Loc), 5201 CGF.getNaturalTypeAlignment(SharedsTy)); 5202 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 5203 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 5204 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 5205 } 5206 // Emit initial values for private copies (if any). 5207 TaskResultTy Result; 5208 if (!Privates.empty()) { 5209 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 5210 SharedsTy, SharedsPtrTy, Data, Privates, 5211 /*ForDup=*/false); 5212 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 5213 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 5214 Result.TaskDupFn = emitTaskDupFunction( 5215 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 5216 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 5217 /*WithLastIter=*/!Data.LastprivateVars.empty()); 5218 } 5219 } 5220 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 5221 enum { Priority = 0, Destructors = 1 }; 5222 // Provide pointer to function with destructors for privates. 5223 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 5224 const RecordDecl *KmpCmplrdataUD = 5225 (*FI)->getType()->getAsUnionType()->getDecl(); 5226 if (NeedsCleanup) { 5227 llvm::Value *DestructorFn = emitDestructorsFunction( 5228 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5229 KmpTaskTWithPrivatesQTy); 5230 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 5231 LValue DestructorsLV = CGF.EmitLValueForField( 5232 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 5233 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5234 DestructorFn, KmpRoutineEntryPtrTy), 5235 DestructorsLV); 5236 } 5237 // Set priority. 5238 if (Data.Priority.getInt()) { 5239 LValue Data2LV = CGF.EmitLValueForField( 5240 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 5241 LValue PriorityLV = CGF.EmitLValueForField( 5242 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 5243 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 5244 } 5245 Result.NewTask = NewTask; 5246 Result.TaskEntry = TaskEntry; 5247 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 5248 Result.TDBase = TDBase; 5249 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 5250 return Result; 5251 } 5252 5253 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5254 const OMPExecutableDirective &D, 5255 llvm::Function *TaskFunction, 5256 QualType SharedsTy, Address Shareds, 5257 const Expr *IfCond, 5258 const OMPTaskDataTy &Data) { 5259 if (!CGF.HaveInsertPoint()) 5260 return; 5261 5262 TaskResultTy Result = 5263 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5264 llvm::Value *NewTask = Result.NewTask; 5265 llvm::Function *TaskEntry = Result.TaskEntry; 5266 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5267 LValue TDBase = Result.TDBase; 5268 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5269 ASTContext &C = CGM.getContext(); 5270 // Process list of dependences. 5271 Address DependenciesArray = Address::invalid(); 5272 unsigned NumDependencies = Data.Dependences.size(); 5273 if (NumDependencies) { 5274 // Dependence kind for RTL. 5275 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 }; 5276 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 5277 RecordDecl *KmpDependInfoRD; 5278 QualType FlagsTy = 5279 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 5280 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5281 if (KmpDependInfoTy.isNull()) { 5282 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 5283 KmpDependInfoRD->startDefinition(); 5284 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 5285 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 5286 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 5287 KmpDependInfoRD->completeDefinition(); 5288 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 5289 } else { 5290 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5291 } 5292 // Define type kmp_depend_info[<Dependences.size()>]; 5293 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5294 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 5295 ArrayType::Normal, /*IndexTypeQuals=*/0); 5296 // kmp_depend_info[<Dependences.size()>] deps; 5297 DependenciesArray = 5298 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 5299 for (unsigned I = 0; I < NumDependencies; ++I) { 5300 const Expr *E = Data.Dependences[I].second; 5301 LValue Addr = CGF.EmitLValue(E); 5302 llvm::Value *Size; 5303 QualType Ty = E->getType(); 5304 if (const auto *ASE = 5305 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 5306 LValue UpAddrLVal = 5307 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 5308 llvm::Value *UpAddr = 5309 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 5310 llvm::Value *LowIntPtr = 5311 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 5312 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 5313 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 5314 } else { 5315 Size = CGF.getTypeSize(Ty); 5316 } 5317 LValue Base = CGF.MakeAddrLValue( 5318 CGF.Builder.CreateConstArrayGEP(DependenciesArray, I), 5319 KmpDependInfoTy); 5320 // deps[i].base_addr = &<Dependences[i].second>; 5321 LValue BaseAddrLVal = CGF.EmitLValueForField( 5322 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5323 CGF.EmitStoreOfScalar( 5324 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 5325 BaseAddrLVal); 5326 // deps[i].len = sizeof(<Dependences[i].second>); 5327 LValue LenLVal = CGF.EmitLValueForField( 5328 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 5329 CGF.EmitStoreOfScalar(Size, LenLVal); 5330 // deps[i].flags = <Dependences[i].first>; 5331 RTLDependenceKindTy DepKind; 5332 switch (Data.Dependences[I].first) { 5333 case OMPC_DEPEND_in: 5334 DepKind = DepIn; 5335 break; 5336 // Out and InOut dependencies must use the same code. 5337 case OMPC_DEPEND_out: 5338 case OMPC_DEPEND_inout: 5339 DepKind = DepInOut; 5340 break; 5341 case OMPC_DEPEND_mutexinoutset: 5342 DepKind = DepMutexInOutSet; 5343 break; 5344 case OMPC_DEPEND_source: 5345 case OMPC_DEPEND_sink: 5346 case OMPC_DEPEND_unknown: 5347 llvm_unreachable("Unknown task dependence type"); 5348 } 5349 LValue FlagsLVal = CGF.EmitLValueForField( 5350 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5351 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5352 FlagsLVal); 5353 } 5354 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5355 CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy); 5356 } 5357 5358 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5359 // libcall. 5360 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5361 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5362 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5363 // list is not empty 5364 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5365 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5366 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5367 llvm::Value *DepTaskArgs[7]; 5368 if (NumDependencies) { 5369 DepTaskArgs[0] = UpLoc; 5370 DepTaskArgs[1] = ThreadID; 5371 DepTaskArgs[2] = NewTask; 5372 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 5373 DepTaskArgs[4] = DependenciesArray.getPointer(); 5374 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5375 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5376 } 5377 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 5378 &TaskArgs, 5379 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5380 if (!Data.Tied) { 5381 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5382 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5383 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5384 } 5385 if (NumDependencies) { 5386 CGF.EmitRuntimeCall( 5387 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5388 } else { 5389 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5390 TaskArgs); 5391 } 5392 // Check if parent region is untied and build return for untied task; 5393 if (auto *Region = 5394 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5395 Region->emitUntiedSwitch(CGF); 5396 }; 5397 5398 llvm::Value *DepWaitTaskArgs[6]; 5399 if (NumDependencies) { 5400 DepWaitTaskArgs[0] = UpLoc; 5401 DepWaitTaskArgs[1] = ThreadID; 5402 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 5403 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5404 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5405 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5406 } 5407 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5408 NumDependencies, &DepWaitTaskArgs, 5409 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5410 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5411 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5412 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5413 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5414 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5415 // is specified. 5416 if (NumDependencies) 5417 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5418 DepWaitTaskArgs); 5419 // Call proxy_task_entry(gtid, new_task); 5420 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5421 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5422 Action.Enter(CGF); 5423 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5424 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5425 OutlinedFnArgs); 5426 }; 5427 5428 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5429 // kmp_task_t *new_task); 5430 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5431 // kmp_task_t *new_task); 5432 RegionCodeGenTy RCG(CodeGen); 5433 CommonActionTy Action( 5434 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5435 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5436 RCG.setAction(Action); 5437 RCG(CGF); 5438 }; 5439 5440 if (IfCond) { 5441 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5442 } else { 5443 RegionCodeGenTy ThenRCG(ThenCodeGen); 5444 ThenRCG(CGF); 5445 } 5446 } 5447 5448 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5449 const OMPLoopDirective &D, 5450 llvm::Function *TaskFunction, 5451 QualType SharedsTy, Address Shareds, 5452 const Expr *IfCond, 5453 const OMPTaskDataTy &Data) { 5454 if (!CGF.HaveInsertPoint()) 5455 return; 5456 TaskResultTy Result = 5457 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5458 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5459 // libcall. 5460 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5461 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5462 // sched, kmp_uint64 grainsize, void *task_dup); 5463 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5464 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5465 llvm::Value *IfVal; 5466 if (IfCond) { 5467 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5468 /*isSigned=*/true); 5469 } else { 5470 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5471 } 5472 5473 LValue LBLVal = CGF.EmitLValueForField( 5474 Result.TDBase, 5475 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5476 const auto *LBVar = 5477 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5478 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 5479 /*IsInitializer=*/true); 5480 LValue UBLVal = CGF.EmitLValueForField( 5481 Result.TDBase, 5482 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5483 const auto *UBVar = 5484 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5485 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 5486 /*IsInitializer=*/true); 5487 LValue StLVal = CGF.EmitLValueForField( 5488 Result.TDBase, 5489 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5490 const auto *StVar = 5491 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5492 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 5493 /*IsInitializer=*/true); 5494 // Store reductions address. 5495 LValue RedLVal = CGF.EmitLValueForField( 5496 Result.TDBase, 5497 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5498 if (Data.Reductions) { 5499 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5500 } else { 5501 CGF.EmitNullInitialization(RedLVal.getAddress(), 5502 CGF.getContext().VoidPtrTy); 5503 } 5504 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5505 llvm::Value *TaskArgs[] = { 5506 UpLoc, 5507 ThreadID, 5508 Result.NewTask, 5509 IfVal, 5510 LBLVal.getPointer(), 5511 UBLVal.getPointer(), 5512 CGF.EmitLoadOfScalar(StLVal, Loc), 5513 llvm::ConstantInt::getSigned( 5514 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5515 llvm::ConstantInt::getSigned( 5516 CGF.IntTy, Data.Schedule.getPointer() 5517 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5518 : NoSchedule), 5519 Data.Schedule.getPointer() 5520 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5521 /*isSigned=*/false) 5522 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5523 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5524 Result.TaskDupFn, CGF.VoidPtrTy) 5525 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5526 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 5527 } 5528 5529 /// Emit reduction operation for each element of array (required for 5530 /// array sections) LHS op = RHS. 5531 /// \param Type Type of array. 5532 /// \param LHSVar Variable on the left side of the reduction operation 5533 /// (references element of array in original variable). 5534 /// \param RHSVar Variable on the right side of the reduction operation 5535 /// (references element of array in original variable). 5536 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5537 /// RHSVar. 5538 static void EmitOMPAggregateReduction( 5539 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5540 const VarDecl *RHSVar, 5541 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5542 const Expr *, const Expr *)> &RedOpGen, 5543 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5544 const Expr *UpExpr = nullptr) { 5545 // Perform element-by-element initialization. 5546 QualType ElementTy; 5547 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5548 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5549 5550 // Drill down to the base element type on both arrays. 5551 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5552 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5553 5554 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5555 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5556 // Cast from pointer to array type to pointer to single element. 5557 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5558 // The basic structure here is a while-do loop. 5559 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5560 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5561 llvm::Value *IsEmpty = 5562 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5563 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5564 5565 // Enter the loop body, making that address the current address. 5566 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5567 CGF.EmitBlock(BodyBB); 5568 5569 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5570 5571 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5572 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5573 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5574 Address RHSElementCurrent = 5575 Address(RHSElementPHI, 5576 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5577 5578 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5579 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5580 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5581 Address LHSElementCurrent = 5582 Address(LHSElementPHI, 5583 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5584 5585 // Emit copy. 5586 CodeGenFunction::OMPPrivateScope Scope(CGF); 5587 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5588 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5589 Scope.Privatize(); 5590 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5591 Scope.ForceCleanup(); 5592 5593 // Shift the address forward by one element. 5594 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5595 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5596 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5597 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5598 // Check whether we've reached the end. 5599 llvm::Value *Done = 5600 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5601 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5602 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5603 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5604 5605 // Done. 5606 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5607 } 5608 5609 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5610 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5611 /// UDR combiner function. 5612 static void emitReductionCombiner(CodeGenFunction &CGF, 5613 const Expr *ReductionOp) { 5614 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5615 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5616 if (const auto *DRE = 5617 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5618 if (const auto *DRD = 5619 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5620 std::pair<llvm::Function *, llvm::Function *> Reduction = 5621 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5622 RValue Func = RValue::get(Reduction.first); 5623 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5624 CGF.EmitIgnoredExpr(ReductionOp); 5625 return; 5626 } 5627 CGF.EmitIgnoredExpr(ReductionOp); 5628 } 5629 5630 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5631 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5632 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5633 ArrayRef<const Expr *> ReductionOps) { 5634 ASTContext &C = CGM.getContext(); 5635 5636 // void reduction_func(void *LHSArg, void *RHSArg); 5637 FunctionArgList Args; 5638 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5639 ImplicitParamDecl::Other); 5640 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5641 ImplicitParamDecl::Other); 5642 Args.push_back(&LHSArg); 5643 Args.push_back(&RHSArg); 5644 const auto &CGFI = 5645 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5646 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5647 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5648 llvm::GlobalValue::InternalLinkage, Name, 5649 &CGM.getModule()); 5650 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5651 Fn->setDoesNotRecurse(); 5652 CodeGenFunction CGF(CGM); 5653 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5654 5655 // Dst = (void*[n])(LHSArg); 5656 // Src = (void*[n])(RHSArg); 5657 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5658 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5659 ArgsType), CGF.getPointerAlign()); 5660 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5661 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5662 ArgsType), CGF.getPointerAlign()); 5663 5664 // ... 5665 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5666 // ... 5667 CodeGenFunction::OMPPrivateScope Scope(CGF); 5668 auto IPriv = Privates.begin(); 5669 unsigned Idx = 0; 5670 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5671 const auto *RHSVar = 5672 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5673 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5674 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5675 }); 5676 const auto *LHSVar = 5677 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5678 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5679 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5680 }); 5681 QualType PrivTy = (*IPriv)->getType(); 5682 if (PrivTy->isVariablyModifiedType()) { 5683 // Get array size and emit VLA type. 5684 ++Idx; 5685 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5686 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5687 const VariableArrayType *VLA = 5688 CGF.getContext().getAsVariableArrayType(PrivTy); 5689 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5690 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5691 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5692 CGF.EmitVariablyModifiedType(PrivTy); 5693 } 5694 } 5695 Scope.Privatize(); 5696 IPriv = Privates.begin(); 5697 auto ILHS = LHSExprs.begin(); 5698 auto IRHS = RHSExprs.begin(); 5699 for (const Expr *E : ReductionOps) { 5700 if ((*IPriv)->getType()->isArrayType()) { 5701 // Emit reduction for array section. 5702 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5703 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5704 EmitOMPAggregateReduction( 5705 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5706 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5707 emitReductionCombiner(CGF, E); 5708 }); 5709 } else { 5710 // Emit reduction for array subscript or single variable. 5711 emitReductionCombiner(CGF, E); 5712 } 5713 ++IPriv; 5714 ++ILHS; 5715 ++IRHS; 5716 } 5717 Scope.ForceCleanup(); 5718 CGF.FinishFunction(); 5719 return Fn; 5720 } 5721 5722 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5723 const Expr *ReductionOp, 5724 const Expr *PrivateRef, 5725 const DeclRefExpr *LHS, 5726 const DeclRefExpr *RHS) { 5727 if (PrivateRef->getType()->isArrayType()) { 5728 // Emit reduction for array section. 5729 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5730 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5731 EmitOMPAggregateReduction( 5732 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5733 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5734 emitReductionCombiner(CGF, ReductionOp); 5735 }); 5736 } else { 5737 // Emit reduction for array subscript or single variable. 5738 emitReductionCombiner(CGF, ReductionOp); 5739 } 5740 } 5741 5742 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5743 ArrayRef<const Expr *> Privates, 5744 ArrayRef<const Expr *> LHSExprs, 5745 ArrayRef<const Expr *> RHSExprs, 5746 ArrayRef<const Expr *> ReductionOps, 5747 ReductionOptionsTy Options) { 5748 if (!CGF.HaveInsertPoint()) 5749 return; 5750 5751 bool WithNowait = Options.WithNowait; 5752 bool SimpleReduction = Options.SimpleReduction; 5753 5754 // Next code should be emitted for reduction: 5755 // 5756 // static kmp_critical_name lock = { 0 }; 5757 // 5758 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5759 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5760 // ... 5761 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5762 // *(Type<n>-1*)rhs[<n>-1]); 5763 // } 5764 // 5765 // ... 5766 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5767 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5768 // RedList, reduce_func, &<lock>)) { 5769 // case 1: 5770 // ... 5771 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5772 // ... 5773 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5774 // break; 5775 // case 2: 5776 // ... 5777 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5778 // ... 5779 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5780 // break; 5781 // default:; 5782 // } 5783 // 5784 // if SimpleReduction is true, only the next code is generated: 5785 // ... 5786 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5787 // ... 5788 5789 ASTContext &C = CGM.getContext(); 5790 5791 if (SimpleReduction) { 5792 CodeGenFunction::RunCleanupsScope Scope(CGF); 5793 auto IPriv = Privates.begin(); 5794 auto ILHS = LHSExprs.begin(); 5795 auto IRHS = RHSExprs.begin(); 5796 for (const Expr *E : ReductionOps) { 5797 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5798 cast<DeclRefExpr>(*IRHS)); 5799 ++IPriv; 5800 ++ILHS; 5801 ++IRHS; 5802 } 5803 return; 5804 } 5805 5806 // 1. Build a list of reduction variables. 5807 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5808 auto Size = RHSExprs.size(); 5809 for (const Expr *E : Privates) { 5810 if (E->getType()->isVariablyModifiedType()) 5811 // Reserve place for array size. 5812 ++Size; 5813 } 5814 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5815 QualType ReductionArrayTy = 5816 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 5817 /*IndexTypeQuals=*/0); 5818 Address ReductionList = 5819 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5820 auto IPriv = Privates.begin(); 5821 unsigned Idx = 0; 5822 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5823 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5824 CGF.Builder.CreateStore( 5825 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5826 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 5827 Elem); 5828 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5829 // Store array size. 5830 ++Idx; 5831 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5832 llvm::Value *Size = CGF.Builder.CreateIntCast( 5833 CGF.getVLASize( 5834 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5835 .NumElts, 5836 CGF.SizeTy, /*isSigned=*/false); 5837 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5838 Elem); 5839 } 5840 } 5841 5842 // 2. Emit reduce_func(). 5843 llvm::Function *ReductionFn = emitReductionFunction( 5844 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5845 LHSExprs, RHSExprs, ReductionOps); 5846 5847 // 3. Create static kmp_critical_name lock = { 0 }; 5848 std::string Name = getName({"reduction"}); 5849 llvm::Value *Lock = getCriticalRegionLock(Name); 5850 5851 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5852 // RedList, reduce_func, &<lock>); 5853 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5854 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5855 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5856 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5857 ReductionList.getPointer(), CGF.VoidPtrTy); 5858 llvm::Value *Args[] = { 5859 IdentTLoc, // ident_t *<loc> 5860 ThreadId, // i32 <gtid> 5861 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5862 ReductionArrayTySize, // size_type sizeof(RedList) 5863 RL, // void *RedList 5864 ReductionFn, // void (*) (void *, void *) <reduce_func> 5865 Lock // kmp_critical_name *&<lock> 5866 }; 5867 llvm::Value *Res = CGF.EmitRuntimeCall( 5868 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 5869 : OMPRTL__kmpc_reduce), 5870 Args); 5871 5872 // 5. Build switch(res) 5873 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5874 llvm::SwitchInst *SwInst = 5875 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5876 5877 // 6. Build case 1: 5878 // ... 5879 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5880 // ... 5881 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5882 // break; 5883 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5884 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5885 CGF.EmitBlock(Case1BB); 5886 5887 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5888 llvm::Value *EndArgs[] = { 5889 IdentTLoc, // ident_t *<loc> 5890 ThreadId, // i32 <gtid> 5891 Lock // kmp_critical_name *&<lock> 5892 }; 5893 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5894 CodeGenFunction &CGF, PrePostActionTy &Action) { 5895 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5896 auto IPriv = Privates.begin(); 5897 auto ILHS = LHSExprs.begin(); 5898 auto IRHS = RHSExprs.begin(); 5899 for (const Expr *E : ReductionOps) { 5900 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5901 cast<DeclRefExpr>(*IRHS)); 5902 ++IPriv; 5903 ++ILHS; 5904 ++IRHS; 5905 } 5906 }; 5907 RegionCodeGenTy RCG(CodeGen); 5908 CommonActionTy Action( 5909 nullptr, llvm::None, 5910 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 5911 : OMPRTL__kmpc_end_reduce), 5912 EndArgs); 5913 RCG.setAction(Action); 5914 RCG(CGF); 5915 5916 CGF.EmitBranch(DefaultBB); 5917 5918 // 7. Build case 2: 5919 // ... 5920 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5921 // ... 5922 // break; 5923 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5924 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5925 CGF.EmitBlock(Case2BB); 5926 5927 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5928 CodeGenFunction &CGF, PrePostActionTy &Action) { 5929 auto ILHS = LHSExprs.begin(); 5930 auto IRHS = RHSExprs.begin(); 5931 auto IPriv = Privates.begin(); 5932 for (const Expr *E : ReductionOps) { 5933 const Expr *XExpr = nullptr; 5934 const Expr *EExpr = nullptr; 5935 const Expr *UpExpr = nullptr; 5936 BinaryOperatorKind BO = BO_Comma; 5937 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5938 if (BO->getOpcode() == BO_Assign) { 5939 XExpr = BO->getLHS(); 5940 UpExpr = BO->getRHS(); 5941 } 5942 } 5943 // Try to emit update expression as a simple atomic. 5944 const Expr *RHSExpr = UpExpr; 5945 if (RHSExpr) { 5946 // Analyze RHS part of the whole expression. 5947 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5948 RHSExpr->IgnoreParenImpCasts())) { 5949 // If this is a conditional operator, analyze its condition for 5950 // min/max reduction operator. 5951 RHSExpr = ACO->getCond(); 5952 } 5953 if (const auto *BORHS = 5954 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5955 EExpr = BORHS->getRHS(); 5956 BO = BORHS->getOpcode(); 5957 } 5958 } 5959 if (XExpr) { 5960 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5961 auto &&AtomicRedGen = [BO, VD, 5962 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5963 const Expr *EExpr, const Expr *UpExpr) { 5964 LValue X = CGF.EmitLValue(XExpr); 5965 RValue E; 5966 if (EExpr) 5967 E = CGF.EmitAnyExpr(EExpr); 5968 CGF.EmitOMPAtomicSimpleUpdateExpr( 5969 X, E, BO, /*IsXLHSInRHSPart=*/true, 5970 llvm::AtomicOrdering::Monotonic, Loc, 5971 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5972 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5973 PrivateScope.addPrivate( 5974 VD, [&CGF, VD, XRValue, Loc]() { 5975 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5976 CGF.emitOMPSimpleStore( 5977 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5978 VD->getType().getNonReferenceType(), Loc); 5979 return LHSTemp; 5980 }); 5981 (void)PrivateScope.Privatize(); 5982 return CGF.EmitAnyExpr(UpExpr); 5983 }); 5984 }; 5985 if ((*IPriv)->getType()->isArrayType()) { 5986 // Emit atomic reduction for array section. 5987 const auto *RHSVar = 5988 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5989 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5990 AtomicRedGen, XExpr, EExpr, UpExpr); 5991 } else { 5992 // Emit atomic reduction for array subscript or single variable. 5993 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5994 } 5995 } else { 5996 // Emit as a critical region. 5997 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5998 const Expr *, const Expr *) { 5999 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6000 std::string Name = RT.getName({"atomic_reduction"}); 6001 RT.emitCriticalRegion( 6002 CGF, Name, 6003 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 6004 Action.Enter(CGF); 6005 emitReductionCombiner(CGF, E); 6006 }, 6007 Loc); 6008 }; 6009 if ((*IPriv)->getType()->isArrayType()) { 6010 const auto *LHSVar = 6011 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 6012 const auto *RHSVar = 6013 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 6014 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 6015 CritRedGen); 6016 } else { 6017 CritRedGen(CGF, nullptr, nullptr, nullptr); 6018 } 6019 } 6020 ++ILHS; 6021 ++IRHS; 6022 ++IPriv; 6023 } 6024 }; 6025 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 6026 if (!WithNowait) { 6027 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 6028 llvm::Value *EndArgs[] = { 6029 IdentTLoc, // ident_t *<loc> 6030 ThreadId, // i32 <gtid> 6031 Lock // kmp_critical_name *&<lock> 6032 }; 6033 CommonActionTy Action(nullptr, llvm::None, 6034 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 6035 EndArgs); 6036 AtomicRCG.setAction(Action); 6037 AtomicRCG(CGF); 6038 } else { 6039 AtomicRCG(CGF); 6040 } 6041 6042 CGF.EmitBranch(DefaultBB); 6043 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 6044 } 6045 6046 /// Generates unique name for artificial threadprivate variables. 6047 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 6048 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 6049 const Expr *Ref) { 6050 SmallString<256> Buffer; 6051 llvm::raw_svector_ostream Out(Buffer); 6052 const clang::DeclRefExpr *DE; 6053 const VarDecl *D = ::getBaseDecl(Ref, DE); 6054 if (!D) 6055 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 6056 D = D->getCanonicalDecl(); 6057 std::string Name = CGM.getOpenMPRuntime().getName( 6058 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 6059 Out << Prefix << Name << "_" 6060 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 6061 return Out.str(); 6062 } 6063 6064 /// Emits reduction initializer function: 6065 /// \code 6066 /// void @.red_init(void* %arg) { 6067 /// %0 = bitcast void* %arg to <type>* 6068 /// store <type> <init>, <type>* %0 6069 /// ret void 6070 /// } 6071 /// \endcode 6072 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 6073 SourceLocation Loc, 6074 ReductionCodeGen &RCG, unsigned N) { 6075 ASTContext &C = CGM.getContext(); 6076 FunctionArgList Args; 6077 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6078 ImplicitParamDecl::Other); 6079 Args.emplace_back(&Param); 6080 const auto &FnInfo = 6081 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6082 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6083 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 6084 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6085 Name, &CGM.getModule()); 6086 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6087 Fn->setDoesNotRecurse(); 6088 CodeGenFunction CGF(CGM); 6089 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6090 Address PrivateAddr = CGF.EmitLoadOfPointer( 6091 CGF.GetAddrOfLocalVar(&Param), 6092 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6093 llvm::Value *Size = nullptr; 6094 // If the size of the reduction item is non-constant, load it from global 6095 // threadprivate variable. 6096 if (RCG.getSizes(N).second) { 6097 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6098 CGF, CGM.getContext().getSizeType(), 6099 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6100 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6101 CGM.getContext().getSizeType(), Loc); 6102 } 6103 RCG.emitAggregateType(CGF, N, Size); 6104 LValue SharedLVal; 6105 // If initializer uses initializer from declare reduction construct, emit a 6106 // pointer to the address of the original reduction item (reuired by reduction 6107 // initializer) 6108 if (RCG.usesReductionInitializer(N)) { 6109 Address SharedAddr = 6110 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6111 CGF, CGM.getContext().VoidPtrTy, 6112 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6113 SharedAddr = CGF.EmitLoadOfPointer( 6114 SharedAddr, 6115 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 6116 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 6117 } else { 6118 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 6119 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 6120 CGM.getContext().VoidPtrTy); 6121 } 6122 // Emit the initializer: 6123 // %0 = bitcast void* %arg to <type>* 6124 // store <type> <init>, <type>* %0 6125 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 6126 [](CodeGenFunction &) { return false; }); 6127 CGF.FinishFunction(); 6128 return Fn; 6129 } 6130 6131 /// Emits reduction combiner function: 6132 /// \code 6133 /// void @.red_comb(void* %arg0, void* %arg1) { 6134 /// %lhs = bitcast void* %arg0 to <type>* 6135 /// %rhs = bitcast void* %arg1 to <type>* 6136 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 6137 /// store <type> %2, <type>* %lhs 6138 /// ret void 6139 /// } 6140 /// \endcode 6141 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 6142 SourceLocation Loc, 6143 ReductionCodeGen &RCG, unsigned N, 6144 const Expr *ReductionOp, 6145 const Expr *LHS, const Expr *RHS, 6146 const Expr *PrivateRef) { 6147 ASTContext &C = CGM.getContext(); 6148 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 6149 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 6150 FunctionArgList Args; 6151 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 6152 C.VoidPtrTy, ImplicitParamDecl::Other); 6153 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6154 ImplicitParamDecl::Other); 6155 Args.emplace_back(&ParamInOut); 6156 Args.emplace_back(&ParamIn); 6157 const auto &FnInfo = 6158 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6159 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6160 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 6161 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6162 Name, &CGM.getModule()); 6163 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6164 Fn->setDoesNotRecurse(); 6165 CodeGenFunction CGF(CGM); 6166 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6167 llvm::Value *Size = nullptr; 6168 // If the size of the reduction item is non-constant, load it from global 6169 // threadprivate variable. 6170 if (RCG.getSizes(N).second) { 6171 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6172 CGF, CGM.getContext().getSizeType(), 6173 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6174 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6175 CGM.getContext().getSizeType(), Loc); 6176 } 6177 RCG.emitAggregateType(CGF, N, Size); 6178 // Remap lhs and rhs variables to the addresses of the function arguments. 6179 // %lhs = bitcast void* %arg0 to <type>* 6180 // %rhs = bitcast void* %arg1 to <type>* 6181 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6182 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6183 // Pull out the pointer to the variable. 6184 Address PtrAddr = CGF.EmitLoadOfPointer( 6185 CGF.GetAddrOfLocalVar(&ParamInOut), 6186 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6187 return CGF.Builder.CreateElementBitCast( 6188 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6189 }); 6190 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6191 // Pull out the pointer to the variable. 6192 Address PtrAddr = CGF.EmitLoadOfPointer( 6193 CGF.GetAddrOfLocalVar(&ParamIn), 6194 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6195 return CGF.Builder.CreateElementBitCast( 6196 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6197 }); 6198 PrivateScope.Privatize(); 6199 // Emit the combiner body: 6200 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6201 // store <type> %2, <type>* %lhs 6202 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6203 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6204 cast<DeclRefExpr>(RHS)); 6205 CGF.FinishFunction(); 6206 return Fn; 6207 } 6208 6209 /// Emits reduction finalizer function: 6210 /// \code 6211 /// void @.red_fini(void* %arg) { 6212 /// %0 = bitcast void* %arg to <type>* 6213 /// <destroy>(<type>* %0) 6214 /// ret void 6215 /// } 6216 /// \endcode 6217 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6218 SourceLocation Loc, 6219 ReductionCodeGen &RCG, unsigned N) { 6220 if (!RCG.needCleanups(N)) 6221 return nullptr; 6222 ASTContext &C = CGM.getContext(); 6223 FunctionArgList Args; 6224 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6225 ImplicitParamDecl::Other); 6226 Args.emplace_back(&Param); 6227 const auto &FnInfo = 6228 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6229 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6230 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6231 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6232 Name, &CGM.getModule()); 6233 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6234 Fn->setDoesNotRecurse(); 6235 CodeGenFunction CGF(CGM); 6236 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6237 Address PrivateAddr = CGF.EmitLoadOfPointer( 6238 CGF.GetAddrOfLocalVar(&Param), 6239 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6240 llvm::Value *Size = nullptr; 6241 // If the size of the reduction item is non-constant, load it from global 6242 // threadprivate variable. 6243 if (RCG.getSizes(N).second) { 6244 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6245 CGF, CGM.getContext().getSizeType(), 6246 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6247 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6248 CGM.getContext().getSizeType(), Loc); 6249 } 6250 RCG.emitAggregateType(CGF, N, Size); 6251 // Emit the finalizer body: 6252 // <destroy>(<type>* %0) 6253 RCG.emitCleanups(CGF, N, PrivateAddr); 6254 CGF.FinishFunction(); 6255 return Fn; 6256 } 6257 6258 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6259 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6260 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6261 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6262 return nullptr; 6263 6264 // Build typedef struct: 6265 // kmp_task_red_input { 6266 // void *reduce_shar; // shared reduction item 6267 // size_t reduce_size; // size of data item 6268 // void *reduce_init; // data initialization routine 6269 // void *reduce_fini; // data finalization routine 6270 // void *reduce_comb; // data combiner routine 6271 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6272 // } kmp_task_red_input_t; 6273 ASTContext &C = CGM.getContext(); 6274 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 6275 RD->startDefinition(); 6276 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6277 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6278 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6279 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6280 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6281 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6282 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6283 RD->completeDefinition(); 6284 QualType RDType = C.getRecordType(RD); 6285 unsigned Size = Data.ReductionVars.size(); 6286 llvm::APInt ArraySize(/*numBits=*/64, Size); 6287 QualType ArrayRDType = C.getConstantArrayType( 6288 RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); 6289 // kmp_task_red_input_t .rd_input.[Size]; 6290 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6291 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 6292 Data.ReductionOps); 6293 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6294 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6295 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6296 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6297 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6298 TaskRedInput.getPointer(), Idxs, 6299 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6300 ".rd_input.gep."); 6301 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6302 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6303 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6304 RCG.emitSharedLValue(CGF, Cnt); 6305 llvm::Value *CastedShared = 6306 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); 6307 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6308 RCG.emitAggregateType(CGF, Cnt); 6309 llvm::Value *SizeValInChars; 6310 llvm::Value *SizeVal; 6311 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6312 // We use delayed creation/initialization for VLAs, array sections and 6313 // custom reduction initializations. It is required because runtime does not 6314 // provide the way to pass the sizes of VLAs/array sections to 6315 // initializer/combiner/finalizer functions and does not pass the pointer to 6316 // original reduction item to the initializer. Instead threadprivate global 6317 // variables are used to store these values and use them in the functions. 6318 bool DelayedCreation = !!SizeVal; 6319 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6320 /*isSigned=*/false); 6321 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6322 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6323 // ElemLVal.reduce_init = init; 6324 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6325 llvm::Value *InitAddr = 6326 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6327 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6328 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 6329 // ElemLVal.reduce_fini = fini; 6330 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6331 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6332 llvm::Value *FiniAddr = Fini 6333 ? CGF.EmitCastToVoidPtr(Fini) 6334 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6335 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6336 // ElemLVal.reduce_comb = comb; 6337 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6338 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6339 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6340 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6341 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6342 // ElemLVal.flags = 0; 6343 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6344 if (DelayedCreation) { 6345 CGF.EmitStoreOfScalar( 6346 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6347 FlagsLVal); 6348 } else 6349 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); 6350 } 6351 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 6352 // *data); 6353 llvm::Value *Args[] = { 6354 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6355 /*isSigned=*/true), 6356 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6357 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6358 CGM.VoidPtrTy)}; 6359 return CGF.EmitRuntimeCall( 6360 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 6361 } 6362 6363 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6364 SourceLocation Loc, 6365 ReductionCodeGen &RCG, 6366 unsigned N) { 6367 auto Sizes = RCG.getSizes(N); 6368 // Emit threadprivate global variable if the type is non-constant 6369 // (Sizes.second = nullptr). 6370 if (Sizes.second) { 6371 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6372 /*isSigned=*/false); 6373 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6374 CGF, CGM.getContext().getSizeType(), 6375 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6376 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6377 } 6378 // Store address of the original reduction item if custom initializer is used. 6379 if (RCG.usesReductionInitializer(N)) { 6380 Address SharedAddr = getAddrOfArtificialThreadPrivate( 6381 CGF, CGM.getContext().VoidPtrTy, 6382 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6383 CGF.Builder.CreateStore( 6384 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6385 RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), 6386 SharedAddr, /*IsVolatile=*/false); 6387 } 6388 } 6389 6390 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6391 SourceLocation Loc, 6392 llvm::Value *ReductionsPtr, 6393 LValue SharedLVal) { 6394 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6395 // *d); 6396 llvm::Value *Args[] = { 6397 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6398 /*isSigned=*/true), 6399 ReductionsPtr, 6400 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), 6401 CGM.VoidPtrTy)}; 6402 return Address( 6403 CGF.EmitRuntimeCall( 6404 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6405 SharedLVal.getAlignment()); 6406 } 6407 6408 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6409 SourceLocation Loc) { 6410 if (!CGF.HaveInsertPoint()) 6411 return; 6412 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6413 // global_tid); 6414 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6415 // Ignore return result until untied tasks are supported. 6416 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 6417 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6418 Region->emitUntiedSwitch(CGF); 6419 } 6420 6421 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6422 OpenMPDirectiveKind InnerKind, 6423 const RegionCodeGenTy &CodeGen, 6424 bool HasCancel) { 6425 if (!CGF.HaveInsertPoint()) 6426 return; 6427 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6428 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6429 } 6430 6431 namespace { 6432 enum RTCancelKind { 6433 CancelNoreq = 0, 6434 CancelParallel = 1, 6435 CancelLoop = 2, 6436 CancelSections = 3, 6437 CancelTaskgroup = 4 6438 }; 6439 } // anonymous namespace 6440 6441 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6442 RTCancelKind CancelKind = CancelNoreq; 6443 if (CancelRegion == OMPD_parallel) 6444 CancelKind = CancelParallel; 6445 else if (CancelRegion == OMPD_for) 6446 CancelKind = CancelLoop; 6447 else if (CancelRegion == OMPD_sections) 6448 CancelKind = CancelSections; 6449 else { 6450 assert(CancelRegion == OMPD_taskgroup); 6451 CancelKind = CancelTaskgroup; 6452 } 6453 return CancelKind; 6454 } 6455 6456 void CGOpenMPRuntime::emitCancellationPointCall( 6457 CodeGenFunction &CGF, SourceLocation Loc, 6458 OpenMPDirectiveKind CancelRegion) { 6459 if (!CGF.HaveInsertPoint()) 6460 return; 6461 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6462 // global_tid, kmp_int32 cncl_kind); 6463 if (auto *OMPRegionInfo = 6464 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6465 // For 'cancellation point taskgroup', the task region info may not have a 6466 // cancel. This may instead happen in another adjacent task. 6467 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6468 llvm::Value *Args[] = { 6469 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6470 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6471 // Ignore return result until untied tasks are supported. 6472 llvm::Value *Result = CGF.EmitRuntimeCall( 6473 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 6474 // if (__kmpc_cancellationpoint()) { 6475 // exit from construct; 6476 // } 6477 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6478 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6479 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6480 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6481 CGF.EmitBlock(ExitBB); 6482 // exit from construct; 6483 CodeGenFunction::JumpDest CancelDest = 6484 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6485 CGF.EmitBranchThroughCleanup(CancelDest); 6486 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6487 } 6488 } 6489 } 6490 6491 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6492 const Expr *IfCond, 6493 OpenMPDirectiveKind CancelRegion) { 6494 if (!CGF.HaveInsertPoint()) 6495 return; 6496 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6497 // kmp_int32 cncl_kind); 6498 if (auto *OMPRegionInfo = 6499 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6500 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 6501 PrePostActionTy &) { 6502 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6503 llvm::Value *Args[] = { 6504 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6505 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6506 // Ignore return result until untied tasks are supported. 6507 llvm::Value *Result = CGF.EmitRuntimeCall( 6508 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 6509 // if (__kmpc_cancel()) { 6510 // exit from construct; 6511 // } 6512 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6513 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6514 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6515 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6516 CGF.EmitBlock(ExitBB); 6517 // exit from construct; 6518 CodeGenFunction::JumpDest CancelDest = 6519 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6520 CGF.EmitBranchThroughCleanup(CancelDest); 6521 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6522 }; 6523 if (IfCond) { 6524 emitOMPIfClause(CGF, IfCond, ThenGen, 6525 [](CodeGenFunction &, PrePostActionTy &) {}); 6526 } else { 6527 RegionCodeGenTy ThenRCG(ThenGen); 6528 ThenRCG(CGF); 6529 } 6530 } 6531 } 6532 6533 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6534 const OMPExecutableDirective &D, StringRef ParentName, 6535 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6536 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6537 assert(!ParentName.empty() && "Invalid target region parent name!"); 6538 HasEmittedTargetRegion = true; 6539 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6540 IsOffloadEntry, CodeGen); 6541 } 6542 6543 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6544 const OMPExecutableDirective &D, StringRef ParentName, 6545 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6546 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6547 // Create a unique name for the entry function using the source location 6548 // information of the current target region. The name will be something like: 6549 // 6550 // __omp_offloading_DD_FFFF_PP_lBB 6551 // 6552 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6553 // mangled name of the function that encloses the target region and BB is the 6554 // line number of the target region. 6555 6556 unsigned DeviceID; 6557 unsigned FileID; 6558 unsigned Line; 6559 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6560 Line); 6561 SmallString<64> EntryFnName; 6562 { 6563 llvm::raw_svector_ostream OS(EntryFnName); 6564 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6565 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6566 } 6567 6568 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6569 6570 CodeGenFunction CGF(CGM, true); 6571 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6572 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6573 6574 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 6575 6576 // If this target outline function is not an offload entry, we don't need to 6577 // register it. 6578 if (!IsOffloadEntry) 6579 return; 6580 6581 // The target region ID is used by the runtime library to identify the current 6582 // target region, so it only has to be unique and not necessarily point to 6583 // anything. It could be the pointer to the outlined function that implements 6584 // the target region, but we aren't using that so that the compiler doesn't 6585 // need to keep that, and could therefore inline the host function if proven 6586 // worthwhile during optimization. In the other hand, if emitting code for the 6587 // device, the ID has to be the function address so that it can retrieved from 6588 // the offloading entry and launched by the runtime library. We also mark the 6589 // outlined function to have external linkage in case we are emitting code for 6590 // the device, because these functions will be entry points to the device. 6591 6592 if (CGM.getLangOpts().OpenMPIsDevice) { 6593 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6594 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6595 OutlinedFn->setDSOLocal(false); 6596 } else { 6597 std::string Name = getName({EntryFnName, "region_id"}); 6598 OutlinedFnID = new llvm::GlobalVariable( 6599 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6600 llvm::GlobalValue::WeakAnyLinkage, 6601 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6602 } 6603 6604 // Register the information for the entry associated with this target region. 6605 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6606 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6607 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6608 } 6609 6610 /// Checks if the expression is constant or does not have non-trivial function 6611 /// calls. 6612 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6613 // We can skip constant expressions. 6614 // We can skip expressions with trivial calls or simple expressions. 6615 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6616 !E->hasNonTrivialCall(Ctx)) && 6617 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6618 } 6619 6620 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6621 const Stmt *Body) { 6622 const Stmt *Child = Body->IgnoreContainers(); 6623 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6624 Child = nullptr; 6625 for (const Stmt *S : C->body()) { 6626 if (const auto *E = dyn_cast<Expr>(S)) { 6627 if (isTrivial(Ctx, E)) 6628 continue; 6629 } 6630 // Some of the statements can be ignored. 6631 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6632 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6633 continue; 6634 // Analyze declarations. 6635 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6636 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6637 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6638 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6639 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6640 isa<UsingDirectiveDecl>(D) || 6641 isa<OMPDeclareReductionDecl>(D) || 6642 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6643 return true; 6644 const auto *VD = dyn_cast<VarDecl>(D); 6645 if (!VD) 6646 return false; 6647 return VD->isConstexpr() || 6648 ((VD->getType().isTrivialType(Ctx) || 6649 VD->getType()->isReferenceType()) && 6650 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6651 })) 6652 continue; 6653 } 6654 // Found multiple children - cannot get the one child only. 6655 if (Child) 6656 return nullptr; 6657 Child = S; 6658 } 6659 if (Child) 6660 Child = Child->IgnoreContainers(); 6661 } 6662 return Child; 6663 } 6664 6665 /// Emit the number of teams for a target directive. Inspect the num_teams 6666 /// clause associated with a teams construct combined or closely nested 6667 /// with the target directive. 6668 /// 6669 /// Emit a team of size one for directives such as 'target parallel' that 6670 /// have no associated teams construct. 6671 /// 6672 /// Otherwise, return nullptr. 6673 static llvm::Value * 6674 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6675 const OMPExecutableDirective &D) { 6676 assert(!CGF.getLangOpts().OpenMPIsDevice && 6677 "Clauses associated with the teams directive expected to be emitted " 6678 "only for the host!"); 6679 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6680 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6681 "Expected target-based executable directive."); 6682 CGBuilderTy &Bld = CGF.Builder; 6683 switch (DirectiveKind) { 6684 case OMPD_target: { 6685 const auto *CS = D.getInnermostCapturedStmt(); 6686 const auto *Body = 6687 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6688 const Stmt *ChildStmt = 6689 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6690 if (const auto *NestedDir = 6691 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6692 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6693 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6694 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6695 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6696 const Expr *NumTeams = 6697 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6698 llvm::Value *NumTeamsVal = 6699 CGF.EmitScalarExpr(NumTeams, 6700 /*IgnoreResultAssign*/ true); 6701 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6702 /*isSigned=*/true); 6703 } 6704 return Bld.getInt32(0); 6705 } 6706 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6707 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6708 return Bld.getInt32(1); 6709 return Bld.getInt32(0); 6710 } 6711 return nullptr; 6712 } 6713 case OMPD_target_teams: 6714 case OMPD_target_teams_distribute: 6715 case OMPD_target_teams_distribute_simd: 6716 case OMPD_target_teams_distribute_parallel_for: 6717 case OMPD_target_teams_distribute_parallel_for_simd: { 6718 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6719 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6720 const Expr *NumTeams = 6721 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6722 llvm::Value *NumTeamsVal = 6723 CGF.EmitScalarExpr(NumTeams, 6724 /*IgnoreResultAssign*/ true); 6725 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6726 /*isSigned=*/true); 6727 } 6728 return Bld.getInt32(0); 6729 } 6730 case OMPD_target_parallel: 6731 case OMPD_target_parallel_for: 6732 case OMPD_target_parallel_for_simd: 6733 case OMPD_target_simd: 6734 return Bld.getInt32(1); 6735 case OMPD_parallel: 6736 case OMPD_for: 6737 case OMPD_parallel_for: 6738 case OMPD_parallel_sections: 6739 case OMPD_for_simd: 6740 case OMPD_parallel_for_simd: 6741 case OMPD_cancel: 6742 case OMPD_cancellation_point: 6743 case OMPD_ordered: 6744 case OMPD_threadprivate: 6745 case OMPD_allocate: 6746 case OMPD_task: 6747 case OMPD_simd: 6748 case OMPD_sections: 6749 case OMPD_section: 6750 case OMPD_single: 6751 case OMPD_master: 6752 case OMPD_critical: 6753 case OMPD_taskyield: 6754 case OMPD_barrier: 6755 case OMPD_taskwait: 6756 case OMPD_taskgroup: 6757 case OMPD_atomic: 6758 case OMPD_flush: 6759 case OMPD_teams: 6760 case OMPD_target_data: 6761 case OMPD_target_exit_data: 6762 case OMPD_target_enter_data: 6763 case OMPD_distribute: 6764 case OMPD_distribute_simd: 6765 case OMPD_distribute_parallel_for: 6766 case OMPD_distribute_parallel_for_simd: 6767 case OMPD_teams_distribute: 6768 case OMPD_teams_distribute_simd: 6769 case OMPD_teams_distribute_parallel_for: 6770 case OMPD_teams_distribute_parallel_for_simd: 6771 case OMPD_target_update: 6772 case OMPD_declare_simd: 6773 case OMPD_declare_variant: 6774 case OMPD_declare_target: 6775 case OMPD_end_declare_target: 6776 case OMPD_declare_reduction: 6777 case OMPD_declare_mapper: 6778 case OMPD_taskloop: 6779 case OMPD_taskloop_simd: 6780 case OMPD_requires: 6781 case OMPD_unknown: 6782 break; 6783 } 6784 llvm_unreachable("Unexpected directive kind."); 6785 } 6786 6787 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6788 llvm::Value *DefaultThreadLimitVal) { 6789 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6790 CGF.getContext(), CS->getCapturedStmt()); 6791 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6792 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6793 llvm::Value *NumThreads = nullptr; 6794 llvm::Value *CondVal = nullptr; 6795 // Handle if clause. If if clause present, the number of threads is 6796 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6797 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6798 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6799 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6800 const OMPIfClause *IfClause = nullptr; 6801 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6802 if (C->getNameModifier() == OMPD_unknown || 6803 C->getNameModifier() == OMPD_parallel) { 6804 IfClause = C; 6805 break; 6806 } 6807 } 6808 if (IfClause) { 6809 const Expr *Cond = IfClause->getCondition(); 6810 bool Result; 6811 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6812 if (!Result) 6813 return CGF.Builder.getInt32(1); 6814 } else { 6815 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6816 if (const auto *PreInit = 6817 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6818 for (const auto *I : PreInit->decls()) { 6819 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6820 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6821 } else { 6822 CodeGenFunction::AutoVarEmission Emission = 6823 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6824 CGF.EmitAutoVarCleanups(Emission); 6825 } 6826 } 6827 } 6828 CondVal = CGF.EvaluateExprAsBool(Cond); 6829 } 6830 } 6831 } 6832 // Check the value of num_threads clause iff if clause was not specified 6833 // or is not evaluated to false. 6834 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6835 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6836 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6837 const auto *NumThreadsClause = 6838 Dir->getSingleClause<OMPNumThreadsClause>(); 6839 CodeGenFunction::LexicalScope Scope( 6840 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6841 if (const auto *PreInit = 6842 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6843 for (const auto *I : PreInit->decls()) { 6844 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6845 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6846 } else { 6847 CodeGenFunction::AutoVarEmission Emission = 6848 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6849 CGF.EmitAutoVarCleanups(Emission); 6850 } 6851 } 6852 } 6853 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6854 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6855 /*isSigned=*/false); 6856 if (DefaultThreadLimitVal) 6857 NumThreads = CGF.Builder.CreateSelect( 6858 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6859 DefaultThreadLimitVal, NumThreads); 6860 } else { 6861 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6862 : CGF.Builder.getInt32(0); 6863 } 6864 // Process condition of the if clause. 6865 if (CondVal) { 6866 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6867 CGF.Builder.getInt32(1)); 6868 } 6869 return NumThreads; 6870 } 6871 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6872 return CGF.Builder.getInt32(1); 6873 return DefaultThreadLimitVal; 6874 } 6875 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6876 : CGF.Builder.getInt32(0); 6877 } 6878 6879 /// Emit the number of threads for a target directive. Inspect the 6880 /// thread_limit clause associated with a teams construct combined or closely 6881 /// nested with the target directive. 6882 /// 6883 /// Emit the num_threads clause for directives such as 'target parallel' that 6884 /// have no associated teams construct. 6885 /// 6886 /// Otherwise, return nullptr. 6887 static llvm::Value * 6888 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6889 const OMPExecutableDirective &D) { 6890 assert(!CGF.getLangOpts().OpenMPIsDevice && 6891 "Clauses associated with the teams directive expected to be emitted " 6892 "only for the host!"); 6893 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6894 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6895 "Expected target-based executable directive."); 6896 CGBuilderTy &Bld = CGF.Builder; 6897 llvm::Value *ThreadLimitVal = nullptr; 6898 llvm::Value *NumThreadsVal = nullptr; 6899 switch (DirectiveKind) { 6900 case OMPD_target: { 6901 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6902 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6903 return NumThreads; 6904 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6905 CGF.getContext(), CS->getCapturedStmt()); 6906 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6907 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6908 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6909 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6910 const auto *ThreadLimitClause = 6911 Dir->getSingleClause<OMPThreadLimitClause>(); 6912 CodeGenFunction::LexicalScope Scope( 6913 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6914 if (const auto *PreInit = 6915 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6916 for (const auto *I : PreInit->decls()) { 6917 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6918 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6919 } else { 6920 CodeGenFunction::AutoVarEmission Emission = 6921 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6922 CGF.EmitAutoVarCleanups(Emission); 6923 } 6924 } 6925 } 6926 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6927 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6928 ThreadLimitVal = 6929 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6930 } 6931 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6932 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6933 CS = Dir->getInnermostCapturedStmt(); 6934 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6935 CGF.getContext(), CS->getCapturedStmt()); 6936 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6937 } 6938 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6939 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6940 CS = Dir->getInnermostCapturedStmt(); 6941 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6942 return NumThreads; 6943 } 6944 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6945 return Bld.getInt32(1); 6946 } 6947 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6948 } 6949 case OMPD_target_teams: { 6950 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6951 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6952 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6953 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6954 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6955 ThreadLimitVal = 6956 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6957 } 6958 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6959 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6960 return NumThreads; 6961 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6962 CGF.getContext(), CS->getCapturedStmt()); 6963 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6964 if (Dir->getDirectiveKind() == OMPD_distribute) { 6965 CS = Dir->getInnermostCapturedStmt(); 6966 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6967 return NumThreads; 6968 } 6969 } 6970 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6971 } 6972 case OMPD_target_teams_distribute: 6973 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6974 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6975 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6976 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6977 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6978 ThreadLimitVal = 6979 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6980 } 6981 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6982 case OMPD_target_parallel: 6983 case OMPD_target_parallel_for: 6984 case OMPD_target_parallel_for_simd: 6985 case OMPD_target_teams_distribute_parallel_for: 6986 case OMPD_target_teams_distribute_parallel_for_simd: { 6987 llvm::Value *CondVal = nullptr; 6988 // Handle if clause. If if clause present, the number of threads is 6989 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6990 if (D.hasClausesOfKind<OMPIfClause>()) { 6991 const OMPIfClause *IfClause = nullptr; 6992 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6993 if (C->getNameModifier() == OMPD_unknown || 6994 C->getNameModifier() == OMPD_parallel) { 6995 IfClause = C; 6996 break; 6997 } 6998 } 6999 if (IfClause) { 7000 const Expr *Cond = IfClause->getCondition(); 7001 bool Result; 7002 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7003 if (!Result) 7004 return Bld.getInt32(1); 7005 } else { 7006 CodeGenFunction::RunCleanupsScope Scope(CGF); 7007 CondVal = CGF.EvaluateExprAsBool(Cond); 7008 } 7009 } 7010 } 7011 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7012 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7013 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7014 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7015 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7016 ThreadLimitVal = 7017 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7018 } 7019 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7020 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7021 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7022 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7023 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7024 NumThreadsVal = 7025 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7026 ThreadLimitVal = ThreadLimitVal 7027 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7028 ThreadLimitVal), 7029 NumThreadsVal, ThreadLimitVal) 7030 : NumThreadsVal; 7031 } 7032 if (!ThreadLimitVal) 7033 ThreadLimitVal = Bld.getInt32(0); 7034 if (CondVal) 7035 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7036 return ThreadLimitVal; 7037 } 7038 case OMPD_target_teams_distribute_simd: 7039 case OMPD_target_simd: 7040 return Bld.getInt32(1); 7041 case OMPD_parallel: 7042 case OMPD_for: 7043 case OMPD_parallel_for: 7044 case OMPD_parallel_sections: 7045 case OMPD_for_simd: 7046 case OMPD_parallel_for_simd: 7047 case OMPD_cancel: 7048 case OMPD_cancellation_point: 7049 case OMPD_ordered: 7050 case OMPD_threadprivate: 7051 case OMPD_allocate: 7052 case OMPD_task: 7053 case OMPD_simd: 7054 case OMPD_sections: 7055 case OMPD_section: 7056 case OMPD_single: 7057 case OMPD_master: 7058 case OMPD_critical: 7059 case OMPD_taskyield: 7060 case OMPD_barrier: 7061 case OMPD_taskwait: 7062 case OMPD_taskgroup: 7063 case OMPD_atomic: 7064 case OMPD_flush: 7065 case OMPD_teams: 7066 case OMPD_target_data: 7067 case OMPD_target_exit_data: 7068 case OMPD_target_enter_data: 7069 case OMPD_distribute: 7070 case OMPD_distribute_simd: 7071 case OMPD_distribute_parallel_for: 7072 case OMPD_distribute_parallel_for_simd: 7073 case OMPD_teams_distribute: 7074 case OMPD_teams_distribute_simd: 7075 case OMPD_teams_distribute_parallel_for: 7076 case OMPD_teams_distribute_parallel_for_simd: 7077 case OMPD_target_update: 7078 case OMPD_declare_simd: 7079 case OMPD_declare_variant: 7080 case OMPD_declare_target: 7081 case OMPD_end_declare_target: 7082 case OMPD_declare_reduction: 7083 case OMPD_declare_mapper: 7084 case OMPD_taskloop: 7085 case OMPD_taskloop_simd: 7086 case OMPD_requires: 7087 case OMPD_unknown: 7088 break; 7089 } 7090 llvm_unreachable("Unsupported directive kind."); 7091 } 7092 7093 namespace { 7094 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7095 7096 // Utility to handle information from clauses associated with a given 7097 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7098 // It provides a convenient interface to obtain the information and generate 7099 // code for that information. 7100 class MappableExprsHandler { 7101 public: 7102 /// Values for bit flags used to specify the mapping type for 7103 /// offloading. 7104 enum OpenMPOffloadMappingFlags : uint64_t { 7105 /// No flags 7106 OMP_MAP_NONE = 0x0, 7107 /// Allocate memory on the device and move data from host to device. 7108 OMP_MAP_TO = 0x01, 7109 /// Allocate memory on the device and move data from device to host. 7110 OMP_MAP_FROM = 0x02, 7111 /// Always perform the requested mapping action on the element, even 7112 /// if it was already mapped before. 7113 OMP_MAP_ALWAYS = 0x04, 7114 /// Delete the element from the device environment, ignoring the 7115 /// current reference count associated with the element. 7116 OMP_MAP_DELETE = 0x08, 7117 /// The element being mapped is a pointer-pointee pair; both the 7118 /// pointer and the pointee should be mapped. 7119 OMP_MAP_PTR_AND_OBJ = 0x10, 7120 /// This flags signals that the base address of an entry should be 7121 /// passed to the target kernel as an argument. 7122 OMP_MAP_TARGET_PARAM = 0x20, 7123 /// Signal that the runtime library has to return the device pointer 7124 /// in the current position for the data being mapped. Used when we have the 7125 /// use_device_ptr clause. 7126 OMP_MAP_RETURN_PARAM = 0x40, 7127 /// This flag signals that the reference being passed is a pointer to 7128 /// private data. 7129 OMP_MAP_PRIVATE = 0x80, 7130 /// Pass the element to the device by value. 7131 OMP_MAP_LITERAL = 0x100, 7132 /// Implicit map 7133 OMP_MAP_IMPLICIT = 0x200, 7134 /// Close is a hint to the runtime to allocate memory close to 7135 /// the target device. 7136 OMP_MAP_CLOSE = 0x400, 7137 /// The 16 MSBs of the flags indicate whether the entry is member of some 7138 /// struct/class. 7139 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7140 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7141 }; 7142 7143 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7144 static unsigned getFlagMemberOffset() { 7145 unsigned Offset = 0; 7146 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7147 Remain = Remain >> 1) 7148 Offset++; 7149 return Offset; 7150 } 7151 7152 /// Class that associates information with a base pointer to be passed to the 7153 /// runtime library. 7154 class BasePointerInfo { 7155 /// The base pointer. 7156 llvm::Value *Ptr = nullptr; 7157 /// The base declaration that refers to this device pointer, or null if 7158 /// there is none. 7159 const ValueDecl *DevPtrDecl = nullptr; 7160 7161 public: 7162 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7163 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7164 llvm::Value *operator*() const { return Ptr; } 7165 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7166 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7167 }; 7168 7169 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7170 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7171 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7172 7173 /// Map between a struct and the its lowest & highest elements which have been 7174 /// mapped. 7175 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7176 /// HE(FieldIndex, Pointer)} 7177 struct StructRangeInfoTy { 7178 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7179 0, Address::invalid()}; 7180 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7181 0, Address::invalid()}; 7182 Address Base = Address::invalid(); 7183 }; 7184 7185 private: 7186 /// Kind that defines how a device pointer has to be returned. 7187 struct MapInfo { 7188 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7189 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7190 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7191 bool ReturnDevicePointer = false; 7192 bool IsImplicit = false; 7193 7194 MapInfo() = default; 7195 MapInfo( 7196 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7197 OpenMPMapClauseKind MapType, 7198 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7199 bool ReturnDevicePointer, bool IsImplicit) 7200 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7201 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 7202 }; 7203 7204 /// If use_device_ptr is used on a pointer which is a struct member and there 7205 /// is no map information about it, then emission of that entry is deferred 7206 /// until the whole struct has been processed. 7207 struct DeferredDevicePtrEntryTy { 7208 const Expr *IE = nullptr; 7209 const ValueDecl *VD = nullptr; 7210 7211 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 7212 : IE(IE), VD(VD) {} 7213 }; 7214 7215 /// The target directive from where the mappable clauses were extracted. It 7216 /// is either a executable directive or a user-defined mapper directive. 7217 llvm::PointerUnion<const OMPExecutableDirective *, 7218 const OMPDeclareMapperDecl *> 7219 CurDir; 7220 7221 /// Function the directive is being generated for. 7222 CodeGenFunction &CGF; 7223 7224 /// Set of all first private variables in the current directive. 7225 /// bool data is set to true if the variable is implicitly marked as 7226 /// firstprivate, false otherwise. 7227 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7228 7229 /// Map between device pointer declarations and their expression components. 7230 /// The key value for declarations in 'this' is null. 7231 llvm::DenseMap< 7232 const ValueDecl *, 7233 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7234 DevPointersMap; 7235 7236 llvm::Value *getExprTypeSize(const Expr *E) const { 7237 QualType ExprTy = E->getType().getCanonicalType(); 7238 7239 // Reference types are ignored for mapping purposes. 7240 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7241 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7242 7243 // Given that an array section is considered a built-in type, we need to 7244 // do the calculation based on the length of the section instead of relying 7245 // on CGF.getTypeSize(E->getType()). 7246 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7247 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7248 OAE->getBase()->IgnoreParenImpCasts()) 7249 .getCanonicalType(); 7250 7251 // If there is no length associated with the expression, that means we 7252 // are using the whole length of the base. 7253 if (!OAE->getLength() && OAE->getColonLoc().isValid()) 7254 return CGF.getTypeSize(BaseTy); 7255 7256 llvm::Value *ElemSize; 7257 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7258 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7259 } else { 7260 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7261 assert(ATy && "Expecting array type if not a pointer type."); 7262 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7263 } 7264 7265 // If we don't have a length at this point, that is because we have an 7266 // array section with a single element. 7267 if (!OAE->getLength()) 7268 return ElemSize; 7269 7270 llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); 7271 LengthVal = 7272 CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); 7273 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7274 } 7275 return CGF.getTypeSize(ExprTy); 7276 } 7277 7278 /// Return the corresponding bits for a given map clause modifier. Add 7279 /// a flag marking the map as a pointer if requested. Add a flag marking the 7280 /// map as the first one of a series of maps that relate to the same map 7281 /// expression. 7282 OpenMPOffloadMappingFlags getMapTypeBits( 7283 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7284 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7285 OpenMPOffloadMappingFlags Bits = 7286 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7287 switch (MapType) { 7288 case OMPC_MAP_alloc: 7289 case OMPC_MAP_release: 7290 // alloc and release is the default behavior in the runtime library, i.e. 7291 // if we don't pass any bits alloc/release that is what the runtime is 7292 // going to do. Therefore, we don't need to signal anything for these two 7293 // type modifiers. 7294 break; 7295 case OMPC_MAP_to: 7296 Bits |= OMP_MAP_TO; 7297 break; 7298 case OMPC_MAP_from: 7299 Bits |= OMP_MAP_FROM; 7300 break; 7301 case OMPC_MAP_tofrom: 7302 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7303 break; 7304 case OMPC_MAP_delete: 7305 Bits |= OMP_MAP_DELETE; 7306 break; 7307 case OMPC_MAP_unknown: 7308 llvm_unreachable("Unexpected map type!"); 7309 } 7310 if (AddPtrFlag) 7311 Bits |= OMP_MAP_PTR_AND_OBJ; 7312 if (AddIsTargetParamFlag) 7313 Bits |= OMP_MAP_TARGET_PARAM; 7314 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7315 != MapModifiers.end()) 7316 Bits |= OMP_MAP_ALWAYS; 7317 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7318 != MapModifiers.end()) 7319 Bits |= OMP_MAP_CLOSE; 7320 return Bits; 7321 } 7322 7323 /// Return true if the provided expression is a final array section. A 7324 /// final array section, is one whose length can't be proved to be one. 7325 bool isFinalArraySectionExpression(const Expr *E) const { 7326 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7327 7328 // It is not an array section and therefore not a unity-size one. 7329 if (!OASE) 7330 return false; 7331 7332 // An array section with no colon always refer to a single element. 7333 if (OASE->getColonLoc().isInvalid()) 7334 return false; 7335 7336 const Expr *Length = OASE->getLength(); 7337 7338 // If we don't have a length we have to check if the array has size 1 7339 // for this dimension. Also, we should always expect a length if the 7340 // base type is pointer. 7341 if (!Length) { 7342 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7343 OASE->getBase()->IgnoreParenImpCasts()) 7344 .getCanonicalType(); 7345 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7346 return ATy->getSize().getSExtValue() != 1; 7347 // If we don't have a constant dimension length, we have to consider 7348 // the current section as having any size, so it is not necessarily 7349 // unitary. If it happen to be unity size, that's user fault. 7350 return true; 7351 } 7352 7353 // Check if the length evaluates to 1. 7354 Expr::EvalResult Result; 7355 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7356 return true; // Can have more that size 1. 7357 7358 llvm::APSInt ConstLength = Result.Val.getInt(); 7359 return ConstLength.getSExtValue() != 1; 7360 } 7361 7362 /// Generate the base pointers, section pointers, sizes and map type 7363 /// bits for the provided map type, map modifier, and expression components. 7364 /// \a IsFirstComponent should be set to true if the provided set of 7365 /// components is the first associated with a capture. 7366 void generateInfoForComponentList( 7367 OpenMPMapClauseKind MapType, 7368 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7369 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7370 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7371 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7372 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 7373 bool IsImplicit, 7374 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7375 OverlappedElements = llvm::None) const { 7376 // The following summarizes what has to be generated for each map and the 7377 // types below. The generated information is expressed in this order: 7378 // base pointer, section pointer, size, flags 7379 // (to add to the ones that come from the map type and modifier). 7380 // 7381 // double d; 7382 // int i[100]; 7383 // float *p; 7384 // 7385 // struct S1 { 7386 // int i; 7387 // float f[50]; 7388 // } 7389 // struct S2 { 7390 // int i; 7391 // float f[50]; 7392 // S1 s; 7393 // double *p; 7394 // struct S2 *ps; 7395 // } 7396 // S2 s; 7397 // S2 *ps; 7398 // 7399 // map(d) 7400 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7401 // 7402 // map(i) 7403 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7404 // 7405 // map(i[1:23]) 7406 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7407 // 7408 // map(p) 7409 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7410 // 7411 // map(p[1:24]) 7412 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7413 // 7414 // map(s) 7415 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7416 // 7417 // map(s.i) 7418 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7419 // 7420 // map(s.s.f) 7421 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7422 // 7423 // map(s.p) 7424 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7425 // 7426 // map(to: s.p[:22]) 7427 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7428 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7429 // &(s.p), &(s.p[0]), 22*sizeof(double), 7430 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7431 // (*) alloc space for struct members, only this is a target parameter 7432 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7433 // optimizes this entry out, same in the examples below) 7434 // (***) map the pointee (map: to) 7435 // 7436 // map(s.ps) 7437 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7438 // 7439 // map(from: s.ps->s.i) 7440 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7441 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7442 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7443 // 7444 // map(to: s.ps->ps) 7445 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7446 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7447 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7448 // 7449 // map(s.ps->ps->ps) 7450 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7451 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7452 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7453 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7454 // 7455 // map(to: s.ps->ps->s.f[:22]) 7456 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7457 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7458 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7459 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7460 // 7461 // map(ps) 7462 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7463 // 7464 // map(ps->i) 7465 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7466 // 7467 // map(ps->s.f) 7468 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7469 // 7470 // map(from: ps->p) 7471 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7472 // 7473 // map(to: ps->p[:22]) 7474 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7475 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7476 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7477 // 7478 // map(ps->ps) 7479 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7480 // 7481 // map(from: ps->ps->s.i) 7482 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7483 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7484 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7485 // 7486 // map(from: ps->ps->ps) 7487 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7488 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7489 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7490 // 7491 // map(ps->ps->ps->ps) 7492 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7493 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7494 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7495 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7496 // 7497 // map(to: ps->ps->ps->s.f[:22]) 7498 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7499 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7500 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7501 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7502 // 7503 // map(to: s.f[:22]) map(from: s.p[:33]) 7504 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7505 // sizeof(double*) (**), TARGET_PARAM 7506 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7507 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7508 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7509 // (*) allocate contiguous space needed to fit all mapped members even if 7510 // we allocate space for members not mapped (in this example, 7511 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7512 // them as well because they fall between &s.f[0] and &s.p) 7513 // 7514 // map(from: s.f[:22]) map(to: ps->p[:33]) 7515 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7516 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7517 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7518 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7519 // (*) the struct this entry pertains to is the 2nd element in the list of 7520 // arguments, hence MEMBER_OF(2) 7521 // 7522 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7523 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7524 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7525 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7526 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7527 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7528 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7529 // (*) the struct this entry pertains to is the 4th element in the list 7530 // of arguments, hence MEMBER_OF(4) 7531 7532 // Track if the map information being generated is the first for a capture. 7533 bool IsCaptureFirstInfo = IsFirstComponentList; 7534 // When the variable is on a declare target link or in a to clause with 7535 // unified memory, a reference is needed to hold the host/device address 7536 // of the variable. 7537 bool RequiresReference = false; 7538 7539 // Scan the components from the base to the complete expression. 7540 auto CI = Components.rbegin(); 7541 auto CE = Components.rend(); 7542 auto I = CI; 7543 7544 // Track if the map information being generated is the first for a list of 7545 // components. 7546 bool IsExpressionFirstInfo = true; 7547 Address BP = Address::invalid(); 7548 const Expr *AssocExpr = I->getAssociatedExpression(); 7549 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7550 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7551 7552 if (isa<MemberExpr>(AssocExpr)) { 7553 // The base is the 'this' pointer. The content of the pointer is going 7554 // to be the base of the field being mapped. 7555 BP = CGF.LoadCXXThisAddress(); 7556 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7557 (OASE && 7558 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7559 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7560 } else { 7561 // The base is the reference to the variable. 7562 // BP = &Var. 7563 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7564 if (const auto *VD = 7565 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7566 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7567 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7568 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7569 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7570 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7571 RequiresReference = true; 7572 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7573 } 7574 } 7575 } 7576 7577 // If the variable is a pointer and is being dereferenced (i.e. is not 7578 // the last component), the base has to be the pointer itself, not its 7579 // reference. References are ignored for mapping purposes. 7580 QualType Ty = 7581 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7582 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7583 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7584 7585 // We do not need to generate individual map information for the 7586 // pointer, it can be associated with the combined storage. 7587 ++I; 7588 } 7589 } 7590 7591 // Track whether a component of the list should be marked as MEMBER_OF some 7592 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7593 // in a component list should be marked as MEMBER_OF, all subsequent entries 7594 // do not belong to the base struct. E.g. 7595 // struct S2 s; 7596 // s.ps->ps->ps->f[:] 7597 // (1) (2) (3) (4) 7598 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7599 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7600 // is the pointee of ps(2) which is not member of struct s, so it should not 7601 // be marked as such (it is still PTR_AND_OBJ). 7602 // The variable is initialized to false so that PTR_AND_OBJ entries which 7603 // are not struct members are not considered (e.g. array of pointers to 7604 // data). 7605 bool ShouldBeMemberOf = false; 7606 7607 // Variable keeping track of whether or not we have encountered a component 7608 // in the component list which is a member expression. Useful when we have a 7609 // pointer or a final array section, in which case it is the previous 7610 // component in the list which tells us whether we have a member expression. 7611 // E.g. X.f[:] 7612 // While processing the final array section "[:]" it is "f" which tells us 7613 // whether we are dealing with a member of a declared struct. 7614 const MemberExpr *EncounteredME = nullptr; 7615 7616 for (; I != CE; ++I) { 7617 // If the current component is member of a struct (parent struct) mark it. 7618 if (!EncounteredME) { 7619 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7620 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7621 // as MEMBER_OF the parent struct. 7622 if (EncounteredME) 7623 ShouldBeMemberOf = true; 7624 } 7625 7626 auto Next = std::next(I); 7627 7628 // We need to generate the addresses and sizes if this is the last 7629 // component, if the component is a pointer or if it is an array section 7630 // whose length can't be proved to be one. If this is a pointer, it 7631 // becomes the base address for the following components. 7632 7633 // A final array section, is one whose length can't be proved to be one. 7634 bool IsFinalArraySection = 7635 isFinalArraySectionExpression(I->getAssociatedExpression()); 7636 7637 // Get information on whether the element is a pointer. Have to do a 7638 // special treatment for array sections given that they are built-in 7639 // types. 7640 const auto *OASE = 7641 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7642 bool IsPointer = 7643 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7644 .getCanonicalType() 7645 ->isAnyPointerType()) || 7646 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7647 7648 if (Next == CE || IsPointer || IsFinalArraySection) { 7649 // If this is not the last component, we expect the pointer to be 7650 // associated with an array expression or member expression. 7651 assert((Next == CE || 7652 isa<MemberExpr>(Next->getAssociatedExpression()) || 7653 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7654 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 7655 "Unexpected expression"); 7656 7657 Address LB = 7658 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); 7659 7660 // If this component is a pointer inside the base struct then we don't 7661 // need to create any entry for it - it will be combined with the object 7662 // it is pointing to into a single PTR_AND_OBJ entry. 7663 bool IsMemberPointer = 7664 IsPointer && EncounteredME && 7665 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7666 EncounteredME); 7667 if (!OverlappedElements.empty()) { 7668 // Handle base element with the info for overlapped elements. 7669 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7670 assert(Next == CE && 7671 "Expected last element for the overlapped elements."); 7672 assert(!IsPointer && 7673 "Unexpected base element with the pointer type."); 7674 // Mark the whole struct as the struct that requires allocation on the 7675 // device. 7676 PartialStruct.LowestElem = {0, LB}; 7677 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7678 I->getAssociatedExpression()->getType()); 7679 Address HB = CGF.Builder.CreateConstGEP( 7680 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7681 CGF.VoidPtrTy), 7682 TypeSize.getQuantity() - 1); 7683 PartialStruct.HighestElem = { 7684 std::numeric_limits<decltype( 7685 PartialStruct.HighestElem.first)>::max(), 7686 HB}; 7687 PartialStruct.Base = BP; 7688 // Emit data for non-overlapped data. 7689 OpenMPOffloadMappingFlags Flags = 7690 OMP_MAP_MEMBER_OF | 7691 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7692 /*AddPtrFlag=*/false, 7693 /*AddIsTargetParamFlag=*/false); 7694 LB = BP; 7695 llvm::Value *Size = nullptr; 7696 // Do bitcopy of all non-overlapped structure elements. 7697 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7698 Component : OverlappedElements) { 7699 Address ComponentLB = Address::invalid(); 7700 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7701 Component) { 7702 if (MC.getAssociatedDeclaration()) { 7703 ComponentLB = 7704 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7705 .getAddress(); 7706 Size = CGF.Builder.CreatePtrDiff( 7707 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7708 CGF.EmitCastToVoidPtr(LB.getPointer())); 7709 break; 7710 } 7711 } 7712 BasePointers.push_back(BP.getPointer()); 7713 Pointers.push_back(LB.getPointer()); 7714 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, 7715 /*isSigned=*/true)); 7716 Types.push_back(Flags); 7717 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7718 } 7719 BasePointers.push_back(BP.getPointer()); 7720 Pointers.push_back(LB.getPointer()); 7721 Size = CGF.Builder.CreatePtrDiff( 7722 CGF.EmitCastToVoidPtr( 7723 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7724 CGF.EmitCastToVoidPtr(LB.getPointer())); 7725 Sizes.push_back( 7726 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7727 Types.push_back(Flags); 7728 break; 7729 } 7730 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7731 if (!IsMemberPointer) { 7732 BasePointers.push_back(BP.getPointer()); 7733 Pointers.push_back(LB.getPointer()); 7734 Sizes.push_back( 7735 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7736 7737 // We need to add a pointer flag for each map that comes from the 7738 // same expression except for the first one. We also need to signal 7739 // this map is the first one that relates with the current capture 7740 // (there is a set of entries for each capture). 7741 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7742 MapType, MapModifiers, IsImplicit, 7743 !IsExpressionFirstInfo || RequiresReference, 7744 IsCaptureFirstInfo && !RequiresReference); 7745 7746 if (!IsExpressionFirstInfo) { 7747 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7748 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7749 if (IsPointer) 7750 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7751 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7752 7753 if (ShouldBeMemberOf) { 7754 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7755 // should be later updated with the correct value of MEMBER_OF. 7756 Flags |= OMP_MAP_MEMBER_OF; 7757 // From now on, all subsequent PTR_AND_OBJ entries should not be 7758 // marked as MEMBER_OF. 7759 ShouldBeMemberOf = false; 7760 } 7761 } 7762 7763 Types.push_back(Flags); 7764 } 7765 7766 // If we have encountered a member expression so far, keep track of the 7767 // mapped member. If the parent is "*this", then the value declaration 7768 // is nullptr. 7769 if (EncounteredME) { 7770 const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl()); 7771 unsigned FieldIndex = FD->getFieldIndex(); 7772 7773 // Update info about the lowest and highest elements for this struct 7774 if (!PartialStruct.Base.isValid()) { 7775 PartialStruct.LowestElem = {FieldIndex, LB}; 7776 PartialStruct.HighestElem = {FieldIndex, LB}; 7777 PartialStruct.Base = BP; 7778 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7779 PartialStruct.LowestElem = {FieldIndex, LB}; 7780 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7781 PartialStruct.HighestElem = {FieldIndex, LB}; 7782 } 7783 } 7784 7785 // If we have a final array section, we are done with this expression. 7786 if (IsFinalArraySection) 7787 break; 7788 7789 // The pointer becomes the base for the next element. 7790 if (Next != CE) 7791 BP = LB; 7792 7793 IsExpressionFirstInfo = false; 7794 IsCaptureFirstInfo = false; 7795 } 7796 } 7797 } 7798 7799 /// Return the adjusted map modifiers if the declaration a capture refers to 7800 /// appears in a first-private clause. This is expected to be used only with 7801 /// directives that start with 'target'. 7802 MappableExprsHandler::OpenMPOffloadMappingFlags 7803 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7804 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7805 7806 // A first private variable captured by reference will use only the 7807 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7808 // declaration is known as first-private in this handler. 7809 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7810 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7811 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7812 return MappableExprsHandler::OMP_MAP_ALWAYS | 7813 MappableExprsHandler::OMP_MAP_TO; 7814 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7815 return MappableExprsHandler::OMP_MAP_TO | 7816 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 7817 return MappableExprsHandler::OMP_MAP_PRIVATE | 7818 MappableExprsHandler::OMP_MAP_TO; 7819 } 7820 return MappableExprsHandler::OMP_MAP_TO | 7821 MappableExprsHandler::OMP_MAP_FROM; 7822 } 7823 7824 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7825 // Rotate by getFlagMemberOffset() bits. 7826 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7827 << getFlagMemberOffset()); 7828 } 7829 7830 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7831 OpenMPOffloadMappingFlags MemberOfFlag) { 7832 // If the entry is PTR_AND_OBJ but has not been marked with the special 7833 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7834 // marked as MEMBER_OF. 7835 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7836 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7837 return; 7838 7839 // Reset the placeholder value to prepare the flag for the assignment of the 7840 // proper MEMBER_OF value. 7841 Flags &= ~OMP_MAP_MEMBER_OF; 7842 Flags |= MemberOfFlag; 7843 } 7844 7845 void getPlainLayout(const CXXRecordDecl *RD, 7846 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7847 bool AsBase) const { 7848 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7849 7850 llvm::StructType *St = 7851 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7852 7853 unsigned NumElements = St->getNumElements(); 7854 llvm::SmallVector< 7855 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7856 RecordLayout(NumElements); 7857 7858 // Fill bases. 7859 for (const auto &I : RD->bases()) { 7860 if (I.isVirtual()) 7861 continue; 7862 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7863 // Ignore empty bases. 7864 if (Base->isEmpty() || CGF.getContext() 7865 .getASTRecordLayout(Base) 7866 .getNonVirtualSize() 7867 .isZero()) 7868 continue; 7869 7870 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7871 RecordLayout[FieldIndex] = Base; 7872 } 7873 // Fill in virtual bases. 7874 for (const auto &I : RD->vbases()) { 7875 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7876 // Ignore empty bases. 7877 if (Base->isEmpty()) 7878 continue; 7879 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7880 if (RecordLayout[FieldIndex]) 7881 continue; 7882 RecordLayout[FieldIndex] = Base; 7883 } 7884 // Fill in all the fields. 7885 assert(!RD->isUnion() && "Unexpected union."); 7886 for (const auto *Field : RD->fields()) { 7887 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7888 // will fill in later.) 7889 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 7890 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7891 RecordLayout[FieldIndex] = Field; 7892 } 7893 } 7894 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7895 &Data : RecordLayout) { 7896 if (Data.isNull()) 7897 continue; 7898 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7899 getPlainLayout(Base, Layout, /*AsBase=*/true); 7900 else 7901 Layout.push_back(Data.get<const FieldDecl *>()); 7902 } 7903 } 7904 7905 public: 7906 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7907 : CurDir(&Dir), CGF(CGF) { 7908 // Extract firstprivate clause information. 7909 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7910 for (const auto *D : C->varlists()) 7911 FirstPrivateDecls.try_emplace( 7912 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 7913 // Extract device pointer clause information. 7914 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7915 for (auto L : C->component_lists()) 7916 DevPointersMap[L.first].push_back(L.second); 7917 } 7918 7919 /// Constructor for the declare mapper directive. 7920 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 7921 : CurDir(&Dir), CGF(CGF) {} 7922 7923 /// Generate code for the combined entry if we have a partially mapped struct 7924 /// and take care of the mapping flags of the arguments corresponding to 7925 /// individual struct members. 7926 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 7927 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7928 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 7929 const StructRangeInfoTy &PartialStruct) const { 7930 // Base is the base of the struct 7931 BasePointers.push_back(PartialStruct.Base.getPointer()); 7932 // Pointer is the address of the lowest element 7933 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7934 Pointers.push_back(LB); 7935 // Size is (addr of {highest+1} element) - (addr of lowest element) 7936 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7937 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7938 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7939 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7940 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7941 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 7942 /*isSigned=*/false); 7943 Sizes.push_back(Size); 7944 // Map type is always TARGET_PARAM 7945 Types.push_back(OMP_MAP_TARGET_PARAM); 7946 // Remove TARGET_PARAM flag from the first element 7947 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7948 7949 // All other current entries will be MEMBER_OF the combined entry 7950 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7951 // 0xFFFF in the MEMBER_OF field). 7952 OpenMPOffloadMappingFlags MemberOfFlag = 7953 getMemberOfFlag(BasePointers.size() - 1); 7954 for (auto &M : CurTypes) 7955 setCorrectMemberOfFlag(M, MemberOfFlag); 7956 } 7957 7958 /// Generate all the base pointers, section pointers, sizes and map 7959 /// types for the extracted mappable expressions. Also, for each item that 7960 /// relates with a device pointer, a pair of the relevant declaration and 7961 /// index where it occurs is appended to the device pointers info array. 7962 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 7963 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7964 MapFlagsArrayTy &Types) const { 7965 // We have to process the component lists that relate with the same 7966 // declaration in a single chunk so that we can generate the map flags 7967 // correctly. Therefore, we organize all lists in a map. 7968 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 7969 7970 // Helper function to fill the information map for the different supported 7971 // clauses. 7972 auto &&InfoGen = [&Info]( 7973 const ValueDecl *D, 7974 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7975 OpenMPMapClauseKind MapType, 7976 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7977 bool ReturnDevicePointer, bool IsImplicit) { 7978 const ValueDecl *VD = 7979 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 7980 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 7981 IsImplicit); 7982 }; 7983 7984 assert(CurDir.is<const OMPExecutableDirective *>() && 7985 "Expect a executable directive"); 7986 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 7987 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 7988 for (const auto &L : C->component_lists()) { 7989 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 7990 /*ReturnDevicePointer=*/false, C->isImplicit()); 7991 } 7992 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 7993 for (const auto &L : C->component_lists()) { 7994 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 7995 /*ReturnDevicePointer=*/false, C->isImplicit()); 7996 } 7997 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 7998 for (const auto &L : C->component_lists()) { 7999 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 8000 /*ReturnDevicePointer=*/false, C->isImplicit()); 8001 } 8002 8003 // Look at the use_device_ptr clause information and mark the existing map 8004 // entries as such. If there is no map information for an entry in the 8005 // use_device_ptr list, we create one with map type 'alloc' and zero size 8006 // section. It is the user fault if that was not mapped before. If there is 8007 // no map information and the pointer is a struct member, then we defer the 8008 // emission of that entry until the whole struct has been processed. 8009 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 8010 DeferredInfo; 8011 8012 for (const auto *C : 8013 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 8014 for (const auto &L : C->component_lists()) { 8015 assert(!L.second.empty() && "Not expecting empty list of components!"); 8016 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 8017 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8018 const Expr *IE = L.second.back().getAssociatedExpression(); 8019 // If the first component is a member expression, we have to look into 8020 // 'this', which maps to null in the map of map information. Otherwise 8021 // look directly for the information. 8022 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8023 8024 // We potentially have map information for this declaration already. 8025 // Look for the first set of components that refer to it. 8026 if (It != Info.end()) { 8027 auto CI = std::find_if( 8028 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 8029 return MI.Components.back().getAssociatedDeclaration() == VD; 8030 }); 8031 // If we found a map entry, signal that the pointer has to be returned 8032 // and move on to the next declaration. 8033 if (CI != It->second.end()) { 8034 CI->ReturnDevicePointer = true; 8035 continue; 8036 } 8037 } 8038 8039 // We didn't find any match in our map information - generate a zero 8040 // size array section - if the pointer is a struct member we defer this 8041 // action until the whole struct has been processed. 8042 if (isa<MemberExpr>(IE)) { 8043 // Insert the pointer into Info to be processed by 8044 // generateInfoForComponentList. Because it is a member pointer 8045 // without a pointee, no entry will be generated for it, therefore 8046 // we need to generate one after the whole struct has been processed. 8047 // Nonetheless, generateInfoForComponentList must be called to take 8048 // the pointer into account for the calculation of the range of the 8049 // partial struct. 8050 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 8051 /*ReturnDevicePointer=*/false, C->isImplicit()); 8052 DeferredInfo[nullptr].emplace_back(IE, VD); 8053 } else { 8054 llvm::Value *Ptr = 8055 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8056 BasePointers.emplace_back(Ptr, VD); 8057 Pointers.push_back(Ptr); 8058 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8059 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8060 } 8061 } 8062 } 8063 8064 for (const auto &M : Info) { 8065 // We need to know when we generate information for the first component 8066 // associated with a capture, because the mapping flags depend on it. 8067 bool IsFirstComponentList = true; 8068 8069 // Temporary versions of arrays 8070 MapBaseValuesArrayTy CurBasePointers; 8071 MapValuesArrayTy CurPointers; 8072 MapValuesArrayTy CurSizes; 8073 MapFlagsArrayTy CurTypes; 8074 StructRangeInfoTy PartialStruct; 8075 8076 for (const MapInfo &L : M.second) { 8077 assert(!L.Components.empty() && 8078 "Not expecting declaration with no component lists."); 8079 8080 // Remember the current base pointer index. 8081 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 8082 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8083 CurBasePointers, CurPointers, CurSizes, 8084 CurTypes, PartialStruct, 8085 IsFirstComponentList, L.IsImplicit); 8086 8087 // If this entry relates with a device pointer, set the relevant 8088 // declaration and add the 'return pointer' flag. 8089 if (L.ReturnDevicePointer) { 8090 assert(CurBasePointers.size() > CurrentBasePointersIdx && 8091 "Unexpected number of mapped base pointers."); 8092 8093 const ValueDecl *RelevantVD = 8094 L.Components.back().getAssociatedDeclaration(); 8095 assert(RelevantVD && 8096 "No relevant declaration related with device pointer??"); 8097 8098 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 8099 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8100 } 8101 IsFirstComponentList = false; 8102 } 8103 8104 // Append any pending zero-length pointers which are struct members and 8105 // used with use_device_ptr. 8106 auto CI = DeferredInfo.find(M.first); 8107 if (CI != DeferredInfo.end()) { 8108 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8109 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(); 8110 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 8111 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 8112 CurBasePointers.emplace_back(BasePtr, L.VD); 8113 CurPointers.push_back(Ptr); 8114 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8115 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8116 // value MEMBER_OF=FFFF so that the entry is later updated with the 8117 // correct value of MEMBER_OF. 8118 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8119 OMP_MAP_MEMBER_OF); 8120 } 8121 } 8122 8123 // If there is an entry in PartialStruct it means we have a struct with 8124 // individual members mapped. Emit an extra combined entry. 8125 if (PartialStruct.Base.isValid()) 8126 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8127 PartialStruct); 8128 8129 // We need to append the results of this capture to what we already have. 8130 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8131 Pointers.append(CurPointers.begin(), CurPointers.end()); 8132 Sizes.append(CurSizes.begin(), CurSizes.end()); 8133 Types.append(CurTypes.begin(), CurTypes.end()); 8134 } 8135 } 8136 8137 /// Generate all the base pointers, section pointers, sizes and map types for 8138 /// the extracted map clauses of user-defined mapper. 8139 void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, 8140 MapValuesArrayTy &Pointers, 8141 MapValuesArrayTy &Sizes, 8142 MapFlagsArrayTy &Types) const { 8143 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8144 "Expect a declare mapper directive"); 8145 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8146 // We have to process the component lists that relate with the same 8147 // declaration in a single chunk so that we can generate the map flags 8148 // correctly. Therefore, we organize all lists in a map. 8149 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8150 8151 // Helper function to fill the information map for the different supported 8152 // clauses. 8153 auto &&InfoGen = [&Info]( 8154 const ValueDecl *D, 8155 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8156 OpenMPMapClauseKind MapType, 8157 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8158 bool ReturnDevicePointer, bool IsImplicit) { 8159 const ValueDecl *VD = 8160 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8161 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8162 IsImplicit); 8163 }; 8164 8165 for (const auto *C : CurMapperDir->clauselists()) { 8166 const auto *MC = cast<OMPMapClause>(C); 8167 for (const auto &L : MC->component_lists()) { 8168 InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), 8169 /*ReturnDevicePointer=*/false, MC->isImplicit()); 8170 } 8171 } 8172 8173 for (const auto &M : Info) { 8174 // We need to know when we generate information for the first component 8175 // associated with a capture, because the mapping flags depend on it. 8176 bool IsFirstComponentList = true; 8177 8178 // Temporary versions of arrays 8179 MapBaseValuesArrayTy CurBasePointers; 8180 MapValuesArrayTy CurPointers; 8181 MapValuesArrayTy CurSizes; 8182 MapFlagsArrayTy CurTypes; 8183 StructRangeInfoTy PartialStruct; 8184 8185 for (const MapInfo &L : M.second) { 8186 assert(!L.Components.empty() && 8187 "Not expecting declaration with no component lists."); 8188 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8189 CurBasePointers, CurPointers, CurSizes, 8190 CurTypes, PartialStruct, 8191 IsFirstComponentList, L.IsImplicit); 8192 IsFirstComponentList = false; 8193 } 8194 8195 // If there is an entry in PartialStruct it means we have a struct with 8196 // individual members mapped. Emit an extra combined entry. 8197 if (PartialStruct.Base.isValid()) 8198 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8199 PartialStruct); 8200 8201 // We need to append the results of this capture to what we already have. 8202 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8203 Pointers.append(CurPointers.begin(), CurPointers.end()); 8204 Sizes.append(CurSizes.begin(), CurSizes.end()); 8205 Types.append(CurTypes.begin(), CurTypes.end()); 8206 } 8207 } 8208 8209 /// Emit capture info for lambdas for variables captured by reference. 8210 void generateInfoForLambdaCaptures( 8211 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 8212 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8213 MapFlagsArrayTy &Types, 8214 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8215 const auto *RD = VD->getType() 8216 .getCanonicalType() 8217 .getNonReferenceType() 8218 ->getAsCXXRecordDecl(); 8219 if (!RD || !RD->isLambda()) 8220 return; 8221 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8222 LValue VDLVal = CGF.MakeAddrLValue( 8223 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8224 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8225 FieldDecl *ThisCapture = nullptr; 8226 RD->getCaptureFields(Captures, ThisCapture); 8227 if (ThisCapture) { 8228 LValue ThisLVal = 8229 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8230 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8231 LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer()); 8232 BasePointers.push_back(ThisLVal.getPointer()); 8233 Pointers.push_back(ThisLValVal.getPointer()); 8234 Sizes.push_back( 8235 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8236 CGF.Int64Ty, /*isSigned=*/true)); 8237 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8238 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8239 } 8240 for (const LambdaCapture &LC : RD->captures()) { 8241 if (!LC.capturesVariable()) 8242 continue; 8243 const VarDecl *VD = LC.getCapturedVar(); 8244 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8245 continue; 8246 auto It = Captures.find(VD); 8247 assert(It != Captures.end() && "Found lambda capture without field."); 8248 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8249 if (LC.getCaptureKind() == LCK_ByRef) { 8250 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8251 LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); 8252 BasePointers.push_back(VarLVal.getPointer()); 8253 Pointers.push_back(VarLValVal.getPointer()); 8254 Sizes.push_back(CGF.Builder.CreateIntCast( 8255 CGF.getTypeSize( 8256 VD->getType().getCanonicalType().getNonReferenceType()), 8257 CGF.Int64Ty, /*isSigned=*/true)); 8258 } else { 8259 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8260 LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); 8261 BasePointers.push_back(VarLVal.getPointer()); 8262 Pointers.push_back(VarRVal.getScalarVal()); 8263 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8264 } 8265 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8266 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8267 } 8268 } 8269 8270 /// Set correct indices for lambdas captures. 8271 void adjustMemberOfForLambdaCaptures( 8272 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8273 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8274 MapFlagsArrayTy &Types) const { 8275 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8276 // Set correct member_of idx for all implicit lambda captures. 8277 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8278 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8279 continue; 8280 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8281 assert(BasePtr && "Unable to find base lambda address."); 8282 int TgtIdx = -1; 8283 for (unsigned J = I; J > 0; --J) { 8284 unsigned Idx = J - 1; 8285 if (Pointers[Idx] != BasePtr) 8286 continue; 8287 TgtIdx = Idx; 8288 break; 8289 } 8290 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8291 // All other current entries will be MEMBER_OF the combined entry 8292 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8293 // 0xFFFF in the MEMBER_OF field). 8294 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8295 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8296 } 8297 } 8298 8299 /// Generate the base pointers, section pointers, sizes and map types 8300 /// associated to a given capture. 8301 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8302 llvm::Value *Arg, 8303 MapBaseValuesArrayTy &BasePointers, 8304 MapValuesArrayTy &Pointers, 8305 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8306 StructRangeInfoTy &PartialStruct) const { 8307 assert(!Cap->capturesVariableArrayType() && 8308 "Not expecting to generate map info for a variable array type!"); 8309 8310 // We need to know when we generating information for the first component 8311 const ValueDecl *VD = Cap->capturesThis() 8312 ? nullptr 8313 : Cap->getCapturedVar()->getCanonicalDecl(); 8314 8315 // If this declaration appears in a is_device_ptr clause we just have to 8316 // pass the pointer by value. If it is a reference to a declaration, we just 8317 // pass its value. 8318 if (DevPointersMap.count(VD)) { 8319 BasePointers.emplace_back(Arg, VD); 8320 Pointers.push_back(Arg); 8321 Sizes.push_back( 8322 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8323 CGF.Int64Ty, /*isSigned=*/true)); 8324 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8325 return; 8326 } 8327 8328 using MapData = 8329 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8330 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 8331 SmallVector<MapData, 4> DeclComponentLists; 8332 assert(CurDir.is<const OMPExecutableDirective *>() && 8333 "Expect a executable directive"); 8334 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8335 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8336 for (const auto &L : C->decl_component_lists(VD)) { 8337 assert(L.first == VD && 8338 "We got information for the wrong declaration??"); 8339 assert(!L.second.empty() && 8340 "Not expecting declaration with no component lists."); 8341 DeclComponentLists.emplace_back(L.second, C->getMapType(), 8342 C->getMapTypeModifiers(), 8343 C->isImplicit()); 8344 } 8345 } 8346 8347 // Find overlapping elements (including the offset from the base element). 8348 llvm::SmallDenseMap< 8349 const MapData *, 8350 llvm::SmallVector< 8351 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8352 4> 8353 OverlappedData; 8354 size_t Count = 0; 8355 for (const MapData &L : DeclComponentLists) { 8356 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8357 OpenMPMapClauseKind MapType; 8358 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8359 bool IsImplicit; 8360 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8361 ++Count; 8362 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8363 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8364 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 8365 auto CI = Components.rbegin(); 8366 auto CE = Components.rend(); 8367 auto SI = Components1.rbegin(); 8368 auto SE = Components1.rend(); 8369 for (; CI != CE && SI != SE; ++CI, ++SI) { 8370 if (CI->getAssociatedExpression()->getStmtClass() != 8371 SI->getAssociatedExpression()->getStmtClass()) 8372 break; 8373 // Are we dealing with different variables/fields? 8374 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8375 break; 8376 } 8377 // Found overlapping if, at least for one component, reached the head of 8378 // the components list. 8379 if (CI == CE || SI == SE) { 8380 assert((CI != CE || SI != SE) && 8381 "Unexpected full match of the mapping components."); 8382 const MapData &BaseData = CI == CE ? L : L1; 8383 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8384 SI == SE ? Components : Components1; 8385 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8386 OverlappedElements.getSecond().push_back(SubData); 8387 } 8388 } 8389 } 8390 // Sort the overlapped elements for each item. 8391 llvm::SmallVector<const FieldDecl *, 4> Layout; 8392 if (!OverlappedData.empty()) { 8393 if (const auto *CRD = 8394 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8395 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8396 else { 8397 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8398 Layout.append(RD->field_begin(), RD->field_end()); 8399 } 8400 } 8401 for (auto &Pair : OverlappedData) { 8402 llvm::sort( 8403 Pair.getSecond(), 8404 [&Layout]( 8405 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8406 OMPClauseMappableExprCommon::MappableExprComponentListRef 8407 Second) { 8408 auto CI = First.rbegin(); 8409 auto CE = First.rend(); 8410 auto SI = Second.rbegin(); 8411 auto SE = Second.rend(); 8412 for (; CI != CE && SI != SE; ++CI, ++SI) { 8413 if (CI->getAssociatedExpression()->getStmtClass() != 8414 SI->getAssociatedExpression()->getStmtClass()) 8415 break; 8416 // Are we dealing with different variables/fields? 8417 if (CI->getAssociatedDeclaration() != 8418 SI->getAssociatedDeclaration()) 8419 break; 8420 } 8421 8422 // Lists contain the same elements. 8423 if (CI == CE && SI == SE) 8424 return false; 8425 8426 // List with less elements is less than list with more elements. 8427 if (CI == CE || SI == SE) 8428 return CI == CE; 8429 8430 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8431 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8432 if (FD1->getParent() == FD2->getParent()) 8433 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8434 const auto It = 8435 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8436 return FD == FD1 || FD == FD2; 8437 }); 8438 return *It == FD1; 8439 }); 8440 } 8441 8442 // Associated with a capture, because the mapping flags depend on it. 8443 // Go through all of the elements with the overlapped elements. 8444 for (const auto &Pair : OverlappedData) { 8445 const MapData &L = *Pair.getFirst(); 8446 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8447 OpenMPMapClauseKind MapType; 8448 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8449 bool IsImplicit; 8450 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8451 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8452 OverlappedComponents = Pair.getSecond(); 8453 bool IsFirstComponentList = true; 8454 generateInfoForComponentList(MapType, MapModifiers, Components, 8455 BasePointers, Pointers, Sizes, Types, 8456 PartialStruct, IsFirstComponentList, 8457 IsImplicit, OverlappedComponents); 8458 } 8459 // Go through other elements without overlapped elements. 8460 bool IsFirstComponentList = OverlappedData.empty(); 8461 for (const MapData &L : DeclComponentLists) { 8462 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8463 OpenMPMapClauseKind MapType; 8464 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8465 bool IsImplicit; 8466 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8467 auto It = OverlappedData.find(&L); 8468 if (It == OverlappedData.end()) 8469 generateInfoForComponentList(MapType, MapModifiers, Components, 8470 BasePointers, Pointers, Sizes, Types, 8471 PartialStruct, IsFirstComponentList, 8472 IsImplicit); 8473 IsFirstComponentList = false; 8474 } 8475 } 8476 8477 /// Generate the base pointers, section pointers, sizes and map types 8478 /// associated with the declare target link variables. 8479 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 8480 MapValuesArrayTy &Pointers, 8481 MapValuesArrayTy &Sizes, 8482 MapFlagsArrayTy &Types) const { 8483 assert(CurDir.is<const OMPExecutableDirective *>() && 8484 "Expect a executable directive"); 8485 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8486 // Map other list items in the map clause which are not captured variables 8487 // but "declare target link" global variables. 8488 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8489 for (const auto &L : C->component_lists()) { 8490 if (!L.first) 8491 continue; 8492 const auto *VD = dyn_cast<VarDecl>(L.first); 8493 if (!VD) 8494 continue; 8495 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8496 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8497 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8498 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 8499 continue; 8500 StructRangeInfoTy PartialStruct; 8501 generateInfoForComponentList( 8502 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 8503 Pointers, Sizes, Types, PartialStruct, 8504 /*IsFirstComponentList=*/true, C->isImplicit()); 8505 assert(!PartialStruct.Base.isValid() && 8506 "No partial structs for declare target link expected."); 8507 } 8508 } 8509 } 8510 8511 /// Generate the default map information for a given capture \a CI, 8512 /// record field declaration \a RI and captured value \a CV. 8513 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8514 const FieldDecl &RI, llvm::Value *CV, 8515 MapBaseValuesArrayTy &CurBasePointers, 8516 MapValuesArrayTy &CurPointers, 8517 MapValuesArrayTy &CurSizes, 8518 MapFlagsArrayTy &CurMapTypes) const { 8519 bool IsImplicit = true; 8520 // Do the default mapping. 8521 if (CI.capturesThis()) { 8522 CurBasePointers.push_back(CV); 8523 CurPointers.push_back(CV); 8524 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8525 CurSizes.push_back( 8526 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8527 CGF.Int64Ty, /*isSigned=*/true)); 8528 // Default map type. 8529 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8530 } else if (CI.capturesVariableByCopy()) { 8531 CurBasePointers.push_back(CV); 8532 CurPointers.push_back(CV); 8533 if (!RI.getType()->isAnyPointerType()) { 8534 // We have to signal to the runtime captures passed by value that are 8535 // not pointers. 8536 CurMapTypes.push_back(OMP_MAP_LITERAL); 8537 CurSizes.push_back(CGF.Builder.CreateIntCast( 8538 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8539 } else { 8540 // Pointers are implicitly mapped with a zero size and no flags 8541 // (other than first map that is added for all implicit maps). 8542 CurMapTypes.push_back(OMP_MAP_NONE); 8543 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8544 } 8545 const VarDecl *VD = CI.getCapturedVar(); 8546 auto I = FirstPrivateDecls.find(VD); 8547 if (I != FirstPrivateDecls.end()) 8548 IsImplicit = I->getSecond(); 8549 } else { 8550 assert(CI.capturesVariable() && "Expected captured reference."); 8551 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8552 QualType ElementType = PtrTy->getPointeeType(); 8553 CurSizes.push_back(CGF.Builder.CreateIntCast( 8554 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8555 // The default map type for a scalar/complex type is 'to' because by 8556 // default the value doesn't have to be retrieved. For an aggregate 8557 // type, the default is 'tofrom'. 8558 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 8559 const VarDecl *VD = CI.getCapturedVar(); 8560 auto I = FirstPrivateDecls.find(VD); 8561 if (I != FirstPrivateDecls.end() && 8562 VD->getType().isConstant(CGF.getContext())) { 8563 llvm::Constant *Addr = 8564 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8565 // Copy the value of the original variable to the new global copy. 8566 CGF.Builder.CreateMemCpy( 8567 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(), 8568 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8569 CurSizes.back(), /*IsVolatile=*/false); 8570 // Use new global variable as the base pointers. 8571 CurBasePointers.push_back(Addr); 8572 CurPointers.push_back(Addr); 8573 } else { 8574 CurBasePointers.push_back(CV); 8575 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8576 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8577 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8578 AlignmentSource::Decl)); 8579 CurPointers.push_back(PtrAddr.getPointer()); 8580 } else { 8581 CurPointers.push_back(CV); 8582 } 8583 } 8584 if (I != FirstPrivateDecls.end()) 8585 IsImplicit = I->getSecond(); 8586 } 8587 // Every default map produces a single argument which is a target parameter. 8588 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 8589 8590 // Add flag stating this is an implicit map. 8591 if (IsImplicit) 8592 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 8593 } 8594 }; 8595 } // anonymous namespace 8596 8597 /// Emit the arrays used to pass the captures and map information to the 8598 /// offloading runtime library. If there is no map or capture information, 8599 /// return nullptr by reference. 8600 static void 8601 emitOffloadingArrays(CodeGenFunction &CGF, 8602 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 8603 MappableExprsHandler::MapValuesArrayTy &Pointers, 8604 MappableExprsHandler::MapValuesArrayTy &Sizes, 8605 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 8606 CGOpenMPRuntime::TargetDataInfo &Info) { 8607 CodeGenModule &CGM = CGF.CGM; 8608 ASTContext &Ctx = CGF.getContext(); 8609 8610 // Reset the array information. 8611 Info.clearArrayInfo(); 8612 Info.NumberOfPtrs = BasePointers.size(); 8613 8614 if (Info.NumberOfPtrs) { 8615 // Detect if we have any capture size requiring runtime evaluation of the 8616 // size so that a constant array could be eventually used. 8617 bool hasRuntimeEvaluationCaptureSize = false; 8618 for (llvm::Value *S : Sizes) 8619 if (!isa<llvm::Constant>(S)) { 8620 hasRuntimeEvaluationCaptureSize = true; 8621 break; 8622 } 8623 8624 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8625 QualType PointerArrayType = 8626 Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 8627 /*IndexTypeQuals=*/0); 8628 8629 Info.BasePointersArray = 8630 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8631 Info.PointersArray = 8632 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8633 8634 // If we don't have any VLA types or other types that require runtime 8635 // evaluation, we can use a constant array for the map sizes, otherwise we 8636 // need to fill up the arrays as we do for the pointers. 8637 QualType Int64Ty = 8638 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8639 if (hasRuntimeEvaluationCaptureSize) { 8640 QualType SizeArrayType = 8641 Ctx.getConstantArrayType(Int64Ty, PointerNumAP, ArrayType::Normal, 8642 /*IndexTypeQuals=*/0); 8643 Info.SizesArray = 8644 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8645 } else { 8646 // We expect all the sizes to be constant, so we collect them to create 8647 // a constant array. 8648 SmallVector<llvm::Constant *, 16> ConstSizes; 8649 for (llvm::Value *S : Sizes) 8650 ConstSizes.push_back(cast<llvm::Constant>(S)); 8651 8652 auto *SizesArrayInit = llvm::ConstantArray::get( 8653 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 8654 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8655 auto *SizesArrayGbl = new llvm::GlobalVariable( 8656 CGM.getModule(), SizesArrayInit->getType(), 8657 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8658 SizesArrayInit, Name); 8659 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8660 Info.SizesArray = SizesArrayGbl; 8661 } 8662 8663 // The map types are always constant so we don't need to generate code to 8664 // fill arrays. Instead, we create an array constant. 8665 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 8666 llvm::copy(MapTypes, Mapping.begin()); 8667 llvm::Constant *MapTypesArrayInit = 8668 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8669 std::string MaptypesName = 8670 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8671 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8672 CGM.getModule(), MapTypesArrayInit->getType(), 8673 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8674 MapTypesArrayInit, MaptypesName); 8675 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8676 Info.MapTypesArray = MapTypesArrayGbl; 8677 8678 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8679 llvm::Value *BPVal = *BasePointers[I]; 8680 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8681 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8682 Info.BasePointersArray, 0, I); 8683 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8684 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8685 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8686 CGF.Builder.CreateStore(BPVal, BPAddr); 8687 8688 if (Info.requiresDevicePointerInfo()) 8689 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 8690 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8691 8692 llvm::Value *PVal = Pointers[I]; 8693 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8694 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8695 Info.PointersArray, 0, I); 8696 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8697 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8698 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8699 CGF.Builder.CreateStore(PVal, PAddr); 8700 8701 if (hasRuntimeEvaluationCaptureSize) { 8702 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8703 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8704 Info.SizesArray, 8705 /*Idx0=*/0, 8706 /*Idx1=*/I); 8707 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 8708 CGF.Builder.CreateStore( 8709 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true), 8710 SAddr); 8711 } 8712 } 8713 } 8714 } 8715 8716 /// Emit the arguments to be passed to the runtime library based on the 8717 /// arrays of pointers, sizes and map types. 8718 static void emitOffloadingArraysArgument( 8719 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8720 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8721 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 8722 CodeGenModule &CGM = CGF.CGM; 8723 if (Info.NumberOfPtrs) { 8724 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8725 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8726 Info.BasePointersArray, 8727 /*Idx0=*/0, /*Idx1=*/0); 8728 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8729 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8730 Info.PointersArray, 8731 /*Idx0=*/0, 8732 /*Idx1=*/0); 8733 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8734 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 8735 /*Idx0=*/0, /*Idx1=*/0); 8736 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8737 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8738 Info.MapTypesArray, 8739 /*Idx0=*/0, 8740 /*Idx1=*/0); 8741 } else { 8742 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8743 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8744 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8745 MapTypesArrayArg = 8746 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8747 } 8748 } 8749 8750 /// Check for inner distribute directive. 8751 static const OMPExecutableDirective * 8752 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8753 const auto *CS = D.getInnermostCapturedStmt(); 8754 const auto *Body = 8755 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8756 const Stmt *ChildStmt = 8757 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8758 8759 if (const auto *NestedDir = 8760 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8761 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8762 switch (D.getDirectiveKind()) { 8763 case OMPD_target: 8764 if (isOpenMPDistributeDirective(DKind)) 8765 return NestedDir; 8766 if (DKind == OMPD_teams) { 8767 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8768 /*IgnoreCaptured=*/true); 8769 if (!Body) 8770 return nullptr; 8771 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8772 if (const auto *NND = 8773 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8774 DKind = NND->getDirectiveKind(); 8775 if (isOpenMPDistributeDirective(DKind)) 8776 return NND; 8777 } 8778 } 8779 return nullptr; 8780 case OMPD_target_teams: 8781 if (isOpenMPDistributeDirective(DKind)) 8782 return NestedDir; 8783 return nullptr; 8784 case OMPD_target_parallel: 8785 case OMPD_target_simd: 8786 case OMPD_target_parallel_for: 8787 case OMPD_target_parallel_for_simd: 8788 return nullptr; 8789 case OMPD_target_teams_distribute: 8790 case OMPD_target_teams_distribute_simd: 8791 case OMPD_target_teams_distribute_parallel_for: 8792 case OMPD_target_teams_distribute_parallel_for_simd: 8793 case OMPD_parallel: 8794 case OMPD_for: 8795 case OMPD_parallel_for: 8796 case OMPD_parallel_sections: 8797 case OMPD_for_simd: 8798 case OMPD_parallel_for_simd: 8799 case OMPD_cancel: 8800 case OMPD_cancellation_point: 8801 case OMPD_ordered: 8802 case OMPD_threadprivate: 8803 case OMPD_allocate: 8804 case OMPD_task: 8805 case OMPD_simd: 8806 case OMPD_sections: 8807 case OMPD_section: 8808 case OMPD_single: 8809 case OMPD_master: 8810 case OMPD_critical: 8811 case OMPD_taskyield: 8812 case OMPD_barrier: 8813 case OMPD_taskwait: 8814 case OMPD_taskgroup: 8815 case OMPD_atomic: 8816 case OMPD_flush: 8817 case OMPD_teams: 8818 case OMPD_target_data: 8819 case OMPD_target_exit_data: 8820 case OMPD_target_enter_data: 8821 case OMPD_distribute: 8822 case OMPD_distribute_simd: 8823 case OMPD_distribute_parallel_for: 8824 case OMPD_distribute_parallel_for_simd: 8825 case OMPD_teams_distribute: 8826 case OMPD_teams_distribute_simd: 8827 case OMPD_teams_distribute_parallel_for: 8828 case OMPD_teams_distribute_parallel_for_simd: 8829 case OMPD_target_update: 8830 case OMPD_declare_simd: 8831 case OMPD_declare_variant: 8832 case OMPD_declare_target: 8833 case OMPD_end_declare_target: 8834 case OMPD_declare_reduction: 8835 case OMPD_declare_mapper: 8836 case OMPD_taskloop: 8837 case OMPD_taskloop_simd: 8838 case OMPD_requires: 8839 case OMPD_unknown: 8840 llvm_unreachable("Unexpected directive."); 8841 } 8842 } 8843 8844 return nullptr; 8845 } 8846 8847 /// Emit the user-defined mapper function. The code generation follows the 8848 /// pattern in the example below. 8849 /// \code 8850 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 8851 /// void *base, void *begin, 8852 /// int64_t size, int64_t type) { 8853 /// // Allocate space for an array section first. 8854 /// if (size > 1 && !maptype.IsDelete) 8855 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8856 /// size*sizeof(Ty), clearToFrom(type)); 8857 /// // Map members. 8858 /// for (unsigned i = 0; i < size; i++) { 8859 /// // For each component specified by this mapper: 8860 /// for (auto c : all_components) { 8861 /// if (c.hasMapper()) 8862 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 8863 /// c.arg_type); 8864 /// else 8865 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 8866 /// c.arg_begin, c.arg_size, c.arg_type); 8867 /// } 8868 /// } 8869 /// // Delete the array section. 8870 /// if (size > 1 && maptype.IsDelete) 8871 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8872 /// size*sizeof(Ty), clearToFrom(type)); 8873 /// } 8874 /// \endcode 8875 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 8876 CodeGenFunction *CGF) { 8877 if (UDMMap.count(D) > 0) 8878 return; 8879 ASTContext &C = CGM.getContext(); 8880 QualType Ty = D->getType(); 8881 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 8882 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 8883 auto *MapperVarDecl = 8884 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 8885 SourceLocation Loc = D->getLocation(); 8886 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 8887 8888 // Prepare mapper function arguments and attributes. 8889 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8890 C.VoidPtrTy, ImplicitParamDecl::Other); 8891 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 8892 ImplicitParamDecl::Other); 8893 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8894 C.VoidPtrTy, ImplicitParamDecl::Other); 8895 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8896 ImplicitParamDecl::Other); 8897 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8898 ImplicitParamDecl::Other); 8899 FunctionArgList Args; 8900 Args.push_back(&HandleArg); 8901 Args.push_back(&BaseArg); 8902 Args.push_back(&BeginArg); 8903 Args.push_back(&SizeArg); 8904 Args.push_back(&TypeArg); 8905 const CGFunctionInfo &FnInfo = 8906 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 8907 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 8908 SmallString<64> TyStr; 8909 llvm::raw_svector_ostream Out(TyStr); 8910 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 8911 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 8912 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 8913 Name, &CGM.getModule()); 8914 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 8915 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 8916 // Start the mapper function code generation. 8917 CodeGenFunction MapperCGF(CGM); 8918 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 8919 // Compute the starting and end addreses of array elements. 8920 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 8921 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 8922 C.getPointerType(Int64Ty), Loc); 8923 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 8924 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 8925 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 8926 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 8927 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 8928 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 8929 C.getPointerType(Int64Ty), Loc); 8930 // Prepare common arguments for array initiation and deletion. 8931 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 8932 MapperCGF.GetAddrOfLocalVar(&HandleArg), 8933 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8934 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 8935 MapperCGF.GetAddrOfLocalVar(&BaseArg), 8936 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8937 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 8938 MapperCGF.GetAddrOfLocalVar(&BeginArg), 8939 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8940 8941 // Emit array initiation if this is an array section and \p MapType indicates 8942 // that memory allocation is required. 8943 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 8944 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 8945 ElementSize, HeadBB, /*IsInit=*/true); 8946 8947 // Emit a for loop to iterate through SizeArg of elements and map all of them. 8948 8949 // Emit the loop header block. 8950 MapperCGF.EmitBlock(HeadBB); 8951 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 8952 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 8953 // Evaluate whether the initial condition is satisfied. 8954 llvm::Value *IsEmpty = 8955 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 8956 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 8957 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 8958 8959 // Emit the loop body block. 8960 MapperCGF.EmitBlock(BodyBB); 8961 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 8962 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 8963 PtrPHI->addIncoming(PtrBegin, EntryBB); 8964 Address PtrCurrent = 8965 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 8966 .getAlignment() 8967 .alignmentOfArrayElement(ElementSize)); 8968 // Privatize the declared variable of mapper to be the current array element. 8969 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 8970 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 8971 return MapperCGF 8972 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 8973 .getAddress(); 8974 }); 8975 (void)Scope.Privatize(); 8976 8977 // Get map clause information. Fill up the arrays with all mapped variables. 8978 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 8979 MappableExprsHandler::MapValuesArrayTy Pointers; 8980 MappableExprsHandler::MapValuesArrayTy Sizes; 8981 MappableExprsHandler::MapFlagsArrayTy MapTypes; 8982 MappableExprsHandler MEHandler(*D, MapperCGF); 8983 MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); 8984 8985 // Call the runtime API __tgt_mapper_num_components to get the number of 8986 // pre-existing components. 8987 llvm::Value *OffloadingArgs[] = {Handle}; 8988 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 8989 createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs); 8990 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 8991 PreviousSize, 8992 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 8993 8994 // Fill up the runtime mapper handle for all components. 8995 for (unsigned I = 0; I < BasePointers.size(); ++I) { 8996 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 8997 *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 8998 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 8999 Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9000 llvm::Value *CurSizeArg = Sizes[I]; 9001 9002 // Extract the MEMBER_OF field from the map type. 9003 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 9004 MapperCGF.EmitBlock(MemberBB); 9005 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); 9006 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 9007 OriMapType, 9008 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 9009 llvm::BasicBlock *MemberCombineBB = 9010 MapperCGF.createBasicBlock("omp.member.combine"); 9011 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 9012 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 9013 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 9014 // Add the number of pre-existing components to the MEMBER_OF field if it 9015 // is valid. 9016 MapperCGF.EmitBlock(MemberCombineBB); 9017 llvm::Value *CombinedMember = 9018 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9019 // Do nothing if it is not a member of previous components. 9020 MapperCGF.EmitBlock(TypeBB); 9021 llvm::PHINode *MemberMapType = 9022 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9023 MemberMapType->addIncoming(OriMapType, MemberBB); 9024 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9025 9026 // Combine the map type inherited from user-defined mapper with that 9027 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9028 // bits of the \a MapType, which is the input argument of the mapper 9029 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9030 // bits of MemberMapType. 9031 // [OpenMP 5.0], 1.2.6. map-type decay. 9032 // | alloc | to | from | tofrom | release | delete 9033 // ---------------------------------------------------------- 9034 // alloc | alloc | alloc | alloc | alloc | release | delete 9035 // to | alloc | to | alloc | to | release | delete 9036 // from | alloc | alloc | from | from | release | delete 9037 // tofrom | alloc | to | from | tofrom | release | delete 9038 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9039 MapType, 9040 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9041 MappableExprsHandler::OMP_MAP_FROM)); 9042 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9043 llvm::BasicBlock *AllocElseBB = 9044 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9045 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9046 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9047 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9048 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9049 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9050 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9051 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9052 MapperCGF.EmitBlock(AllocBB); 9053 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9054 MemberMapType, 9055 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9056 MappableExprsHandler::OMP_MAP_FROM))); 9057 MapperCGF.Builder.CreateBr(EndBB); 9058 MapperCGF.EmitBlock(AllocElseBB); 9059 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9060 LeftToFrom, 9061 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9062 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9063 // In case of to, clear OMP_MAP_FROM. 9064 MapperCGF.EmitBlock(ToBB); 9065 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9066 MemberMapType, 9067 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9068 MapperCGF.Builder.CreateBr(EndBB); 9069 MapperCGF.EmitBlock(ToElseBB); 9070 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9071 LeftToFrom, 9072 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9073 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9074 // In case of from, clear OMP_MAP_TO. 9075 MapperCGF.EmitBlock(FromBB); 9076 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9077 MemberMapType, 9078 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9079 // In case of tofrom, do nothing. 9080 MapperCGF.EmitBlock(EndBB); 9081 llvm::PHINode *CurMapType = 9082 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9083 CurMapType->addIncoming(AllocMapType, AllocBB); 9084 CurMapType->addIncoming(ToMapType, ToBB); 9085 CurMapType->addIncoming(FromMapType, FromBB); 9086 CurMapType->addIncoming(MemberMapType, ToElseBB); 9087 9088 // TODO: call the corresponding mapper function if a user-defined mapper is 9089 // associated with this map clause. 9090 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9091 // data structure. 9092 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9093 CurSizeArg, CurMapType}; 9094 MapperCGF.EmitRuntimeCall( 9095 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), 9096 OffloadingArgs); 9097 } 9098 9099 // Update the pointer to point to the next element that needs to be mapped, 9100 // and check whether we have mapped all elements. 9101 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9102 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9103 PtrPHI->addIncoming(PtrNext, BodyBB); 9104 llvm::Value *IsDone = 9105 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9106 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9107 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9108 9109 MapperCGF.EmitBlock(ExitBB); 9110 // Emit array deletion if this is an array section and \p MapType indicates 9111 // that deletion is required. 9112 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9113 ElementSize, DoneBB, /*IsInit=*/false); 9114 9115 // Emit the function exit block. 9116 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9117 MapperCGF.FinishFunction(); 9118 UDMMap.try_emplace(D, Fn); 9119 if (CGF) { 9120 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9121 Decls.second.push_back(D); 9122 } 9123 } 9124 9125 /// Emit the array initialization or deletion portion for user-defined mapper 9126 /// code generation. First, it evaluates whether an array section is mapped and 9127 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9128 /// true, and \a MapType indicates to not delete this array, array 9129 /// initialization code is generated. If \a IsInit is false, and \a MapType 9130 /// indicates to not this array, array deletion code is generated. 9131 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9132 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9133 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9134 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9135 StringRef Prefix = IsInit ? ".init" : ".del"; 9136 9137 // Evaluate if this is an array section. 9138 llvm::BasicBlock *IsDeleteBB = 9139 MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete"); 9140 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix); 9141 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9142 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9143 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9144 9145 // Evaluate if we are going to delete this section. 9146 MapperCGF.EmitBlock(IsDeleteBB); 9147 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9148 MapType, 9149 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9150 llvm::Value *DeleteCond; 9151 if (IsInit) { 9152 DeleteCond = MapperCGF.Builder.CreateIsNull( 9153 DeleteBit, "omp.array" + Prefix + ".delete"); 9154 } else { 9155 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9156 DeleteBit, "omp.array" + Prefix + ".delete"); 9157 } 9158 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9159 9160 MapperCGF.EmitBlock(BodyBB); 9161 // Get the array size by multiplying element size and element number (i.e., \p 9162 // Size). 9163 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9164 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9165 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9166 // memory allocation/deletion purpose only. 9167 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9168 MapType, 9169 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9170 MappableExprsHandler::OMP_MAP_FROM))); 9171 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9172 // data structure. 9173 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9174 MapperCGF.EmitRuntimeCall( 9175 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs); 9176 } 9177 9178 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9179 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device, 9180 const llvm::function_ref<llvm::Value *( 9181 CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) { 9182 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9183 const OMPExecutableDirective *TD = &D; 9184 // Get nested teams distribute kind directive, if any. 9185 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9186 TD = getNestedDistributeDirective(CGM.getContext(), D); 9187 if (!TD) 9188 return; 9189 const auto *LD = cast<OMPLoopDirective>(TD); 9190 auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF, 9191 PrePostActionTy &) { 9192 llvm::Value *NumIterations = SizeEmitter(CGF, *LD); 9193 9194 // Emit device ID if any. 9195 llvm::Value *DeviceID; 9196 if (Device) 9197 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9198 CGF.Int64Ty, /*isSigned=*/true); 9199 else 9200 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9201 9202 llvm::Value *Args[] = {DeviceID, NumIterations}; 9203 CGF.EmitRuntimeCall( 9204 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); 9205 }; 9206 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9207 } 9208 9209 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 9210 const OMPExecutableDirective &D, 9211 llvm::Function *OutlinedFn, 9212 llvm::Value *OutlinedFnID, 9213 const Expr *IfCond, const Expr *Device) { 9214 if (!CGF.HaveInsertPoint()) 9215 return; 9216 9217 assert(OutlinedFn && "Invalid outlined function!"); 9218 9219 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 9220 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9221 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9222 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9223 PrePostActionTy &) { 9224 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9225 }; 9226 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9227 9228 CodeGenFunction::OMPTargetDataInfo InputInfo; 9229 llvm::Value *MapTypesArray = nullptr; 9230 // Fill up the pointer arrays and transfer execution to the device. 9231 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9232 &MapTypesArray, &CS, RequiresOuterTask, 9233 &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) { 9234 // On top of the arrays that were filled up, the target offloading call 9235 // takes as arguments the device id as well as the host pointer. The host 9236 // pointer is used by the runtime library to identify the current target 9237 // region, so it only has to be unique and not necessarily point to 9238 // anything. It could be the pointer to the outlined function that 9239 // implements the target region, but we aren't using that so that the 9240 // compiler doesn't need to keep that, and could therefore inline the host 9241 // function if proven worthwhile during optimization. 9242 9243 // From this point on, we need to have an ID of the target region defined. 9244 assert(OutlinedFnID && "Invalid outlined function ID!"); 9245 9246 // Emit device ID if any. 9247 llvm::Value *DeviceID; 9248 if (Device) { 9249 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9250 CGF.Int64Ty, /*isSigned=*/true); 9251 } else { 9252 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9253 } 9254 9255 // Emit the number of elements in the offloading arrays. 9256 llvm::Value *PointerNum = 9257 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9258 9259 // Return value of the runtime offloading call. 9260 llvm::Value *Return; 9261 9262 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9263 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9264 9265 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9266 // The target region is an outlined function launched by the runtime 9267 // via calls __tgt_target() or __tgt_target_teams(). 9268 // 9269 // __tgt_target() launches a target region with one team and one thread, 9270 // executing a serial region. This master thread may in turn launch 9271 // more threads within its team upon encountering a parallel region, 9272 // however, no additional teams can be launched on the device. 9273 // 9274 // __tgt_target_teams() launches a target region with one or more teams, 9275 // each with one or more threads. This call is required for target 9276 // constructs such as: 9277 // 'target teams' 9278 // 'target' / 'teams' 9279 // 'target teams distribute parallel for' 9280 // 'target parallel' 9281 // and so on. 9282 // 9283 // Note that on the host and CPU targets, the runtime implementation of 9284 // these calls simply call the outlined function without forking threads. 9285 // The outlined functions themselves have runtime calls to 9286 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9287 // the compiler in emitTeamsCall() and emitParallelCall(). 9288 // 9289 // In contrast, on the NVPTX target, the implementation of 9290 // __tgt_target_teams() launches a GPU kernel with the requested number 9291 // of teams and threads so no additional calls to the runtime are required. 9292 if (NumTeams) { 9293 // If we have NumTeams defined this means that we have an enclosed teams 9294 // region. Therefore we also expect to have NumThreads defined. These two 9295 // values should be defined in the presence of a teams directive, 9296 // regardless of having any clauses associated. If the user is using teams 9297 // but no clauses, these two values will be the default that should be 9298 // passed to the runtime library - a 32-bit integer with the value zero. 9299 assert(NumThreads && "Thread limit expression should be available along " 9300 "with number of teams."); 9301 llvm::Value *OffloadingArgs[] = {DeviceID, 9302 OutlinedFnID, 9303 PointerNum, 9304 InputInfo.BasePointersArray.getPointer(), 9305 InputInfo.PointersArray.getPointer(), 9306 InputInfo.SizesArray.getPointer(), 9307 MapTypesArray, 9308 NumTeams, 9309 NumThreads}; 9310 Return = CGF.EmitRuntimeCall( 9311 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 9312 : OMPRTL__tgt_target_teams), 9313 OffloadingArgs); 9314 } else { 9315 llvm::Value *OffloadingArgs[] = {DeviceID, 9316 OutlinedFnID, 9317 PointerNum, 9318 InputInfo.BasePointersArray.getPointer(), 9319 InputInfo.PointersArray.getPointer(), 9320 InputInfo.SizesArray.getPointer(), 9321 MapTypesArray}; 9322 Return = CGF.EmitRuntimeCall( 9323 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 9324 : OMPRTL__tgt_target), 9325 OffloadingArgs); 9326 } 9327 9328 // Check the error code and execute the host version if required. 9329 llvm::BasicBlock *OffloadFailedBlock = 9330 CGF.createBasicBlock("omp_offload.failed"); 9331 llvm::BasicBlock *OffloadContBlock = 9332 CGF.createBasicBlock("omp_offload.cont"); 9333 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9334 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9335 9336 CGF.EmitBlock(OffloadFailedBlock); 9337 if (RequiresOuterTask) { 9338 CapturedVars.clear(); 9339 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9340 } 9341 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9342 CGF.EmitBranch(OffloadContBlock); 9343 9344 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9345 }; 9346 9347 // Notify that the host version must be executed. 9348 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 9349 RequiresOuterTask](CodeGenFunction &CGF, 9350 PrePostActionTy &) { 9351 if (RequiresOuterTask) { 9352 CapturedVars.clear(); 9353 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9354 } 9355 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9356 }; 9357 9358 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 9359 &CapturedVars, RequiresOuterTask, 9360 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 9361 // Fill up the arrays with all the captured variables. 9362 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9363 MappableExprsHandler::MapValuesArrayTy Pointers; 9364 MappableExprsHandler::MapValuesArrayTy Sizes; 9365 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9366 9367 // Get mappable expression information. 9368 MappableExprsHandler MEHandler(D, CGF); 9369 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9370 9371 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9372 auto CV = CapturedVars.begin(); 9373 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9374 CE = CS.capture_end(); 9375 CI != CE; ++CI, ++RI, ++CV) { 9376 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 9377 MappableExprsHandler::MapValuesArrayTy CurPointers; 9378 MappableExprsHandler::MapValuesArrayTy CurSizes; 9379 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 9380 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9381 9382 // VLA sizes are passed to the outlined region by copy and do not have map 9383 // information associated. 9384 if (CI->capturesVariableArrayType()) { 9385 CurBasePointers.push_back(*CV); 9386 CurPointers.push_back(*CV); 9387 CurSizes.push_back(CGF.Builder.CreateIntCast( 9388 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9389 // Copy to the device as an argument. No need to retrieve it. 9390 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 9391 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 9392 MappableExprsHandler::OMP_MAP_IMPLICIT); 9393 } else { 9394 // If we have any information in the map clause, we use it, otherwise we 9395 // just do a default mapping. 9396 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 9397 CurSizes, CurMapTypes, PartialStruct); 9398 if (CurBasePointers.empty()) 9399 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 9400 CurPointers, CurSizes, CurMapTypes); 9401 // Generate correct mapping for variables captured by reference in 9402 // lambdas. 9403 if (CI->capturesVariable()) 9404 MEHandler.generateInfoForLambdaCaptures( 9405 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 9406 CurMapTypes, LambdaPointers); 9407 } 9408 // We expect to have at least an element of information for this capture. 9409 assert(!CurBasePointers.empty() && 9410 "Non-existing map pointer for capture!"); 9411 assert(CurBasePointers.size() == CurPointers.size() && 9412 CurBasePointers.size() == CurSizes.size() && 9413 CurBasePointers.size() == CurMapTypes.size() && 9414 "Inconsistent map information sizes!"); 9415 9416 // If there is an entry in PartialStruct it means we have a struct with 9417 // individual members mapped. Emit an extra combined entry. 9418 if (PartialStruct.Base.isValid()) 9419 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 9420 CurMapTypes, PartialStruct); 9421 9422 // We need to append the results of this capture to what we already have. 9423 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 9424 Pointers.append(CurPointers.begin(), CurPointers.end()); 9425 Sizes.append(CurSizes.begin(), CurSizes.end()); 9426 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 9427 } 9428 // Adjust MEMBER_OF flags for the lambdas captures. 9429 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 9430 Pointers, MapTypes); 9431 // Map other list items in the map clause which are not captured variables 9432 // but "declare target link" global variables. 9433 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 9434 MapTypes); 9435 9436 TargetDataInfo Info; 9437 // Fill up the arrays and create the arguments. 9438 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9439 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9440 Info.PointersArray, Info.SizesArray, 9441 Info.MapTypesArray, Info); 9442 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9443 InputInfo.BasePointersArray = 9444 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9445 InputInfo.PointersArray = 9446 Address(Info.PointersArray, CGM.getPointerAlign()); 9447 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 9448 MapTypesArray = Info.MapTypesArray; 9449 if (RequiresOuterTask) 9450 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9451 else 9452 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9453 }; 9454 9455 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 9456 CodeGenFunction &CGF, PrePostActionTy &) { 9457 if (RequiresOuterTask) { 9458 CodeGenFunction::OMPTargetDataInfo InputInfo; 9459 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9460 } else { 9461 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9462 } 9463 }; 9464 9465 // If we have a target function ID it means that we need to support 9466 // offloading, otherwise, just execute on the host. We need to execute on host 9467 // regardless of the conditional in the if clause if, e.g., the user do not 9468 // specify target triples. 9469 if (OutlinedFnID) { 9470 if (IfCond) { 9471 emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9472 } else { 9473 RegionCodeGenTy ThenRCG(TargetThenGen); 9474 ThenRCG(CGF); 9475 } 9476 } else { 9477 RegionCodeGenTy ElseRCG(TargetElseGen); 9478 ElseRCG(CGF); 9479 } 9480 } 9481 9482 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9483 StringRef ParentName) { 9484 if (!S) 9485 return; 9486 9487 // Codegen OMP target directives that offload compute to the device. 9488 bool RequiresDeviceCodegen = 9489 isa<OMPExecutableDirective>(S) && 9490 isOpenMPTargetExecutionDirective( 9491 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9492 9493 if (RequiresDeviceCodegen) { 9494 const auto &E = *cast<OMPExecutableDirective>(S); 9495 unsigned DeviceID; 9496 unsigned FileID; 9497 unsigned Line; 9498 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 9499 FileID, Line); 9500 9501 // Is this a target region that should not be emitted as an entry point? If 9502 // so just signal we are done with this target region. 9503 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 9504 ParentName, Line)) 9505 return; 9506 9507 switch (E.getDirectiveKind()) { 9508 case OMPD_target: 9509 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9510 cast<OMPTargetDirective>(E)); 9511 break; 9512 case OMPD_target_parallel: 9513 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9514 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9515 break; 9516 case OMPD_target_teams: 9517 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9518 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9519 break; 9520 case OMPD_target_teams_distribute: 9521 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9522 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9523 break; 9524 case OMPD_target_teams_distribute_simd: 9525 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9526 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9527 break; 9528 case OMPD_target_parallel_for: 9529 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9530 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9531 break; 9532 case OMPD_target_parallel_for_simd: 9533 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9534 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9535 break; 9536 case OMPD_target_simd: 9537 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9538 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9539 break; 9540 case OMPD_target_teams_distribute_parallel_for: 9541 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9542 CGM, ParentName, 9543 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9544 break; 9545 case OMPD_target_teams_distribute_parallel_for_simd: 9546 CodeGenFunction:: 9547 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9548 CGM, ParentName, 9549 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9550 break; 9551 case OMPD_parallel: 9552 case OMPD_for: 9553 case OMPD_parallel_for: 9554 case OMPD_parallel_sections: 9555 case OMPD_for_simd: 9556 case OMPD_parallel_for_simd: 9557 case OMPD_cancel: 9558 case OMPD_cancellation_point: 9559 case OMPD_ordered: 9560 case OMPD_threadprivate: 9561 case OMPD_allocate: 9562 case OMPD_task: 9563 case OMPD_simd: 9564 case OMPD_sections: 9565 case OMPD_section: 9566 case OMPD_single: 9567 case OMPD_master: 9568 case OMPD_critical: 9569 case OMPD_taskyield: 9570 case OMPD_barrier: 9571 case OMPD_taskwait: 9572 case OMPD_taskgroup: 9573 case OMPD_atomic: 9574 case OMPD_flush: 9575 case OMPD_teams: 9576 case OMPD_target_data: 9577 case OMPD_target_exit_data: 9578 case OMPD_target_enter_data: 9579 case OMPD_distribute: 9580 case OMPD_distribute_simd: 9581 case OMPD_distribute_parallel_for: 9582 case OMPD_distribute_parallel_for_simd: 9583 case OMPD_teams_distribute: 9584 case OMPD_teams_distribute_simd: 9585 case OMPD_teams_distribute_parallel_for: 9586 case OMPD_teams_distribute_parallel_for_simd: 9587 case OMPD_target_update: 9588 case OMPD_declare_simd: 9589 case OMPD_declare_variant: 9590 case OMPD_declare_target: 9591 case OMPD_end_declare_target: 9592 case OMPD_declare_reduction: 9593 case OMPD_declare_mapper: 9594 case OMPD_taskloop: 9595 case OMPD_taskloop_simd: 9596 case OMPD_requires: 9597 case OMPD_unknown: 9598 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9599 } 9600 return; 9601 } 9602 9603 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9604 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9605 return; 9606 9607 scanForTargetRegionsFunctions( 9608 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9609 return; 9610 } 9611 9612 // If this is a lambda function, look into its body. 9613 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9614 S = L->getBody(); 9615 9616 // Keep looking for target regions recursively. 9617 for (const Stmt *II : S->children()) 9618 scanForTargetRegionsFunctions(II, ParentName); 9619 } 9620 9621 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9622 // If emitting code for the host, we do not process FD here. Instead we do 9623 // the normal code generation. 9624 if (!CGM.getLangOpts().OpenMPIsDevice) { 9625 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 9626 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9627 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9628 // Do not emit device_type(nohost) functions for the host. 9629 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 9630 return true; 9631 } 9632 return false; 9633 } 9634 9635 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9636 StringRef Name = CGM.getMangledName(GD); 9637 // Try to detect target regions in the function. 9638 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 9639 scanForTargetRegionsFunctions(FD->getBody(), Name); 9640 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9641 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9642 // Do not emit device_type(nohost) functions for the host. 9643 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 9644 return true; 9645 } 9646 9647 // Do not to emit function if it is not marked as declare target. 9648 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9649 AlreadyEmittedTargetFunctions.count(Name) == 0; 9650 } 9651 9652 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9653 if (!CGM.getLangOpts().OpenMPIsDevice) 9654 return false; 9655 9656 // Check if there are Ctors/Dtors in this declaration and look for target 9657 // regions in it. We use the complete variant to produce the kernel name 9658 // mangling. 9659 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9660 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9661 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9662 StringRef ParentName = 9663 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9664 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9665 } 9666 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9667 StringRef ParentName = 9668 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9669 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9670 } 9671 } 9672 9673 // Do not to emit variable if it is not marked as declare target. 9674 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9675 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9676 cast<VarDecl>(GD.getDecl())); 9677 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9678 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9679 HasRequiresUnifiedSharedMemory)) { 9680 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9681 return true; 9682 } 9683 return false; 9684 } 9685 9686 llvm::Constant * 9687 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9688 const VarDecl *VD) { 9689 assert(VD->getType().isConstant(CGM.getContext()) && 9690 "Expected constant variable."); 9691 StringRef VarName; 9692 llvm::Constant *Addr; 9693 llvm::GlobalValue::LinkageTypes Linkage; 9694 QualType Ty = VD->getType(); 9695 SmallString<128> Buffer; 9696 { 9697 unsigned DeviceID; 9698 unsigned FileID; 9699 unsigned Line; 9700 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9701 FileID, Line); 9702 llvm::raw_svector_ostream OS(Buffer); 9703 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9704 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9705 VarName = OS.str(); 9706 } 9707 Linkage = llvm::GlobalValue::InternalLinkage; 9708 Addr = 9709 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9710 getDefaultFirstprivateAddressSpace()); 9711 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9712 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9713 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9714 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9715 VarName, Addr, VarSize, 9716 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9717 return Addr; 9718 } 9719 9720 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9721 llvm::Constant *Addr) { 9722 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9723 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9724 if (!Res) { 9725 if (CGM.getLangOpts().OpenMPIsDevice) { 9726 // Register non-target variables being emitted in device code (debug info 9727 // may cause this). 9728 StringRef VarName = CGM.getMangledName(VD); 9729 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9730 } 9731 return; 9732 } 9733 // Register declare target variables. 9734 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9735 StringRef VarName; 9736 CharUnits VarSize; 9737 llvm::GlobalValue::LinkageTypes Linkage; 9738 9739 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9740 !HasRequiresUnifiedSharedMemory) { 9741 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9742 VarName = CGM.getMangledName(VD); 9743 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9744 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9745 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9746 } else { 9747 VarSize = CharUnits::Zero(); 9748 } 9749 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 9750 // Temp solution to prevent optimizations of the internal variables. 9751 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 9752 std::string RefName = getName({VarName, "ref"}); 9753 if (!CGM.GetGlobalValue(RefName)) { 9754 llvm::Constant *AddrRef = 9755 getOrCreateInternalVariable(Addr->getType(), RefName); 9756 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 9757 GVAddrRef->setConstant(/*Val=*/true); 9758 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 9759 GVAddrRef->setInitializer(Addr); 9760 CGM.addCompilerUsedGlobal(GVAddrRef); 9761 } 9762 } 9763 } else { 9764 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 9765 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9766 HasRequiresUnifiedSharedMemory)) && 9767 "Declare target attribute must link or to with unified memory."); 9768 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 9769 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 9770 else 9771 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9772 9773 if (CGM.getLangOpts().OpenMPIsDevice) { 9774 VarName = Addr->getName(); 9775 Addr = nullptr; 9776 } else { 9777 VarName = getAddrOfDeclareTargetVar(VD).getName(); 9778 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 9779 } 9780 VarSize = CGM.getPointerSize(); 9781 Linkage = llvm::GlobalValue::WeakAnyLinkage; 9782 } 9783 9784 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9785 VarName, Addr, VarSize, Flags, Linkage); 9786 } 9787 9788 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9789 if (isa<FunctionDecl>(GD.getDecl()) || 9790 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9791 return emitTargetFunctions(GD); 9792 9793 return emitTargetGlobalVariable(GD); 9794 } 9795 9796 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9797 for (const VarDecl *VD : DeferredGlobalVariables) { 9798 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9799 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9800 if (!Res) 9801 continue; 9802 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9803 !HasRequiresUnifiedSharedMemory) { 9804 CGM.EmitGlobal(VD); 9805 } else { 9806 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 9807 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9808 HasRequiresUnifiedSharedMemory)) && 9809 "Expected link clause or to clause with unified memory."); 9810 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 9811 } 9812 } 9813 } 9814 9815 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 9816 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 9817 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 9818 " Expected target-based directive."); 9819 } 9820 9821 void CGOpenMPRuntime::checkArchForUnifiedAddressing( 9822 const OMPRequiresDecl *D) { 9823 for (const OMPClause *Clause : D->clauselists()) { 9824 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 9825 HasRequiresUnifiedSharedMemory = true; 9826 break; 9827 } 9828 } 9829 } 9830 9831 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 9832 LangAS &AS) { 9833 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 9834 return false; 9835 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 9836 switch(A->getAllocatorType()) { 9837 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 9838 // Not supported, fallback to the default mem space. 9839 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 9840 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 9841 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 9842 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 9843 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 9844 case OMPAllocateDeclAttr::OMPConstMemAlloc: 9845 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 9846 AS = LangAS::Default; 9847 return true; 9848 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 9849 llvm_unreachable("Expected predefined allocator for the variables with the " 9850 "static storage."); 9851 } 9852 return false; 9853 } 9854 9855 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 9856 return HasRequiresUnifiedSharedMemory; 9857 } 9858 9859 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 9860 CodeGenModule &CGM) 9861 : CGM(CGM) { 9862 if (CGM.getLangOpts().OpenMPIsDevice) { 9863 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 9864 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 9865 } 9866 } 9867 9868 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 9869 if (CGM.getLangOpts().OpenMPIsDevice) 9870 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 9871 } 9872 9873 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 9874 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 9875 return true; 9876 9877 StringRef Name = CGM.getMangledName(GD); 9878 const auto *D = cast<FunctionDecl>(GD.getDecl()); 9879 // Do not to emit function if it is marked as declare target as it was already 9880 // emitted. 9881 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 9882 if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) { 9883 if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name))) 9884 return !F->isDeclaration(); 9885 return false; 9886 } 9887 return true; 9888 } 9889 9890 return !AlreadyEmittedTargetFunctions.insert(Name).second; 9891 } 9892 9893 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 9894 // If we don't have entries or if we are emitting code for the device, we 9895 // don't need to do anything. 9896 if (CGM.getLangOpts().OMPTargetTriples.empty() || 9897 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 9898 (OffloadEntriesInfoManager.empty() && 9899 !HasEmittedDeclareTargetRegion && 9900 !HasEmittedTargetRegion)) 9901 return nullptr; 9902 9903 // Create and register the function that handles the requires directives. 9904 ASTContext &C = CGM.getContext(); 9905 9906 llvm::Function *RequiresRegFn; 9907 { 9908 CodeGenFunction CGF(CGM); 9909 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 9910 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 9911 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 9912 RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); 9913 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 9914 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 9915 // TODO: check for other requires clauses. 9916 // The requires directive takes effect only when a target region is 9917 // present in the compilation unit. Otherwise it is ignored and not 9918 // passed to the runtime. This avoids the runtime from throwing an error 9919 // for mismatching requires clauses across compilation units that don't 9920 // contain at least 1 target region. 9921 assert((HasEmittedTargetRegion || 9922 HasEmittedDeclareTargetRegion || 9923 !OffloadEntriesInfoManager.empty()) && 9924 "Target or declare target region expected."); 9925 if (HasRequiresUnifiedSharedMemory) 9926 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 9927 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires), 9928 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 9929 CGF.FinishFunction(); 9930 } 9931 return RequiresRegFn; 9932 } 9933 9934 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 9935 // If we have offloading in the current module, we need to emit the entries 9936 // now and register the offloading descriptor. 9937 createOffloadEntriesAndInfoMetadata(); 9938 9939 // Create and register the offloading binary descriptors. This is the main 9940 // entity that captures all the information about offloading in the current 9941 // compilation unit. 9942 return createOffloadingBinaryDescriptorRegistration(); 9943 } 9944 9945 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 9946 const OMPExecutableDirective &D, 9947 SourceLocation Loc, 9948 llvm::Function *OutlinedFn, 9949 ArrayRef<llvm::Value *> CapturedVars) { 9950 if (!CGF.HaveInsertPoint()) 9951 return; 9952 9953 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9954 CodeGenFunction::RunCleanupsScope Scope(CGF); 9955 9956 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 9957 llvm::Value *Args[] = { 9958 RTLoc, 9959 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 9960 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 9961 llvm::SmallVector<llvm::Value *, 16> RealArgs; 9962 RealArgs.append(std::begin(Args), std::end(Args)); 9963 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 9964 9965 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 9966 CGF.EmitRuntimeCall(RTLFn, RealArgs); 9967 } 9968 9969 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 9970 const Expr *NumTeams, 9971 const Expr *ThreadLimit, 9972 SourceLocation Loc) { 9973 if (!CGF.HaveInsertPoint()) 9974 return; 9975 9976 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9977 9978 llvm::Value *NumTeamsVal = 9979 NumTeams 9980 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 9981 CGF.CGM.Int32Ty, /* isSigned = */ true) 9982 : CGF.Builder.getInt32(0); 9983 9984 llvm::Value *ThreadLimitVal = 9985 ThreadLimit 9986 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 9987 CGF.CGM.Int32Ty, /* isSigned = */ true) 9988 : CGF.Builder.getInt32(0); 9989 9990 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 9991 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 9992 ThreadLimitVal}; 9993 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 9994 PushNumTeamsArgs); 9995 } 9996 9997 void CGOpenMPRuntime::emitTargetDataCalls( 9998 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9999 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10000 if (!CGF.HaveInsertPoint()) 10001 return; 10002 10003 // Action used to replace the default codegen action and turn privatization 10004 // off. 10005 PrePostActionTy NoPrivAction; 10006 10007 // Generate the code for the opening of the data environment. Capture all the 10008 // arguments of the runtime call by reference because they are used in the 10009 // closing of the region. 10010 auto &&BeginThenGen = [this, &D, Device, &Info, 10011 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10012 // Fill up the arrays with all the mapped variables. 10013 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10014 MappableExprsHandler::MapValuesArrayTy Pointers; 10015 MappableExprsHandler::MapValuesArrayTy Sizes; 10016 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10017 10018 // Get map clause information. 10019 MappableExprsHandler MCHandler(D, CGF); 10020 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10021 10022 // Fill up the arrays and create the arguments. 10023 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10024 10025 llvm::Value *BasePointersArrayArg = nullptr; 10026 llvm::Value *PointersArrayArg = nullptr; 10027 llvm::Value *SizesArrayArg = nullptr; 10028 llvm::Value *MapTypesArrayArg = nullptr; 10029 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10030 SizesArrayArg, MapTypesArrayArg, Info); 10031 10032 // Emit device ID if any. 10033 llvm::Value *DeviceID = nullptr; 10034 if (Device) { 10035 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10036 CGF.Int64Ty, /*isSigned=*/true); 10037 } else { 10038 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10039 } 10040 10041 // Emit the number of elements in the offloading arrays. 10042 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10043 10044 llvm::Value *OffloadingArgs[] = { 10045 DeviceID, PointerNum, BasePointersArrayArg, 10046 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10047 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 10048 OffloadingArgs); 10049 10050 // If device pointer privatization is required, emit the body of the region 10051 // here. It will have to be duplicated: with and without privatization. 10052 if (!Info.CaptureDeviceAddrMap.empty()) 10053 CodeGen(CGF); 10054 }; 10055 10056 // Generate code for the closing of the data region. 10057 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10058 PrePostActionTy &) { 10059 assert(Info.isValid() && "Invalid data environment closing arguments."); 10060 10061 llvm::Value *BasePointersArrayArg = nullptr; 10062 llvm::Value *PointersArrayArg = nullptr; 10063 llvm::Value *SizesArrayArg = nullptr; 10064 llvm::Value *MapTypesArrayArg = nullptr; 10065 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10066 SizesArrayArg, MapTypesArrayArg, Info); 10067 10068 // Emit device ID if any. 10069 llvm::Value *DeviceID = nullptr; 10070 if (Device) { 10071 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10072 CGF.Int64Ty, /*isSigned=*/true); 10073 } else { 10074 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10075 } 10076 10077 // Emit the number of elements in the offloading arrays. 10078 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10079 10080 llvm::Value *OffloadingArgs[] = { 10081 DeviceID, PointerNum, BasePointersArrayArg, 10082 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10083 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 10084 OffloadingArgs); 10085 }; 10086 10087 // If we need device pointer privatization, we need to emit the body of the 10088 // region with no privatization in the 'else' branch of the conditional. 10089 // Otherwise, we don't have to do anything. 10090 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10091 PrePostActionTy &) { 10092 if (!Info.CaptureDeviceAddrMap.empty()) { 10093 CodeGen.setAction(NoPrivAction); 10094 CodeGen(CGF); 10095 } 10096 }; 10097 10098 // We don't have to do anything to close the region if the if clause evaluates 10099 // to false. 10100 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10101 10102 if (IfCond) { 10103 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10104 } else { 10105 RegionCodeGenTy RCG(BeginThenGen); 10106 RCG(CGF); 10107 } 10108 10109 // If we don't require privatization of device pointers, we emit the body in 10110 // between the runtime calls. This avoids duplicating the body code. 10111 if (Info.CaptureDeviceAddrMap.empty()) { 10112 CodeGen.setAction(NoPrivAction); 10113 CodeGen(CGF); 10114 } 10115 10116 if (IfCond) { 10117 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10118 } else { 10119 RegionCodeGenTy RCG(EndThenGen); 10120 RCG(CGF); 10121 } 10122 } 10123 10124 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10125 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10126 const Expr *Device) { 10127 if (!CGF.HaveInsertPoint()) 10128 return; 10129 10130 assert((isa<OMPTargetEnterDataDirective>(D) || 10131 isa<OMPTargetExitDataDirective>(D) || 10132 isa<OMPTargetUpdateDirective>(D)) && 10133 "Expecting either target enter, exit data, or update directives."); 10134 10135 CodeGenFunction::OMPTargetDataInfo InputInfo; 10136 llvm::Value *MapTypesArray = nullptr; 10137 // Generate the code for the opening of the data environment. 10138 auto &&ThenGen = [this, &D, Device, &InputInfo, 10139 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10140 // Emit device ID if any. 10141 llvm::Value *DeviceID = nullptr; 10142 if (Device) { 10143 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10144 CGF.Int64Ty, /*isSigned=*/true); 10145 } else { 10146 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10147 } 10148 10149 // Emit the number of elements in the offloading arrays. 10150 llvm::Constant *PointerNum = 10151 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10152 10153 llvm::Value *OffloadingArgs[] = {DeviceID, 10154 PointerNum, 10155 InputInfo.BasePointersArray.getPointer(), 10156 InputInfo.PointersArray.getPointer(), 10157 InputInfo.SizesArray.getPointer(), 10158 MapTypesArray}; 10159 10160 // Select the right runtime function call for each expected standalone 10161 // directive. 10162 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10163 OpenMPRTLFunction RTLFn; 10164 switch (D.getDirectiveKind()) { 10165 case OMPD_target_enter_data: 10166 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 10167 : OMPRTL__tgt_target_data_begin; 10168 break; 10169 case OMPD_target_exit_data: 10170 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 10171 : OMPRTL__tgt_target_data_end; 10172 break; 10173 case OMPD_target_update: 10174 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 10175 : OMPRTL__tgt_target_data_update; 10176 break; 10177 case OMPD_parallel: 10178 case OMPD_for: 10179 case OMPD_parallel_for: 10180 case OMPD_parallel_sections: 10181 case OMPD_for_simd: 10182 case OMPD_parallel_for_simd: 10183 case OMPD_cancel: 10184 case OMPD_cancellation_point: 10185 case OMPD_ordered: 10186 case OMPD_threadprivate: 10187 case OMPD_allocate: 10188 case OMPD_task: 10189 case OMPD_simd: 10190 case OMPD_sections: 10191 case OMPD_section: 10192 case OMPD_single: 10193 case OMPD_master: 10194 case OMPD_critical: 10195 case OMPD_taskyield: 10196 case OMPD_barrier: 10197 case OMPD_taskwait: 10198 case OMPD_taskgroup: 10199 case OMPD_atomic: 10200 case OMPD_flush: 10201 case OMPD_teams: 10202 case OMPD_target_data: 10203 case OMPD_distribute: 10204 case OMPD_distribute_simd: 10205 case OMPD_distribute_parallel_for: 10206 case OMPD_distribute_parallel_for_simd: 10207 case OMPD_teams_distribute: 10208 case OMPD_teams_distribute_simd: 10209 case OMPD_teams_distribute_parallel_for: 10210 case OMPD_teams_distribute_parallel_for_simd: 10211 case OMPD_declare_simd: 10212 case OMPD_declare_variant: 10213 case OMPD_declare_target: 10214 case OMPD_end_declare_target: 10215 case OMPD_declare_reduction: 10216 case OMPD_declare_mapper: 10217 case OMPD_taskloop: 10218 case OMPD_taskloop_simd: 10219 case OMPD_target: 10220 case OMPD_target_simd: 10221 case OMPD_target_teams_distribute: 10222 case OMPD_target_teams_distribute_simd: 10223 case OMPD_target_teams_distribute_parallel_for: 10224 case OMPD_target_teams_distribute_parallel_for_simd: 10225 case OMPD_target_teams: 10226 case OMPD_target_parallel: 10227 case OMPD_target_parallel_for: 10228 case OMPD_target_parallel_for_simd: 10229 case OMPD_requires: 10230 case OMPD_unknown: 10231 llvm_unreachable("Unexpected standalone target data directive."); 10232 break; 10233 } 10234 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 10235 }; 10236 10237 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 10238 CodeGenFunction &CGF, PrePostActionTy &) { 10239 // Fill up the arrays with all the mapped variables. 10240 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10241 MappableExprsHandler::MapValuesArrayTy Pointers; 10242 MappableExprsHandler::MapValuesArrayTy Sizes; 10243 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10244 10245 // Get map clause information. 10246 MappableExprsHandler MEHandler(D, CGF); 10247 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10248 10249 TargetDataInfo Info; 10250 // Fill up the arrays and create the arguments. 10251 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10252 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10253 Info.PointersArray, Info.SizesArray, 10254 Info.MapTypesArray, Info); 10255 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10256 InputInfo.BasePointersArray = 10257 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10258 InputInfo.PointersArray = 10259 Address(Info.PointersArray, CGM.getPointerAlign()); 10260 InputInfo.SizesArray = 10261 Address(Info.SizesArray, CGM.getPointerAlign()); 10262 MapTypesArray = Info.MapTypesArray; 10263 if (D.hasClausesOfKind<OMPDependClause>()) 10264 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10265 else 10266 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10267 }; 10268 10269 if (IfCond) { 10270 emitOMPIfClause(CGF, IfCond, TargetThenGen, 10271 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10272 } else { 10273 RegionCodeGenTy ThenRCG(TargetThenGen); 10274 ThenRCG(CGF); 10275 } 10276 } 10277 10278 namespace { 10279 /// Kind of parameter in a function with 'declare simd' directive. 10280 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 10281 /// Attribute set of the parameter. 10282 struct ParamAttrTy { 10283 ParamKindTy Kind = Vector; 10284 llvm::APSInt StrideOrArg; 10285 llvm::APSInt Alignment; 10286 }; 10287 } // namespace 10288 10289 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10290 ArrayRef<ParamAttrTy> ParamAttrs) { 10291 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10292 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10293 // of that clause. The VLEN value must be power of 2. 10294 // In other case the notion of the function`s "characteristic data type" (CDT) 10295 // is used to compute the vector length. 10296 // CDT is defined in the following order: 10297 // a) For non-void function, the CDT is the return type. 10298 // b) If the function has any non-uniform, non-linear parameters, then the 10299 // CDT is the type of the first such parameter. 10300 // c) If the CDT determined by a) or b) above is struct, union, or class 10301 // type which is pass-by-value (except for the type that maps to the 10302 // built-in complex data type), the characteristic data type is int. 10303 // d) If none of the above three cases is applicable, the CDT is int. 10304 // The VLEN is then determined based on the CDT and the size of vector 10305 // register of that ISA for which current vector version is generated. The 10306 // VLEN is computed using the formula below: 10307 // VLEN = sizeof(vector_register) / sizeof(CDT), 10308 // where vector register size specified in section 3.2.1 Registers and the 10309 // Stack Frame of original AMD64 ABI document. 10310 QualType RetType = FD->getReturnType(); 10311 if (RetType.isNull()) 10312 return 0; 10313 ASTContext &C = FD->getASTContext(); 10314 QualType CDT; 10315 if (!RetType.isNull() && !RetType->isVoidType()) { 10316 CDT = RetType; 10317 } else { 10318 unsigned Offset = 0; 10319 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10320 if (ParamAttrs[Offset].Kind == Vector) 10321 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10322 ++Offset; 10323 } 10324 if (CDT.isNull()) { 10325 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10326 if (ParamAttrs[I + Offset].Kind == Vector) { 10327 CDT = FD->getParamDecl(I)->getType(); 10328 break; 10329 } 10330 } 10331 } 10332 } 10333 if (CDT.isNull()) 10334 CDT = C.IntTy; 10335 CDT = CDT->getCanonicalTypeUnqualified(); 10336 if (CDT->isRecordType() || CDT->isUnionType()) 10337 CDT = C.IntTy; 10338 return C.getTypeSize(CDT); 10339 } 10340 10341 static void 10342 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10343 const llvm::APSInt &VLENVal, 10344 ArrayRef<ParamAttrTy> ParamAttrs, 10345 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10346 struct ISADataTy { 10347 char ISA; 10348 unsigned VecRegSize; 10349 }; 10350 ISADataTy ISAData[] = { 10351 { 10352 'b', 128 10353 }, // SSE 10354 { 10355 'c', 256 10356 }, // AVX 10357 { 10358 'd', 256 10359 }, // AVX2 10360 { 10361 'e', 512 10362 }, // AVX512 10363 }; 10364 llvm::SmallVector<char, 2> Masked; 10365 switch (State) { 10366 case OMPDeclareSimdDeclAttr::BS_Undefined: 10367 Masked.push_back('N'); 10368 Masked.push_back('M'); 10369 break; 10370 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10371 Masked.push_back('N'); 10372 break; 10373 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10374 Masked.push_back('M'); 10375 break; 10376 } 10377 for (char Mask : Masked) { 10378 for (const ISADataTy &Data : ISAData) { 10379 SmallString<256> Buffer; 10380 llvm::raw_svector_ostream Out(Buffer); 10381 Out << "_ZGV" << Data.ISA << Mask; 10382 if (!VLENVal) { 10383 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10384 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10385 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10386 } else { 10387 Out << VLENVal; 10388 } 10389 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 10390 switch (ParamAttr.Kind){ 10391 case LinearWithVarStride: 10392 Out << 's' << ParamAttr.StrideOrArg; 10393 break; 10394 case Linear: 10395 Out << 'l'; 10396 if (!!ParamAttr.StrideOrArg) 10397 Out << ParamAttr.StrideOrArg; 10398 break; 10399 case Uniform: 10400 Out << 'u'; 10401 break; 10402 case Vector: 10403 Out << 'v'; 10404 break; 10405 } 10406 if (!!ParamAttr.Alignment) 10407 Out << 'a' << ParamAttr.Alignment; 10408 } 10409 Out << '_' << Fn->getName(); 10410 Fn->addFnAttr(Out.str()); 10411 } 10412 } 10413 } 10414 10415 // This are the Functions that are needed to mangle the name of the 10416 // vector functions generated by the compiler, according to the rules 10417 // defined in the "Vector Function ABI specifications for AArch64", 10418 // available at 10419 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10420 10421 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 10422 /// 10423 /// TODO: Need to implement the behavior for reference marked with a 10424 /// var or no linear modifiers (1.b in the section). For this, we 10425 /// need to extend ParamKindTy to support the linear modifiers. 10426 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10427 QT = QT.getCanonicalType(); 10428 10429 if (QT->isVoidType()) 10430 return false; 10431 10432 if (Kind == ParamKindTy::Uniform) 10433 return false; 10434 10435 if (Kind == ParamKindTy::Linear) 10436 return false; 10437 10438 // TODO: Handle linear references with modifiers 10439 10440 if (Kind == ParamKindTy::LinearWithVarStride) 10441 return false; 10442 10443 return true; 10444 } 10445 10446 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10447 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10448 QT = QT.getCanonicalType(); 10449 unsigned Size = C.getTypeSize(QT); 10450 10451 // Only scalars and complex within 16 bytes wide set PVB to true. 10452 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10453 return false; 10454 10455 if (QT->isFloatingType()) 10456 return true; 10457 10458 if (QT->isIntegerType()) 10459 return true; 10460 10461 if (QT->isPointerType()) 10462 return true; 10463 10464 // TODO: Add support for complex types (section 3.1.2, item 2). 10465 10466 return false; 10467 } 10468 10469 /// Computes the lane size (LS) of a return type or of an input parameter, 10470 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10471 /// TODO: Add support for references, section 3.2.1, item 1. 10472 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10473 if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10474 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10475 if (getAArch64PBV(PTy, C)) 10476 return C.getTypeSize(PTy); 10477 } 10478 if (getAArch64PBV(QT, C)) 10479 return C.getTypeSize(QT); 10480 10481 return C.getTypeSize(C.getUIntPtrType()); 10482 } 10483 10484 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10485 // signature of the scalar function, as defined in 3.2.2 of the 10486 // AAVFABI. 10487 static std::tuple<unsigned, unsigned, bool> 10488 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10489 QualType RetType = FD->getReturnType().getCanonicalType(); 10490 10491 ASTContext &C = FD->getASTContext(); 10492 10493 bool OutputBecomesInput = false; 10494 10495 llvm::SmallVector<unsigned, 8> Sizes; 10496 if (!RetType->isVoidType()) { 10497 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10498 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10499 OutputBecomesInput = true; 10500 } 10501 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10502 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10503 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10504 } 10505 10506 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10507 // The LS of a function parameter / return value can only be a power 10508 // of 2, starting from 8 bits, up to 128. 10509 assert(std::all_of(Sizes.begin(), Sizes.end(), 10510 [](unsigned Size) { 10511 return Size == 8 || Size == 16 || Size == 32 || 10512 Size == 64 || Size == 128; 10513 }) && 10514 "Invalid size"); 10515 10516 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10517 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10518 OutputBecomesInput); 10519 } 10520 10521 /// Mangle the parameter part of the vector function name according to 10522 /// their OpenMP classification. The mangling function is defined in 10523 /// section 3.5 of the AAVFABI. 10524 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10525 SmallString<256> Buffer; 10526 llvm::raw_svector_ostream Out(Buffer); 10527 for (const auto &ParamAttr : ParamAttrs) { 10528 switch (ParamAttr.Kind) { 10529 case LinearWithVarStride: 10530 Out << "ls" << ParamAttr.StrideOrArg; 10531 break; 10532 case Linear: 10533 Out << 'l'; 10534 // Don't print the step value if it is not present or if it is 10535 // equal to 1. 10536 if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1) 10537 Out << ParamAttr.StrideOrArg; 10538 break; 10539 case Uniform: 10540 Out << 'u'; 10541 break; 10542 case Vector: 10543 Out << 'v'; 10544 break; 10545 } 10546 10547 if (!!ParamAttr.Alignment) 10548 Out << 'a' << ParamAttr.Alignment; 10549 } 10550 10551 return Out.str(); 10552 } 10553 10554 // Function used to add the attribute. The parameter `VLEN` is 10555 // templated to allow the use of "x" when targeting scalable functions 10556 // for SVE. 10557 template <typename T> 10558 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10559 char ISA, StringRef ParSeq, 10560 StringRef MangledName, bool OutputBecomesInput, 10561 llvm::Function *Fn) { 10562 SmallString<256> Buffer; 10563 llvm::raw_svector_ostream Out(Buffer); 10564 Out << Prefix << ISA << LMask << VLEN; 10565 if (OutputBecomesInput) 10566 Out << "v"; 10567 Out << ParSeq << "_" << MangledName; 10568 Fn->addFnAttr(Out.str()); 10569 } 10570 10571 // Helper function to generate the Advanced SIMD names depending on 10572 // the value of the NDS when simdlen is not present. 10573 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10574 StringRef Prefix, char ISA, 10575 StringRef ParSeq, StringRef MangledName, 10576 bool OutputBecomesInput, 10577 llvm::Function *Fn) { 10578 switch (NDS) { 10579 case 8: 10580 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10581 OutputBecomesInput, Fn); 10582 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10583 OutputBecomesInput, Fn); 10584 break; 10585 case 16: 10586 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10587 OutputBecomesInput, Fn); 10588 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10589 OutputBecomesInput, Fn); 10590 break; 10591 case 32: 10592 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10593 OutputBecomesInput, Fn); 10594 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10595 OutputBecomesInput, Fn); 10596 break; 10597 case 64: 10598 case 128: 10599 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10600 OutputBecomesInput, Fn); 10601 break; 10602 default: 10603 llvm_unreachable("Scalar type is too wide."); 10604 } 10605 } 10606 10607 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10608 static void emitAArch64DeclareSimdFunction( 10609 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10610 ArrayRef<ParamAttrTy> ParamAttrs, 10611 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10612 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10613 10614 // Get basic data for building the vector signature. 10615 const auto Data = getNDSWDS(FD, ParamAttrs); 10616 const unsigned NDS = std::get<0>(Data); 10617 const unsigned WDS = std::get<1>(Data); 10618 const bool OutputBecomesInput = std::get<2>(Data); 10619 10620 // Check the values provided via `simdlen` by the user. 10621 // 1. A `simdlen(1)` doesn't produce vector signatures, 10622 if (UserVLEN == 1) { 10623 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10624 DiagnosticsEngine::Warning, 10625 "The clause simdlen(1) has no effect when targeting aarch64."); 10626 CGM.getDiags().Report(SLoc, DiagID); 10627 return; 10628 } 10629 10630 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10631 // Advanced SIMD output. 10632 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10633 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10634 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10635 "power of 2 when targeting Advanced SIMD."); 10636 CGM.getDiags().Report(SLoc, DiagID); 10637 return; 10638 } 10639 10640 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10641 // limits. 10642 if (ISA == 's' && UserVLEN != 0) { 10643 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10644 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10645 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10646 "lanes in the architectural constraints " 10647 "for SVE (min is 128-bit, max is " 10648 "2048-bit, by steps of 128-bit)"); 10649 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10650 return; 10651 } 10652 } 10653 10654 // Sort out parameter sequence. 10655 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10656 StringRef Prefix = "_ZGV"; 10657 // Generate simdlen from user input (if any). 10658 if (UserVLEN) { 10659 if (ISA == 's') { 10660 // SVE generates only a masked function. 10661 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10662 OutputBecomesInput, Fn); 10663 } else { 10664 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10665 // Advanced SIMD generates one or two functions, depending on 10666 // the `[not]inbranch` clause. 10667 switch (State) { 10668 case OMPDeclareSimdDeclAttr::BS_Undefined: 10669 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10670 OutputBecomesInput, Fn); 10671 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10672 OutputBecomesInput, Fn); 10673 break; 10674 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10675 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10676 OutputBecomesInput, Fn); 10677 break; 10678 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10679 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10680 OutputBecomesInput, Fn); 10681 break; 10682 } 10683 } 10684 } else { 10685 // If no user simdlen is provided, follow the AAVFABI rules for 10686 // generating the vector length. 10687 if (ISA == 's') { 10688 // SVE, section 3.4.1, item 1. 10689 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10690 OutputBecomesInput, Fn); 10691 } else { 10692 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10693 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10694 // two vector names depending on the use of the clause 10695 // `[not]inbranch`. 10696 switch (State) { 10697 case OMPDeclareSimdDeclAttr::BS_Undefined: 10698 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10699 OutputBecomesInput, Fn); 10700 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10701 OutputBecomesInput, Fn); 10702 break; 10703 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10704 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10705 OutputBecomesInput, Fn); 10706 break; 10707 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10708 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10709 OutputBecomesInput, Fn); 10710 break; 10711 } 10712 } 10713 } 10714 } 10715 10716 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10717 llvm::Function *Fn) { 10718 ASTContext &C = CGM.getContext(); 10719 FD = FD->getMostRecentDecl(); 10720 // Map params to their positions in function decl. 10721 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10722 if (isa<CXXMethodDecl>(FD)) 10723 ParamPositions.try_emplace(FD, 0); 10724 unsigned ParamPos = ParamPositions.size(); 10725 for (const ParmVarDecl *P : FD->parameters()) { 10726 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10727 ++ParamPos; 10728 } 10729 while (FD) { 10730 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10731 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10732 // Mark uniform parameters. 10733 for (const Expr *E : Attr->uniforms()) { 10734 E = E->IgnoreParenImpCasts(); 10735 unsigned Pos; 10736 if (isa<CXXThisExpr>(E)) { 10737 Pos = ParamPositions[FD]; 10738 } else { 10739 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10740 ->getCanonicalDecl(); 10741 Pos = ParamPositions[PVD]; 10742 } 10743 ParamAttrs[Pos].Kind = Uniform; 10744 } 10745 // Get alignment info. 10746 auto NI = Attr->alignments_begin(); 10747 for (const Expr *E : Attr->aligneds()) { 10748 E = E->IgnoreParenImpCasts(); 10749 unsigned Pos; 10750 QualType ParmTy; 10751 if (isa<CXXThisExpr>(E)) { 10752 Pos = ParamPositions[FD]; 10753 ParmTy = E->getType(); 10754 } else { 10755 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10756 ->getCanonicalDecl(); 10757 Pos = ParamPositions[PVD]; 10758 ParmTy = PVD->getType(); 10759 } 10760 ParamAttrs[Pos].Alignment = 10761 (*NI) 10762 ? (*NI)->EvaluateKnownConstInt(C) 10763 : llvm::APSInt::getUnsigned( 10764 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10765 .getQuantity()); 10766 ++NI; 10767 } 10768 // Mark linear parameters. 10769 auto SI = Attr->steps_begin(); 10770 auto MI = Attr->modifiers_begin(); 10771 for (const Expr *E : Attr->linears()) { 10772 E = E->IgnoreParenImpCasts(); 10773 unsigned Pos; 10774 if (isa<CXXThisExpr>(E)) { 10775 Pos = ParamPositions[FD]; 10776 } else { 10777 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10778 ->getCanonicalDecl(); 10779 Pos = ParamPositions[PVD]; 10780 } 10781 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 10782 ParamAttr.Kind = Linear; 10783 if (*SI) { 10784 Expr::EvalResult Result; 10785 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 10786 if (const auto *DRE = 10787 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 10788 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 10789 ParamAttr.Kind = LinearWithVarStride; 10790 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 10791 ParamPositions[StridePVD->getCanonicalDecl()]); 10792 } 10793 } 10794 } else { 10795 ParamAttr.StrideOrArg = Result.Val.getInt(); 10796 } 10797 } 10798 ++SI; 10799 ++MI; 10800 } 10801 llvm::APSInt VLENVal; 10802 SourceLocation ExprLoc; 10803 const Expr *VLENExpr = Attr->getSimdlen(); 10804 if (VLENExpr) { 10805 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 10806 ExprLoc = VLENExpr->getExprLoc(); 10807 } 10808 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 10809 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 10810 CGM.getTriple().getArch() == llvm::Triple::x86_64) { 10811 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 10812 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 10813 unsigned VLEN = VLENVal.getExtValue(); 10814 StringRef MangledName = Fn->getName(); 10815 if (CGM.getTarget().hasFeature("sve")) 10816 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10817 MangledName, 's', 128, Fn, ExprLoc); 10818 if (CGM.getTarget().hasFeature("neon")) 10819 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10820 MangledName, 'n', 128, Fn, ExprLoc); 10821 } 10822 } 10823 FD = FD->getPreviousDecl(); 10824 } 10825 } 10826 10827 namespace { 10828 /// Cleanup action for doacross support. 10829 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 10830 public: 10831 static const int DoacrossFinArgs = 2; 10832 10833 private: 10834 llvm::FunctionCallee RTLFn; 10835 llvm::Value *Args[DoacrossFinArgs]; 10836 10837 public: 10838 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 10839 ArrayRef<llvm::Value *> CallArgs) 10840 : RTLFn(RTLFn) { 10841 assert(CallArgs.size() == DoacrossFinArgs); 10842 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10843 } 10844 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10845 if (!CGF.HaveInsertPoint()) 10846 return; 10847 CGF.EmitRuntimeCall(RTLFn, Args); 10848 } 10849 }; 10850 } // namespace 10851 10852 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 10853 const OMPLoopDirective &D, 10854 ArrayRef<Expr *> NumIterations) { 10855 if (!CGF.HaveInsertPoint()) 10856 return; 10857 10858 ASTContext &C = CGM.getContext(); 10859 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 10860 RecordDecl *RD; 10861 if (KmpDimTy.isNull()) { 10862 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 10863 // kmp_int64 lo; // lower 10864 // kmp_int64 up; // upper 10865 // kmp_int64 st; // stride 10866 // }; 10867 RD = C.buildImplicitRecord("kmp_dim"); 10868 RD->startDefinition(); 10869 addFieldToRecordDecl(C, RD, Int64Ty); 10870 addFieldToRecordDecl(C, RD, Int64Ty); 10871 addFieldToRecordDecl(C, RD, Int64Ty); 10872 RD->completeDefinition(); 10873 KmpDimTy = C.getRecordType(RD); 10874 } else { 10875 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 10876 } 10877 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 10878 QualType ArrayTy = 10879 C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0); 10880 10881 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 10882 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 10883 enum { LowerFD = 0, UpperFD, StrideFD }; 10884 // Fill dims with data. 10885 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 10886 LValue DimsLVal = CGF.MakeAddrLValue( 10887 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 10888 // dims.upper = num_iterations; 10889 LValue UpperLVal = CGF.EmitLValueForField( 10890 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 10891 llvm::Value *NumIterVal = 10892 CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]), 10893 D.getNumIterations()->getType(), Int64Ty, 10894 D.getNumIterations()->getExprLoc()); 10895 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 10896 // dims.stride = 1; 10897 LValue StrideLVal = CGF.EmitLValueForField( 10898 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 10899 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 10900 StrideLVal); 10901 } 10902 10903 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 10904 // kmp_int32 num_dims, struct kmp_dim * dims); 10905 llvm::Value *Args[] = { 10906 emitUpdateLocation(CGF, D.getBeginLoc()), 10907 getThreadID(CGF, D.getBeginLoc()), 10908 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 10909 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 10910 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 10911 CGM.VoidPtrTy)}; 10912 10913 llvm::FunctionCallee RTLFn = 10914 createRuntimeFunction(OMPRTL__kmpc_doacross_init); 10915 CGF.EmitRuntimeCall(RTLFn, Args); 10916 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 10917 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 10918 llvm::FunctionCallee FiniRTLFn = 10919 createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 10920 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 10921 llvm::makeArrayRef(FiniArgs)); 10922 } 10923 10924 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 10925 const OMPDependClause *C) { 10926 QualType Int64Ty = 10927 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 10928 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 10929 QualType ArrayTy = CGM.getContext().getConstantArrayType( 10930 Int64Ty, Size, ArrayType::Normal, 0); 10931 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 10932 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 10933 const Expr *CounterVal = C->getLoopData(I); 10934 assert(CounterVal); 10935 llvm::Value *CntVal = CGF.EmitScalarConversion( 10936 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 10937 CounterVal->getExprLoc()); 10938 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 10939 /*Volatile=*/false, Int64Ty); 10940 } 10941 llvm::Value *Args[] = { 10942 emitUpdateLocation(CGF, C->getBeginLoc()), 10943 getThreadID(CGF, C->getBeginLoc()), 10944 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 10945 llvm::FunctionCallee RTLFn; 10946 if (C->getDependencyKind() == OMPC_DEPEND_source) { 10947 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 10948 } else { 10949 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 10950 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 10951 } 10952 CGF.EmitRuntimeCall(RTLFn, Args); 10953 } 10954 10955 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 10956 llvm::FunctionCallee Callee, 10957 ArrayRef<llvm::Value *> Args) const { 10958 assert(Loc.isValid() && "Outlined function call location must be valid."); 10959 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 10960 10961 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 10962 if (Fn->doesNotThrow()) { 10963 CGF.EmitNounwindRuntimeCall(Fn, Args); 10964 return; 10965 } 10966 } 10967 CGF.EmitRuntimeCall(Callee, Args); 10968 } 10969 10970 void CGOpenMPRuntime::emitOutlinedFunctionCall( 10971 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 10972 ArrayRef<llvm::Value *> Args) const { 10973 emitCall(CGF, Loc, OutlinedFn, Args); 10974 } 10975 10976 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 10977 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 10978 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 10979 HasEmittedDeclareTargetRegion = true; 10980 } 10981 10982 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 10983 const VarDecl *NativeParam, 10984 const VarDecl *TargetParam) const { 10985 return CGF.GetAddrOfLocalVar(NativeParam); 10986 } 10987 10988 namespace { 10989 /// Cleanup action for allocate support. 10990 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 10991 public: 10992 static const int CleanupArgs = 3; 10993 10994 private: 10995 llvm::FunctionCallee RTLFn; 10996 llvm::Value *Args[CleanupArgs]; 10997 10998 public: 10999 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 11000 ArrayRef<llvm::Value *> CallArgs) 11001 : RTLFn(RTLFn) { 11002 assert(CallArgs.size() == CleanupArgs && 11003 "Size of arguments does not match."); 11004 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11005 } 11006 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11007 if (!CGF.HaveInsertPoint()) 11008 return; 11009 CGF.EmitRuntimeCall(RTLFn, Args); 11010 } 11011 }; 11012 } // namespace 11013 11014 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11015 const VarDecl *VD) { 11016 if (!VD) 11017 return Address::invalid(); 11018 const VarDecl *CVD = VD->getCanonicalDecl(); 11019 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 11020 return Address::invalid(); 11021 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11022 // Use the default allocation. 11023 if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 11024 !AA->getAllocator()) 11025 return Address::invalid(); 11026 llvm::Value *Size; 11027 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11028 if (CVD->getType()->isVariablyModifiedType()) { 11029 Size = CGF.getTypeSize(CVD->getType()); 11030 // Align the size: ((size + align - 1) / align) * align 11031 Size = CGF.Builder.CreateNUWAdd( 11032 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11033 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11034 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11035 } else { 11036 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11037 Size = CGM.getSize(Sz.alignTo(Align)); 11038 } 11039 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11040 assert(AA->getAllocator() && 11041 "Expected allocator expression for non-default allocator."); 11042 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11043 // According to the standard, the original allocator type is a enum (integer). 11044 // Convert to pointer type, if required. 11045 if (Allocator->getType()->isIntegerTy()) 11046 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 11047 else if (Allocator->getType()->isPointerTy()) 11048 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 11049 CGM.VoidPtrTy); 11050 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11051 11052 llvm::Value *Addr = 11053 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, 11054 CVD->getName() + ".void.addr"); 11055 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 11056 Allocator}; 11057 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); 11058 11059 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11060 llvm::makeArrayRef(FiniArgs)); 11061 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11062 Addr, 11063 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 11064 CVD->getName() + ".addr"); 11065 return Address(Addr, Align); 11066 } 11067 11068 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11069 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11070 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11071 llvm_unreachable("Not supported in SIMD-only mode"); 11072 } 11073 11074 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11075 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11076 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11077 llvm_unreachable("Not supported in SIMD-only mode"); 11078 } 11079 11080 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11081 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11082 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11083 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11084 bool Tied, unsigned &NumberOfParts) { 11085 llvm_unreachable("Not supported in SIMD-only mode"); 11086 } 11087 11088 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11089 SourceLocation Loc, 11090 llvm::Function *OutlinedFn, 11091 ArrayRef<llvm::Value *> CapturedVars, 11092 const Expr *IfCond) { 11093 llvm_unreachable("Not supported in SIMD-only mode"); 11094 } 11095 11096 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11097 CodeGenFunction &CGF, StringRef CriticalName, 11098 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11099 const Expr *Hint) { 11100 llvm_unreachable("Not supported in SIMD-only mode"); 11101 } 11102 11103 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11104 const RegionCodeGenTy &MasterOpGen, 11105 SourceLocation Loc) { 11106 llvm_unreachable("Not supported in SIMD-only mode"); 11107 } 11108 11109 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11110 SourceLocation Loc) { 11111 llvm_unreachable("Not supported in SIMD-only mode"); 11112 } 11113 11114 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 11115 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 11116 SourceLocation Loc) { 11117 llvm_unreachable("Not supported in SIMD-only mode"); 11118 } 11119 11120 void CGOpenMPSIMDRuntime::emitSingleRegion( 11121 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 11122 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 11123 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 11124 ArrayRef<const Expr *> AssignmentOps) { 11125 llvm_unreachable("Not supported in SIMD-only mode"); 11126 } 11127 11128 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 11129 const RegionCodeGenTy &OrderedOpGen, 11130 SourceLocation Loc, 11131 bool IsThreads) { 11132 llvm_unreachable("Not supported in SIMD-only mode"); 11133 } 11134 11135 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 11136 SourceLocation Loc, 11137 OpenMPDirectiveKind Kind, 11138 bool EmitChecks, 11139 bool ForceSimpleCall) { 11140 llvm_unreachable("Not supported in SIMD-only mode"); 11141 } 11142 11143 void CGOpenMPSIMDRuntime::emitForDispatchInit( 11144 CodeGenFunction &CGF, SourceLocation Loc, 11145 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 11146 bool Ordered, const DispatchRTInput &DispatchValues) { 11147 llvm_unreachable("Not supported in SIMD-only mode"); 11148 } 11149 11150 void CGOpenMPSIMDRuntime::emitForStaticInit( 11151 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 11152 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 11153 llvm_unreachable("Not supported in SIMD-only mode"); 11154 } 11155 11156 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 11157 CodeGenFunction &CGF, SourceLocation Loc, 11158 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 11159 llvm_unreachable("Not supported in SIMD-only mode"); 11160 } 11161 11162 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 11163 SourceLocation Loc, 11164 unsigned IVSize, 11165 bool IVSigned) { 11166 llvm_unreachable("Not supported in SIMD-only mode"); 11167 } 11168 11169 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 11170 SourceLocation Loc, 11171 OpenMPDirectiveKind DKind) { 11172 llvm_unreachable("Not supported in SIMD-only mode"); 11173 } 11174 11175 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 11176 SourceLocation Loc, 11177 unsigned IVSize, bool IVSigned, 11178 Address IL, Address LB, 11179 Address UB, Address ST) { 11180 llvm_unreachable("Not supported in SIMD-only mode"); 11181 } 11182 11183 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 11184 llvm::Value *NumThreads, 11185 SourceLocation Loc) { 11186 llvm_unreachable("Not supported in SIMD-only mode"); 11187 } 11188 11189 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 11190 OpenMPProcBindClauseKind ProcBind, 11191 SourceLocation Loc) { 11192 llvm_unreachable("Not supported in SIMD-only mode"); 11193 } 11194 11195 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 11196 const VarDecl *VD, 11197 Address VDAddr, 11198 SourceLocation Loc) { 11199 llvm_unreachable("Not supported in SIMD-only mode"); 11200 } 11201 11202 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 11203 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 11204 CodeGenFunction *CGF) { 11205 llvm_unreachable("Not supported in SIMD-only mode"); 11206 } 11207 11208 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 11209 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 11210 llvm_unreachable("Not supported in SIMD-only mode"); 11211 } 11212 11213 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 11214 ArrayRef<const Expr *> Vars, 11215 SourceLocation Loc) { 11216 llvm_unreachable("Not supported in SIMD-only mode"); 11217 } 11218 11219 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 11220 const OMPExecutableDirective &D, 11221 llvm::Function *TaskFunction, 11222 QualType SharedsTy, Address Shareds, 11223 const Expr *IfCond, 11224 const OMPTaskDataTy &Data) { 11225 llvm_unreachable("Not supported in SIMD-only mode"); 11226 } 11227 11228 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 11229 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 11230 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 11231 const Expr *IfCond, const OMPTaskDataTy &Data) { 11232 llvm_unreachable("Not supported in SIMD-only mode"); 11233 } 11234 11235 void CGOpenMPSIMDRuntime::emitReduction( 11236 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 11237 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 11238 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 11239 assert(Options.SimpleReduction && "Only simple reduction is expected."); 11240 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 11241 ReductionOps, Options); 11242 } 11243 11244 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 11245 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 11246 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 11247 llvm_unreachable("Not supported in SIMD-only mode"); 11248 } 11249 11250 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 11251 SourceLocation Loc, 11252 ReductionCodeGen &RCG, 11253 unsigned N) { 11254 llvm_unreachable("Not supported in SIMD-only mode"); 11255 } 11256 11257 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 11258 SourceLocation Loc, 11259 llvm::Value *ReductionsPtr, 11260 LValue SharedLVal) { 11261 llvm_unreachable("Not supported in SIMD-only mode"); 11262 } 11263 11264 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 11265 SourceLocation Loc) { 11266 llvm_unreachable("Not supported in SIMD-only mode"); 11267 } 11268 11269 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 11270 CodeGenFunction &CGF, SourceLocation Loc, 11271 OpenMPDirectiveKind CancelRegion) { 11272 llvm_unreachable("Not supported in SIMD-only mode"); 11273 } 11274 11275 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 11276 SourceLocation Loc, const Expr *IfCond, 11277 OpenMPDirectiveKind CancelRegion) { 11278 llvm_unreachable("Not supported in SIMD-only mode"); 11279 } 11280 11281 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 11282 const OMPExecutableDirective &D, StringRef ParentName, 11283 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 11284 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 11285 llvm_unreachable("Not supported in SIMD-only mode"); 11286 } 11287 11288 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF, 11289 const OMPExecutableDirective &D, 11290 llvm::Function *OutlinedFn, 11291 llvm::Value *OutlinedFnID, 11292 const Expr *IfCond, 11293 const Expr *Device) { 11294 llvm_unreachable("Not supported in SIMD-only mode"); 11295 } 11296 11297 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 11298 llvm_unreachable("Not supported in SIMD-only mode"); 11299 } 11300 11301 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 11302 llvm_unreachable("Not supported in SIMD-only mode"); 11303 } 11304 11305 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 11306 return false; 11307 } 11308 11309 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() { 11310 return nullptr; 11311 } 11312 11313 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 11314 const OMPExecutableDirective &D, 11315 SourceLocation Loc, 11316 llvm::Function *OutlinedFn, 11317 ArrayRef<llvm::Value *> CapturedVars) { 11318 llvm_unreachable("Not supported in SIMD-only mode"); 11319 } 11320 11321 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11322 const Expr *NumTeams, 11323 const Expr *ThreadLimit, 11324 SourceLocation Loc) { 11325 llvm_unreachable("Not supported in SIMD-only mode"); 11326 } 11327 11328 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 11329 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11330 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11331 llvm_unreachable("Not supported in SIMD-only mode"); 11332 } 11333 11334 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 11335 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11336 const Expr *Device) { 11337 llvm_unreachable("Not supported in SIMD-only mode"); 11338 } 11339 11340 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11341 const OMPLoopDirective &D, 11342 ArrayRef<Expr *> NumIterations) { 11343 llvm_unreachable("Not supported in SIMD-only mode"); 11344 } 11345 11346 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11347 const OMPDependClause *C) { 11348 llvm_unreachable("Not supported in SIMD-only mode"); 11349 } 11350 11351 const VarDecl * 11352 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 11353 const VarDecl *NativeParam) const { 11354 llvm_unreachable("Not supported in SIMD-only mode"); 11355 } 11356 11357 Address 11358 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 11359 const VarDecl *NativeParam, 11360 const VarDecl *TargetParam) const { 11361 llvm_unreachable("Not supported in SIMD-only mode"); 11362 } 11363