1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGCXXABI.h" 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/CodeGen/ConstantInitBuilder.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/Basic/BitmaskEnum.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/Bitcode/BitcodeReader.h" 24 #include "llvm/IR/DerivedTypes.h" 25 #include "llvm/IR/GlobalValue.h" 26 #include "llvm/IR/Value.h" 27 #include "llvm/Support/Format.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <cassert> 30 31 using namespace clang; 32 using namespace CodeGen; 33 34 namespace { 35 /// Base class for handling code generation inside OpenMP regions. 36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 37 public: 38 /// Kinds of OpenMP regions used in codegen. 39 enum CGOpenMPRegionKind { 40 /// Region with outlined function for standalone 'parallel' 41 /// directive. 42 ParallelOutlinedRegion, 43 /// Region with outlined function for standalone 'task' directive. 44 TaskOutlinedRegion, 45 /// Region for constructs that do not require function outlining, 46 /// like 'for', 'sections', 'atomic' etc. directives. 47 InlinedRegion, 48 /// Region with outlined function for standalone 'target' directive. 49 TargetRegion, 50 }; 51 52 CGOpenMPRegionInfo(const CapturedStmt &CS, 53 const CGOpenMPRegionKind RegionKind, 54 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 55 bool HasCancel) 56 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 57 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 58 59 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 60 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 61 bool HasCancel) 62 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 63 Kind(Kind), HasCancel(HasCancel) {} 64 65 /// Get a variable or parameter for storing global thread id 66 /// inside OpenMP construct. 67 virtual const VarDecl *getThreadIDVariable() const = 0; 68 69 /// Emit the captured statement body. 70 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 71 72 /// Get an LValue for the current ThreadID variable. 73 /// \return LValue for thread id variable. This LValue always has type int32*. 74 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 75 76 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 77 78 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 79 80 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 81 82 bool hasCancel() const { return HasCancel; } 83 84 static bool classof(const CGCapturedStmtInfo *Info) { 85 return Info->getKind() == CR_OpenMP; 86 } 87 88 ~CGOpenMPRegionInfo() override = default; 89 90 protected: 91 CGOpenMPRegionKind RegionKind; 92 RegionCodeGenTy CodeGen; 93 OpenMPDirectiveKind Kind; 94 bool HasCancel; 95 }; 96 97 /// API for captured statement code generation in OpenMP constructs. 98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 99 public: 100 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 101 const RegionCodeGenTy &CodeGen, 102 OpenMPDirectiveKind Kind, bool HasCancel, 103 StringRef HelperName) 104 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 105 HasCancel), 106 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 107 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 108 } 109 110 /// Get a variable or parameter for storing global thread id 111 /// inside OpenMP construct. 112 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 113 114 /// Get the name of the capture helper. 115 StringRef getHelperName() const override { return HelperName; } 116 117 static bool classof(const CGCapturedStmtInfo *Info) { 118 return CGOpenMPRegionInfo::classof(Info) && 119 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 120 ParallelOutlinedRegion; 121 } 122 123 private: 124 /// A variable or parameter storing global thread id for OpenMP 125 /// constructs. 126 const VarDecl *ThreadIDVar; 127 StringRef HelperName; 128 }; 129 130 /// API for captured statement code generation in OpenMP constructs. 131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 132 public: 133 class UntiedTaskActionTy final : public PrePostActionTy { 134 bool Untied; 135 const VarDecl *PartIDVar; 136 const RegionCodeGenTy UntiedCodeGen; 137 llvm::SwitchInst *UntiedSwitch = nullptr; 138 139 public: 140 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 141 const RegionCodeGenTy &UntiedCodeGen) 142 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 143 void Enter(CodeGenFunction &CGF) override { 144 if (Untied) { 145 // Emit task switching point. 146 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 147 CGF.GetAddrOfLocalVar(PartIDVar), 148 PartIDVar->getType()->castAs<PointerType>()); 149 llvm::Value *Res = 150 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 151 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 152 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 153 CGF.EmitBlock(DoneBB); 154 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 155 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 156 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 157 CGF.Builder.GetInsertBlock()); 158 emitUntiedSwitch(CGF); 159 } 160 } 161 void emitUntiedSwitch(CodeGenFunction &CGF) const { 162 if (Untied) { 163 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 164 CGF.GetAddrOfLocalVar(PartIDVar), 165 PartIDVar->getType()->castAs<PointerType>()); 166 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 167 PartIdLVal); 168 UntiedCodeGen(CGF); 169 CodeGenFunction::JumpDest CurPoint = 170 CGF.getJumpDestInCurrentScope(".untied.next."); 171 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 172 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 173 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 174 CGF.Builder.GetInsertBlock()); 175 CGF.EmitBranchThroughCleanup(CurPoint); 176 CGF.EmitBlock(CurPoint.getBlock()); 177 } 178 } 179 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 180 }; 181 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 182 const VarDecl *ThreadIDVar, 183 const RegionCodeGenTy &CodeGen, 184 OpenMPDirectiveKind Kind, bool HasCancel, 185 const UntiedTaskActionTy &Action) 186 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 187 ThreadIDVar(ThreadIDVar), Action(Action) { 188 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 189 } 190 191 /// Get a variable or parameter for storing global thread id 192 /// inside OpenMP construct. 193 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 194 195 /// Get an LValue for the current ThreadID variable. 196 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 197 198 /// Get the name of the capture helper. 199 StringRef getHelperName() const override { return ".omp_outlined."; } 200 201 void emitUntiedSwitch(CodeGenFunction &CGF) override { 202 Action.emitUntiedSwitch(CGF); 203 } 204 205 static bool classof(const CGCapturedStmtInfo *Info) { 206 return CGOpenMPRegionInfo::classof(Info) && 207 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 208 TaskOutlinedRegion; 209 } 210 211 private: 212 /// A variable or parameter storing global thread id for OpenMP 213 /// constructs. 214 const VarDecl *ThreadIDVar; 215 /// Action for emitting code for untied tasks. 216 const UntiedTaskActionTy &Action; 217 }; 218 219 /// API for inlined captured statement code generation in OpenMP 220 /// constructs. 221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 222 public: 223 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 224 const RegionCodeGenTy &CodeGen, 225 OpenMPDirectiveKind Kind, bool HasCancel) 226 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 227 OldCSI(OldCSI), 228 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 229 230 // Retrieve the value of the context parameter. 231 llvm::Value *getContextValue() const override { 232 if (OuterRegionInfo) 233 return OuterRegionInfo->getContextValue(); 234 llvm_unreachable("No context value for inlined OpenMP region"); 235 } 236 237 void setContextValue(llvm::Value *V) override { 238 if (OuterRegionInfo) { 239 OuterRegionInfo->setContextValue(V); 240 return; 241 } 242 llvm_unreachable("No context value for inlined OpenMP region"); 243 } 244 245 /// Lookup the captured field decl for a variable. 246 const FieldDecl *lookup(const VarDecl *VD) const override { 247 if (OuterRegionInfo) 248 return OuterRegionInfo->lookup(VD); 249 // If there is no outer outlined region,no need to lookup in a list of 250 // captured variables, we can use the original one. 251 return nullptr; 252 } 253 254 FieldDecl *getThisFieldDecl() const override { 255 if (OuterRegionInfo) 256 return OuterRegionInfo->getThisFieldDecl(); 257 return nullptr; 258 } 259 260 /// Get a variable or parameter for storing global thread id 261 /// inside OpenMP construct. 262 const VarDecl *getThreadIDVariable() const override { 263 if (OuterRegionInfo) 264 return OuterRegionInfo->getThreadIDVariable(); 265 return nullptr; 266 } 267 268 /// Get an LValue for the current ThreadID variable. 269 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 270 if (OuterRegionInfo) 271 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 272 llvm_unreachable("No LValue for inlined OpenMP construct"); 273 } 274 275 /// Get the name of the capture helper. 276 StringRef getHelperName() const override { 277 if (auto *OuterRegionInfo = getOldCSI()) 278 return OuterRegionInfo->getHelperName(); 279 llvm_unreachable("No helper name for inlined OpenMP construct"); 280 } 281 282 void emitUntiedSwitch(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 OuterRegionInfo->emitUntiedSwitch(CGF); 285 } 286 287 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 288 289 static bool classof(const CGCapturedStmtInfo *Info) { 290 return CGOpenMPRegionInfo::classof(Info) && 291 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 292 } 293 294 ~CGOpenMPInlinedRegionInfo() override = default; 295 296 private: 297 /// CodeGen info about outer OpenMP region. 298 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 299 CGOpenMPRegionInfo *OuterRegionInfo; 300 }; 301 302 /// API for captured statement code generation in OpenMP target 303 /// constructs. For this captures, implicit parameters are used instead of the 304 /// captured fields. The name of the target region has to be unique in a given 305 /// application so it is provided by the client, because only the client has 306 /// the information to generate that. 307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 308 public: 309 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 310 const RegionCodeGenTy &CodeGen, StringRef HelperName) 311 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 312 /*HasCancel=*/false), 313 HelperName(HelperName) {} 314 315 /// This is unused for target regions because each starts executing 316 /// with a single thread. 317 const VarDecl *getThreadIDVariable() const override { return nullptr; } 318 319 /// Get the name of the capture helper. 320 StringRef getHelperName() const override { return HelperName; } 321 322 static bool classof(const CGCapturedStmtInfo *Info) { 323 return CGOpenMPRegionInfo::classof(Info) && 324 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 325 } 326 327 private: 328 StringRef HelperName; 329 }; 330 331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 332 llvm_unreachable("No codegen for expressions"); 333 } 334 /// API for generation of expressions captured in a innermost OpenMP 335 /// region. 336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 337 public: 338 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 339 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 340 OMPD_unknown, 341 /*HasCancel=*/false), 342 PrivScope(CGF) { 343 // Make sure the globals captured in the provided statement are local by 344 // using the privatization logic. We assume the same variable is not 345 // captured more than once. 346 for (const auto &C : CS.captures()) { 347 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 348 continue; 349 350 const VarDecl *VD = C.getCapturedVar(); 351 if (VD->isLocalVarDeclOrParm()) 352 continue; 353 354 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 355 /*RefersToEnclosingVariableOrCapture=*/false, 356 VD->getType().getNonReferenceType(), VK_LValue, 357 C.getLocation()); 358 PrivScope.addPrivate( 359 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); }); 360 } 361 (void)PrivScope.Privatize(); 362 } 363 364 /// Lookup the captured field decl for a variable. 365 const FieldDecl *lookup(const VarDecl *VD) const override { 366 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 367 return FD; 368 return nullptr; 369 } 370 371 /// Emit the captured statement body. 372 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 373 llvm_unreachable("No body for expressions"); 374 } 375 376 /// Get a variable or parameter for storing global thread id 377 /// inside OpenMP construct. 378 const VarDecl *getThreadIDVariable() const override { 379 llvm_unreachable("No thread id for expressions"); 380 } 381 382 /// Get the name of the capture helper. 383 StringRef getHelperName() const override { 384 llvm_unreachable("No helper name for expressions"); 385 } 386 387 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 388 389 private: 390 /// Private scope to capture global variables. 391 CodeGenFunction::OMPPrivateScope PrivScope; 392 }; 393 394 /// RAII for emitting code of OpenMP constructs. 395 class InlinedOpenMPRegionRAII { 396 CodeGenFunction &CGF; 397 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 398 FieldDecl *LambdaThisCaptureField = nullptr; 399 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 400 401 public: 402 /// Constructs region for combined constructs. 403 /// \param CodeGen Code generation sequence for combined directives. Includes 404 /// a list of functions used for code generation of implicitly inlined 405 /// regions. 406 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 407 OpenMPDirectiveKind Kind, bool HasCancel) 408 : CGF(CGF) { 409 // Start emission for the construct. 410 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 411 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 412 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 413 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 414 CGF.LambdaThisCaptureField = nullptr; 415 BlockInfo = CGF.BlockInfo; 416 CGF.BlockInfo = nullptr; 417 } 418 419 ~InlinedOpenMPRegionRAII() { 420 // Restore original CapturedStmtInfo only if we're done with code emission. 421 auto *OldCSI = 422 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 423 delete CGF.CapturedStmtInfo; 424 CGF.CapturedStmtInfo = OldCSI; 425 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 426 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 427 CGF.BlockInfo = BlockInfo; 428 } 429 }; 430 431 /// Values for bit flags used in the ident_t to describe the fields. 432 /// All enumeric elements are named and described in accordance with the code 433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 434 enum OpenMPLocationFlags : unsigned { 435 /// Use trampoline for internal microtask. 436 OMP_IDENT_IMD = 0x01, 437 /// Use c-style ident structure. 438 OMP_IDENT_KMPC = 0x02, 439 /// Atomic reduction option for kmpc_reduce. 440 OMP_ATOMIC_REDUCE = 0x10, 441 /// Explicit 'barrier' directive. 442 OMP_IDENT_BARRIER_EXPL = 0x20, 443 /// Implicit barrier in code. 444 OMP_IDENT_BARRIER_IMPL = 0x40, 445 /// Implicit barrier in 'for' directive. 446 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 447 /// Implicit barrier in 'sections' directive. 448 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 449 /// Implicit barrier in 'single' directive. 450 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 451 /// Call of __kmp_for_static_init for static loop. 452 OMP_IDENT_WORK_LOOP = 0x200, 453 /// Call of __kmp_for_static_init for sections. 454 OMP_IDENT_WORK_SECTIONS = 0x400, 455 /// Call of __kmp_for_static_init for distribute. 456 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 457 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 458 }; 459 460 namespace { 461 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 462 /// Values for bit flags for marking which requires clauses have been used. 463 enum OpenMPOffloadingRequiresDirFlags : int64_t { 464 /// flag undefined. 465 OMP_REQ_UNDEFINED = 0x000, 466 /// no requires clause present. 467 OMP_REQ_NONE = 0x001, 468 /// reverse_offload clause. 469 OMP_REQ_REVERSE_OFFLOAD = 0x002, 470 /// unified_address clause. 471 OMP_REQ_UNIFIED_ADDRESS = 0x004, 472 /// unified_shared_memory clause. 473 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 474 /// dynamic_allocators clause. 475 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 476 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 477 }; 478 479 enum OpenMPOffloadingReservedDeviceIDs { 480 /// Device ID if the device was not defined, runtime should get it 481 /// from environment variables in the spec. 482 OMP_DEVICEID_UNDEF = -1, 483 }; 484 } // anonymous namespace 485 486 /// Describes ident structure that describes a source location. 487 /// All descriptions are taken from 488 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 489 /// Original structure: 490 /// typedef struct ident { 491 /// kmp_int32 reserved_1; /**< might be used in Fortran; 492 /// see above */ 493 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 494 /// KMP_IDENT_KMPC identifies this union 495 /// member */ 496 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 497 /// see above */ 498 ///#if USE_ITT_BUILD 499 /// /* but currently used for storing 500 /// region-specific ITT */ 501 /// /* contextual information. */ 502 ///#endif /* USE_ITT_BUILD */ 503 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 504 /// C++ */ 505 /// char const *psource; /**< String describing the source location. 506 /// The string is composed of semi-colon separated 507 // fields which describe the source file, 508 /// the function and a pair of line numbers that 509 /// delimit the construct. 510 /// */ 511 /// } ident_t; 512 enum IdentFieldIndex { 513 /// might be used in Fortran 514 IdentField_Reserved_1, 515 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 516 IdentField_Flags, 517 /// Not really used in Fortran any more 518 IdentField_Reserved_2, 519 /// Source[4] in Fortran, do not use for C++ 520 IdentField_Reserved_3, 521 /// String describing the source location. The string is composed of 522 /// semi-colon separated fields which describe the source file, the function 523 /// and a pair of line numbers that delimit the construct. 524 IdentField_PSource 525 }; 526 527 /// Schedule types for 'omp for' loops (these enumerators are taken from 528 /// the enum sched_type in kmp.h). 529 enum OpenMPSchedType { 530 /// Lower bound for default (unordered) versions. 531 OMP_sch_lower = 32, 532 OMP_sch_static_chunked = 33, 533 OMP_sch_static = 34, 534 OMP_sch_dynamic_chunked = 35, 535 OMP_sch_guided_chunked = 36, 536 OMP_sch_runtime = 37, 537 OMP_sch_auto = 38, 538 /// static with chunk adjustment (e.g., simd) 539 OMP_sch_static_balanced_chunked = 45, 540 /// Lower bound for 'ordered' versions. 541 OMP_ord_lower = 64, 542 OMP_ord_static_chunked = 65, 543 OMP_ord_static = 66, 544 OMP_ord_dynamic_chunked = 67, 545 OMP_ord_guided_chunked = 68, 546 OMP_ord_runtime = 69, 547 OMP_ord_auto = 70, 548 OMP_sch_default = OMP_sch_static, 549 /// dist_schedule types 550 OMP_dist_sch_static_chunked = 91, 551 OMP_dist_sch_static = 92, 552 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 553 /// Set if the monotonic schedule modifier was present. 554 OMP_sch_modifier_monotonic = (1 << 29), 555 /// Set if the nonmonotonic schedule modifier was present. 556 OMP_sch_modifier_nonmonotonic = (1 << 30), 557 }; 558 559 enum OpenMPRTLFunction { 560 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 561 /// kmpc_micro microtask, ...); 562 OMPRTL__kmpc_fork_call, 563 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, 564 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 565 OMPRTL__kmpc_threadprivate_cached, 566 /// Call to void __kmpc_threadprivate_register( ident_t *, 567 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 568 OMPRTL__kmpc_threadprivate_register, 569 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 570 OMPRTL__kmpc_global_thread_num, 571 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 572 // kmp_critical_name *crit); 573 OMPRTL__kmpc_critical, 574 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 575 // global_tid, kmp_critical_name *crit, uintptr_t hint); 576 OMPRTL__kmpc_critical_with_hint, 577 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 578 // kmp_critical_name *crit); 579 OMPRTL__kmpc_end_critical, 580 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 581 // global_tid); 582 OMPRTL__kmpc_cancel_barrier, 583 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 584 OMPRTL__kmpc_barrier, 585 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 586 OMPRTL__kmpc_for_static_fini, 587 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 588 // global_tid); 589 OMPRTL__kmpc_serialized_parallel, 590 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 591 // global_tid); 592 OMPRTL__kmpc_end_serialized_parallel, 593 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 594 // kmp_int32 num_threads); 595 OMPRTL__kmpc_push_num_threads, 596 // Call to void __kmpc_flush(ident_t *loc); 597 OMPRTL__kmpc_flush, 598 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 599 OMPRTL__kmpc_master, 600 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 601 OMPRTL__kmpc_end_master, 602 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 603 // int end_part); 604 OMPRTL__kmpc_omp_taskyield, 605 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 606 OMPRTL__kmpc_single, 607 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 608 OMPRTL__kmpc_end_single, 609 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 610 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 611 // kmp_routine_entry_t *task_entry); 612 OMPRTL__kmpc_omp_task_alloc, 613 // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *, 614 // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, 615 // size_t sizeof_shareds, kmp_routine_entry_t *task_entry, 616 // kmp_int64 device_id); 617 OMPRTL__kmpc_omp_target_task_alloc, 618 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 619 // new_task); 620 OMPRTL__kmpc_omp_task, 621 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 622 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 623 // kmp_int32 didit); 624 OMPRTL__kmpc_copyprivate, 625 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 626 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 627 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 628 OMPRTL__kmpc_reduce, 629 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 630 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 631 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 632 // *lck); 633 OMPRTL__kmpc_reduce_nowait, 634 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 635 // kmp_critical_name *lck); 636 OMPRTL__kmpc_end_reduce, 637 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 638 // kmp_critical_name *lck); 639 OMPRTL__kmpc_end_reduce_nowait, 640 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 641 // kmp_task_t * new_task); 642 OMPRTL__kmpc_omp_task_begin_if0, 643 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 644 // kmp_task_t * new_task); 645 OMPRTL__kmpc_omp_task_complete_if0, 646 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 647 OMPRTL__kmpc_ordered, 648 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 649 OMPRTL__kmpc_end_ordered, 650 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 651 // global_tid); 652 OMPRTL__kmpc_omp_taskwait, 653 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 654 OMPRTL__kmpc_taskgroup, 655 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 656 OMPRTL__kmpc_end_taskgroup, 657 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 658 // int proc_bind); 659 OMPRTL__kmpc_push_proc_bind, 660 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 661 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 662 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 663 OMPRTL__kmpc_omp_task_with_deps, 664 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 665 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 666 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 667 OMPRTL__kmpc_omp_wait_deps, 668 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 669 // global_tid, kmp_int32 cncl_kind); 670 OMPRTL__kmpc_cancellationpoint, 671 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 672 // kmp_int32 cncl_kind); 673 OMPRTL__kmpc_cancel, 674 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 675 // kmp_int32 num_teams, kmp_int32 thread_limit); 676 OMPRTL__kmpc_push_num_teams, 677 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 678 // microtask, ...); 679 OMPRTL__kmpc_fork_teams, 680 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 681 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 682 // sched, kmp_uint64 grainsize, void *task_dup); 683 OMPRTL__kmpc_taskloop, 684 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 685 // num_dims, struct kmp_dim *dims); 686 OMPRTL__kmpc_doacross_init, 687 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 688 OMPRTL__kmpc_doacross_fini, 689 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 690 // *vec); 691 OMPRTL__kmpc_doacross_post, 692 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 693 // *vec); 694 OMPRTL__kmpc_doacross_wait, 695 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 696 // *data); 697 OMPRTL__kmpc_task_reduction_init, 698 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 699 // *d); 700 OMPRTL__kmpc_task_reduction_get_th_data, 701 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); 702 OMPRTL__kmpc_alloc, 703 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); 704 OMPRTL__kmpc_free, 705 706 // 707 // Offloading related calls 708 // 709 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 710 // size); 711 OMPRTL__kmpc_push_target_tripcount, 712 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 713 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 714 // *arg_types); 715 OMPRTL__tgt_target, 716 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 717 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 718 // *arg_types); 719 OMPRTL__tgt_target_nowait, 720 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 721 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 722 // *arg_types, int32_t num_teams, int32_t thread_limit); 723 OMPRTL__tgt_target_teams, 724 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 725 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 726 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 727 OMPRTL__tgt_target_teams_nowait, 728 // Call to void __tgt_register_requires(int64_t flags); 729 OMPRTL__tgt_register_requires, 730 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 731 OMPRTL__tgt_register_lib, 732 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 733 OMPRTL__tgt_unregister_lib, 734 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 735 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 736 OMPRTL__tgt_target_data_begin, 737 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 738 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 739 // *arg_types); 740 OMPRTL__tgt_target_data_begin_nowait, 741 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 742 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 743 OMPRTL__tgt_target_data_end, 744 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 745 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 746 // *arg_types); 747 OMPRTL__tgt_target_data_end_nowait, 748 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 749 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 750 OMPRTL__tgt_target_data_update, 751 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 752 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 753 // *arg_types); 754 OMPRTL__tgt_target_data_update_nowait, 755 }; 756 757 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 758 /// region. 759 class CleanupTy final : public EHScopeStack::Cleanup { 760 PrePostActionTy *Action; 761 762 public: 763 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 764 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 765 if (!CGF.HaveInsertPoint()) 766 return; 767 Action->Exit(CGF); 768 } 769 }; 770 771 } // anonymous namespace 772 773 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 774 CodeGenFunction::RunCleanupsScope Scope(CGF); 775 if (PrePostAction) { 776 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 777 Callback(CodeGen, CGF, *PrePostAction); 778 } else { 779 PrePostActionTy Action; 780 Callback(CodeGen, CGF, Action); 781 } 782 } 783 784 /// Check if the combiner is a call to UDR combiner and if it is so return the 785 /// UDR decl used for reduction. 786 static const OMPDeclareReductionDecl * 787 getReductionInit(const Expr *ReductionOp) { 788 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 789 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 790 if (const auto *DRE = 791 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 792 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 793 return DRD; 794 return nullptr; 795 } 796 797 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 798 const OMPDeclareReductionDecl *DRD, 799 const Expr *InitOp, 800 Address Private, Address Original, 801 QualType Ty) { 802 if (DRD->getInitializer()) { 803 std::pair<llvm::Function *, llvm::Function *> Reduction = 804 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 805 const auto *CE = cast<CallExpr>(InitOp); 806 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 807 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 808 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 809 const auto *LHSDRE = 810 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 811 const auto *RHSDRE = 812 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 813 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 814 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 815 [=]() { return Private; }); 816 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 817 [=]() { return Original; }); 818 (void)PrivateScope.Privatize(); 819 RValue Func = RValue::get(Reduction.second); 820 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 821 CGF.EmitIgnoredExpr(InitOp); 822 } else { 823 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 824 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 825 auto *GV = new llvm::GlobalVariable( 826 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 827 llvm::GlobalValue::PrivateLinkage, Init, Name); 828 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 829 RValue InitRVal; 830 switch (CGF.getEvaluationKind(Ty)) { 831 case TEK_Scalar: 832 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 833 break; 834 case TEK_Complex: 835 InitRVal = 836 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 837 break; 838 case TEK_Aggregate: 839 InitRVal = RValue::getAggregate(LV.getAddress()); 840 break; 841 } 842 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 843 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 844 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 845 /*IsInitializer=*/false); 846 } 847 } 848 849 /// Emit initialization of arrays of complex types. 850 /// \param DestAddr Address of the array. 851 /// \param Type Type of array. 852 /// \param Init Initial expression of array. 853 /// \param SrcAddr Address of the original array. 854 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 855 QualType Type, bool EmitDeclareReductionInit, 856 const Expr *Init, 857 const OMPDeclareReductionDecl *DRD, 858 Address SrcAddr = Address::invalid()) { 859 // Perform element-by-element initialization. 860 QualType ElementTy; 861 862 // Drill down to the base element type on both arrays. 863 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 864 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 865 DestAddr = 866 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 867 if (DRD) 868 SrcAddr = 869 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 870 871 llvm::Value *SrcBegin = nullptr; 872 if (DRD) 873 SrcBegin = SrcAddr.getPointer(); 874 llvm::Value *DestBegin = DestAddr.getPointer(); 875 // Cast from pointer to array type to pointer to single element. 876 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 877 // The basic structure here is a while-do loop. 878 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 879 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 880 llvm::Value *IsEmpty = 881 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 882 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 883 884 // Enter the loop body, making that address the current address. 885 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 886 CGF.EmitBlock(BodyBB); 887 888 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 889 890 llvm::PHINode *SrcElementPHI = nullptr; 891 Address SrcElementCurrent = Address::invalid(); 892 if (DRD) { 893 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 894 "omp.arraycpy.srcElementPast"); 895 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 896 SrcElementCurrent = 897 Address(SrcElementPHI, 898 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 899 } 900 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 901 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 902 DestElementPHI->addIncoming(DestBegin, EntryBB); 903 Address DestElementCurrent = 904 Address(DestElementPHI, 905 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 906 907 // Emit copy. 908 { 909 CodeGenFunction::RunCleanupsScope InitScope(CGF); 910 if (EmitDeclareReductionInit) { 911 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 912 SrcElementCurrent, ElementTy); 913 } else 914 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 915 /*IsInitializer=*/false); 916 } 917 918 if (DRD) { 919 // Shift the address forward by one element. 920 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 921 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 922 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 923 } 924 925 // Shift the address forward by one element. 926 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 927 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 928 // Check whether we've reached the end. 929 llvm::Value *Done = 930 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 931 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 932 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 933 934 // Done. 935 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 936 } 937 938 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 939 return CGF.EmitOMPSharedLValue(E); 940 } 941 942 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 943 const Expr *E) { 944 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 945 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 946 return LValue(); 947 } 948 949 void ReductionCodeGen::emitAggregateInitialization( 950 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 951 const OMPDeclareReductionDecl *DRD) { 952 // Emit VarDecl with copy init for arrays. 953 // Get the address of the original variable captured in current 954 // captured region. 955 const auto *PrivateVD = 956 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 957 bool EmitDeclareReductionInit = 958 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 959 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 960 EmitDeclareReductionInit, 961 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 962 : PrivateVD->getInit(), 963 DRD, SharedLVal.getAddress()); 964 } 965 966 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 967 ArrayRef<const Expr *> Privates, 968 ArrayRef<const Expr *> ReductionOps) { 969 ClausesData.reserve(Shareds.size()); 970 SharedAddresses.reserve(Shareds.size()); 971 Sizes.reserve(Shareds.size()); 972 BaseDecls.reserve(Shareds.size()); 973 auto IPriv = Privates.begin(); 974 auto IRed = ReductionOps.begin(); 975 for (const Expr *Ref : Shareds) { 976 ClausesData.emplace_back(Ref, *IPriv, *IRed); 977 std::advance(IPriv, 1); 978 std::advance(IRed, 1); 979 } 980 } 981 982 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 983 assert(SharedAddresses.size() == N && 984 "Number of generated lvalues must be exactly N."); 985 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 986 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 987 SharedAddresses.emplace_back(First, Second); 988 } 989 990 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 991 const auto *PrivateVD = 992 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 993 QualType PrivateType = PrivateVD->getType(); 994 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 995 if (!PrivateType->isVariablyModifiedType()) { 996 Sizes.emplace_back( 997 CGF.getTypeSize( 998 SharedAddresses[N].first.getType().getNonReferenceType()), 999 nullptr); 1000 return; 1001 } 1002 llvm::Value *Size; 1003 llvm::Value *SizeInChars; 1004 auto *ElemType = 1005 cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) 1006 ->getElementType(); 1007 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 1008 if (AsArraySection) { 1009 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), 1010 SharedAddresses[N].first.getPointer()); 1011 Size = CGF.Builder.CreateNUWAdd( 1012 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 1013 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 1014 } else { 1015 SizeInChars = CGF.getTypeSize( 1016 SharedAddresses[N].first.getType().getNonReferenceType()); 1017 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 1018 } 1019 Sizes.emplace_back(SizeInChars, Size); 1020 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1021 CGF, 1022 cast<OpaqueValueExpr>( 1023 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1024 RValue::get(Size)); 1025 CGF.EmitVariablyModifiedType(PrivateType); 1026 } 1027 1028 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 1029 llvm::Value *Size) { 1030 const auto *PrivateVD = 1031 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1032 QualType PrivateType = PrivateVD->getType(); 1033 if (!PrivateType->isVariablyModifiedType()) { 1034 assert(!Size && !Sizes[N].second && 1035 "Size should be nullptr for non-variably modified reduction " 1036 "items."); 1037 return; 1038 } 1039 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1040 CGF, 1041 cast<OpaqueValueExpr>( 1042 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1043 RValue::get(Size)); 1044 CGF.EmitVariablyModifiedType(PrivateType); 1045 } 1046 1047 void ReductionCodeGen::emitInitialization( 1048 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1049 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1050 assert(SharedAddresses.size() > N && "No variable was generated"); 1051 const auto *PrivateVD = 1052 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1053 const OMPDeclareReductionDecl *DRD = 1054 getReductionInit(ClausesData[N].ReductionOp); 1055 QualType PrivateType = PrivateVD->getType(); 1056 PrivateAddr = CGF.Builder.CreateElementBitCast( 1057 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1058 QualType SharedType = SharedAddresses[N].first.getType(); 1059 SharedLVal = CGF.MakeAddrLValue( 1060 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), 1061 CGF.ConvertTypeForMem(SharedType)), 1062 SharedType, SharedAddresses[N].first.getBaseInfo(), 1063 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1064 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1065 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1066 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1067 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1068 PrivateAddr, SharedLVal.getAddress(), 1069 SharedLVal.getType()); 1070 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1071 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1072 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1073 PrivateVD->getType().getQualifiers(), 1074 /*IsInitializer=*/false); 1075 } 1076 } 1077 1078 bool ReductionCodeGen::needCleanups(unsigned N) { 1079 const auto *PrivateVD = 1080 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1081 QualType PrivateType = PrivateVD->getType(); 1082 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1083 return DTorKind != QualType::DK_none; 1084 } 1085 1086 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1087 Address PrivateAddr) { 1088 const auto *PrivateVD = 1089 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1090 QualType PrivateType = PrivateVD->getType(); 1091 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1092 if (needCleanups(N)) { 1093 PrivateAddr = CGF.Builder.CreateElementBitCast( 1094 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1095 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1096 } 1097 } 1098 1099 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1100 LValue BaseLV) { 1101 BaseTy = BaseTy.getNonReferenceType(); 1102 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1103 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1104 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1105 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 1106 } else { 1107 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); 1108 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1109 } 1110 BaseTy = BaseTy->getPointeeType(); 1111 } 1112 return CGF.MakeAddrLValue( 1113 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), 1114 CGF.ConvertTypeForMem(ElTy)), 1115 BaseLV.getType(), BaseLV.getBaseInfo(), 1116 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1117 } 1118 1119 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1120 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1121 llvm::Value *Addr) { 1122 Address Tmp = Address::invalid(); 1123 Address TopTmp = Address::invalid(); 1124 Address MostTopTmp = Address::invalid(); 1125 BaseTy = BaseTy.getNonReferenceType(); 1126 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1127 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1128 Tmp = CGF.CreateMemTemp(BaseTy); 1129 if (TopTmp.isValid()) 1130 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1131 else 1132 MostTopTmp = Tmp; 1133 TopTmp = Tmp; 1134 BaseTy = BaseTy->getPointeeType(); 1135 } 1136 llvm::Type *Ty = BaseLVType; 1137 if (Tmp.isValid()) 1138 Ty = Tmp.getElementType(); 1139 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1140 if (Tmp.isValid()) { 1141 CGF.Builder.CreateStore(Addr, Tmp); 1142 return MostTopTmp; 1143 } 1144 return Address(Addr, BaseLVAlignment); 1145 } 1146 1147 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1148 const VarDecl *OrigVD = nullptr; 1149 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1150 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1151 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1152 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1153 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1154 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1155 DE = cast<DeclRefExpr>(Base); 1156 OrigVD = cast<VarDecl>(DE->getDecl()); 1157 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1158 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1159 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1160 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1161 DE = cast<DeclRefExpr>(Base); 1162 OrigVD = cast<VarDecl>(DE->getDecl()); 1163 } 1164 return OrigVD; 1165 } 1166 1167 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1168 Address PrivateAddr) { 1169 const DeclRefExpr *DE; 1170 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1171 BaseDecls.emplace_back(OrigVD); 1172 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1173 LValue BaseLValue = 1174 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1175 OriginalBaseLValue); 1176 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1177 BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); 1178 llvm::Value *PrivatePointer = 1179 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1180 PrivateAddr.getPointer(), 1181 SharedAddresses[N].first.getAddress().getType()); 1182 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1183 return castToBase(CGF, OrigVD->getType(), 1184 SharedAddresses[N].first.getType(), 1185 OriginalBaseLValue.getAddress().getType(), 1186 OriginalBaseLValue.getAlignment(), Ptr); 1187 } 1188 BaseDecls.emplace_back( 1189 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1190 return PrivateAddr; 1191 } 1192 1193 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1194 const OMPDeclareReductionDecl *DRD = 1195 getReductionInit(ClausesData[N].ReductionOp); 1196 return DRD && DRD->getInitializer(); 1197 } 1198 1199 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1200 return CGF.EmitLoadOfPointerLValue( 1201 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1202 getThreadIDVariable()->getType()->castAs<PointerType>()); 1203 } 1204 1205 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1206 if (!CGF.HaveInsertPoint()) 1207 return; 1208 // 1.2.2 OpenMP Language Terminology 1209 // Structured block - An executable statement with a single entry at the 1210 // top and a single exit at the bottom. 1211 // The point of exit cannot be a branch out of the structured block. 1212 // longjmp() and throw() must not violate the entry/exit criteria. 1213 CGF.EHStack.pushTerminate(); 1214 CodeGen(CGF); 1215 CGF.EHStack.popTerminate(); 1216 } 1217 1218 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1219 CodeGenFunction &CGF) { 1220 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1221 getThreadIDVariable()->getType(), 1222 AlignmentSource::Decl); 1223 } 1224 1225 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1226 QualType FieldTy) { 1227 auto *Field = FieldDecl::Create( 1228 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1229 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1230 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1231 Field->setAccess(AS_public); 1232 DC->addDecl(Field); 1233 return Field; 1234 } 1235 1236 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1237 StringRef Separator) 1238 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1239 OffloadEntriesInfoManager(CGM) { 1240 ASTContext &C = CGM.getContext(); 1241 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1242 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1243 RD->startDefinition(); 1244 // reserved_1 1245 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1246 // flags 1247 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1248 // reserved_2 1249 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1250 // reserved_3 1251 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1252 // psource 1253 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1254 RD->completeDefinition(); 1255 IdentQTy = C.getRecordType(RD); 1256 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1257 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1258 1259 loadOffloadInfoMetadata(); 1260 } 1261 1262 void CGOpenMPRuntime::clear() { 1263 InternalVars.clear(); 1264 // Clean non-target variable declarations possibly used only in debug info. 1265 for (const auto &Data : EmittedNonTargetVariables) { 1266 if (!Data.getValue().pointsToAliveValue()) 1267 continue; 1268 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1269 if (!GV) 1270 continue; 1271 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1272 continue; 1273 GV->eraseFromParent(); 1274 } 1275 } 1276 1277 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1278 SmallString<128> Buffer; 1279 llvm::raw_svector_ostream OS(Buffer); 1280 StringRef Sep = FirstSeparator; 1281 for (StringRef Part : Parts) { 1282 OS << Sep << Part; 1283 Sep = Separator; 1284 } 1285 return OS.str(); 1286 } 1287 1288 static llvm::Function * 1289 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1290 const Expr *CombinerInitializer, const VarDecl *In, 1291 const VarDecl *Out, bool IsCombiner) { 1292 // void .omp_combiner.(Ty *in, Ty *out); 1293 ASTContext &C = CGM.getContext(); 1294 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1295 FunctionArgList Args; 1296 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1297 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1298 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1299 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1300 Args.push_back(&OmpOutParm); 1301 Args.push_back(&OmpInParm); 1302 const CGFunctionInfo &FnInfo = 1303 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1304 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1305 std::string Name = CGM.getOpenMPRuntime().getName( 1306 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1307 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1308 Name, &CGM.getModule()); 1309 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1310 if (CGM.getLangOpts().Optimize) { 1311 Fn->removeFnAttr(llvm::Attribute::NoInline); 1312 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1313 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1314 } 1315 CodeGenFunction CGF(CGM); 1316 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1317 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1318 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1319 Out->getLocation()); 1320 CodeGenFunction::OMPPrivateScope Scope(CGF); 1321 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1322 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1323 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1324 .getAddress(); 1325 }); 1326 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1327 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1328 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1329 .getAddress(); 1330 }); 1331 (void)Scope.Privatize(); 1332 if (!IsCombiner && Out->hasInit() && 1333 !CGF.isTrivialInitializer(Out->getInit())) { 1334 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1335 Out->getType().getQualifiers(), 1336 /*IsInitializer=*/true); 1337 } 1338 if (CombinerInitializer) 1339 CGF.EmitIgnoredExpr(CombinerInitializer); 1340 Scope.ForceCleanup(); 1341 CGF.FinishFunction(); 1342 return Fn; 1343 } 1344 1345 void CGOpenMPRuntime::emitUserDefinedReduction( 1346 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1347 if (UDRMap.count(D) > 0) 1348 return; 1349 llvm::Function *Combiner = emitCombinerOrInitializer( 1350 CGM, D->getType(), D->getCombiner(), 1351 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1352 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1353 /*IsCombiner=*/true); 1354 llvm::Function *Initializer = nullptr; 1355 if (const Expr *Init = D->getInitializer()) { 1356 Initializer = emitCombinerOrInitializer( 1357 CGM, D->getType(), 1358 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1359 : nullptr, 1360 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1361 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1362 /*IsCombiner=*/false); 1363 } 1364 UDRMap.try_emplace(D, Combiner, Initializer); 1365 if (CGF) { 1366 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1367 Decls.second.push_back(D); 1368 } 1369 } 1370 1371 std::pair<llvm::Function *, llvm::Function *> 1372 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1373 auto I = UDRMap.find(D); 1374 if (I != UDRMap.end()) 1375 return I->second; 1376 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1377 return UDRMap.lookup(D); 1378 } 1379 1380 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1381 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1382 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1383 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1384 assert(ThreadIDVar->getType()->isPointerType() && 1385 "thread id variable must be of type kmp_int32 *"); 1386 CodeGenFunction CGF(CGM, true); 1387 bool HasCancel = false; 1388 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1389 HasCancel = OPD->hasCancel(); 1390 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1391 HasCancel = OPSD->hasCancel(); 1392 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1393 HasCancel = OPFD->hasCancel(); 1394 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1395 HasCancel = OPFD->hasCancel(); 1396 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1397 HasCancel = OPFD->hasCancel(); 1398 else if (const auto *OPFD = 1399 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1400 HasCancel = OPFD->hasCancel(); 1401 else if (const auto *OPFD = 1402 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1403 HasCancel = OPFD->hasCancel(); 1404 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1405 HasCancel, OutlinedHelperName); 1406 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1407 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 1408 } 1409 1410 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1411 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1412 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1413 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1414 return emitParallelOrTeamsOutlinedFunction( 1415 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1416 } 1417 1418 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1419 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1420 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1421 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1422 return emitParallelOrTeamsOutlinedFunction( 1423 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1424 } 1425 1426 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1427 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1428 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1429 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1430 bool Tied, unsigned &NumberOfParts) { 1431 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1432 PrePostActionTy &) { 1433 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1434 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1435 llvm::Value *TaskArgs[] = { 1436 UpLoc, ThreadID, 1437 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1438 TaskTVar->getType()->castAs<PointerType>()) 1439 .getPointer()}; 1440 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1441 }; 1442 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1443 UntiedCodeGen); 1444 CodeGen.setAction(Action); 1445 assert(!ThreadIDVar->getType()->isPointerType() && 1446 "thread id variable must be of type kmp_int32 for tasks"); 1447 const OpenMPDirectiveKind Region = 1448 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1449 : OMPD_task; 1450 const CapturedStmt *CS = D.getCapturedStmt(Region); 1451 const auto *TD = dyn_cast<OMPTaskDirective>(&D); 1452 CodeGenFunction CGF(CGM, true); 1453 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1454 InnermostKind, 1455 TD ? TD->hasCancel() : false, Action); 1456 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1457 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1458 if (!Tied) 1459 NumberOfParts = Action.getNumberOfParts(); 1460 return Res; 1461 } 1462 1463 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1464 const RecordDecl *RD, const CGRecordLayout &RL, 1465 ArrayRef<llvm::Constant *> Data) { 1466 llvm::StructType *StructTy = RL.getLLVMType(); 1467 unsigned PrevIdx = 0; 1468 ConstantInitBuilder CIBuilder(CGM); 1469 auto DI = Data.begin(); 1470 for (const FieldDecl *FD : RD->fields()) { 1471 unsigned Idx = RL.getLLVMFieldNo(FD); 1472 // Fill the alignment. 1473 for (unsigned I = PrevIdx; I < Idx; ++I) 1474 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1475 PrevIdx = Idx + 1; 1476 Fields.add(*DI); 1477 ++DI; 1478 } 1479 } 1480 1481 template <class... As> 1482 static llvm::GlobalVariable * 1483 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1484 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1485 As &&... Args) { 1486 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1487 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1488 ConstantInitBuilder CIBuilder(CGM); 1489 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1490 buildStructValue(Fields, CGM, RD, RL, Data); 1491 return Fields.finishAndCreateGlobal( 1492 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1493 std::forward<As>(Args)...); 1494 } 1495 1496 template <typename T> 1497 static void 1498 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1499 ArrayRef<llvm::Constant *> Data, 1500 T &Parent) { 1501 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1502 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1503 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1504 buildStructValue(Fields, CGM, RD, RL, Data); 1505 Fields.finishAndAddTo(Parent); 1506 } 1507 1508 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1509 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1510 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1511 FlagsTy FlagsKey(Flags, Reserved2Flags); 1512 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1513 if (!Entry) { 1514 if (!DefaultOpenMPPSource) { 1515 // Initialize default location for psource field of ident_t structure of 1516 // all ident_t objects. Format is ";file;function;line;column;;". 1517 // Taken from 1518 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1519 DefaultOpenMPPSource = 1520 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1521 DefaultOpenMPPSource = 1522 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1523 } 1524 1525 llvm::Constant *Data[] = { 1526 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1527 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1528 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1529 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1530 llvm::GlobalValue *DefaultOpenMPLocation = 1531 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1532 llvm::GlobalValue::PrivateLinkage); 1533 DefaultOpenMPLocation->setUnnamedAddr( 1534 llvm::GlobalValue::UnnamedAddr::Global); 1535 1536 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1537 } 1538 return Address(Entry, Align); 1539 } 1540 1541 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1542 bool AtCurrentPoint) { 1543 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1544 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1545 1546 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1547 if (AtCurrentPoint) { 1548 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1549 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1550 } else { 1551 Elem.second.ServiceInsertPt = 1552 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1553 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1554 } 1555 } 1556 1557 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1558 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1559 if (Elem.second.ServiceInsertPt) { 1560 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1561 Elem.second.ServiceInsertPt = nullptr; 1562 Ptr->eraseFromParent(); 1563 } 1564 } 1565 1566 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1567 SourceLocation Loc, 1568 unsigned Flags) { 1569 Flags |= OMP_IDENT_KMPC; 1570 // If no debug info is generated - return global default location. 1571 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1572 Loc.isInvalid()) 1573 return getOrCreateDefaultLocation(Flags).getPointer(); 1574 1575 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1576 1577 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1578 Address LocValue = Address::invalid(); 1579 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1580 if (I != OpenMPLocThreadIDMap.end()) 1581 LocValue = Address(I->second.DebugLoc, Align); 1582 1583 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1584 // GetOpenMPThreadID was called before this routine. 1585 if (!LocValue.isValid()) { 1586 // Generate "ident_t .kmpc_loc.addr;" 1587 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1588 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1589 Elem.second.DebugLoc = AI.getPointer(); 1590 LocValue = AI; 1591 1592 if (!Elem.second.ServiceInsertPt) 1593 setLocThreadIdInsertPt(CGF); 1594 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1595 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1596 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1597 CGF.getTypeSize(IdentQTy)); 1598 } 1599 1600 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1601 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1602 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1603 LValue PSource = 1604 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1605 1606 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1607 if (OMPDebugLoc == nullptr) { 1608 SmallString<128> Buffer2; 1609 llvm::raw_svector_ostream OS2(Buffer2); 1610 // Build debug location 1611 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1612 OS2 << ";" << PLoc.getFilename() << ";"; 1613 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1614 OS2 << FD->getQualifiedNameAsString(); 1615 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1616 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1617 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1618 } 1619 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1620 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1621 1622 // Our callers always pass this to a runtime function, so for 1623 // convenience, go ahead and return a naked pointer. 1624 return LocValue.getPointer(); 1625 } 1626 1627 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1628 SourceLocation Loc) { 1629 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1630 1631 llvm::Value *ThreadID = nullptr; 1632 // Check whether we've already cached a load of the thread id in this 1633 // function. 1634 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1635 if (I != OpenMPLocThreadIDMap.end()) { 1636 ThreadID = I->second.ThreadID; 1637 if (ThreadID != nullptr) 1638 return ThreadID; 1639 } 1640 // If exceptions are enabled, do not use parameter to avoid possible crash. 1641 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1642 !CGF.getLangOpts().CXXExceptions || 1643 CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1644 if (auto *OMPRegionInfo = 1645 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1646 if (OMPRegionInfo->getThreadIDVariable()) { 1647 // Check if this an outlined function with thread id passed as argument. 1648 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1649 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1650 // If value loaded in entry block, cache it and use it everywhere in 1651 // function. 1652 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1653 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1654 Elem.second.ThreadID = ThreadID; 1655 } 1656 return ThreadID; 1657 } 1658 } 1659 } 1660 1661 // This is not an outlined function region - need to call __kmpc_int32 1662 // kmpc_global_thread_num(ident_t *loc). 1663 // Generate thread id value and cache this value for use across the 1664 // function. 1665 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1666 if (!Elem.second.ServiceInsertPt) 1667 setLocThreadIdInsertPt(CGF); 1668 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1669 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1670 llvm::CallInst *Call = CGF.Builder.CreateCall( 1671 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1672 emitUpdateLocation(CGF, Loc)); 1673 Call->setCallingConv(CGF.getRuntimeCC()); 1674 Elem.second.ThreadID = Call; 1675 return Call; 1676 } 1677 1678 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1679 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1680 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1681 clearLocThreadIdInsertPt(CGF); 1682 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1683 } 1684 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1685 for(auto *D : FunctionUDRMap[CGF.CurFn]) 1686 UDRMap.erase(D); 1687 FunctionUDRMap.erase(CGF.CurFn); 1688 } 1689 } 1690 1691 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1692 return IdentTy->getPointerTo(); 1693 } 1694 1695 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1696 if (!Kmpc_MicroTy) { 1697 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1698 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1699 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1700 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1701 } 1702 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1703 } 1704 1705 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1706 llvm::FunctionCallee RTLFn = nullptr; 1707 switch (static_cast<OpenMPRTLFunction>(Function)) { 1708 case OMPRTL__kmpc_fork_call: { 1709 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1710 // microtask, ...); 1711 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1712 getKmpc_MicroPointerTy()}; 1713 auto *FnTy = 1714 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1715 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1716 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 1717 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 1718 llvm::LLVMContext &Ctx = F->getContext(); 1719 llvm::MDBuilder MDB(Ctx); 1720 // Annotate the callback behavior of the __kmpc_fork_call: 1721 // - The callback callee is argument number 2 (microtask). 1722 // - The first two arguments of the callback callee are unknown (-1). 1723 // - All variadic arguments to the __kmpc_fork_call are passed to the 1724 // callback callee. 1725 F->addMetadata( 1726 llvm::LLVMContext::MD_callback, 1727 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 1728 2, {-1, -1}, 1729 /* VarArgsArePassed */ true)})); 1730 } 1731 } 1732 break; 1733 } 1734 case OMPRTL__kmpc_global_thread_num: { 1735 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1736 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1737 auto *FnTy = 1738 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1739 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1740 break; 1741 } 1742 case OMPRTL__kmpc_threadprivate_cached: { 1743 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1744 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1745 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1746 CGM.VoidPtrTy, CGM.SizeTy, 1747 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1748 auto *FnTy = 1749 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1750 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1751 break; 1752 } 1753 case OMPRTL__kmpc_critical: { 1754 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1755 // kmp_critical_name *crit); 1756 llvm::Type *TypeParams[] = { 1757 getIdentTyPointerTy(), CGM.Int32Ty, 1758 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1759 auto *FnTy = 1760 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1761 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1762 break; 1763 } 1764 case OMPRTL__kmpc_critical_with_hint: { 1765 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1766 // kmp_critical_name *crit, uintptr_t hint); 1767 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1768 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1769 CGM.IntPtrTy}; 1770 auto *FnTy = 1771 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1772 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1773 break; 1774 } 1775 case OMPRTL__kmpc_threadprivate_register: { 1776 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1777 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1778 // typedef void *(*kmpc_ctor)(void *); 1779 auto *KmpcCtorTy = 1780 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1781 /*isVarArg*/ false)->getPointerTo(); 1782 // typedef void *(*kmpc_cctor)(void *, void *); 1783 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1784 auto *KmpcCopyCtorTy = 1785 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1786 /*isVarArg*/ false) 1787 ->getPointerTo(); 1788 // typedef void (*kmpc_dtor)(void *); 1789 auto *KmpcDtorTy = 1790 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1791 ->getPointerTo(); 1792 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1793 KmpcCopyCtorTy, KmpcDtorTy}; 1794 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1795 /*isVarArg*/ false); 1796 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1797 break; 1798 } 1799 case OMPRTL__kmpc_end_critical: { 1800 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1801 // kmp_critical_name *crit); 1802 llvm::Type *TypeParams[] = { 1803 getIdentTyPointerTy(), CGM.Int32Ty, 1804 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1805 auto *FnTy = 1806 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1807 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1808 break; 1809 } 1810 case OMPRTL__kmpc_cancel_barrier: { 1811 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1812 // global_tid); 1813 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1814 auto *FnTy = 1815 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1816 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1817 break; 1818 } 1819 case OMPRTL__kmpc_barrier: { 1820 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1821 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1822 auto *FnTy = 1823 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1824 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1825 break; 1826 } 1827 case OMPRTL__kmpc_for_static_fini: { 1828 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1829 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1830 auto *FnTy = 1831 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1832 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1833 break; 1834 } 1835 case OMPRTL__kmpc_push_num_threads: { 1836 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1837 // kmp_int32 num_threads) 1838 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1839 CGM.Int32Ty}; 1840 auto *FnTy = 1841 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1842 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1843 break; 1844 } 1845 case OMPRTL__kmpc_serialized_parallel: { 1846 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1847 // global_tid); 1848 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1849 auto *FnTy = 1850 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1851 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1852 break; 1853 } 1854 case OMPRTL__kmpc_end_serialized_parallel: { 1855 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1856 // global_tid); 1857 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1858 auto *FnTy = 1859 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1860 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1861 break; 1862 } 1863 case OMPRTL__kmpc_flush: { 1864 // Build void __kmpc_flush(ident_t *loc); 1865 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1866 auto *FnTy = 1867 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1868 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1869 break; 1870 } 1871 case OMPRTL__kmpc_master: { 1872 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1873 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1874 auto *FnTy = 1875 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1876 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1877 break; 1878 } 1879 case OMPRTL__kmpc_end_master: { 1880 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1881 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1882 auto *FnTy = 1883 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1884 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1885 break; 1886 } 1887 case OMPRTL__kmpc_omp_taskyield: { 1888 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1889 // int end_part); 1890 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1891 auto *FnTy = 1892 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1893 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1894 break; 1895 } 1896 case OMPRTL__kmpc_single: { 1897 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1898 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1899 auto *FnTy = 1900 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1901 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1902 break; 1903 } 1904 case OMPRTL__kmpc_end_single: { 1905 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1906 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1907 auto *FnTy = 1908 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1909 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1910 break; 1911 } 1912 case OMPRTL__kmpc_omp_task_alloc: { 1913 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1914 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1915 // kmp_routine_entry_t *task_entry); 1916 assert(KmpRoutineEntryPtrTy != nullptr && 1917 "Type kmp_routine_entry_t must be created."); 1918 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1919 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1920 // Return void * and then cast to particular kmp_task_t type. 1921 auto *FnTy = 1922 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1923 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1924 break; 1925 } 1926 case OMPRTL__kmpc_omp_target_task_alloc: { 1927 // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid, 1928 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1929 // kmp_routine_entry_t *task_entry, kmp_int64 device_id); 1930 assert(KmpRoutineEntryPtrTy != nullptr && 1931 "Type kmp_routine_entry_t must be created."); 1932 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1933 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy, 1934 CGM.Int64Ty}; 1935 // Return void * and then cast to particular kmp_task_t type. 1936 auto *FnTy = 1937 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1938 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc"); 1939 break; 1940 } 1941 case OMPRTL__kmpc_omp_task: { 1942 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1943 // *new_task); 1944 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1945 CGM.VoidPtrTy}; 1946 auto *FnTy = 1947 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1948 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 1949 break; 1950 } 1951 case OMPRTL__kmpc_copyprivate: { 1952 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 1953 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 1954 // kmp_int32 didit); 1955 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1956 auto *CpyFnTy = 1957 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 1958 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 1959 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 1960 CGM.Int32Ty}; 1961 auto *FnTy = 1962 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1963 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 1964 break; 1965 } 1966 case OMPRTL__kmpc_reduce: { 1967 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 1968 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 1969 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 1970 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1971 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1972 /*isVarArg=*/false); 1973 llvm::Type *TypeParams[] = { 1974 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1975 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1976 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1977 auto *FnTy = 1978 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1979 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 1980 break; 1981 } 1982 case OMPRTL__kmpc_reduce_nowait: { 1983 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 1984 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 1985 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 1986 // *lck); 1987 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1988 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1989 /*isVarArg=*/false); 1990 llvm::Type *TypeParams[] = { 1991 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1992 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1993 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1994 auto *FnTy = 1995 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1996 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 1997 break; 1998 } 1999 case OMPRTL__kmpc_end_reduce: { 2000 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 2001 // kmp_critical_name *lck); 2002 llvm::Type *TypeParams[] = { 2003 getIdentTyPointerTy(), CGM.Int32Ty, 2004 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2005 auto *FnTy = 2006 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2007 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 2008 break; 2009 } 2010 case OMPRTL__kmpc_end_reduce_nowait: { 2011 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 2012 // kmp_critical_name *lck); 2013 llvm::Type *TypeParams[] = { 2014 getIdentTyPointerTy(), CGM.Int32Ty, 2015 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2016 auto *FnTy = 2017 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2018 RTLFn = 2019 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 2020 break; 2021 } 2022 case OMPRTL__kmpc_omp_task_begin_if0: { 2023 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2024 // *new_task); 2025 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2026 CGM.VoidPtrTy}; 2027 auto *FnTy = 2028 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2029 RTLFn = 2030 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 2031 break; 2032 } 2033 case OMPRTL__kmpc_omp_task_complete_if0: { 2034 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2035 // *new_task); 2036 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2037 CGM.VoidPtrTy}; 2038 auto *FnTy = 2039 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2040 RTLFn = CGM.CreateRuntimeFunction(FnTy, 2041 /*Name=*/"__kmpc_omp_task_complete_if0"); 2042 break; 2043 } 2044 case OMPRTL__kmpc_ordered: { 2045 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 2046 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2047 auto *FnTy = 2048 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2049 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 2050 break; 2051 } 2052 case OMPRTL__kmpc_end_ordered: { 2053 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 2054 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2055 auto *FnTy = 2056 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2057 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 2058 break; 2059 } 2060 case OMPRTL__kmpc_omp_taskwait: { 2061 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 2062 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2063 auto *FnTy = 2064 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2065 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 2066 break; 2067 } 2068 case OMPRTL__kmpc_taskgroup: { 2069 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 2070 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2071 auto *FnTy = 2072 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2073 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 2074 break; 2075 } 2076 case OMPRTL__kmpc_end_taskgroup: { 2077 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 2078 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2079 auto *FnTy = 2080 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2081 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 2082 break; 2083 } 2084 case OMPRTL__kmpc_push_proc_bind: { 2085 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 2086 // int proc_bind) 2087 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2088 auto *FnTy = 2089 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2090 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 2091 break; 2092 } 2093 case OMPRTL__kmpc_omp_task_with_deps: { 2094 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2095 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2096 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 2097 llvm::Type *TypeParams[] = { 2098 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 2099 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 2100 auto *FnTy = 2101 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2102 RTLFn = 2103 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 2104 break; 2105 } 2106 case OMPRTL__kmpc_omp_wait_deps: { 2107 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2108 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2109 // kmp_depend_info_t *noalias_dep_list); 2110 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2111 CGM.Int32Ty, CGM.VoidPtrTy, 2112 CGM.Int32Ty, CGM.VoidPtrTy}; 2113 auto *FnTy = 2114 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2115 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2116 break; 2117 } 2118 case OMPRTL__kmpc_cancellationpoint: { 2119 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2120 // global_tid, kmp_int32 cncl_kind) 2121 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2122 auto *FnTy = 2123 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2124 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2125 break; 2126 } 2127 case OMPRTL__kmpc_cancel: { 2128 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2129 // kmp_int32 cncl_kind) 2130 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2131 auto *FnTy = 2132 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2133 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2134 break; 2135 } 2136 case OMPRTL__kmpc_push_num_teams: { 2137 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2138 // kmp_int32 num_teams, kmp_int32 num_threads) 2139 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2140 CGM.Int32Ty}; 2141 auto *FnTy = 2142 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2143 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2144 break; 2145 } 2146 case OMPRTL__kmpc_fork_teams: { 2147 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2148 // microtask, ...); 2149 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2150 getKmpc_MicroPointerTy()}; 2151 auto *FnTy = 2152 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2153 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2154 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 2155 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 2156 llvm::LLVMContext &Ctx = F->getContext(); 2157 llvm::MDBuilder MDB(Ctx); 2158 // Annotate the callback behavior of the __kmpc_fork_teams: 2159 // - The callback callee is argument number 2 (microtask). 2160 // - The first two arguments of the callback callee are unknown (-1). 2161 // - All variadic arguments to the __kmpc_fork_teams are passed to the 2162 // callback callee. 2163 F->addMetadata( 2164 llvm::LLVMContext::MD_callback, 2165 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 2166 2, {-1, -1}, 2167 /* VarArgsArePassed */ true)})); 2168 } 2169 } 2170 break; 2171 } 2172 case OMPRTL__kmpc_taskloop: { 2173 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2174 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2175 // sched, kmp_uint64 grainsize, void *task_dup); 2176 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2177 CGM.IntTy, 2178 CGM.VoidPtrTy, 2179 CGM.IntTy, 2180 CGM.Int64Ty->getPointerTo(), 2181 CGM.Int64Ty->getPointerTo(), 2182 CGM.Int64Ty, 2183 CGM.IntTy, 2184 CGM.IntTy, 2185 CGM.Int64Ty, 2186 CGM.VoidPtrTy}; 2187 auto *FnTy = 2188 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2189 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2190 break; 2191 } 2192 case OMPRTL__kmpc_doacross_init: { 2193 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2194 // num_dims, struct kmp_dim *dims); 2195 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2196 CGM.Int32Ty, 2197 CGM.Int32Ty, 2198 CGM.VoidPtrTy}; 2199 auto *FnTy = 2200 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2201 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2202 break; 2203 } 2204 case OMPRTL__kmpc_doacross_fini: { 2205 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2206 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2207 auto *FnTy = 2208 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2209 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2210 break; 2211 } 2212 case OMPRTL__kmpc_doacross_post: { 2213 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2214 // *vec); 2215 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2216 CGM.Int64Ty->getPointerTo()}; 2217 auto *FnTy = 2218 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2219 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2220 break; 2221 } 2222 case OMPRTL__kmpc_doacross_wait: { 2223 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2224 // *vec); 2225 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2226 CGM.Int64Ty->getPointerTo()}; 2227 auto *FnTy = 2228 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2229 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2230 break; 2231 } 2232 case OMPRTL__kmpc_task_reduction_init: { 2233 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2234 // *data); 2235 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2236 auto *FnTy = 2237 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2238 RTLFn = 2239 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2240 break; 2241 } 2242 case OMPRTL__kmpc_task_reduction_get_th_data: { 2243 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2244 // *d); 2245 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2246 auto *FnTy = 2247 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2248 RTLFn = CGM.CreateRuntimeFunction( 2249 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2250 break; 2251 } 2252 case OMPRTL__kmpc_alloc: { 2253 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t 2254 // al); omp_allocator_handle_t type is void *. 2255 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy}; 2256 auto *FnTy = 2257 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2258 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc"); 2259 break; 2260 } 2261 case OMPRTL__kmpc_free: { 2262 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t 2263 // al); omp_allocator_handle_t type is void *. 2264 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2265 auto *FnTy = 2266 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2267 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); 2268 break; 2269 } 2270 case OMPRTL__kmpc_push_target_tripcount: { 2271 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 2272 // size); 2273 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; 2274 llvm::FunctionType *FnTy = 2275 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2276 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); 2277 break; 2278 } 2279 case OMPRTL__tgt_target: { 2280 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2281 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2282 // *arg_types); 2283 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2284 CGM.VoidPtrTy, 2285 CGM.Int32Ty, 2286 CGM.VoidPtrPtrTy, 2287 CGM.VoidPtrPtrTy, 2288 CGM.Int64Ty->getPointerTo(), 2289 CGM.Int64Ty->getPointerTo()}; 2290 auto *FnTy = 2291 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2292 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2293 break; 2294 } 2295 case OMPRTL__tgt_target_nowait: { 2296 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2297 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2298 // int64_t *arg_types); 2299 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2300 CGM.VoidPtrTy, 2301 CGM.Int32Ty, 2302 CGM.VoidPtrPtrTy, 2303 CGM.VoidPtrPtrTy, 2304 CGM.Int64Ty->getPointerTo(), 2305 CGM.Int64Ty->getPointerTo()}; 2306 auto *FnTy = 2307 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2308 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2309 break; 2310 } 2311 case OMPRTL__tgt_target_teams: { 2312 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2313 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2314 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2315 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2316 CGM.VoidPtrTy, 2317 CGM.Int32Ty, 2318 CGM.VoidPtrPtrTy, 2319 CGM.VoidPtrPtrTy, 2320 CGM.Int64Ty->getPointerTo(), 2321 CGM.Int64Ty->getPointerTo(), 2322 CGM.Int32Ty, 2323 CGM.Int32Ty}; 2324 auto *FnTy = 2325 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2326 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2327 break; 2328 } 2329 case OMPRTL__tgt_target_teams_nowait: { 2330 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2331 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 2332 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2333 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2334 CGM.VoidPtrTy, 2335 CGM.Int32Ty, 2336 CGM.VoidPtrPtrTy, 2337 CGM.VoidPtrPtrTy, 2338 CGM.Int64Ty->getPointerTo(), 2339 CGM.Int64Ty->getPointerTo(), 2340 CGM.Int32Ty, 2341 CGM.Int32Ty}; 2342 auto *FnTy = 2343 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2344 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2345 break; 2346 } 2347 case OMPRTL__tgt_register_requires: { 2348 // Build void __tgt_register_requires(int64_t flags); 2349 llvm::Type *TypeParams[] = {CGM.Int64Ty}; 2350 auto *FnTy = 2351 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2352 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); 2353 break; 2354 } 2355 case OMPRTL__tgt_register_lib: { 2356 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 2357 QualType ParamTy = 2358 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2359 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2360 auto *FnTy = 2361 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2362 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 2363 break; 2364 } 2365 case OMPRTL__tgt_unregister_lib: { 2366 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 2367 QualType ParamTy = 2368 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2369 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2370 auto *FnTy = 2371 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2372 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 2373 break; 2374 } 2375 case OMPRTL__tgt_target_data_begin: { 2376 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2377 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2378 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2379 CGM.Int32Ty, 2380 CGM.VoidPtrPtrTy, 2381 CGM.VoidPtrPtrTy, 2382 CGM.Int64Ty->getPointerTo(), 2383 CGM.Int64Ty->getPointerTo()}; 2384 auto *FnTy = 2385 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2386 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2387 break; 2388 } 2389 case OMPRTL__tgt_target_data_begin_nowait: { 2390 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2391 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2392 // *arg_types); 2393 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2394 CGM.Int32Ty, 2395 CGM.VoidPtrPtrTy, 2396 CGM.VoidPtrPtrTy, 2397 CGM.Int64Ty->getPointerTo(), 2398 CGM.Int64Ty->getPointerTo()}; 2399 auto *FnTy = 2400 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2401 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2402 break; 2403 } 2404 case OMPRTL__tgt_target_data_end: { 2405 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2406 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2407 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2408 CGM.Int32Ty, 2409 CGM.VoidPtrPtrTy, 2410 CGM.VoidPtrPtrTy, 2411 CGM.Int64Ty->getPointerTo(), 2412 CGM.Int64Ty->getPointerTo()}; 2413 auto *FnTy = 2414 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2415 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2416 break; 2417 } 2418 case OMPRTL__tgt_target_data_end_nowait: { 2419 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2420 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2421 // *arg_types); 2422 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2423 CGM.Int32Ty, 2424 CGM.VoidPtrPtrTy, 2425 CGM.VoidPtrPtrTy, 2426 CGM.Int64Ty->getPointerTo(), 2427 CGM.Int64Ty->getPointerTo()}; 2428 auto *FnTy = 2429 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2430 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2431 break; 2432 } 2433 case OMPRTL__tgt_target_data_update: { 2434 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2435 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2436 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2437 CGM.Int32Ty, 2438 CGM.VoidPtrPtrTy, 2439 CGM.VoidPtrPtrTy, 2440 CGM.Int64Ty->getPointerTo(), 2441 CGM.Int64Ty->getPointerTo()}; 2442 auto *FnTy = 2443 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2444 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2445 break; 2446 } 2447 case OMPRTL__tgt_target_data_update_nowait: { 2448 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2449 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2450 // *arg_types); 2451 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2452 CGM.Int32Ty, 2453 CGM.VoidPtrPtrTy, 2454 CGM.VoidPtrPtrTy, 2455 CGM.Int64Ty->getPointerTo(), 2456 CGM.Int64Ty->getPointerTo()}; 2457 auto *FnTy = 2458 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2459 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2460 break; 2461 } 2462 } 2463 assert(RTLFn && "Unable to find OpenMP runtime function"); 2464 return RTLFn; 2465 } 2466 2467 llvm::FunctionCallee 2468 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 2469 assert((IVSize == 32 || IVSize == 64) && 2470 "IV size is not compatible with the omp runtime"); 2471 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2472 : "__kmpc_for_static_init_4u") 2473 : (IVSigned ? "__kmpc_for_static_init_8" 2474 : "__kmpc_for_static_init_8u"); 2475 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2476 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2477 llvm::Type *TypeParams[] = { 2478 getIdentTyPointerTy(), // loc 2479 CGM.Int32Ty, // tid 2480 CGM.Int32Ty, // schedtype 2481 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2482 PtrTy, // p_lower 2483 PtrTy, // p_upper 2484 PtrTy, // p_stride 2485 ITy, // incr 2486 ITy // chunk 2487 }; 2488 auto *FnTy = 2489 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2490 return CGM.CreateRuntimeFunction(FnTy, Name); 2491 } 2492 2493 llvm::FunctionCallee 2494 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 2495 assert((IVSize == 32 || IVSize == 64) && 2496 "IV size is not compatible with the omp runtime"); 2497 StringRef Name = 2498 IVSize == 32 2499 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2500 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2501 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2502 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2503 CGM.Int32Ty, // tid 2504 CGM.Int32Ty, // schedtype 2505 ITy, // lower 2506 ITy, // upper 2507 ITy, // stride 2508 ITy // chunk 2509 }; 2510 auto *FnTy = 2511 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2512 return CGM.CreateRuntimeFunction(FnTy, Name); 2513 } 2514 2515 llvm::FunctionCallee 2516 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 2517 assert((IVSize == 32 || IVSize == 64) && 2518 "IV size is not compatible with the omp runtime"); 2519 StringRef Name = 2520 IVSize == 32 2521 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2522 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2523 llvm::Type *TypeParams[] = { 2524 getIdentTyPointerTy(), // loc 2525 CGM.Int32Ty, // tid 2526 }; 2527 auto *FnTy = 2528 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2529 return CGM.CreateRuntimeFunction(FnTy, Name); 2530 } 2531 2532 llvm::FunctionCallee 2533 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 2534 assert((IVSize == 32 || IVSize == 64) && 2535 "IV size is not compatible with the omp runtime"); 2536 StringRef Name = 2537 IVSize == 32 2538 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2539 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2540 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2541 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2542 llvm::Type *TypeParams[] = { 2543 getIdentTyPointerTy(), // loc 2544 CGM.Int32Ty, // tid 2545 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2546 PtrTy, // p_lower 2547 PtrTy, // p_upper 2548 PtrTy // p_stride 2549 }; 2550 auto *FnTy = 2551 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2552 return CGM.CreateRuntimeFunction(FnTy, Name); 2553 } 2554 2555 /// Obtain information that uniquely identifies a target entry. This 2556 /// consists of the file and device IDs as well as line number associated with 2557 /// the relevant entry source location. 2558 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2559 unsigned &DeviceID, unsigned &FileID, 2560 unsigned &LineNum) { 2561 SourceManager &SM = C.getSourceManager(); 2562 2563 // The loc should be always valid and have a file ID (the user cannot use 2564 // #pragma directives in macros) 2565 2566 assert(Loc.isValid() && "Source location is expected to be always valid."); 2567 2568 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2569 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2570 2571 llvm::sys::fs::UniqueID ID; 2572 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2573 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2574 << PLoc.getFilename() << EC.message(); 2575 2576 DeviceID = ID.getDevice(); 2577 FileID = ID.getFile(); 2578 LineNum = PLoc.getLine(); 2579 } 2580 2581 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 2582 if (CGM.getLangOpts().OpenMPSimd) 2583 return Address::invalid(); 2584 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2585 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2586 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 2587 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2588 HasRequiresUnifiedSharedMemory))) { 2589 SmallString<64> PtrName; 2590 { 2591 llvm::raw_svector_ostream OS(PtrName); 2592 OS << CGM.getMangledName(GlobalDecl(VD)); 2593 if (!VD->isExternallyVisible()) { 2594 unsigned DeviceID, FileID, Line; 2595 getTargetEntryUniqueInfo(CGM.getContext(), 2596 VD->getCanonicalDecl()->getBeginLoc(), 2597 DeviceID, FileID, Line); 2598 OS << llvm::format("_%x", FileID); 2599 } 2600 OS << "_decl_tgt_ref_ptr"; 2601 } 2602 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2603 if (!Ptr) { 2604 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2605 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2606 PtrName); 2607 2608 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2609 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 2610 2611 if (!CGM.getLangOpts().OpenMPIsDevice) 2612 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2613 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2614 } 2615 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2616 } 2617 return Address::invalid(); 2618 } 2619 2620 llvm::Constant * 2621 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2622 assert(!CGM.getLangOpts().OpenMPUseTLS || 2623 !CGM.getContext().getTargetInfo().isTLSSupported()); 2624 // Lookup the entry, lazily creating it if necessary. 2625 std::string Suffix = getName({"cache", ""}); 2626 return getOrCreateInternalVariable( 2627 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 2628 } 2629 2630 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2631 const VarDecl *VD, 2632 Address VDAddr, 2633 SourceLocation Loc) { 2634 if (CGM.getLangOpts().OpenMPUseTLS && 2635 CGM.getContext().getTargetInfo().isTLSSupported()) 2636 return VDAddr; 2637 2638 llvm::Type *VarTy = VDAddr.getElementType(); 2639 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2640 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2641 CGM.Int8PtrTy), 2642 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2643 getOrCreateThreadPrivateCache(VD)}; 2644 return Address(CGF.EmitRuntimeCall( 2645 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2646 VDAddr.getAlignment()); 2647 } 2648 2649 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2650 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2651 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2652 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2653 // library. 2654 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2655 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2656 OMPLoc); 2657 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2658 // to register constructor/destructor for variable. 2659 llvm::Value *Args[] = { 2660 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2661 Ctor, CopyCtor, Dtor}; 2662 CGF.EmitRuntimeCall( 2663 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2664 } 2665 2666 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2667 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2668 bool PerformInit, CodeGenFunction *CGF) { 2669 if (CGM.getLangOpts().OpenMPUseTLS && 2670 CGM.getContext().getTargetInfo().isTLSSupported()) 2671 return nullptr; 2672 2673 VD = VD->getDefinition(CGM.getContext()); 2674 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 2675 QualType ASTTy = VD->getType(); 2676 2677 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2678 const Expr *Init = VD->getAnyInitializer(); 2679 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2680 // Generate function that re-emits the declaration's initializer into the 2681 // threadprivate copy of the variable VD 2682 CodeGenFunction CtorCGF(CGM); 2683 FunctionArgList Args; 2684 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2685 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2686 ImplicitParamDecl::Other); 2687 Args.push_back(&Dst); 2688 2689 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2690 CGM.getContext().VoidPtrTy, Args); 2691 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2692 std::string Name = getName({"__kmpc_global_ctor_", ""}); 2693 llvm::Function *Fn = 2694 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2695 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2696 Args, Loc, Loc); 2697 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2698 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2699 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2700 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2701 Arg = CtorCGF.Builder.CreateElementBitCast( 2702 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2703 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2704 /*IsInitializer=*/true); 2705 ArgVal = CtorCGF.EmitLoadOfScalar( 2706 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2707 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2708 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2709 CtorCGF.FinishFunction(); 2710 Ctor = Fn; 2711 } 2712 if (VD->getType().isDestructedType() != QualType::DK_none) { 2713 // Generate function that emits destructor call for the threadprivate copy 2714 // of the variable VD 2715 CodeGenFunction DtorCGF(CGM); 2716 FunctionArgList Args; 2717 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2718 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2719 ImplicitParamDecl::Other); 2720 Args.push_back(&Dst); 2721 2722 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2723 CGM.getContext().VoidTy, Args); 2724 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2725 std::string Name = getName({"__kmpc_global_dtor_", ""}); 2726 llvm::Function *Fn = 2727 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2728 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2729 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2730 Loc, Loc); 2731 // Create a scope with an artificial location for the body of this function. 2732 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2733 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2734 DtorCGF.GetAddrOfLocalVar(&Dst), 2735 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2736 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2737 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2738 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2739 DtorCGF.FinishFunction(); 2740 Dtor = Fn; 2741 } 2742 // Do not emit init function if it is not required. 2743 if (!Ctor && !Dtor) 2744 return nullptr; 2745 2746 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2747 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2748 /*isVarArg=*/false) 2749 ->getPointerTo(); 2750 // Copying constructor for the threadprivate variable. 2751 // Must be NULL - reserved by runtime, but currently it requires that this 2752 // parameter is always NULL. Otherwise it fires assertion. 2753 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2754 if (Ctor == nullptr) { 2755 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2756 /*isVarArg=*/false) 2757 ->getPointerTo(); 2758 Ctor = llvm::Constant::getNullValue(CtorTy); 2759 } 2760 if (Dtor == nullptr) { 2761 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2762 /*isVarArg=*/false) 2763 ->getPointerTo(); 2764 Dtor = llvm::Constant::getNullValue(DtorTy); 2765 } 2766 if (!CGF) { 2767 auto *InitFunctionTy = 2768 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2769 std::string Name = getName({"__omp_threadprivate_init_", ""}); 2770 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2771 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 2772 CodeGenFunction InitCGF(CGM); 2773 FunctionArgList ArgList; 2774 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2775 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2776 Loc, Loc); 2777 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2778 InitCGF.FinishFunction(); 2779 return InitFunction; 2780 } 2781 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2782 } 2783 return nullptr; 2784 } 2785 2786 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2787 llvm::GlobalVariable *Addr, 2788 bool PerformInit) { 2789 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2790 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2791 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 2792 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2793 HasRequiresUnifiedSharedMemory)) 2794 return CGM.getLangOpts().OpenMPIsDevice; 2795 VD = VD->getDefinition(CGM.getContext()); 2796 if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 2797 return CGM.getLangOpts().OpenMPIsDevice; 2798 2799 QualType ASTTy = VD->getType(); 2800 2801 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 2802 // Produce the unique prefix to identify the new target regions. We use 2803 // the source location of the variable declaration which we know to not 2804 // conflict with any target region. 2805 unsigned DeviceID; 2806 unsigned FileID; 2807 unsigned Line; 2808 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2809 SmallString<128> Buffer, Out; 2810 { 2811 llvm::raw_svector_ostream OS(Buffer); 2812 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2813 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2814 } 2815 2816 const Expr *Init = VD->getAnyInitializer(); 2817 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2818 llvm::Constant *Ctor; 2819 llvm::Constant *ID; 2820 if (CGM.getLangOpts().OpenMPIsDevice) { 2821 // Generate function that re-emits the declaration's initializer into 2822 // the threadprivate copy of the variable VD 2823 CodeGenFunction CtorCGF(CGM); 2824 2825 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2826 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2827 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2828 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2829 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2830 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2831 FunctionArgList(), Loc, Loc); 2832 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2833 CtorCGF.EmitAnyExprToMem(Init, 2834 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2835 Init->getType().getQualifiers(), 2836 /*IsInitializer=*/true); 2837 CtorCGF.FinishFunction(); 2838 Ctor = Fn; 2839 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2840 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 2841 } else { 2842 Ctor = new llvm::GlobalVariable( 2843 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2844 llvm::GlobalValue::PrivateLinkage, 2845 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2846 ID = Ctor; 2847 } 2848 2849 // Register the information for the entry associated with the constructor. 2850 Out.clear(); 2851 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2852 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2853 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2854 } 2855 if (VD->getType().isDestructedType() != QualType::DK_none) { 2856 llvm::Constant *Dtor; 2857 llvm::Constant *ID; 2858 if (CGM.getLangOpts().OpenMPIsDevice) { 2859 // Generate function that emits destructor call for the threadprivate 2860 // copy of the variable VD 2861 CodeGenFunction DtorCGF(CGM); 2862 2863 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2864 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2865 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2866 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2867 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2868 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2869 FunctionArgList(), Loc, Loc); 2870 // Create a scope with an artificial location for the body of this 2871 // function. 2872 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2873 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 2874 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2875 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2876 DtorCGF.FinishFunction(); 2877 Dtor = Fn; 2878 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2879 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 2880 } else { 2881 Dtor = new llvm::GlobalVariable( 2882 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2883 llvm::GlobalValue::PrivateLinkage, 2884 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2885 ID = Dtor; 2886 } 2887 // Register the information for the entry associated with the destructor. 2888 Out.clear(); 2889 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2890 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2891 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2892 } 2893 return CGM.getLangOpts().OpenMPIsDevice; 2894 } 2895 2896 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2897 QualType VarType, 2898 StringRef Name) { 2899 std::string Suffix = getName({"artificial", ""}); 2900 std::string CacheSuffix = getName({"cache", ""}); 2901 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2902 llvm::Value *GAddr = 2903 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2904 llvm::Value *Args[] = { 2905 emitUpdateLocation(CGF, SourceLocation()), 2906 getThreadID(CGF, SourceLocation()), 2907 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2908 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2909 /*isSigned=*/false), 2910 getOrCreateInternalVariable( 2911 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2912 return Address( 2913 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2914 CGF.EmitRuntimeCall( 2915 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2916 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2917 CGM.getPointerAlign()); 2918 } 2919 2920 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 2921 const RegionCodeGenTy &ThenGen, 2922 const RegionCodeGenTy &ElseGen) { 2923 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2924 2925 // If the condition constant folds and can be elided, try to avoid emitting 2926 // the condition and the dead arm of the if/else. 2927 bool CondConstant; 2928 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2929 if (CondConstant) 2930 ThenGen(CGF); 2931 else 2932 ElseGen(CGF); 2933 return; 2934 } 2935 2936 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2937 // emit the conditional branch. 2938 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2939 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2940 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2941 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2942 2943 // Emit the 'then' code. 2944 CGF.EmitBlock(ThenBlock); 2945 ThenGen(CGF); 2946 CGF.EmitBranch(ContBlock); 2947 // Emit the 'else' code if present. 2948 // There is no need to emit line number for unconditional branch. 2949 (void)ApplyDebugLocation::CreateEmpty(CGF); 2950 CGF.EmitBlock(ElseBlock); 2951 ElseGen(CGF); 2952 // There is no need to emit line number for unconditional branch. 2953 (void)ApplyDebugLocation::CreateEmpty(CGF); 2954 CGF.EmitBranch(ContBlock); 2955 // Emit the continuation block for code after the if. 2956 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2957 } 2958 2959 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2960 llvm::Function *OutlinedFn, 2961 ArrayRef<llvm::Value *> CapturedVars, 2962 const Expr *IfCond) { 2963 if (!CGF.HaveInsertPoint()) 2964 return; 2965 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2966 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 2967 PrePostActionTy &) { 2968 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2969 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2970 llvm::Value *Args[] = { 2971 RTLoc, 2972 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 2973 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 2974 llvm::SmallVector<llvm::Value *, 16> RealArgs; 2975 RealArgs.append(std::begin(Args), std::end(Args)); 2976 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 2977 2978 llvm::FunctionCallee RTLFn = 2979 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 2980 CGF.EmitRuntimeCall(RTLFn, RealArgs); 2981 }; 2982 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 2983 PrePostActionTy &) { 2984 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2985 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 2986 // Build calls: 2987 // __kmpc_serialized_parallel(&Loc, GTid); 2988 llvm::Value *Args[] = {RTLoc, ThreadID}; 2989 CGF.EmitRuntimeCall( 2990 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 2991 2992 // OutlinedFn(>id, &zero, CapturedStruct); 2993 Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 2994 /*Name*/ ".zero.addr"); 2995 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 2996 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 2997 // ThreadId for serialized parallels is 0. 2998 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 2999 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 3000 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 3001 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 3002 3003 // __kmpc_end_serialized_parallel(&Loc, GTid); 3004 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 3005 CGF.EmitRuntimeCall( 3006 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 3007 EndArgs); 3008 }; 3009 if (IfCond) { 3010 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 3011 } else { 3012 RegionCodeGenTy ThenRCG(ThenGen); 3013 ThenRCG(CGF); 3014 } 3015 } 3016 3017 // If we're inside an (outlined) parallel region, use the region info's 3018 // thread-ID variable (it is passed in a first argument of the outlined function 3019 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 3020 // regular serial code region, get thread ID by calling kmp_int32 3021 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 3022 // return the address of that temp. 3023 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 3024 SourceLocation Loc) { 3025 if (auto *OMPRegionInfo = 3026 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3027 if (OMPRegionInfo->getThreadIDVariable()) 3028 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 3029 3030 llvm::Value *ThreadID = getThreadID(CGF, Loc); 3031 QualType Int32Ty = 3032 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 3033 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 3034 CGF.EmitStoreOfScalar(ThreadID, 3035 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 3036 3037 return ThreadIDTemp; 3038 } 3039 3040 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 3041 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 3042 SmallString<256> Buffer; 3043 llvm::raw_svector_ostream Out(Buffer); 3044 Out << Name; 3045 StringRef RuntimeName = Out.str(); 3046 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 3047 if (Elem.second) { 3048 assert(Elem.second->getType()->getPointerElementType() == Ty && 3049 "OMP internal variable has different type than requested"); 3050 return &*Elem.second; 3051 } 3052 3053 return Elem.second = new llvm::GlobalVariable( 3054 CGM.getModule(), Ty, /*IsConstant*/ false, 3055 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 3056 Elem.first(), /*InsertBefore=*/nullptr, 3057 llvm::GlobalValue::NotThreadLocal, AddressSpace); 3058 } 3059 3060 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 3061 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 3062 std::string Name = getName({Prefix, "var"}); 3063 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 3064 } 3065 3066 namespace { 3067 /// Common pre(post)-action for different OpenMP constructs. 3068 class CommonActionTy final : public PrePostActionTy { 3069 llvm::FunctionCallee EnterCallee; 3070 ArrayRef<llvm::Value *> EnterArgs; 3071 llvm::FunctionCallee ExitCallee; 3072 ArrayRef<llvm::Value *> ExitArgs; 3073 bool Conditional; 3074 llvm::BasicBlock *ContBlock = nullptr; 3075 3076 public: 3077 CommonActionTy(llvm::FunctionCallee EnterCallee, 3078 ArrayRef<llvm::Value *> EnterArgs, 3079 llvm::FunctionCallee ExitCallee, 3080 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 3081 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 3082 ExitArgs(ExitArgs), Conditional(Conditional) {} 3083 void Enter(CodeGenFunction &CGF) override { 3084 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 3085 if (Conditional) { 3086 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 3087 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3088 ContBlock = CGF.createBasicBlock("omp_if.end"); 3089 // Generate the branch (If-stmt) 3090 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 3091 CGF.EmitBlock(ThenBlock); 3092 } 3093 } 3094 void Done(CodeGenFunction &CGF) { 3095 // Emit the rest of blocks/branches 3096 CGF.EmitBranch(ContBlock); 3097 CGF.EmitBlock(ContBlock, true); 3098 } 3099 void Exit(CodeGenFunction &CGF) override { 3100 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 3101 } 3102 }; 3103 } // anonymous namespace 3104 3105 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 3106 StringRef CriticalName, 3107 const RegionCodeGenTy &CriticalOpGen, 3108 SourceLocation Loc, const Expr *Hint) { 3109 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 3110 // CriticalOpGen(); 3111 // __kmpc_end_critical(ident_t *, gtid, Lock); 3112 // Prepare arguments and build a call to __kmpc_critical 3113 if (!CGF.HaveInsertPoint()) 3114 return; 3115 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3116 getCriticalRegionLock(CriticalName)}; 3117 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 3118 std::end(Args)); 3119 if (Hint) { 3120 EnterArgs.push_back(CGF.Builder.CreateIntCast( 3121 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 3122 } 3123 CommonActionTy Action( 3124 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 3125 : OMPRTL__kmpc_critical), 3126 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 3127 CriticalOpGen.setAction(Action); 3128 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 3129 } 3130 3131 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 3132 const RegionCodeGenTy &MasterOpGen, 3133 SourceLocation Loc) { 3134 if (!CGF.HaveInsertPoint()) 3135 return; 3136 // if(__kmpc_master(ident_t *, gtid)) { 3137 // MasterOpGen(); 3138 // __kmpc_end_master(ident_t *, gtid); 3139 // } 3140 // Prepare arguments and build a call to __kmpc_master 3141 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3142 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 3143 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 3144 /*Conditional=*/true); 3145 MasterOpGen.setAction(Action); 3146 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 3147 Action.Done(CGF); 3148 } 3149 3150 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 3151 SourceLocation Loc) { 3152 if (!CGF.HaveInsertPoint()) 3153 return; 3154 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 3155 llvm::Value *Args[] = { 3156 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3157 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 3158 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 3159 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3160 Region->emitUntiedSwitch(CGF); 3161 } 3162 3163 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 3164 const RegionCodeGenTy &TaskgroupOpGen, 3165 SourceLocation Loc) { 3166 if (!CGF.HaveInsertPoint()) 3167 return; 3168 // __kmpc_taskgroup(ident_t *, gtid); 3169 // TaskgroupOpGen(); 3170 // __kmpc_end_taskgroup(ident_t *, gtid); 3171 // Prepare arguments and build a call to __kmpc_taskgroup 3172 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3173 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3174 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3175 Args); 3176 TaskgroupOpGen.setAction(Action); 3177 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3178 } 3179 3180 /// Given an array of pointers to variables, project the address of a 3181 /// given variable. 3182 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3183 unsigned Index, const VarDecl *Var) { 3184 // Pull out the pointer to the variable. 3185 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 3186 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3187 3188 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3189 Addr = CGF.Builder.CreateElementBitCast( 3190 Addr, CGF.ConvertTypeForMem(Var->getType())); 3191 return Addr; 3192 } 3193 3194 static llvm::Value *emitCopyprivateCopyFunction( 3195 CodeGenModule &CGM, llvm::Type *ArgsType, 3196 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3197 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3198 SourceLocation Loc) { 3199 ASTContext &C = CGM.getContext(); 3200 // void copy_func(void *LHSArg, void *RHSArg); 3201 FunctionArgList Args; 3202 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3203 ImplicitParamDecl::Other); 3204 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3205 ImplicitParamDecl::Other); 3206 Args.push_back(&LHSArg); 3207 Args.push_back(&RHSArg); 3208 const auto &CGFI = 3209 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3210 std::string Name = 3211 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 3212 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 3213 llvm::GlobalValue::InternalLinkage, Name, 3214 &CGM.getModule()); 3215 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3216 Fn->setDoesNotRecurse(); 3217 CodeGenFunction CGF(CGM); 3218 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3219 // Dest = (void*[n])(LHSArg); 3220 // Src = (void*[n])(RHSArg); 3221 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3222 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3223 ArgsType), CGF.getPointerAlign()); 3224 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3225 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3226 ArgsType), CGF.getPointerAlign()); 3227 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3228 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3229 // ... 3230 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3231 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3232 const auto *DestVar = 3233 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3234 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3235 3236 const auto *SrcVar = 3237 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3238 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3239 3240 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3241 QualType Type = VD->getType(); 3242 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3243 } 3244 CGF.FinishFunction(); 3245 return Fn; 3246 } 3247 3248 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3249 const RegionCodeGenTy &SingleOpGen, 3250 SourceLocation Loc, 3251 ArrayRef<const Expr *> CopyprivateVars, 3252 ArrayRef<const Expr *> SrcExprs, 3253 ArrayRef<const Expr *> DstExprs, 3254 ArrayRef<const Expr *> AssignmentOps) { 3255 if (!CGF.HaveInsertPoint()) 3256 return; 3257 assert(CopyprivateVars.size() == SrcExprs.size() && 3258 CopyprivateVars.size() == DstExprs.size() && 3259 CopyprivateVars.size() == AssignmentOps.size()); 3260 ASTContext &C = CGM.getContext(); 3261 // int32 did_it = 0; 3262 // if(__kmpc_single(ident_t *, gtid)) { 3263 // SingleOpGen(); 3264 // __kmpc_end_single(ident_t *, gtid); 3265 // did_it = 1; 3266 // } 3267 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3268 // <copy_func>, did_it); 3269 3270 Address DidIt = Address::invalid(); 3271 if (!CopyprivateVars.empty()) { 3272 // int32 did_it = 0; 3273 QualType KmpInt32Ty = 3274 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3275 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3276 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3277 } 3278 // Prepare arguments and build a call to __kmpc_single 3279 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3280 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3281 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3282 /*Conditional=*/true); 3283 SingleOpGen.setAction(Action); 3284 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3285 if (DidIt.isValid()) { 3286 // did_it = 1; 3287 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3288 } 3289 Action.Done(CGF); 3290 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3291 // <copy_func>, did_it); 3292 if (DidIt.isValid()) { 3293 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3294 QualType CopyprivateArrayTy = 3295 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 3296 /*IndexTypeQuals=*/0); 3297 // Create a list of all private variables for copyprivate. 3298 Address CopyprivateList = 3299 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3300 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3301 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 3302 CGF.Builder.CreateStore( 3303 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3304 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 3305 Elem); 3306 } 3307 // Build function that copies private values from single region to all other 3308 // threads in the corresponding parallel region. 3309 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3310 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3311 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3312 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3313 Address CL = 3314 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3315 CGF.VoidPtrTy); 3316 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3317 llvm::Value *Args[] = { 3318 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3319 getThreadID(CGF, Loc), // i32 <gtid> 3320 BufSize, // size_t <buf_size> 3321 CL.getPointer(), // void *<copyprivate list> 3322 CpyFn, // void (*) (void *, void *) <copy_func> 3323 DidItVal // i32 did_it 3324 }; 3325 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3326 } 3327 } 3328 3329 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3330 const RegionCodeGenTy &OrderedOpGen, 3331 SourceLocation Loc, bool IsThreads) { 3332 if (!CGF.HaveInsertPoint()) 3333 return; 3334 // __kmpc_ordered(ident_t *, gtid); 3335 // OrderedOpGen(); 3336 // __kmpc_end_ordered(ident_t *, gtid); 3337 // Prepare arguments and build a call to __kmpc_ordered 3338 if (IsThreads) { 3339 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3340 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3341 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3342 Args); 3343 OrderedOpGen.setAction(Action); 3344 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3345 return; 3346 } 3347 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3348 } 3349 3350 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 3351 unsigned Flags; 3352 if (Kind == OMPD_for) 3353 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3354 else if (Kind == OMPD_sections) 3355 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3356 else if (Kind == OMPD_single) 3357 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3358 else if (Kind == OMPD_barrier) 3359 Flags = OMP_IDENT_BARRIER_EXPL; 3360 else 3361 Flags = OMP_IDENT_BARRIER_IMPL; 3362 return Flags; 3363 } 3364 3365 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 3366 CodeGenFunction &CGF, const OMPLoopDirective &S, 3367 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 3368 // Check if the loop directive is actually a doacross loop directive. In this 3369 // case choose static, 1 schedule. 3370 if (llvm::any_of( 3371 S.getClausesOfKind<OMPOrderedClause>(), 3372 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 3373 ScheduleKind = OMPC_SCHEDULE_static; 3374 // Chunk size is 1 in this case. 3375 llvm::APInt ChunkSize(32, 1); 3376 ChunkExpr = IntegerLiteral::Create( 3377 CGF.getContext(), ChunkSize, 3378 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3379 SourceLocation()); 3380 } 3381 } 3382 3383 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3384 OpenMPDirectiveKind Kind, bool EmitChecks, 3385 bool ForceSimpleCall) { 3386 if (!CGF.HaveInsertPoint()) 3387 return; 3388 // Build call __kmpc_cancel_barrier(loc, thread_id); 3389 // Build call __kmpc_barrier(loc, thread_id); 3390 unsigned Flags = getDefaultFlagsForBarriers(Kind); 3391 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3392 // thread_id); 3393 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3394 getThreadID(CGF, Loc)}; 3395 if (auto *OMPRegionInfo = 3396 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 3397 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3398 llvm::Value *Result = CGF.EmitRuntimeCall( 3399 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3400 if (EmitChecks) { 3401 // if (__kmpc_cancel_barrier()) { 3402 // exit from construct; 3403 // } 3404 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3405 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3406 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3407 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3408 CGF.EmitBlock(ExitBB); 3409 // exit from construct; 3410 CodeGenFunction::JumpDest CancelDestination = 3411 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3412 CGF.EmitBranchThroughCleanup(CancelDestination); 3413 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3414 } 3415 return; 3416 } 3417 } 3418 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3419 } 3420 3421 /// Map the OpenMP loop schedule to the runtime enumeration. 3422 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3423 bool Chunked, bool Ordered) { 3424 switch (ScheduleKind) { 3425 case OMPC_SCHEDULE_static: 3426 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3427 : (Ordered ? OMP_ord_static : OMP_sch_static); 3428 case OMPC_SCHEDULE_dynamic: 3429 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3430 case OMPC_SCHEDULE_guided: 3431 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3432 case OMPC_SCHEDULE_runtime: 3433 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3434 case OMPC_SCHEDULE_auto: 3435 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3436 case OMPC_SCHEDULE_unknown: 3437 assert(!Chunked && "chunk was specified but schedule kind not known"); 3438 return Ordered ? OMP_ord_static : OMP_sch_static; 3439 } 3440 llvm_unreachable("Unexpected runtime schedule"); 3441 } 3442 3443 /// Map the OpenMP distribute schedule to the runtime enumeration. 3444 static OpenMPSchedType 3445 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3446 // only static is allowed for dist_schedule 3447 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3448 } 3449 3450 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3451 bool Chunked) const { 3452 OpenMPSchedType Schedule = 3453 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3454 return Schedule == OMP_sch_static; 3455 } 3456 3457 bool CGOpenMPRuntime::isStaticNonchunked( 3458 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3459 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3460 return Schedule == OMP_dist_sch_static; 3461 } 3462 3463 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 3464 bool Chunked) const { 3465 OpenMPSchedType Schedule = 3466 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3467 return Schedule == OMP_sch_static_chunked; 3468 } 3469 3470 bool CGOpenMPRuntime::isStaticChunked( 3471 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3472 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3473 return Schedule == OMP_dist_sch_static_chunked; 3474 } 3475 3476 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3477 OpenMPSchedType Schedule = 3478 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3479 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3480 return Schedule != OMP_sch_static; 3481 } 3482 3483 static int addMonoNonMonoModifier(OpenMPSchedType Schedule, 3484 OpenMPScheduleClauseModifier M1, 3485 OpenMPScheduleClauseModifier M2) { 3486 int Modifier = 0; 3487 switch (M1) { 3488 case OMPC_SCHEDULE_MODIFIER_monotonic: 3489 Modifier = OMP_sch_modifier_monotonic; 3490 break; 3491 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3492 Modifier = OMP_sch_modifier_nonmonotonic; 3493 break; 3494 case OMPC_SCHEDULE_MODIFIER_simd: 3495 if (Schedule == OMP_sch_static_chunked) 3496 Schedule = OMP_sch_static_balanced_chunked; 3497 break; 3498 case OMPC_SCHEDULE_MODIFIER_last: 3499 case OMPC_SCHEDULE_MODIFIER_unknown: 3500 break; 3501 } 3502 switch (M2) { 3503 case OMPC_SCHEDULE_MODIFIER_monotonic: 3504 Modifier = OMP_sch_modifier_monotonic; 3505 break; 3506 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3507 Modifier = OMP_sch_modifier_nonmonotonic; 3508 break; 3509 case OMPC_SCHEDULE_MODIFIER_simd: 3510 if (Schedule == OMP_sch_static_chunked) 3511 Schedule = OMP_sch_static_balanced_chunked; 3512 break; 3513 case OMPC_SCHEDULE_MODIFIER_last: 3514 case OMPC_SCHEDULE_MODIFIER_unknown: 3515 break; 3516 } 3517 return Schedule | Modifier; 3518 } 3519 3520 void CGOpenMPRuntime::emitForDispatchInit( 3521 CodeGenFunction &CGF, SourceLocation Loc, 3522 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3523 bool Ordered, const DispatchRTInput &DispatchValues) { 3524 if (!CGF.HaveInsertPoint()) 3525 return; 3526 OpenMPSchedType Schedule = getRuntimeSchedule( 3527 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3528 assert(Ordered || 3529 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3530 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3531 Schedule != OMP_sch_static_balanced_chunked)); 3532 // Call __kmpc_dispatch_init( 3533 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3534 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3535 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3536 3537 // If the Chunk was not specified in the clause - use default value 1. 3538 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3539 : CGF.Builder.getIntN(IVSize, 1); 3540 llvm::Value *Args[] = { 3541 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3542 CGF.Builder.getInt32(addMonoNonMonoModifier( 3543 Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3544 DispatchValues.LB, // Lower 3545 DispatchValues.UB, // Upper 3546 CGF.Builder.getIntN(IVSize, 1), // Stride 3547 Chunk // Chunk 3548 }; 3549 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3550 } 3551 3552 static void emitForStaticInitCall( 3553 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3554 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 3555 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3556 const CGOpenMPRuntime::StaticRTInput &Values) { 3557 if (!CGF.HaveInsertPoint()) 3558 return; 3559 3560 assert(!Values.Ordered); 3561 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3562 Schedule == OMP_sch_static_balanced_chunked || 3563 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3564 Schedule == OMP_dist_sch_static || 3565 Schedule == OMP_dist_sch_static_chunked); 3566 3567 // Call __kmpc_for_static_init( 3568 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3569 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3570 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3571 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3572 llvm::Value *Chunk = Values.Chunk; 3573 if (Chunk == nullptr) { 3574 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3575 Schedule == OMP_dist_sch_static) && 3576 "expected static non-chunked schedule"); 3577 // If the Chunk was not specified in the clause - use default value 1. 3578 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3579 } else { 3580 assert((Schedule == OMP_sch_static_chunked || 3581 Schedule == OMP_sch_static_balanced_chunked || 3582 Schedule == OMP_ord_static_chunked || 3583 Schedule == OMP_dist_sch_static_chunked) && 3584 "expected static chunked schedule"); 3585 } 3586 llvm::Value *Args[] = { 3587 UpdateLocation, 3588 ThreadId, 3589 CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1, 3590 M2)), // Schedule type 3591 Values.IL.getPointer(), // &isLastIter 3592 Values.LB.getPointer(), // &LB 3593 Values.UB.getPointer(), // &UB 3594 Values.ST.getPointer(), // &Stride 3595 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3596 Chunk // Chunk 3597 }; 3598 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3599 } 3600 3601 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3602 SourceLocation Loc, 3603 OpenMPDirectiveKind DKind, 3604 const OpenMPScheduleTy &ScheduleKind, 3605 const StaticRTInput &Values) { 3606 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3607 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3608 assert(isOpenMPWorksharingDirective(DKind) && 3609 "Expected loop-based or sections-based directive."); 3610 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3611 isOpenMPLoopDirective(DKind) 3612 ? OMP_IDENT_WORK_LOOP 3613 : OMP_IDENT_WORK_SECTIONS); 3614 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3615 llvm::FunctionCallee StaticInitFunction = 3616 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3617 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3618 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3619 } 3620 3621 void CGOpenMPRuntime::emitDistributeStaticInit( 3622 CodeGenFunction &CGF, SourceLocation Loc, 3623 OpenMPDistScheduleClauseKind SchedKind, 3624 const CGOpenMPRuntime::StaticRTInput &Values) { 3625 OpenMPSchedType ScheduleNum = 3626 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3627 llvm::Value *UpdatedLocation = 3628 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3629 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3630 llvm::FunctionCallee StaticInitFunction = 3631 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3632 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3633 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3634 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3635 } 3636 3637 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3638 SourceLocation Loc, 3639 OpenMPDirectiveKind DKind) { 3640 if (!CGF.HaveInsertPoint()) 3641 return; 3642 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3643 llvm::Value *Args[] = { 3644 emitUpdateLocation(CGF, Loc, 3645 isOpenMPDistributeDirective(DKind) 3646 ? OMP_IDENT_WORK_DISTRIBUTE 3647 : isOpenMPLoopDirective(DKind) 3648 ? OMP_IDENT_WORK_LOOP 3649 : OMP_IDENT_WORK_SECTIONS), 3650 getThreadID(CGF, Loc)}; 3651 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3652 Args); 3653 } 3654 3655 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3656 SourceLocation Loc, 3657 unsigned IVSize, 3658 bool IVSigned) { 3659 if (!CGF.HaveInsertPoint()) 3660 return; 3661 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3662 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3663 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3664 } 3665 3666 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3667 SourceLocation Loc, unsigned IVSize, 3668 bool IVSigned, Address IL, 3669 Address LB, Address UB, 3670 Address ST) { 3671 // Call __kmpc_dispatch_next( 3672 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3673 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3674 // kmp_int[32|64] *p_stride); 3675 llvm::Value *Args[] = { 3676 emitUpdateLocation(CGF, Loc), 3677 getThreadID(CGF, Loc), 3678 IL.getPointer(), // &isLastIter 3679 LB.getPointer(), // &Lower 3680 UB.getPointer(), // &Upper 3681 ST.getPointer() // &Stride 3682 }; 3683 llvm::Value *Call = 3684 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3685 return CGF.EmitScalarConversion( 3686 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3687 CGF.getContext().BoolTy, Loc); 3688 } 3689 3690 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3691 llvm::Value *NumThreads, 3692 SourceLocation Loc) { 3693 if (!CGF.HaveInsertPoint()) 3694 return; 3695 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3696 llvm::Value *Args[] = { 3697 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3698 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3699 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3700 Args); 3701 } 3702 3703 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3704 OpenMPProcBindClauseKind ProcBind, 3705 SourceLocation Loc) { 3706 if (!CGF.HaveInsertPoint()) 3707 return; 3708 // Constants for proc bind value accepted by the runtime. 3709 enum ProcBindTy { 3710 ProcBindFalse = 0, 3711 ProcBindTrue, 3712 ProcBindMaster, 3713 ProcBindClose, 3714 ProcBindSpread, 3715 ProcBindIntel, 3716 ProcBindDefault 3717 } RuntimeProcBind; 3718 switch (ProcBind) { 3719 case OMPC_PROC_BIND_master: 3720 RuntimeProcBind = ProcBindMaster; 3721 break; 3722 case OMPC_PROC_BIND_close: 3723 RuntimeProcBind = ProcBindClose; 3724 break; 3725 case OMPC_PROC_BIND_spread: 3726 RuntimeProcBind = ProcBindSpread; 3727 break; 3728 case OMPC_PROC_BIND_unknown: 3729 llvm_unreachable("Unsupported proc_bind value."); 3730 } 3731 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3732 llvm::Value *Args[] = { 3733 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3734 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 3735 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3736 } 3737 3738 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3739 SourceLocation Loc) { 3740 if (!CGF.HaveInsertPoint()) 3741 return; 3742 // Build call void __kmpc_flush(ident_t *loc) 3743 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3744 emitUpdateLocation(CGF, Loc)); 3745 } 3746 3747 namespace { 3748 /// Indexes of fields for type kmp_task_t. 3749 enum KmpTaskTFields { 3750 /// List of shared variables. 3751 KmpTaskTShareds, 3752 /// Task routine. 3753 KmpTaskTRoutine, 3754 /// Partition id for the untied tasks. 3755 KmpTaskTPartId, 3756 /// Function with call of destructors for private variables. 3757 Data1, 3758 /// Task priority. 3759 Data2, 3760 /// (Taskloops only) Lower bound. 3761 KmpTaskTLowerBound, 3762 /// (Taskloops only) Upper bound. 3763 KmpTaskTUpperBound, 3764 /// (Taskloops only) Stride. 3765 KmpTaskTStride, 3766 /// (Taskloops only) Is last iteration flag. 3767 KmpTaskTLastIter, 3768 /// (Taskloops only) Reduction data. 3769 KmpTaskTReductions, 3770 }; 3771 } // anonymous namespace 3772 3773 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3774 return OffloadEntriesTargetRegion.empty() && 3775 OffloadEntriesDeviceGlobalVar.empty(); 3776 } 3777 3778 /// Initialize target region entry. 3779 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3780 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3781 StringRef ParentName, unsigned LineNum, 3782 unsigned Order) { 3783 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3784 "only required for the device " 3785 "code generation."); 3786 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3787 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3788 OMPTargetRegionEntryTargetRegion); 3789 ++OffloadingEntriesNum; 3790 } 3791 3792 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3793 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3794 StringRef ParentName, unsigned LineNum, 3795 llvm::Constant *Addr, llvm::Constant *ID, 3796 OMPTargetRegionEntryKind Flags) { 3797 // If we are emitting code for a target, the entry is already initialized, 3798 // only has to be registered. 3799 if (CGM.getLangOpts().OpenMPIsDevice) { 3800 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3801 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3802 DiagnosticsEngine::Error, 3803 "Unable to find target region on line '%0' in the device code."); 3804 CGM.getDiags().Report(DiagID) << LineNum; 3805 return; 3806 } 3807 auto &Entry = 3808 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3809 assert(Entry.isValid() && "Entry not initialized!"); 3810 Entry.setAddress(Addr); 3811 Entry.setID(ID); 3812 Entry.setFlags(Flags); 3813 } else { 3814 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3815 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3816 ++OffloadingEntriesNum; 3817 } 3818 } 3819 3820 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3821 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3822 unsigned LineNum) const { 3823 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3824 if (PerDevice == OffloadEntriesTargetRegion.end()) 3825 return false; 3826 auto PerFile = PerDevice->second.find(FileID); 3827 if (PerFile == PerDevice->second.end()) 3828 return false; 3829 auto PerParentName = PerFile->second.find(ParentName); 3830 if (PerParentName == PerFile->second.end()) 3831 return false; 3832 auto PerLine = PerParentName->second.find(LineNum); 3833 if (PerLine == PerParentName->second.end()) 3834 return false; 3835 // Fail if this entry is already registered. 3836 if (PerLine->second.getAddress() || PerLine->second.getID()) 3837 return false; 3838 return true; 3839 } 3840 3841 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3842 const OffloadTargetRegionEntryInfoActTy &Action) { 3843 // Scan all target region entries and perform the provided action. 3844 for (const auto &D : OffloadEntriesTargetRegion) 3845 for (const auto &F : D.second) 3846 for (const auto &P : F.second) 3847 for (const auto &L : P.second) 3848 Action(D.first, F.first, P.first(), L.first, L.second); 3849 } 3850 3851 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3852 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3853 OMPTargetGlobalVarEntryKind Flags, 3854 unsigned Order) { 3855 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3856 "only required for the device " 3857 "code generation."); 3858 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3859 ++OffloadingEntriesNum; 3860 } 3861 3862 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3863 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3864 CharUnits VarSize, 3865 OMPTargetGlobalVarEntryKind Flags, 3866 llvm::GlobalValue::LinkageTypes Linkage) { 3867 if (CGM.getLangOpts().OpenMPIsDevice) { 3868 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3869 assert(Entry.isValid() && Entry.getFlags() == Flags && 3870 "Entry not initialized!"); 3871 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3872 "Resetting with the new address."); 3873 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3874 if (Entry.getVarSize().isZero()) { 3875 Entry.setVarSize(VarSize); 3876 Entry.setLinkage(Linkage); 3877 } 3878 return; 3879 } 3880 Entry.setVarSize(VarSize); 3881 Entry.setLinkage(Linkage); 3882 Entry.setAddress(Addr); 3883 } else { 3884 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3885 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3886 assert(Entry.isValid() && Entry.getFlags() == Flags && 3887 "Entry not initialized!"); 3888 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3889 "Resetting with the new address."); 3890 if (Entry.getVarSize().isZero()) { 3891 Entry.setVarSize(VarSize); 3892 Entry.setLinkage(Linkage); 3893 } 3894 return; 3895 } 3896 OffloadEntriesDeviceGlobalVar.try_emplace( 3897 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3898 ++OffloadingEntriesNum; 3899 } 3900 } 3901 3902 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3903 actOnDeviceGlobalVarEntriesInfo( 3904 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3905 // Scan all target region entries and perform the provided action. 3906 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3907 Action(E.getKey(), E.getValue()); 3908 } 3909 3910 llvm::Function * 3911 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 3912 // If we don't have entries or if we are emitting code for the device, we 3913 // don't need to do anything. 3914 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 3915 return nullptr; 3916 3917 llvm::Module &M = CGM.getModule(); 3918 ASTContext &C = CGM.getContext(); 3919 3920 // Get list of devices we care about 3921 const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples; 3922 3923 // We should be creating an offloading descriptor only if there are devices 3924 // specified. 3925 assert(!Devices.empty() && "No OpenMP offloading devices??"); 3926 3927 // Create the external variables that will point to the begin and end of the 3928 // host entries section. These will be defined by the linker. 3929 llvm::Type *OffloadEntryTy = 3930 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 3931 std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"}); 3932 auto *HostEntriesBegin = new llvm::GlobalVariable( 3933 M, OffloadEntryTy, /*isConstant=*/true, 3934 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 3935 EntriesBeginName); 3936 std::string EntriesEndName = getName({"omp_offloading", "entries_end"}); 3937 auto *HostEntriesEnd = 3938 new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true, 3939 llvm::GlobalValue::ExternalLinkage, 3940 /*Initializer=*/nullptr, EntriesEndName); 3941 3942 // Create all device images 3943 auto *DeviceImageTy = cast<llvm::StructType>( 3944 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 3945 ConstantInitBuilder DeviceImagesBuilder(CGM); 3946 ConstantArrayBuilder DeviceImagesEntries = 3947 DeviceImagesBuilder.beginArray(DeviceImageTy); 3948 3949 for (const llvm::Triple &Device : Devices) { 3950 StringRef T = Device.getTriple(); 3951 std::string BeginName = getName({"omp_offloading", "img_start", ""}); 3952 auto *ImgBegin = new llvm::GlobalVariable( 3953 M, CGM.Int8Ty, /*isConstant=*/true, 3954 llvm::GlobalValue::ExternalWeakLinkage, 3955 /*Initializer=*/nullptr, Twine(BeginName).concat(T)); 3956 std::string EndName = getName({"omp_offloading", "img_end", ""}); 3957 auto *ImgEnd = new llvm::GlobalVariable( 3958 M, CGM.Int8Ty, /*isConstant=*/true, 3959 llvm::GlobalValue::ExternalWeakLinkage, 3960 /*Initializer=*/nullptr, Twine(EndName).concat(T)); 3961 3962 llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin, 3963 HostEntriesEnd}; 3964 createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data, 3965 DeviceImagesEntries); 3966 } 3967 3968 // Create device images global array. 3969 std::string ImagesName = getName({"omp_offloading", "device_images"}); 3970 llvm::GlobalVariable *DeviceImages = 3971 DeviceImagesEntries.finishAndCreateGlobal(ImagesName, 3972 CGM.getPointerAlign(), 3973 /*isConstant=*/true); 3974 DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 3975 3976 // This is a Zero array to be used in the creation of the constant expressions 3977 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 3978 llvm::Constant::getNullValue(CGM.Int32Ty)}; 3979 3980 // Create the target region descriptor. 3981 llvm::Constant *Data[] = { 3982 llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()), 3983 llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), 3984 DeviceImages, Index), 3985 HostEntriesBegin, HostEntriesEnd}; 3986 std::string Descriptor = getName({"omp_offloading", "descriptor"}); 3987 llvm::GlobalVariable *Desc = createGlobalStruct( 3988 CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor); 3989 3990 // Emit code to register or unregister the descriptor at execution 3991 // startup or closing, respectively. 3992 3993 llvm::Function *UnRegFn; 3994 { 3995 FunctionArgList Args; 3996 ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); 3997 Args.push_back(&DummyPtr); 3998 3999 CodeGenFunction CGF(CGM); 4000 // Disable debug info for global (de-)initializer because they are not part 4001 // of some particular construct. 4002 CGF.disableDebugInfo(); 4003 const auto &FI = 4004 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4005 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 4006 std::string UnregName = getName({"omp_offloading", "descriptor_unreg"}); 4007 UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI); 4008 CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args); 4009 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 4010 Desc); 4011 CGF.FinishFunction(); 4012 } 4013 llvm::Function *RegFn; 4014 { 4015 CodeGenFunction CGF(CGM); 4016 // Disable debug info for global (de-)initializer because they are not part 4017 // of some particular construct. 4018 CGF.disableDebugInfo(); 4019 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 4020 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 4021 4022 // Encode offload target triples into the registration function name. It 4023 // will serve as a comdat key for the registration/unregistration code for 4024 // this particular combination of offloading targets. 4025 SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U); 4026 RegFnNameParts[0] = "omp_offloading"; 4027 RegFnNameParts[1] = "descriptor_reg"; 4028 llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2), 4029 [](const llvm::Triple &T) -> const std::string& { 4030 return T.getTriple(); 4031 }); 4032 llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end()); 4033 std::string Descriptor = getName(RegFnNameParts); 4034 RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI); 4035 CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList()); 4036 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); 4037 // Create a variable to drive the registration and unregistration of the 4038 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 4039 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), 4040 SourceLocation(), nullptr, C.CharTy, 4041 ImplicitParamDecl::Other); 4042 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 4043 CGF.FinishFunction(); 4044 } 4045 if (CGM.supportsCOMDAT()) { 4046 // It is sufficient to call registration function only once, so create a 4047 // COMDAT group for registration/unregistration functions and associated 4048 // data. That would reduce startup time and code size. Registration 4049 // function serves as a COMDAT group key. 4050 llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName()); 4051 RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); 4052 RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); 4053 RegFn->setComdat(ComdatKey); 4054 UnRegFn->setComdat(ComdatKey); 4055 DeviceImages->setComdat(ComdatKey); 4056 Desc->setComdat(ComdatKey); 4057 } 4058 return RegFn; 4059 } 4060 4061 void CGOpenMPRuntime::createOffloadEntry( 4062 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 4063 llvm::GlobalValue::LinkageTypes Linkage) { 4064 StringRef Name = Addr->getName(); 4065 llvm::Module &M = CGM.getModule(); 4066 llvm::LLVMContext &C = M.getContext(); 4067 4068 // Create constant string with the name. 4069 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 4070 4071 std::string StringName = getName({"omp_offloading", "entry_name"}); 4072 auto *Str = new llvm::GlobalVariable( 4073 M, StrPtrInit->getType(), /*isConstant=*/true, 4074 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 4075 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4076 4077 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 4078 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 4079 llvm::ConstantInt::get(CGM.SizeTy, Size), 4080 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 4081 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 4082 std::string EntryName = getName({"omp_offloading", "entry", ""}); 4083 llvm::GlobalVariable *Entry = createGlobalStruct( 4084 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 4085 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 4086 4087 // The entry has to be created in the section the linker expects it to be. 4088 std::string Section = getName({"omp_offloading", "entries"}); 4089 Entry->setSection(Section); 4090 } 4091 4092 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 4093 // Emit the offloading entries and metadata so that the device codegen side 4094 // can easily figure out what to emit. The produced metadata looks like 4095 // this: 4096 // 4097 // !omp_offload.info = !{!1, ...} 4098 // 4099 // Right now we only generate metadata for function that contain target 4100 // regions. 4101 4102 // If we do not have entries, we don't need to do anything. 4103 if (OffloadEntriesInfoManager.empty()) 4104 return; 4105 4106 llvm::Module &M = CGM.getModule(); 4107 llvm::LLVMContext &C = M.getContext(); 4108 SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 4109 OrderedEntries(OffloadEntriesInfoManager.size()); 4110 llvm::SmallVector<StringRef, 16> ParentFunctions( 4111 OffloadEntriesInfoManager.size()); 4112 4113 // Auxiliary methods to create metadata values and strings. 4114 auto &&GetMDInt = [this](unsigned V) { 4115 return llvm::ConstantAsMetadata::get( 4116 llvm::ConstantInt::get(CGM.Int32Ty, V)); 4117 }; 4118 4119 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 4120 4121 // Create the offloading info metadata node. 4122 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 4123 4124 // Create function that emits metadata for each target region entry; 4125 auto &&TargetRegionMetadataEmitter = 4126 [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString]( 4127 unsigned DeviceID, unsigned FileID, StringRef ParentName, 4128 unsigned Line, 4129 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 4130 // Generate metadata for target regions. Each entry of this metadata 4131 // contains: 4132 // - Entry 0 -> Kind of this type of metadata (0). 4133 // - Entry 1 -> Device ID of the file where the entry was identified. 4134 // - Entry 2 -> File ID of the file where the entry was identified. 4135 // - Entry 3 -> Mangled name of the function where the entry was 4136 // identified. 4137 // - Entry 4 -> Line in the file where the entry was identified. 4138 // - Entry 5 -> Order the entry was created. 4139 // The first element of the metadata node is the kind. 4140 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 4141 GetMDInt(FileID), GetMDString(ParentName), 4142 GetMDInt(Line), GetMDInt(E.getOrder())}; 4143 4144 // Save this entry in the right position of the ordered entries array. 4145 OrderedEntries[E.getOrder()] = &E; 4146 ParentFunctions[E.getOrder()] = ParentName; 4147 4148 // Add metadata to the named metadata node. 4149 MD->addOperand(llvm::MDNode::get(C, Ops)); 4150 }; 4151 4152 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 4153 TargetRegionMetadataEmitter); 4154 4155 // Create function that emits metadata for each device global variable entry; 4156 auto &&DeviceGlobalVarMetadataEmitter = 4157 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 4158 MD](StringRef MangledName, 4159 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 4160 &E) { 4161 // Generate metadata for global variables. Each entry of this metadata 4162 // contains: 4163 // - Entry 0 -> Kind of this type of metadata (1). 4164 // - Entry 1 -> Mangled name of the variable. 4165 // - Entry 2 -> Declare target kind. 4166 // - Entry 3 -> Order the entry was created. 4167 // The first element of the metadata node is the kind. 4168 llvm::Metadata *Ops[] = { 4169 GetMDInt(E.getKind()), GetMDString(MangledName), 4170 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 4171 4172 // Save this entry in the right position of the ordered entries array. 4173 OrderedEntries[E.getOrder()] = &E; 4174 4175 // Add metadata to the named metadata node. 4176 MD->addOperand(llvm::MDNode::get(C, Ops)); 4177 }; 4178 4179 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 4180 DeviceGlobalVarMetadataEmitter); 4181 4182 for (const auto *E : OrderedEntries) { 4183 assert(E && "All ordered entries must exist!"); 4184 if (const auto *CE = 4185 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 4186 E)) { 4187 if (!CE->getID() || !CE->getAddress()) { 4188 // Do not blame the entry if the parent funtion is not emitted. 4189 StringRef FnName = ParentFunctions[CE->getOrder()]; 4190 if (!CGM.GetGlobalValue(FnName)) 4191 continue; 4192 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4193 DiagnosticsEngine::Error, 4194 "Offloading entry for target region is incorrect: either the " 4195 "address or the ID is invalid."); 4196 CGM.getDiags().Report(DiagID); 4197 continue; 4198 } 4199 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 4200 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 4201 } else if (const auto *CE = 4202 dyn_cast<OffloadEntriesInfoManagerTy:: 4203 OffloadEntryInfoDeviceGlobalVar>(E)) { 4204 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 4205 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4206 CE->getFlags()); 4207 switch (Flags) { 4208 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 4209 if (CGM.getLangOpts().OpenMPIsDevice && 4210 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 4211 continue; 4212 if (!CE->getAddress()) { 4213 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4214 DiagnosticsEngine::Error, 4215 "Offloading entry for declare target variable is incorrect: the " 4216 "address is invalid."); 4217 CGM.getDiags().Report(DiagID); 4218 continue; 4219 } 4220 // The vaiable has no definition - no need to add the entry. 4221 if (CE->getVarSize().isZero()) 4222 continue; 4223 break; 4224 } 4225 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 4226 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 4227 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 4228 "Declaret target link address is set."); 4229 if (CGM.getLangOpts().OpenMPIsDevice) 4230 continue; 4231 if (!CE->getAddress()) { 4232 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4233 DiagnosticsEngine::Error, 4234 "Offloading entry for declare target variable is incorrect: the " 4235 "address is invalid."); 4236 CGM.getDiags().Report(DiagID); 4237 continue; 4238 } 4239 break; 4240 } 4241 createOffloadEntry(CE->getAddress(), CE->getAddress(), 4242 CE->getVarSize().getQuantity(), Flags, 4243 CE->getLinkage()); 4244 } else { 4245 llvm_unreachable("Unsupported entry kind."); 4246 } 4247 } 4248 } 4249 4250 /// Loads all the offload entries information from the host IR 4251 /// metadata. 4252 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 4253 // If we are in target mode, load the metadata from the host IR. This code has 4254 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 4255 4256 if (!CGM.getLangOpts().OpenMPIsDevice) 4257 return; 4258 4259 if (CGM.getLangOpts().OMPHostIRFile.empty()) 4260 return; 4261 4262 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 4263 if (auto EC = Buf.getError()) { 4264 CGM.getDiags().Report(diag::err_cannot_open_file) 4265 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4266 return; 4267 } 4268 4269 llvm::LLVMContext C; 4270 auto ME = expectedToErrorOrAndEmitErrors( 4271 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 4272 4273 if (auto EC = ME.getError()) { 4274 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4275 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 4276 CGM.getDiags().Report(DiagID) 4277 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4278 return; 4279 } 4280 4281 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4282 if (!MD) 4283 return; 4284 4285 for (llvm::MDNode *MN : MD->operands()) { 4286 auto &&GetMDInt = [MN](unsigned Idx) { 4287 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4288 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4289 }; 4290 4291 auto &&GetMDString = [MN](unsigned Idx) { 4292 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4293 return V->getString(); 4294 }; 4295 4296 switch (GetMDInt(0)) { 4297 default: 4298 llvm_unreachable("Unexpected metadata!"); 4299 break; 4300 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4301 OffloadingEntryInfoTargetRegion: 4302 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4303 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4304 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4305 /*Order=*/GetMDInt(5)); 4306 break; 4307 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4308 OffloadingEntryInfoDeviceGlobalVar: 4309 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4310 /*MangledName=*/GetMDString(1), 4311 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4312 /*Flags=*/GetMDInt(2)), 4313 /*Order=*/GetMDInt(3)); 4314 break; 4315 } 4316 } 4317 } 4318 4319 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4320 if (!KmpRoutineEntryPtrTy) { 4321 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4322 ASTContext &C = CGM.getContext(); 4323 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4324 FunctionProtoType::ExtProtoInfo EPI; 4325 KmpRoutineEntryPtrQTy = C.getPointerType( 4326 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4327 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4328 } 4329 } 4330 4331 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4332 // Make sure the type of the entry is already created. This is the type we 4333 // have to create: 4334 // struct __tgt_offload_entry{ 4335 // void *addr; // Pointer to the offload entry info. 4336 // // (function or global) 4337 // char *name; // Name of the function or global. 4338 // size_t size; // Size of the entry info (0 if it a function). 4339 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4340 // int32_t reserved; // Reserved, to use by the runtime library. 4341 // }; 4342 if (TgtOffloadEntryQTy.isNull()) { 4343 ASTContext &C = CGM.getContext(); 4344 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4345 RD->startDefinition(); 4346 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4347 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4348 addFieldToRecordDecl(C, RD, C.getSizeType()); 4349 addFieldToRecordDecl( 4350 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4351 addFieldToRecordDecl( 4352 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4353 RD->completeDefinition(); 4354 RD->addAttr(PackedAttr::CreateImplicit(C)); 4355 TgtOffloadEntryQTy = C.getRecordType(RD); 4356 } 4357 return TgtOffloadEntryQTy; 4358 } 4359 4360 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 4361 // These are the types we need to build: 4362 // struct __tgt_device_image{ 4363 // void *ImageStart; // Pointer to the target code start. 4364 // void *ImageEnd; // Pointer to the target code end. 4365 // // We also add the host entries to the device image, as it may be useful 4366 // // for the target runtime to have access to that information. 4367 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 4368 // // the entries. 4369 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4370 // // entries (non inclusive). 4371 // }; 4372 if (TgtDeviceImageQTy.isNull()) { 4373 ASTContext &C = CGM.getContext(); 4374 RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image"); 4375 RD->startDefinition(); 4376 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4377 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4378 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4379 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4380 RD->completeDefinition(); 4381 TgtDeviceImageQTy = C.getRecordType(RD); 4382 } 4383 return TgtDeviceImageQTy; 4384 } 4385 4386 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 4387 // struct __tgt_bin_desc{ 4388 // int32_t NumDevices; // Number of devices supported. 4389 // __tgt_device_image *DeviceImages; // Arrays of device images 4390 // // (one per device). 4391 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 4392 // // entries. 4393 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4394 // // entries (non inclusive). 4395 // }; 4396 if (TgtBinaryDescriptorQTy.isNull()) { 4397 ASTContext &C = CGM.getContext(); 4398 RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc"); 4399 RD->startDefinition(); 4400 addFieldToRecordDecl( 4401 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4402 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 4403 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4404 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4405 RD->completeDefinition(); 4406 TgtBinaryDescriptorQTy = C.getRecordType(RD); 4407 } 4408 return TgtBinaryDescriptorQTy; 4409 } 4410 4411 namespace { 4412 struct PrivateHelpersTy { 4413 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 4414 const VarDecl *PrivateElemInit) 4415 : Original(Original), PrivateCopy(PrivateCopy), 4416 PrivateElemInit(PrivateElemInit) {} 4417 const VarDecl *Original; 4418 const VarDecl *PrivateCopy; 4419 const VarDecl *PrivateElemInit; 4420 }; 4421 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4422 } // anonymous namespace 4423 4424 static RecordDecl * 4425 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4426 if (!Privates.empty()) { 4427 ASTContext &C = CGM.getContext(); 4428 // Build struct .kmp_privates_t. { 4429 // /* private vars */ 4430 // }; 4431 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4432 RD->startDefinition(); 4433 for (const auto &Pair : Privates) { 4434 const VarDecl *VD = Pair.second.Original; 4435 QualType Type = VD->getType().getNonReferenceType(); 4436 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4437 if (VD->hasAttrs()) { 4438 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4439 E(VD->getAttrs().end()); 4440 I != E; ++I) 4441 FD->addAttr(*I); 4442 } 4443 } 4444 RD->completeDefinition(); 4445 return RD; 4446 } 4447 return nullptr; 4448 } 4449 4450 static RecordDecl * 4451 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4452 QualType KmpInt32Ty, 4453 QualType KmpRoutineEntryPointerQTy) { 4454 ASTContext &C = CGM.getContext(); 4455 // Build struct kmp_task_t { 4456 // void * shareds; 4457 // kmp_routine_entry_t routine; 4458 // kmp_int32 part_id; 4459 // kmp_cmplrdata_t data1; 4460 // kmp_cmplrdata_t data2; 4461 // For taskloops additional fields: 4462 // kmp_uint64 lb; 4463 // kmp_uint64 ub; 4464 // kmp_int64 st; 4465 // kmp_int32 liter; 4466 // void * reductions; 4467 // }; 4468 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4469 UD->startDefinition(); 4470 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4471 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4472 UD->completeDefinition(); 4473 QualType KmpCmplrdataTy = C.getRecordType(UD); 4474 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4475 RD->startDefinition(); 4476 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4477 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4478 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4479 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4480 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4481 if (isOpenMPTaskLoopDirective(Kind)) { 4482 QualType KmpUInt64Ty = 4483 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4484 QualType KmpInt64Ty = 4485 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4486 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4487 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4488 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4489 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4490 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4491 } 4492 RD->completeDefinition(); 4493 return RD; 4494 } 4495 4496 static RecordDecl * 4497 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4498 ArrayRef<PrivateDataTy> Privates) { 4499 ASTContext &C = CGM.getContext(); 4500 // Build struct kmp_task_t_with_privates { 4501 // kmp_task_t task_data; 4502 // .kmp_privates_t. privates; 4503 // }; 4504 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4505 RD->startDefinition(); 4506 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4507 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4508 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4509 RD->completeDefinition(); 4510 return RD; 4511 } 4512 4513 /// Emit a proxy function which accepts kmp_task_t as the second 4514 /// argument. 4515 /// \code 4516 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4517 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4518 /// For taskloops: 4519 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4520 /// tt->reductions, tt->shareds); 4521 /// return 0; 4522 /// } 4523 /// \endcode 4524 static llvm::Function * 4525 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4526 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4527 QualType KmpTaskTWithPrivatesPtrQTy, 4528 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4529 QualType SharedsPtrTy, llvm::Function *TaskFunction, 4530 llvm::Value *TaskPrivatesMap) { 4531 ASTContext &C = CGM.getContext(); 4532 FunctionArgList Args; 4533 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4534 ImplicitParamDecl::Other); 4535 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4536 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4537 ImplicitParamDecl::Other); 4538 Args.push_back(&GtidArg); 4539 Args.push_back(&TaskTypeArg); 4540 const auto &TaskEntryFnInfo = 4541 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4542 llvm::FunctionType *TaskEntryTy = 4543 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4544 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 4545 auto *TaskEntry = llvm::Function::Create( 4546 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4547 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4548 TaskEntry->setDoesNotRecurse(); 4549 CodeGenFunction CGF(CGM); 4550 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4551 Loc, Loc); 4552 4553 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4554 // tt, 4555 // For taskloops: 4556 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4557 // tt->task_data.shareds); 4558 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4559 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4560 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4561 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4562 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4563 const auto *KmpTaskTWithPrivatesQTyRD = 4564 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4565 LValue Base = 4566 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4567 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4568 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4569 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4570 llvm::Value *PartidParam = PartIdLVal.getPointer(); 4571 4572 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4573 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4574 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4575 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4576 CGF.ConvertTypeForMem(SharedsPtrTy)); 4577 4578 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4579 llvm::Value *PrivatesParam; 4580 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4581 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4582 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4583 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 4584 } else { 4585 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4586 } 4587 4588 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4589 TaskPrivatesMap, 4590 CGF.Builder 4591 .CreatePointerBitCastOrAddrSpaceCast( 4592 TDBase.getAddress(), CGF.VoidPtrTy) 4593 .getPointer()}; 4594 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4595 std::end(CommonArgs)); 4596 if (isOpenMPTaskLoopDirective(Kind)) { 4597 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4598 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4599 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4600 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4601 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4602 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4603 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4604 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4605 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4606 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4607 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4608 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4609 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4610 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4611 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4612 CallArgs.push_back(LBParam); 4613 CallArgs.push_back(UBParam); 4614 CallArgs.push_back(StParam); 4615 CallArgs.push_back(LIParam); 4616 CallArgs.push_back(RParam); 4617 } 4618 CallArgs.push_back(SharedsParam); 4619 4620 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4621 CallArgs); 4622 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4623 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4624 CGF.FinishFunction(); 4625 return TaskEntry; 4626 } 4627 4628 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4629 SourceLocation Loc, 4630 QualType KmpInt32Ty, 4631 QualType KmpTaskTWithPrivatesPtrQTy, 4632 QualType KmpTaskTWithPrivatesQTy) { 4633 ASTContext &C = CGM.getContext(); 4634 FunctionArgList Args; 4635 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4636 ImplicitParamDecl::Other); 4637 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4638 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4639 ImplicitParamDecl::Other); 4640 Args.push_back(&GtidArg); 4641 Args.push_back(&TaskTypeArg); 4642 const auto &DestructorFnInfo = 4643 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4644 llvm::FunctionType *DestructorFnTy = 4645 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4646 std::string Name = 4647 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 4648 auto *DestructorFn = 4649 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4650 Name, &CGM.getModule()); 4651 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4652 DestructorFnInfo); 4653 DestructorFn->setDoesNotRecurse(); 4654 CodeGenFunction CGF(CGM); 4655 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4656 Args, Loc, Loc); 4657 4658 LValue Base = CGF.EmitLoadOfPointerLValue( 4659 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4660 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4661 const auto *KmpTaskTWithPrivatesQTyRD = 4662 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4663 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4664 Base = CGF.EmitLValueForField(Base, *FI); 4665 for (const auto *Field : 4666 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4667 if (QualType::DestructionKind DtorKind = 4668 Field->getType().isDestructedType()) { 4669 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4670 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 4671 } 4672 } 4673 CGF.FinishFunction(); 4674 return DestructorFn; 4675 } 4676 4677 /// Emit a privates mapping function for correct handling of private and 4678 /// firstprivate variables. 4679 /// \code 4680 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4681 /// **noalias priv1,..., <tyn> **noalias privn) { 4682 /// *priv1 = &.privates.priv1; 4683 /// ...; 4684 /// *privn = &.privates.privn; 4685 /// } 4686 /// \endcode 4687 static llvm::Value * 4688 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4689 ArrayRef<const Expr *> PrivateVars, 4690 ArrayRef<const Expr *> FirstprivateVars, 4691 ArrayRef<const Expr *> LastprivateVars, 4692 QualType PrivatesQTy, 4693 ArrayRef<PrivateDataTy> Privates) { 4694 ASTContext &C = CGM.getContext(); 4695 FunctionArgList Args; 4696 ImplicitParamDecl TaskPrivatesArg( 4697 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4698 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4699 ImplicitParamDecl::Other); 4700 Args.push_back(&TaskPrivatesArg); 4701 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4702 unsigned Counter = 1; 4703 for (const Expr *E : PrivateVars) { 4704 Args.push_back(ImplicitParamDecl::Create( 4705 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4706 C.getPointerType(C.getPointerType(E->getType())) 4707 .withConst() 4708 .withRestrict(), 4709 ImplicitParamDecl::Other)); 4710 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4711 PrivateVarsPos[VD] = Counter; 4712 ++Counter; 4713 } 4714 for (const Expr *E : FirstprivateVars) { 4715 Args.push_back(ImplicitParamDecl::Create( 4716 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4717 C.getPointerType(C.getPointerType(E->getType())) 4718 .withConst() 4719 .withRestrict(), 4720 ImplicitParamDecl::Other)); 4721 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4722 PrivateVarsPos[VD] = Counter; 4723 ++Counter; 4724 } 4725 for (const Expr *E : LastprivateVars) { 4726 Args.push_back(ImplicitParamDecl::Create( 4727 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4728 C.getPointerType(C.getPointerType(E->getType())) 4729 .withConst() 4730 .withRestrict(), 4731 ImplicitParamDecl::Other)); 4732 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4733 PrivateVarsPos[VD] = Counter; 4734 ++Counter; 4735 } 4736 const auto &TaskPrivatesMapFnInfo = 4737 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4738 llvm::FunctionType *TaskPrivatesMapTy = 4739 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4740 std::string Name = 4741 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 4742 auto *TaskPrivatesMap = llvm::Function::Create( 4743 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 4744 &CGM.getModule()); 4745 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4746 TaskPrivatesMapFnInfo); 4747 if (CGM.getLangOpts().Optimize) { 4748 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4749 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4750 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4751 } 4752 CodeGenFunction CGF(CGM); 4753 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4754 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4755 4756 // *privi = &.privates.privi; 4757 LValue Base = CGF.EmitLoadOfPointerLValue( 4758 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4759 TaskPrivatesArg.getType()->castAs<PointerType>()); 4760 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4761 Counter = 0; 4762 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4763 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4764 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4765 LValue RefLVal = 4766 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4767 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4768 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 4769 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 4770 ++Counter; 4771 } 4772 CGF.FinishFunction(); 4773 return TaskPrivatesMap; 4774 } 4775 4776 /// Emit initialization for private variables in task-based directives. 4777 static void emitPrivatesInit(CodeGenFunction &CGF, 4778 const OMPExecutableDirective &D, 4779 Address KmpTaskSharedsPtr, LValue TDBase, 4780 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4781 QualType SharedsTy, QualType SharedsPtrTy, 4782 const OMPTaskDataTy &Data, 4783 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4784 ASTContext &C = CGF.getContext(); 4785 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4786 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4787 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4788 ? OMPD_taskloop 4789 : OMPD_task; 4790 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4791 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4792 LValue SrcBase; 4793 bool IsTargetTask = 4794 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4795 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4796 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4797 // PointersArray and SizesArray. The original variables for these arrays are 4798 // not captured and we get their addresses explicitly. 4799 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || 4800 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4801 SrcBase = CGF.MakeAddrLValue( 4802 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4803 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4804 SharedsTy); 4805 } 4806 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4807 for (const PrivateDataTy &Pair : Privates) { 4808 const VarDecl *VD = Pair.second.PrivateCopy; 4809 const Expr *Init = VD->getAnyInitializer(); 4810 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4811 !CGF.isTrivialInitializer(Init)))) { 4812 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4813 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4814 const VarDecl *OriginalVD = Pair.second.Original; 4815 // Check if the variable is the target-based BasePointersArray, 4816 // PointersArray or SizesArray. 4817 LValue SharedRefLValue; 4818 QualType Type = PrivateLValue.getType(); 4819 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4820 if (IsTargetTask && !SharedField) { 4821 assert(isa<ImplicitParamDecl>(OriginalVD) && 4822 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4823 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4824 ->getNumParams() == 0 && 4825 isa<TranslationUnitDecl>( 4826 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4827 ->getDeclContext()) && 4828 "Expected artificial target data variable."); 4829 SharedRefLValue = 4830 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4831 } else { 4832 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4833 SharedRefLValue = CGF.MakeAddrLValue( 4834 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 4835 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4836 SharedRefLValue.getTBAAInfo()); 4837 } 4838 if (Type->isArrayType()) { 4839 // Initialize firstprivate array. 4840 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4841 // Perform simple memcpy. 4842 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4843 } else { 4844 // Initialize firstprivate array using element-by-element 4845 // initialization. 4846 CGF.EmitOMPAggregateAssign( 4847 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 4848 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4849 Address SrcElement) { 4850 // Clean up any temporaries needed by the initialization. 4851 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4852 InitScope.addPrivate( 4853 Elem, [SrcElement]() -> Address { return SrcElement; }); 4854 (void)InitScope.Privatize(); 4855 // Emit initialization for single element. 4856 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4857 CGF, &CapturesInfo); 4858 CGF.EmitAnyExprToMem(Init, DestElement, 4859 Init->getType().getQualifiers(), 4860 /*IsInitializer=*/false); 4861 }); 4862 } 4863 } else { 4864 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4865 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 4866 return SharedRefLValue.getAddress(); 4867 }); 4868 (void)InitScope.Privatize(); 4869 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4870 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4871 /*capturedByInit=*/false); 4872 } 4873 } else { 4874 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4875 } 4876 } 4877 ++FI; 4878 } 4879 } 4880 4881 /// Check if duplication function is required for taskloops. 4882 static bool checkInitIsRequired(CodeGenFunction &CGF, 4883 ArrayRef<PrivateDataTy> Privates) { 4884 bool InitRequired = false; 4885 for (const PrivateDataTy &Pair : Privates) { 4886 const VarDecl *VD = Pair.second.PrivateCopy; 4887 const Expr *Init = VD->getAnyInitializer(); 4888 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4889 !CGF.isTrivialInitializer(Init)); 4890 if (InitRequired) 4891 break; 4892 } 4893 return InitRequired; 4894 } 4895 4896 4897 /// Emit task_dup function (for initialization of 4898 /// private/firstprivate/lastprivate vars and last_iter flag) 4899 /// \code 4900 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4901 /// lastpriv) { 4902 /// // setup lastprivate flag 4903 /// task_dst->last = lastpriv; 4904 /// // could be constructor calls here... 4905 /// } 4906 /// \endcode 4907 static llvm::Value * 4908 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4909 const OMPExecutableDirective &D, 4910 QualType KmpTaskTWithPrivatesPtrQTy, 4911 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4912 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4913 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4914 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4915 ASTContext &C = CGM.getContext(); 4916 FunctionArgList Args; 4917 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4918 KmpTaskTWithPrivatesPtrQTy, 4919 ImplicitParamDecl::Other); 4920 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4921 KmpTaskTWithPrivatesPtrQTy, 4922 ImplicitParamDecl::Other); 4923 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4924 ImplicitParamDecl::Other); 4925 Args.push_back(&DstArg); 4926 Args.push_back(&SrcArg); 4927 Args.push_back(&LastprivArg); 4928 const auto &TaskDupFnInfo = 4929 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4930 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4931 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4932 auto *TaskDup = llvm::Function::Create( 4933 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4934 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4935 TaskDup->setDoesNotRecurse(); 4936 CodeGenFunction CGF(CGM); 4937 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4938 Loc); 4939 4940 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4941 CGF.GetAddrOfLocalVar(&DstArg), 4942 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4943 // task_dst->liter = lastpriv; 4944 if (WithLastIter) { 4945 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4946 LValue Base = CGF.EmitLValueForField( 4947 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4948 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4949 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4950 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4951 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4952 } 4953 4954 // Emit initial values for private copies (if any). 4955 assert(!Privates.empty()); 4956 Address KmpTaskSharedsPtr = Address::invalid(); 4957 if (!Data.FirstprivateVars.empty()) { 4958 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4959 CGF.GetAddrOfLocalVar(&SrcArg), 4960 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4961 LValue Base = CGF.EmitLValueForField( 4962 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4963 KmpTaskSharedsPtr = Address( 4964 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4965 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4966 KmpTaskTShareds)), 4967 Loc), 4968 CGF.getNaturalTypeAlignment(SharedsTy)); 4969 } 4970 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4971 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4972 CGF.FinishFunction(); 4973 return TaskDup; 4974 } 4975 4976 /// Checks if destructor function is required to be generated. 4977 /// \return true if cleanups are required, false otherwise. 4978 static bool 4979 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4980 bool NeedsCleanup = false; 4981 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4982 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4983 for (const FieldDecl *FD : PrivateRD->fields()) { 4984 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4985 if (NeedsCleanup) 4986 break; 4987 } 4988 return NeedsCleanup; 4989 } 4990 4991 CGOpenMPRuntime::TaskResultTy 4992 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4993 const OMPExecutableDirective &D, 4994 llvm::Function *TaskFunction, QualType SharedsTy, 4995 Address Shareds, const OMPTaskDataTy &Data) { 4996 ASTContext &C = CGM.getContext(); 4997 llvm::SmallVector<PrivateDataTy, 4> Privates; 4998 // Aggregate privates and sort them by the alignment. 4999 auto I = Data.PrivateCopies.begin(); 5000 for (const Expr *E : Data.PrivateVars) { 5001 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5002 Privates.emplace_back( 5003 C.getDeclAlign(VD), 5004 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5005 /*PrivateElemInit=*/nullptr)); 5006 ++I; 5007 } 5008 I = Data.FirstprivateCopies.begin(); 5009 auto IElemInitRef = Data.FirstprivateInits.begin(); 5010 for (const Expr *E : Data.FirstprivateVars) { 5011 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5012 Privates.emplace_back( 5013 C.getDeclAlign(VD), 5014 PrivateHelpersTy( 5015 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5016 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 5017 ++I; 5018 ++IElemInitRef; 5019 } 5020 I = Data.LastprivateCopies.begin(); 5021 for (const Expr *E : Data.LastprivateVars) { 5022 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5023 Privates.emplace_back( 5024 C.getDeclAlign(VD), 5025 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5026 /*PrivateElemInit=*/nullptr)); 5027 ++I; 5028 } 5029 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 5030 return L.first > R.first; 5031 }); 5032 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 5033 // Build type kmp_routine_entry_t (if not built yet). 5034 emitKmpRoutineEntryT(KmpInt32Ty); 5035 // Build type kmp_task_t (if not built yet). 5036 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 5037 if (SavedKmpTaskloopTQTy.isNull()) { 5038 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5039 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5040 } 5041 KmpTaskTQTy = SavedKmpTaskloopTQTy; 5042 } else { 5043 assert((D.getDirectiveKind() == OMPD_task || 5044 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 5045 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 5046 "Expected taskloop, task or target directive"); 5047 if (SavedKmpTaskTQTy.isNull()) { 5048 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5049 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5050 } 5051 KmpTaskTQTy = SavedKmpTaskTQTy; 5052 } 5053 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 5054 // Build particular struct kmp_task_t for the given task. 5055 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 5056 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 5057 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 5058 QualType KmpTaskTWithPrivatesPtrQTy = 5059 C.getPointerType(KmpTaskTWithPrivatesQTy); 5060 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 5061 llvm::Type *KmpTaskTWithPrivatesPtrTy = 5062 KmpTaskTWithPrivatesTy->getPointerTo(); 5063 llvm::Value *KmpTaskTWithPrivatesTySize = 5064 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 5065 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 5066 5067 // Emit initial values for private copies (if any). 5068 llvm::Value *TaskPrivatesMap = nullptr; 5069 llvm::Type *TaskPrivatesMapTy = 5070 std::next(TaskFunction->arg_begin(), 3)->getType(); 5071 if (!Privates.empty()) { 5072 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 5073 TaskPrivatesMap = emitTaskPrivateMappingFunction( 5074 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 5075 FI->getType(), Privates); 5076 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5077 TaskPrivatesMap, TaskPrivatesMapTy); 5078 } else { 5079 TaskPrivatesMap = llvm::ConstantPointerNull::get( 5080 cast<llvm::PointerType>(TaskPrivatesMapTy)); 5081 } 5082 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 5083 // kmp_task_t *tt); 5084 llvm::Function *TaskEntry = emitProxyTaskFunction( 5085 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5086 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 5087 TaskPrivatesMap); 5088 5089 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 5090 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 5091 // kmp_routine_entry_t *task_entry); 5092 // Task flags. Format is taken from 5093 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 5094 // description of kmp_tasking_flags struct. 5095 enum { 5096 TiedFlag = 0x1, 5097 FinalFlag = 0x2, 5098 DestructorsFlag = 0x8, 5099 PriorityFlag = 0x20 5100 }; 5101 unsigned Flags = Data.Tied ? TiedFlag : 0; 5102 bool NeedsCleanup = false; 5103 if (!Privates.empty()) { 5104 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 5105 if (NeedsCleanup) 5106 Flags = Flags | DestructorsFlag; 5107 } 5108 if (Data.Priority.getInt()) 5109 Flags = Flags | PriorityFlag; 5110 llvm::Value *TaskFlags = 5111 Data.Final.getPointer() 5112 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 5113 CGF.Builder.getInt32(FinalFlag), 5114 CGF.Builder.getInt32(/*C=*/0)) 5115 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 5116 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 5117 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 5118 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 5119 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 5120 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5121 TaskEntry, KmpRoutineEntryPtrTy)}; 5122 llvm::Value *NewTask; 5123 if (D.hasClausesOfKind<OMPNowaitClause>()) { 5124 // Check if we have any device clause associated with the directive. 5125 const Expr *Device = nullptr; 5126 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 5127 Device = C->getDevice(); 5128 // Emit device ID if any otherwise use default value. 5129 llvm::Value *DeviceID; 5130 if (Device) 5131 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 5132 CGF.Int64Ty, /*isSigned=*/true); 5133 else 5134 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 5135 AllocArgs.push_back(DeviceID); 5136 NewTask = CGF.EmitRuntimeCall( 5137 createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs); 5138 } else { 5139 NewTask = CGF.EmitRuntimeCall( 5140 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 5141 } 5142 llvm::Value *NewTaskNewTaskTTy = 5143 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5144 NewTask, KmpTaskTWithPrivatesPtrTy); 5145 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 5146 KmpTaskTWithPrivatesQTy); 5147 LValue TDBase = 5148 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 5149 // Fill the data in the resulting kmp_task_t record. 5150 // Copy shareds if there are any. 5151 Address KmpTaskSharedsPtr = Address::invalid(); 5152 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 5153 KmpTaskSharedsPtr = 5154 Address(CGF.EmitLoadOfScalar( 5155 CGF.EmitLValueForField( 5156 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 5157 KmpTaskTShareds)), 5158 Loc), 5159 CGF.getNaturalTypeAlignment(SharedsTy)); 5160 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 5161 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 5162 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 5163 } 5164 // Emit initial values for private copies (if any). 5165 TaskResultTy Result; 5166 if (!Privates.empty()) { 5167 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 5168 SharedsTy, SharedsPtrTy, Data, Privates, 5169 /*ForDup=*/false); 5170 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 5171 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 5172 Result.TaskDupFn = emitTaskDupFunction( 5173 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 5174 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 5175 /*WithLastIter=*/!Data.LastprivateVars.empty()); 5176 } 5177 } 5178 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 5179 enum { Priority = 0, Destructors = 1 }; 5180 // Provide pointer to function with destructors for privates. 5181 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 5182 const RecordDecl *KmpCmplrdataUD = 5183 (*FI)->getType()->getAsUnionType()->getDecl(); 5184 if (NeedsCleanup) { 5185 llvm::Value *DestructorFn = emitDestructorsFunction( 5186 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5187 KmpTaskTWithPrivatesQTy); 5188 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 5189 LValue DestructorsLV = CGF.EmitLValueForField( 5190 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 5191 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5192 DestructorFn, KmpRoutineEntryPtrTy), 5193 DestructorsLV); 5194 } 5195 // Set priority. 5196 if (Data.Priority.getInt()) { 5197 LValue Data2LV = CGF.EmitLValueForField( 5198 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 5199 LValue PriorityLV = CGF.EmitLValueForField( 5200 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 5201 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 5202 } 5203 Result.NewTask = NewTask; 5204 Result.TaskEntry = TaskEntry; 5205 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 5206 Result.TDBase = TDBase; 5207 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 5208 return Result; 5209 } 5210 5211 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5212 const OMPExecutableDirective &D, 5213 llvm::Function *TaskFunction, 5214 QualType SharedsTy, Address Shareds, 5215 const Expr *IfCond, 5216 const OMPTaskDataTy &Data) { 5217 if (!CGF.HaveInsertPoint()) 5218 return; 5219 5220 TaskResultTy Result = 5221 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5222 llvm::Value *NewTask = Result.NewTask; 5223 llvm::Function *TaskEntry = Result.TaskEntry; 5224 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5225 LValue TDBase = Result.TDBase; 5226 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5227 ASTContext &C = CGM.getContext(); 5228 // Process list of dependences. 5229 Address DependenciesArray = Address::invalid(); 5230 unsigned NumDependencies = Data.Dependences.size(); 5231 if (NumDependencies) { 5232 // Dependence kind for RTL. 5233 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 }; 5234 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 5235 RecordDecl *KmpDependInfoRD; 5236 QualType FlagsTy = 5237 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 5238 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5239 if (KmpDependInfoTy.isNull()) { 5240 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 5241 KmpDependInfoRD->startDefinition(); 5242 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 5243 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 5244 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 5245 KmpDependInfoRD->completeDefinition(); 5246 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 5247 } else { 5248 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5249 } 5250 // Define type kmp_depend_info[<Dependences.size()>]; 5251 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5252 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 5253 ArrayType::Normal, /*IndexTypeQuals=*/0); 5254 // kmp_depend_info[<Dependences.size()>] deps; 5255 DependenciesArray = 5256 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 5257 for (unsigned I = 0; I < NumDependencies; ++I) { 5258 const Expr *E = Data.Dependences[I].second; 5259 LValue Addr = CGF.EmitLValue(E); 5260 llvm::Value *Size; 5261 QualType Ty = E->getType(); 5262 if (const auto *ASE = 5263 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 5264 LValue UpAddrLVal = 5265 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 5266 llvm::Value *UpAddr = 5267 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 5268 llvm::Value *LowIntPtr = 5269 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 5270 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 5271 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 5272 } else { 5273 Size = CGF.getTypeSize(Ty); 5274 } 5275 LValue Base = CGF.MakeAddrLValue( 5276 CGF.Builder.CreateConstArrayGEP(DependenciesArray, I), 5277 KmpDependInfoTy); 5278 // deps[i].base_addr = &<Dependences[i].second>; 5279 LValue BaseAddrLVal = CGF.EmitLValueForField( 5280 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5281 CGF.EmitStoreOfScalar( 5282 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 5283 BaseAddrLVal); 5284 // deps[i].len = sizeof(<Dependences[i].second>); 5285 LValue LenLVal = CGF.EmitLValueForField( 5286 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 5287 CGF.EmitStoreOfScalar(Size, LenLVal); 5288 // deps[i].flags = <Dependences[i].first>; 5289 RTLDependenceKindTy DepKind; 5290 switch (Data.Dependences[I].first) { 5291 case OMPC_DEPEND_in: 5292 DepKind = DepIn; 5293 break; 5294 // Out and InOut dependencies must use the same code. 5295 case OMPC_DEPEND_out: 5296 case OMPC_DEPEND_inout: 5297 DepKind = DepInOut; 5298 break; 5299 case OMPC_DEPEND_mutexinoutset: 5300 DepKind = DepMutexInOutSet; 5301 break; 5302 case OMPC_DEPEND_source: 5303 case OMPC_DEPEND_sink: 5304 case OMPC_DEPEND_unknown: 5305 llvm_unreachable("Unknown task dependence type"); 5306 } 5307 LValue FlagsLVal = CGF.EmitLValueForField( 5308 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5309 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5310 FlagsLVal); 5311 } 5312 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5313 CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy); 5314 } 5315 5316 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5317 // libcall. 5318 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5319 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5320 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5321 // list is not empty 5322 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5323 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5324 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5325 llvm::Value *DepTaskArgs[7]; 5326 if (NumDependencies) { 5327 DepTaskArgs[0] = UpLoc; 5328 DepTaskArgs[1] = ThreadID; 5329 DepTaskArgs[2] = NewTask; 5330 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 5331 DepTaskArgs[4] = DependenciesArray.getPointer(); 5332 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5333 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5334 } 5335 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 5336 &TaskArgs, 5337 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5338 if (!Data.Tied) { 5339 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5340 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5341 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5342 } 5343 if (NumDependencies) { 5344 CGF.EmitRuntimeCall( 5345 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5346 } else { 5347 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5348 TaskArgs); 5349 } 5350 // Check if parent region is untied and build return for untied task; 5351 if (auto *Region = 5352 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5353 Region->emitUntiedSwitch(CGF); 5354 }; 5355 5356 llvm::Value *DepWaitTaskArgs[6]; 5357 if (NumDependencies) { 5358 DepWaitTaskArgs[0] = UpLoc; 5359 DepWaitTaskArgs[1] = ThreadID; 5360 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 5361 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5362 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5363 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5364 } 5365 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5366 NumDependencies, &DepWaitTaskArgs, 5367 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5368 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5369 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5370 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5371 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5372 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5373 // is specified. 5374 if (NumDependencies) 5375 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5376 DepWaitTaskArgs); 5377 // Call proxy_task_entry(gtid, new_task); 5378 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5379 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5380 Action.Enter(CGF); 5381 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5382 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5383 OutlinedFnArgs); 5384 }; 5385 5386 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5387 // kmp_task_t *new_task); 5388 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5389 // kmp_task_t *new_task); 5390 RegionCodeGenTy RCG(CodeGen); 5391 CommonActionTy Action( 5392 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5393 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5394 RCG.setAction(Action); 5395 RCG(CGF); 5396 }; 5397 5398 if (IfCond) { 5399 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5400 } else { 5401 RegionCodeGenTy ThenRCG(ThenCodeGen); 5402 ThenRCG(CGF); 5403 } 5404 } 5405 5406 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5407 const OMPLoopDirective &D, 5408 llvm::Function *TaskFunction, 5409 QualType SharedsTy, Address Shareds, 5410 const Expr *IfCond, 5411 const OMPTaskDataTy &Data) { 5412 if (!CGF.HaveInsertPoint()) 5413 return; 5414 TaskResultTy Result = 5415 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5416 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5417 // libcall. 5418 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5419 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5420 // sched, kmp_uint64 grainsize, void *task_dup); 5421 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5422 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5423 llvm::Value *IfVal; 5424 if (IfCond) { 5425 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5426 /*isSigned=*/true); 5427 } else { 5428 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5429 } 5430 5431 LValue LBLVal = CGF.EmitLValueForField( 5432 Result.TDBase, 5433 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5434 const auto *LBVar = 5435 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5436 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 5437 /*IsInitializer=*/true); 5438 LValue UBLVal = CGF.EmitLValueForField( 5439 Result.TDBase, 5440 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5441 const auto *UBVar = 5442 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5443 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 5444 /*IsInitializer=*/true); 5445 LValue StLVal = CGF.EmitLValueForField( 5446 Result.TDBase, 5447 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5448 const auto *StVar = 5449 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5450 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 5451 /*IsInitializer=*/true); 5452 // Store reductions address. 5453 LValue RedLVal = CGF.EmitLValueForField( 5454 Result.TDBase, 5455 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5456 if (Data.Reductions) { 5457 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5458 } else { 5459 CGF.EmitNullInitialization(RedLVal.getAddress(), 5460 CGF.getContext().VoidPtrTy); 5461 } 5462 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5463 llvm::Value *TaskArgs[] = { 5464 UpLoc, 5465 ThreadID, 5466 Result.NewTask, 5467 IfVal, 5468 LBLVal.getPointer(), 5469 UBLVal.getPointer(), 5470 CGF.EmitLoadOfScalar(StLVal, Loc), 5471 llvm::ConstantInt::getSigned( 5472 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5473 llvm::ConstantInt::getSigned( 5474 CGF.IntTy, Data.Schedule.getPointer() 5475 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5476 : NoSchedule), 5477 Data.Schedule.getPointer() 5478 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5479 /*isSigned=*/false) 5480 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5481 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5482 Result.TaskDupFn, CGF.VoidPtrTy) 5483 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5484 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 5485 } 5486 5487 /// Emit reduction operation for each element of array (required for 5488 /// array sections) LHS op = RHS. 5489 /// \param Type Type of array. 5490 /// \param LHSVar Variable on the left side of the reduction operation 5491 /// (references element of array in original variable). 5492 /// \param RHSVar Variable on the right side of the reduction operation 5493 /// (references element of array in original variable). 5494 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5495 /// RHSVar. 5496 static void EmitOMPAggregateReduction( 5497 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5498 const VarDecl *RHSVar, 5499 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5500 const Expr *, const Expr *)> &RedOpGen, 5501 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5502 const Expr *UpExpr = nullptr) { 5503 // Perform element-by-element initialization. 5504 QualType ElementTy; 5505 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5506 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5507 5508 // Drill down to the base element type on both arrays. 5509 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5510 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5511 5512 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5513 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5514 // Cast from pointer to array type to pointer to single element. 5515 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5516 // The basic structure here is a while-do loop. 5517 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5518 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5519 llvm::Value *IsEmpty = 5520 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5521 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5522 5523 // Enter the loop body, making that address the current address. 5524 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5525 CGF.EmitBlock(BodyBB); 5526 5527 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5528 5529 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5530 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5531 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5532 Address RHSElementCurrent = 5533 Address(RHSElementPHI, 5534 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5535 5536 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5537 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5538 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5539 Address LHSElementCurrent = 5540 Address(LHSElementPHI, 5541 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5542 5543 // Emit copy. 5544 CodeGenFunction::OMPPrivateScope Scope(CGF); 5545 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5546 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5547 Scope.Privatize(); 5548 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5549 Scope.ForceCleanup(); 5550 5551 // Shift the address forward by one element. 5552 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5553 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5554 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5555 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5556 // Check whether we've reached the end. 5557 llvm::Value *Done = 5558 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5559 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5560 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5561 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5562 5563 // Done. 5564 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5565 } 5566 5567 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5568 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5569 /// UDR combiner function. 5570 static void emitReductionCombiner(CodeGenFunction &CGF, 5571 const Expr *ReductionOp) { 5572 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5573 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5574 if (const auto *DRE = 5575 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5576 if (const auto *DRD = 5577 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5578 std::pair<llvm::Function *, llvm::Function *> Reduction = 5579 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5580 RValue Func = RValue::get(Reduction.first); 5581 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5582 CGF.EmitIgnoredExpr(ReductionOp); 5583 return; 5584 } 5585 CGF.EmitIgnoredExpr(ReductionOp); 5586 } 5587 5588 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5589 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5590 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5591 ArrayRef<const Expr *> ReductionOps) { 5592 ASTContext &C = CGM.getContext(); 5593 5594 // void reduction_func(void *LHSArg, void *RHSArg); 5595 FunctionArgList Args; 5596 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5597 ImplicitParamDecl::Other); 5598 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5599 ImplicitParamDecl::Other); 5600 Args.push_back(&LHSArg); 5601 Args.push_back(&RHSArg); 5602 const auto &CGFI = 5603 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5604 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5605 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5606 llvm::GlobalValue::InternalLinkage, Name, 5607 &CGM.getModule()); 5608 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5609 Fn->setDoesNotRecurse(); 5610 CodeGenFunction CGF(CGM); 5611 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5612 5613 // Dst = (void*[n])(LHSArg); 5614 // Src = (void*[n])(RHSArg); 5615 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5616 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5617 ArgsType), CGF.getPointerAlign()); 5618 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5619 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5620 ArgsType), CGF.getPointerAlign()); 5621 5622 // ... 5623 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5624 // ... 5625 CodeGenFunction::OMPPrivateScope Scope(CGF); 5626 auto IPriv = Privates.begin(); 5627 unsigned Idx = 0; 5628 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5629 const auto *RHSVar = 5630 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5631 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5632 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5633 }); 5634 const auto *LHSVar = 5635 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5636 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5637 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5638 }); 5639 QualType PrivTy = (*IPriv)->getType(); 5640 if (PrivTy->isVariablyModifiedType()) { 5641 // Get array size and emit VLA type. 5642 ++Idx; 5643 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5644 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5645 const VariableArrayType *VLA = 5646 CGF.getContext().getAsVariableArrayType(PrivTy); 5647 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5648 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5649 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5650 CGF.EmitVariablyModifiedType(PrivTy); 5651 } 5652 } 5653 Scope.Privatize(); 5654 IPriv = Privates.begin(); 5655 auto ILHS = LHSExprs.begin(); 5656 auto IRHS = RHSExprs.begin(); 5657 for (const Expr *E : ReductionOps) { 5658 if ((*IPriv)->getType()->isArrayType()) { 5659 // Emit reduction for array section. 5660 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5661 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5662 EmitOMPAggregateReduction( 5663 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5664 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5665 emitReductionCombiner(CGF, E); 5666 }); 5667 } else { 5668 // Emit reduction for array subscript or single variable. 5669 emitReductionCombiner(CGF, E); 5670 } 5671 ++IPriv; 5672 ++ILHS; 5673 ++IRHS; 5674 } 5675 Scope.ForceCleanup(); 5676 CGF.FinishFunction(); 5677 return Fn; 5678 } 5679 5680 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5681 const Expr *ReductionOp, 5682 const Expr *PrivateRef, 5683 const DeclRefExpr *LHS, 5684 const DeclRefExpr *RHS) { 5685 if (PrivateRef->getType()->isArrayType()) { 5686 // Emit reduction for array section. 5687 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5688 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5689 EmitOMPAggregateReduction( 5690 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5691 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5692 emitReductionCombiner(CGF, ReductionOp); 5693 }); 5694 } else { 5695 // Emit reduction for array subscript or single variable. 5696 emitReductionCombiner(CGF, ReductionOp); 5697 } 5698 } 5699 5700 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5701 ArrayRef<const Expr *> Privates, 5702 ArrayRef<const Expr *> LHSExprs, 5703 ArrayRef<const Expr *> RHSExprs, 5704 ArrayRef<const Expr *> ReductionOps, 5705 ReductionOptionsTy Options) { 5706 if (!CGF.HaveInsertPoint()) 5707 return; 5708 5709 bool WithNowait = Options.WithNowait; 5710 bool SimpleReduction = Options.SimpleReduction; 5711 5712 // Next code should be emitted for reduction: 5713 // 5714 // static kmp_critical_name lock = { 0 }; 5715 // 5716 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5717 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5718 // ... 5719 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5720 // *(Type<n>-1*)rhs[<n>-1]); 5721 // } 5722 // 5723 // ... 5724 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5725 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5726 // RedList, reduce_func, &<lock>)) { 5727 // case 1: 5728 // ... 5729 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5730 // ... 5731 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5732 // break; 5733 // case 2: 5734 // ... 5735 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5736 // ... 5737 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5738 // break; 5739 // default:; 5740 // } 5741 // 5742 // if SimpleReduction is true, only the next code is generated: 5743 // ... 5744 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5745 // ... 5746 5747 ASTContext &C = CGM.getContext(); 5748 5749 if (SimpleReduction) { 5750 CodeGenFunction::RunCleanupsScope Scope(CGF); 5751 auto IPriv = Privates.begin(); 5752 auto ILHS = LHSExprs.begin(); 5753 auto IRHS = RHSExprs.begin(); 5754 for (const Expr *E : ReductionOps) { 5755 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5756 cast<DeclRefExpr>(*IRHS)); 5757 ++IPriv; 5758 ++ILHS; 5759 ++IRHS; 5760 } 5761 return; 5762 } 5763 5764 // 1. Build a list of reduction variables. 5765 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5766 auto Size = RHSExprs.size(); 5767 for (const Expr *E : Privates) { 5768 if (E->getType()->isVariablyModifiedType()) 5769 // Reserve place for array size. 5770 ++Size; 5771 } 5772 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5773 QualType ReductionArrayTy = 5774 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 5775 /*IndexTypeQuals=*/0); 5776 Address ReductionList = 5777 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5778 auto IPriv = Privates.begin(); 5779 unsigned Idx = 0; 5780 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5781 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5782 CGF.Builder.CreateStore( 5783 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5784 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 5785 Elem); 5786 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5787 // Store array size. 5788 ++Idx; 5789 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5790 llvm::Value *Size = CGF.Builder.CreateIntCast( 5791 CGF.getVLASize( 5792 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5793 .NumElts, 5794 CGF.SizeTy, /*isSigned=*/false); 5795 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5796 Elem); 5797 } 5798 } 5799 5800 // 2. Emit reduce_func(). 5801 llvm::Function *ReductionFn = emitReductionFunction( 5802 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5803 LHSExprs, RHSExprs, ReductionOps); 5804 5805 // 3. Create static kmp_critical_name lock = { 0 }; 5806 std::string Name = getName({"reduction"}); 5807 llvm::Value *Lock = getCriticalRegionLock(Name); 5808 5809 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5810 // RedList, reduce_func, &<lock>); 5811 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5812 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5813 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5814 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5815 ReductionList.getPointer(), CGF.VoidPtrTy); 5816 llvm::Value *Args[] = { 5817 IdentTLoc, // ident_t *<loc> 5818 ThreadId, // i32 <gtid> 5819 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5820 ReductionArrayTySize, // size_type sizeof(RedList) 5821 RL, // void *RedList 5822 ReductionFn, // void (*) (void *, void *) <reduce_func> 5823 Lock // kmp_critical_name *&<lock> 5824 }; 5825 llvm::Value *Res = CGF.EmitRuntimeCall( 5826 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 5827 : OMPRTL__kmpc_reduce), 5828 Args); 5829 5830 // 5. Build switch(res) 5831 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5832 llvm::SwitchInst *SwInst = 5833 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5834 5835 // 6. Build case 1: 5836 // ... 5837 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5838 // ... 5839 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5840 // break; 5841 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5842 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5843 CGF.EmitBlock(Case1BB); 5844 5845 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5846 llvm::Value *EndArgs[] = { 5847 IdentTLoc, // ident_t *<loc> 5848 ThreadId, // i32 <gtid> 5849 Lock // kmp_critical_name *&<lock> 5850 }; 5851 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5852 CodeGenFunction &CGF, PrePostActionTy &Action) { 5853 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5854 auto IPriv = Privates.begin(); 5855 auto ILHS = LHSExprs.begin(); 5856 auto IRHS = RHSExprs.begin(); 5857 for (const Expr *E : ReductionOps) { 5858 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5859 cast<DeclRefExpr>(*IRHS)); 5860 ++IPriv; 5861 ++ILHS; 5862 ++IRHS; 5863 } 5864 }; 5865 RegionCodeGenTy RCG(CodeGen); 5866 CommonActionTy Action( 5867 nullptr, llvm::None, 5868 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 5869 : OMPRTL__kmpc_end_reduce), 5870 EndArgs); 5871 RCG.setAction(Action); 5872 RCG(CGF); 5873 5874 CGF.EmitBranch(DefaultBB); 5875 5876 // 7. Build case 2: 5877 // ... 5878 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5879 // ... 5880 // break; 5881 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5882 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5883 CGF.EmitBlock(Case2BB); 5884 5885 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5886 CodeGenFunction &CGF, PrePostActionTy &Action) { 5887 auto ILHS = LHSExprs.begin(); 5888 auto IRHS = RHSExprs.begin(); 5889 auto IPriv = Privates.begin(); 5890 for (const Expr *E : ReductionOps) { 5891 const Expr *XExpr = nullptr; 5892 const Expr *EExpr = nullptr; 5893 const Expr *UpExpr = nullptr; 5894 BinaryOperatorKind BO = BO_Comma; 5895 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5896 if (BO->getOpcode() == BO_Assign) { 5897 XExpr = BO->getLHS(); 5898 UpExpr = BO->getRHS(); 5899 } 5900 } 5901 // Try to emit update expression as a simple atomic. 5902 const Expr *RHSExpr = UpExpr; 5903 if (RHSExpr) { 5904 // Analyze RHS part of the whole expression. 5905 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5906 RHSExpr->IgnoreParenImpCasts())) { 5907 // If this is a conditional operator, analyze its condition for 5908 // min/max reduction operator. 5909 RHSExpr = ACO->getCond(); 5910 } 5911 if (const auto *BORHS = 5912 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5913 EExpr = BORHS->getRHS(); 5914 BO = BORHS->getOpcode(); 5915 } 5916 } 5917 if (XExpr) { 5918 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5919 auto &&AtomicRedGen = [BO, VD, 5920 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5921 const Expr *EExpr, const Expr *UpExpr) { 5922 LValue X = CGF.EmitLValue(XExpr); 5923 RValue E; 5924 if (EExpr) 5925 E = CGF.EmitAnyExpr(EExpr); 5926 CGF.EmitOMPAtomicSimpleUpdateExpr( 5927 X, E, BO, /*IsXLHSInRHSPart=*/true, 5928 llvm::AtomicOrdering::Monotonic, Loc, 5929 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5930 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5931 PrivateScope.addPrivate( 5932 VD, [&CGF, VD, XRValue, Loc]() { 5933 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5934 CGF.emitOMPSimpleStore( 5935 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5936 VD->getType().getNonReferenceType(), Loc); 5937 return LHSTemp; 5938 }); 5939 (void)PrivateScope.Privatize(); 5940 return CGF.EmitAnyExpr(UpExpr); 5941 }); 5942 }; 5943 if ((*IPriv)->getType()->isArrayType()) { 5944 // Emit atomic reduction for array section. 5945 const auto *RHSVar = 5946 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5947 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5948 AtomicRedGen, XExpr, EExpr, UpExpr); 5949 } else { 5950 // Emit atomic reduction for array subscript or single variable. 5951 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5952 } 5953 } else { 5954 // Emit as a critical region. 5955 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5956 const Expr *, const Expr *) { 5957 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5958 std::string Name = RT.getName({"atomic_reduction"}); 5959 RT.emitCriticalRegion( 5960 CGF, Name, 5961 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5962 Action.Enter(CGF); 5963 emitReductionCombiner(CGF, E); 5964 }, 5965 Loc); 5966 }; 5967 if ((*IPriv)->getType()->isArrayType()) { 5968 const auto *LHSVar = 5969 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5970 const auto *RHSVar = 5971 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5972 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5973 CritRedGen); 5974 } else { 5975 CritRedGen(CGF, nullptr, nullptr, nullptr); 5976 } 5977 } 5978 ++ILHS; 5979 ++IRHS; 5980 ++IPriv; 5981 } 5982 }; 5983 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5984 if (!WithNowait) { 5985 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5986 llvm::Value *EndArgs[] = { 5987 IdentTLoc, // ident_t *<loc> 5988 ThreadId, // i32 <gtid> 5989 Lock // kmp_critical_name *&<lock> 5990 }; 5991 CommonActionTy Action(nullptr, llvm::None, 5992 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 5993 EndArgs); 5994 AtomicRCG.setAction(Action); 5995 AtomicRCG(CGF); 5996 } else { 5997 AtomicRCG(CGF); 5998 } 5999 6000 CGF.EmitBranch(DefaultBB); 6001 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 6002 } 6003 6004 /// Generates unique name for artificial threadprivate variables. 6005 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 6006 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 6007 const Expr *Ref) { 6008 SmallString<256> Buffer; 6009 llvm::raw_svector_ostream Out(Buffer); 6010 const clang::DeclRefExpr *DE; 6011 const VarDecl *D = ::getBaseDecl(Ref, DE); 6012 if (!D) 6013 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 6014 D = D->getCanonicalDecl(); 6015 std::string Name = CGM.getOpenMPRuntime().getName( 6016 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 6017 Out << Prefix << Name << "_" 6018 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 6019 return Out.str(); 6020 } 6021 6022 /// Emits reduction initializer function: 6023 /// \code 6024 /// void @.red_init(void* %arg) { 6025 /// %0 = bitcast void* %arg to <type>* 6026 /// store <type> <init>, <type>* %0 6027 /// ret void 6028 /// } 6029 /// \endcode 6030 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 6031 SourceLocation Loc, 6032 ReductionCodeGen &RCG, unsigned N) { 6033 ASTContext &C = CGM.getContext(); 6034 FunctionArgList Args; 6035 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6036 ImplicitParamDecl::Other); 6037 Args.emplace_back(&Param); 6038 const auto &FnInfo = 6039 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6040 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6041 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 6042 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6043 Name, &CGM.getModule()); 6044 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6045 Fn->setDoesNotRecurse(); 6046 CodeGenFunction CGF(CGM); 6047 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6048 Address PrivateAddr = CGF.EmitLoadOfPointer( 6049 CGF.GetAddrOfLocalVar(&Param), 6050 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6051 llvm::Value *Size = nullptr; 6052 // If the size of the reduction item is non-constant, load it from global 6053 // threadprivate variable. 6054 if (RCG.getSizes(N).second) { 6055 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6056 CGF, CGM.getContext().getSizeType(), 6057 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6058 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6059 CGM.getContext().getSizeType(), Loc); 6060 } 6061 RCG.emitAggregateType(CGF, N, Size); 6062 LValue SharedLVal; 6063 // If initializer uses initializer from declare reduction construct, emit a 6064 // pointer to the address of the original reduction item (reuired by reduction 6065 // initializer) 6066 if (RCG.usesReductionInitializer(N)) { 6067 Address SharedAddr = 6068 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6069 CGF, CGM.getContext().VoidPtrTy, 6070 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6071 SharedAddr = CGF.EmitLoadOfPointer( 6072 SharedAddr, 6073 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 6074 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 6075 } else { 6076 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 6077 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 6078 CGM.getContext().VoidPtrTy); 6079 } 6080 // Emit the initializer: 6081 // %0 = bitcast void* %arg to <type>* 6082 // store <type> <init>, <type>* %0 6083 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 6084 [](CodeGenFunction &) { return false; }); 6085 CGF.FinishFunction(); 6086 return Fn; 6087 } 6088 6089 /// Emits reduction combiner function: 6090 /// \code 6091 /// void @.red_comb(void* %arg0, void* %arg1) { 6092 /// %lhs = bitcast void* %arg0 to <type>* 6093 /// %rhs = bitcast void* %arg1 to <type>* 6094 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 6095 /// store <type> %2, <type>* %lhs 6096 /// ret void 6097 /// } 6098 /// \endcode 6099 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 6100 SourceLocation Loc, 6101 ReductionCodeGen &RCG, unsigned N, 6102 const Expr *ReductionOp, 6103 const Expr *LHS, const Expr *RHS, 6104 const Expr *PrivateRef) { 6105 ASTContext &C = CGM.getContext(); 6106 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 6107 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 6108 FunctionArgList Args; 6109 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 6110 C.VoidPtrTy, ImplicitParamDecl::Other); 6111 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6112 ImplicitParamDecl::Other); 6113 Args.emplace_back(&ParamInOut); 6114 Args.emplace_back(&ParamIn); 6115 const auto &FnInfo = 6116 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6117 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6118 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 6119 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6120 Name, &CGM.getModule()); 6121 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6122 Fn->setDoesNotRecurse(); 6123 CodeGenFunction CGF(CGM); 6124 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6125 llvm::Value *Size = nullptr; 6126 // If the size of the reduction item is non-constant, load it from global 6127 // threadprivate variable. 6128 if (RCG.getSizes(N).second) { 6129 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6130 CGF, CGM.getContext().getSizeType(), 6131 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6132 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6133 CGM.getContext().getSizeType(), Loc); 6134 } 6135 RCG.emitAggregateType(CGF, N, Size); 6136 // Remap lhs and rhs variables to the addresses of the function arguments. 6137 // %lhs = bitcast void* %arg0 to <type>* 6138 // %rhs = bitcast void* %arg1 to <type>* 6139 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6140 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6141 // Pull out the pointer to the variable. 6142 Address PtrAddr = CGF.EmitLoadOfPointer( 6143 CGF.GetAddrOfLocalVar(&ParamInOut), 6144 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6145 return CGF.Builder.CreateElementBitCast( 6146 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6147 }); 6148 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6149 // Pull out the pointer to the variable. 6150 Address PtrAddr = CGF.EmitLoadOfPointer( 6151 CGF.GetAddrOfLocalVar(&ParamIn), 6152 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6153 return CGF.Builder.CreateElementBitCast( 6154 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6155 }); 6156 PrivateScope.Privatize(); 6157 // Emit the combiner body: 6158 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6159 // store <type> %2, <type>* %lhs 6160 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6161 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6162 cast<DeclRefExpr>(RHS)); 6163 CGF.FinishFunction(); 6164 return Fn; 6165 } 6166 6167 /// Emits reduction finalizer function: 6168 /// \code 6169 /// void @.red_fini(void* %arg) { 6170 /// %0 = bitcast void* %arg to <type>* 6171 /// <destroy>(<type>* %0) 6172 /// ret void 6173 /// } 6174 /// \endcode 6175 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6176 SourceLocation Loc, 6177 ReductionCodeGen &RCG, unsigned N) { 6178 if (!RCG.needCleanups(N)) 6179 return nullptr; 6180 ASTContext &C = CGM.getContext(); 6181 FunctionArgList Args; 6182 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6183 ImplicitParamDecl::Other); 6184 Args.emplace_back(&Param); 6185 const auto &FnInfo = 6186 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6187 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6188 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6189 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6190 Name, &CGM.getModule()); 6191 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6192 Fn->setDoesNotRecurse(); 6193 CodeGenFunction CGF(CGM); 6194 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6195 Address PrivateAddr = CGF.EmitLoadOfPointer( 6196 CGF.GetAddrOfLocalVar(&Param), 6197 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6198 llvm::Value *Size = nullptr; 6199 // If the size of the reduction item is non-constant, load it from global 6200 // threadprivate variable. 6201 if (RCG.getSizes(N).second) { 6202 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6203 CGF, CGM.getContext().getSizeType(), 6204 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6205 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6206 CGM.getContext().getSizeType(), Loc); 6207 } 6208 RCG.emitAggregateType(CGF, N, Size); 6209 // Emit the finalizer body: 6210 // <destroy>(<type>* %0) 6211 RCG.emitCleanups(CGF, N, PrivateAddr); 6212 CGF.FinishFunction(); 6213 return Fn; 6214 } 6215 6216 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6217 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6218 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6219 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6220 return nullptr; 6221 6222 // Build typedef struct: 6223 // kmp_task_red_input { 6224 // void *reduce_shar; // shared reduction item 6225 // size_t reduce_size; // size of data item 6226 // void *reduce_init; // data initialization routine 6227 // void *reduce_fini; // data finalization routine 6228 // void *reduce_comb; // data combiner routine 6229 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6230 // } kmp_task_red_input_t; 6231 ASTContext &C = CGM.getContext(); 6232 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 6233 RD->startDefinition(); 6234 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6235 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6236 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6237 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6238 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6239 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6240 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6241 RD->completeDefinition(); 6242 QualType RDType = C.getRecordType(RD); 6243 unsigned Size = Data.ReductionVars.size(); 6244 llvm::APInt ArraySize(/*numBits=*/64, Size); 6245 QualType ArrayRDType = C.getConstantArrayType( 6246 RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); 6247 // kmp_task_red_input_t .rd_input.[Size]; 6248 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6249 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 6250 Data.ReductionOps); 6251 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6252 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6253 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6254 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6255 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6256 TaskRedInput.getPointer(), Idxs, 6257 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6258 ".rd_input.gep."); 6259 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6260 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6261 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6262 RCG.emitSharedLValue(CGF, Cnt); 6263 llvm::Value *CastedShared = 6264 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); 6265 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6266 RCG.emitAggregateType(CGF, Cnt); 6267 llvm::Value *SizeValInChars; 6268 llvm::Value *SizeVal; 6269 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6270 // We use delayed creation/initialization for VLAs, array sections and 6271 // custom reduction initializations. It is required because runtime does not 6272 // provide the way to pass the sizes of VLAs/array sections to 6273 // initializer/combiner/finalizer functions and does not pass the pointer to 6274 // original reduction item to the initializer. Instead threadprivate global 6275 // variables are used to store these values and use them in the functions. 6276 bool DelayedCreation = !!SizeVal; 6277 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6278 /*isSigned=*/false); 6279 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6280 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6281 // ElemLVal.reduce_init = init; 6282 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6283 llvm::Value *InitAddr = 6284 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6285 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6286 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 6287 // ElemLVal.reduce_fini = fini; 6288 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6289 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6290 llvm::Value *FiniAddr = Fini 6291 ? CGF.EmitCastToVoidPtr(Fini) 6292 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6293 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6294 // ElemLVal.reduce_comb = comb; 6295 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6296 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6297 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6298 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6299 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6300 // ElemLVal.flags = 0; 6301 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6302 if (DelayedCreation) { 6303 CGF.EmitStoreOfScalar( 6304 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6305 FlagsLVal); 6306 } else 6307 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); 6308 } 6309 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 6310 // *data); 6311 llvm::Value *Args[] = { 6312 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6313 /*isSigned=*/true), 6314 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6315 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6316 CGM.VoidPtrTy)}; 6317 return CGF.EmitRuntimeCall( 6318 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 6319 } 6320 6321 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6322 SourceLocation Loc, 6323 ReductionCodeGen &RCG, 6324 unsigned N) { 6325 auto Sizes = RCG.getSizes(N); 6326 // Emit threadprivate global variable if the type is non-constant 6327 // (Sizes.second = nullptr). 6328 if (Sizes.second) { 6329 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6330 /*isSigned=*/false); 6331 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6332 CGF, CGM.getContext().getSizeType(), 6333 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6334 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6335 } 6336 // Store address of the original reduction item if custom initializer is used. 6337 if (RCG.usesReductionInitializer(N)) { 6338 Address SharedAddr = getAddrOfArtificialThreadPrivate( 6339 CGF, CGM.getContext().VoidPtrTy, 6340 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6341 CGF.Builder.CreateStore( 6342 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6343 RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), 6344 SharedAddr, /*IsVolatile=*/false); 6345 } 6346 } 6347 6348 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6349 SourceLocation Loc, 6350 llvm::Value *ReductionsPtr, 6351 LValue SharedLVal) { 6352 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6353 // *d); 6354 llvm::Value *Args[] = { 6355 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6356 /*isSigned=*/true), 6357 ReductionsPtr, 6358 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), 6359 CGM.VoidPtrTy)}; 6360 return Address( 6361 CGF.EmitRuntimeCall( 6362 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6363 SharedLVal.getAlignment()); 6364 } 6365 6366 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6367 SourceLocation Loc) { 6368 if (!CGF.HaveInsertPoint()) 6369 return; 6370 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6371 // global_tid); 6372 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6373 // Ignore return result until untied tasks are supported. 6374 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 6375 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6376 Region->emitUntiedSwitch(CGF); 6377 } 6378 6379 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6380 OpenMPDirectiveKind InnerKind, 6381 const RegionCodeGenTy &CodeGen, 6382 bool HasCancel) { 6383 if (!CGF.HaveInsertPoint()) 6384 return; 6385 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6386 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6387 } 6388 6389 namespace { 6390 enum RTCancelKind { 6391 CancelNoreq = 0, 6392 CancelParallel = 1, 6393 CancelLoop = 2, 6394 CancelSections = 3, 6395 CancelTaskgroup = 4 6396 }; 6397 } // anonymous namespace 6398 6399 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6400 RTCancelKind CancelKind = CancelNoreq; 6401 if (CancelRegion == OMPD_parallel) 6402 CancelKind = CancelParallel; 6403 else if (CancelRegion == OMPD_for) 6404 CancelKind = CancelLoop; 6405 else if (CancelRegion == OMPD_sections) 6406 CancelKind = CancelSections; 6407 else { 6408 assert(CancelRegion == OMPD_taskgroup); 6409 CancelKind = CancelTaskgroup; 6410 } 6411 return CancelKind; 6412 } 6413 6414 void CGOpenMPRuntime::emitCancellationPointCall( 6415 CodeGenFunction &CGF, SourceLocation Loc, 6416 OpenMPDirectiveKind CancelRegion) { 6417 if (!CGF.HaveInsertPoint()) 6418 return; 6419 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6420 // global_tid, kmp_int32 cncl_kind); 6421 if (auto *OMPRegionInfo = 6422 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6423 // For 'cancellation point taskgroup', the task region info may not have a 6424 // cancel. This may instead happen in another adjacent task. 6425 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6426 llvm::Value *Args[] = { 6427 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6428 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6429 // Ignore return result until untied tasks are supported. 6430 llvm::Value *Result = CGF.EmitRuntimeCall( 6431 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 6432 // if (__kmpc_cancellationpoint()) { 6433 // exit from construct; 6434 // } 6435 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6436 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6437 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6438 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6439 CGF.EmitBlock(ExitBB); 6440 // exit from construct; 6441 CodeGenFunction::JumpDest CancelDest = 6442 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6443 CGF.EmitBranchThroughCleanup(CancelDest); 6444 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6445 } 6446 } 6447 } 6448 6449 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6450 const Expr *IfCond, 6451 OpenMPDirectiveKind CancelRegion) { 6452 if (!CGF.HaveInsertPoint()) 6453 return; 6454 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6455 // kmp_int32 cncl_kind); 6456 if (auto *OMPRegionInfo = 6457 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6458 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 6459 PrePostActionTy &) { 6460 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6461 llvm::Value *Args[] = { 6462 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6463 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6464 // Ignore return result until untied tasks are supported. 6465 llvm::Value *Result = CGF.EmitRuntimeCall( 6466 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 6467 // if (__kmpc_cancel()) { 6468 // exit from construct; 6469 // } 6470 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6471 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6472 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6473 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6474 CGF.EmitBlock(ExitBB); 6475 // exit from construct; 6476 CodeGenFunction::JumpDest CancelDest = 6477 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6478 CGF.EmitBranchThroughCleanup(CancelDest); 6479 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6480 }; 6481 if (IfCond) { 6482 emitOMPIfClause(CGF, IfCond, ThenGen, 6483 [](CodeGenFunction &, PrePostActionTy &) {}); 6484 } else { 6485 RegionCodeGenTy ThenRCG(ThenGen); 6486 ThenRCG(CGF); 6487 } 6488 } 6489 } 6490 6491 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6492 const OMPExecutableDirective &D, StringRef ParentName, 6493 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6494 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6495 assert(!ParentName.empty() && "Invalid target region parent name!"); 6496 HasEmittedTargetRegion = true; 6497 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6498 IsOffloadEntry, CodeGen); 6499 } 6500 6501 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6502 const OMPExecutableDirective &D, StringRef ParentName, 6503 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6504 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6505 // Create a unique name for the entry function using the source location 6506 // information of the current target region. The name will be something like: 6507 // 6508 // __omp_offloading_DD_FFFF_PP_lBB 6509 // 6510 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6511 // mangled name of the function that encloses the target region and BB is the 6512 // line number of the target region. 6513 6514 unsigned DeviceID; 6515 unsigned FileID; 6516 unsigned Line; 6517 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6518 Line); 6519 SmallString<64> EntryFnName; 6520 { 6521 llvm::raw_svector_ostream OS(EntryFnName); 6522 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6523 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6524 } 6525 6526 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6527 6528 CodeGenFunction CGF(CGM, true); 6529 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6530 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6531 6532 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 6533 6534 // If this target outline function is not an offload entry, we don't need to 6535 // register it. 6536 if (!IsOffloadEntry) 6537 return; 6538 6539 // The target region ID is used by the runtime library to identify the current 6540 // target region, so it only has to be unique and not necessarily point to 6541 // anything. It could be the pointer to the outlined function that implements 6542 // the target region, but we aren't using that so that the compiler doesn't 6543 // need to keep that, and could therefore inline the host function if proven 6544 // worthwhile during optimization. In the other hand, if emitting code for the 6545 // device, the ID has to be the function address so that it can retrieved from 6546 // the offloading entry and launched by the runtime library. We also mark the 6547 // outlined function to have external linkage in case we are emitting code for 6548 // the device, because these functions will be entry points to the device. 6549 6550 if (CGM.getLangOpts().OpenMPIsDevice) { 6551 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6552 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6553 OutlinedFn->setDSOLocal(false); 6554 } else { 6555 std::string Name = getName({EntryFnName, "region_id"}); 6556 OutlinedFnID = new llvm::GlobalVariable( 6557 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6558 llvm::GlobalValue::WeakAnyLinkage, 6559 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6560 } 6561 6562 // Register the information for the entry associated with this target region. 6563 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6564 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6565 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6566 } 6567 6568 /// Checks if the expression is constant or does not have non-trivial function 6569 /// calls. 6570 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6571 // We can skip constant expressions. 6572 // We can skip expressions with trivial calls or simple expressions. 6573 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6574 !E->hasNonTrivialCall(Ctx)) && 6575 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6576 } 6577 6578 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6579 const Stmt *Body) { 6580 const Stmt *Child = Body->IgnoreContainers(); 6581 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6582 Child = nullptr; 6583 for (const Stmt *S : C->body()) { 6584 if (const auto *E = dyn_cast<Expr>(S)) { 6585 if (isTrivial(Ctx, E)) 6586 continue; 6587 } 6588 // Some of the statements can be ignored. 6589 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6590 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6591 continue; 6592 // Analyze declarations. 6593 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6594 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6595 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6596 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6597 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6598 isa<UsingDirectiveDecl>(D) || 6599 isa<OMPDeclareReductionDecl>(D) || 6600 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6601 return true; 6602 const auto *VD = dyn_cast<VarDecl>(D); 6603 if (!VD) 6604 return false; 6605 return VD->isConstexpr() || 6606 ((VD->getType().isTrivialType(Ctx) || 6607 VD->getType()->isReferenceType()) && 6608 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6609 })) 6610 continue; 6611 } 6612 // Found multiple children - cannot get the one child only. 6613 if (Child) 6614 return nullptr; 6615 Child = S; 6616 } 6617 if (Child) 6618 Child = Child->IgnoreContainers(); 6619 } 6620 return Child; 6621 } 6622 6623 /// Emit the number of teams for a target directive. Inspect the num_teams 6624 /// clause associated with a teams construct combined or closely nested 6625 /// with the target directive. 6626 /// 6627 /// Emit a team of size one for directives such as 'target parallel' that 6628 /// have no associated teams construct. 6629 /// 6630 /// Otherwise, return nullptr. 6631 static llvm::Value * 6632 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6633 const OMPExecutableDirective &D) { 6634 assert(!CGF.getLangOpts().OpenMPIsDevice && 6635 "Clauses associated with the teams directive expected to be emitted " 6636 "only for the host!"); 6637 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6638 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6639 "Expected target-based executable directive."); 6640 CGBuilderTy &Bld = CGF.Builder; 6641 switch (DirectiveKind) { 6642 case OMPD_target: { 6643 const auto *CS = D.getInnermostCapturedStmt(); 6644 const auto *Body = 6645 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6646 const Stmt *ChildStmt = 6647 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6648 if (const auto *NestedDir = 6649 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6650 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6651 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6652 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6653 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6654 const Expr *NumTeams = 6655 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6656 llvm::Value *NumTeamsVal = 6657 CGF.EmitScalarExpr(NumTeams, 6658 /*IgnoreResultAssign*/ true); 6659 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6660 /*isSigned=*/true); 6661 } 6662 return Bld.getInt32(0); 6663 } 6664 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6665 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6666 return Bld.getInt32(1); 6667 return Bld.getInt32(0); 6668 } 6669 return nullptr; 6670 } 6671 case OMPD_target_teams: 6672 case OMPD_target_teams_distribute: 6673 case OMPD_target_teams_distribute_simd: 6674 case OMPD_target_teams_distribute_parallel_for: 6675 case OMPD_target_teams_distribute_parallel_for_simd: { 6676 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6677 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6678 const Expr *NumTeams = 6679 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6680 llvm::Value *NumTeamsVal = 6681 CGF.EmitScalarExpr(NumTeams, 6682 /*IgnoreResultAssign*/ true); 6683 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6684 /*isSigned=*/true); 6685 } 6686 return Bld.getInt32(0); 6687 } 6688 case OMPD_target_parallel: 6689 case OMPD_target_parallel_for: 6690 case OMPD_target_parallel_for_simd: 6691 case OMPD_target_simd: 6692 return Bld.getInt32(1); 6693 case OMPD_parallel: 6694 case OMPD_for: 6695 case OMPD_parallel_for: 6696 case OMPD_parallel_sections: 6697 case OMPD_for_simd: 6698 case OMPD_parallel_for_simd: 6699 case OMPD_cancel: 6700 case OMPD_cancellation_point: 6701 case OMPD_ordered: 6702 case OMPD_threadprivate: 6703 case OMPD_allocate: 6704 case OMPD_task: 6705 case OMPD_simd: 6706 case OMPD_sections: 6707 case OMPD_section: 6708 case OMPD_single: 6709 case OMPD_master: 6710 case OMPD_critical: 6711 case OMPD_taskyield: 6712 case OMPD_barrier: 6713 case OMPD_taskwait: 6714 case OMPD_taskgroup: 6715 case OMPD_atomic: 6716 case OMPD_flush: 6717 case OMPD_teams: 6718 case OMPD_target_data: 6719 case OMPD_target_exit_data: 6720 case OMPD_target_enter_data: 6721 case OMPD_distribute: 6722 case OMPD_distribute_simd: 6723 case OMPD_distribute_parallel_for: 6724 case OMPD_distribute_parallel_for_simd: 6725 case OMPD_teams_distribute: 6726 case OMPD_teams_distribute_simd: 6727 case OMPD_teams_distribute_parallel_for: 6728 case OMPD_teams_distribute_parallel_for_simd: 6729 case OMPD_target_update: 6730 case OMPD_declare_simd: 6731 case OMPD_declare_target: 6732 case OMPD_end_declare_target: 6733 case OMPD_declare_reduction: 6734 case OMPD_declare_mapper: 6735 case OMPD_taskloop: 6736 case OMPD_taskloop_simd: 6737 case OMPD_requires: 6738 case OMPD_unknown: 6739 break; 6740 } 6741 llvm_unreachable("Unexpected directive kind."); 6742 } 6743 6744 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6745 llvm::Value *DefaultThreadLimitVal) { 6746 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6747 CGF.getContext(), CS->getCapturedStmt()); 6748 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6749 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6750 llvm::Value *NumThreads = nullptr; 6751 llvm::Value *CondVal = nullptr; 6752 // Handle if clause. If if clause present, the number of threads is 6753 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6754 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6755 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6756 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6757 const OMPIfClause *IfClause = nullptr; 6758 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6759 if (C->getNameModifier() == OMPD_unknown || 6760 C->getNameModifier() == OMPD_parallel) { 6761 IfClause = C; 6762 break; 6763 } 6764 } 6765 if (IfClause) { 6766 const Expr *Cond = IfClause->getCondition(); 6767 bool Result; 6768 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6769 if (!Result) 6770 return CGF.Builder.getInt32(1); 6771 } else { 6772 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6773 if (const auto *PreInit = 6774 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6775 for (const auto *I : PreInit->decls()) { 6776 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6777 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6778 } else { 6779 CodeGenFunction::AutoVarEmission Emission = 6780 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6781 CGF.EmitAutoVarCleanups(Emission); 6782 } 6783 } 6784 } 6785 CondVal = CGF.EvaluateExprAsBool(Cond); 6786 } 6787 } 6788 } 6789 // Check the value of num_threads clause iff if clause was not specified 6790 // or is not evaluated to false. 6791 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6792 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6793 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6794 const auto *NumThreadsClause = 6795 Dir->getSingleClause<OMPNumThreadsClause>(); 6796 CodeGenFunction::LexicalScope Scope( 6797 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6798 if (const auto *PreInit = 6799 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6800 for (const auto *I : PreInit->decls()) { 6801 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6802 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6803 } else { 6804 CodeGenFunction::AutoVarEmission Emission = 6805 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6806 CGF.EmitAutoVarCleanups(Emission); 6807 } 6808 } 6809 } 6810 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6811 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6812 /*isSigned=*/false); 6813 if (DefaultThreadLimitVal) 6814 NumThreads = CGF.Builder.CreateSelect( 6815 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6816 DefaultThreadLimitVal, NumThreads); 6817 } else { 6818 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6819 : CGF.Builder.getInt32(0); 6820 } 6821 // Process condition of the if clause. 6822 if (CondVal) { 6823 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6824 CGF.Builder.getInt32(1)); 6825 } 6826 return NumThreads; 6827 } 6828 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6829 return CGF.Builder.getInt32(1); 6830 return DefaultThreadLimitVal; 6831 } 6832 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6833 : CGF.Builder.getInt32(0); 6834 } 6835 6836 /// Emit the number of threads for a target directive. Inspect the 6837 /// thread_limit clause associated with a teams construct combined or closely 6838 /// nested with the target directive. 6839 /// 6840 /// Emit the num_threads clause for directives such as 'target parallel' that 6841 /// have no associated teams construct. 6842 /// 6843 /// Otherwise, return nullptr. 6844 static llvm::Value * 6845 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6846 const OMPExecutableDirective &D) { 6847 assert(!CGF.getLangOpts().OpenMPIsDevice && 6848 "Clauses associated with the teams directive expected to be emitted " 6849 "only for the host!"); 6850 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6851 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6852 "Expected target-based executable directive."); 6853 CGBuilderTy &Bld = CGF.Builder; 6854 llvm::Value *ThreadLimitVal = nullptr; 6855 llvm::Value *NumThreadsVal = nullptr; 6856 switch (DirectiveKind) { 6857 case OMPD_target: { 6858 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6859 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6860 return NumThreads; 6861 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6862 CGF.getContext(), CS->getCapturedStmt()); 6863 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6864 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6865 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6866 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6867 const auto *ThreadLimitClause = 6868 Dir->getSingleClause<OMPThreadLimitClause>(); 6869 CodeGenFunction::LexicalScope Scope( 6870 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6871 if (const auto *PreInit = 6872 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6873 for (const auto *I : PreInit->decls()) { 6874 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6875 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6876 } else { 6877 CodeGenFunction::AutoVarEmission Emission = 6878 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6879 CGF.EmitAutoVarCleanups(Emission); 6880 } 6881 } 6882 } 6883 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6884 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6885 ThreadLimitVal = 6886 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6887 } 6888 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6889 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6890 CS = Dir->getInnermostCapturedStmt(); 6891 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6892 CGF.getContext(), CS->getCapturedStmt()); 6893 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6894 } 6895 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6896 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6897 CS = Dir->getInnermostCapturedStmt(); 6898 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6899 return NumThreads; 6900 } 6901 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6902 return Bld.getInt32(1); 6903 } 6904 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6905 } 6906 case OMPD_target_teams: { 6907 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6908 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6909 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6910 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6911 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6912 ThreadLimitVal = 6913 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6914 } 6915 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6916 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6917 return NumThreads; 6918 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6919 CGF.getContext(), CS->getCapturedStmt()); 6920 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6921 if (Dir->getDirectiveKind() == OMPD_distribute) { 6922 CS = Dir->getInnermostCapturedStmt(); 6923 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6924 return NumThreads; 6925 } 6926 } 6927 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6928 } 6929 case OMPD_target_teams_distribute: 6930 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6931 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6932 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6933 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6934 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6935 ThreadLimitVal = 6936 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6937 } 6938 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6939 case OMPD_target_parallel: 6940 case OMPD_target_parallel_for: 6941 case OMPD_target_parallel_for_simd: 6942 case OMPD_target_teams_distribute_parallel_for: 6943 case OMPD_target_teams_distribute_parallel_for_simd: { 6944 llvm::Value *CondVal = nullptr; 6945 // Handle if clause. If if clause present, the number of threads is 6946 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6947 if (D.hasClausesOfKind<OMPIfClause>()) { 6948 const OMPIfClause *IfClause = nullptr; 6949 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6950 if (C->getNameModifier() == OMPD_unknown || 6951 C->getNameModifier() == OMPD_parallel) { 6952 IfClause = C; 6953 break; 6954 } 6955 } 6956 if (IfClause) { 6957 const Expr *Cond = IfClause->getCondition(); 6958 bool Result; 6959 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6960 if (!Result) 6961 return Bld.getInt32(1); 6962 } else { 6963 CodeGenFunction::RunCleanupsScope Scope(CGF); 6964 CondVal = CGF.EvaluateExprAsBool(Cond); 6965 } 6966 } 6967 } 6968 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6969 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6970 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6971 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6972 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6973 ThreadLimitVal = 6974 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6975 } 6976 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6977 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6978 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6979 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6980 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6981 NumThreadsVal = 6982 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6983 ThreadLimitVal = ThreadLimitVal 6984 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6985 ThreadLimitVal), 6986 NumThreadsVal, ThreadLimitVal) 6987 : NumThreadsVal; 6988 } 6989 if (!ThreadLimitVal) 6990 ThreadLimitVal = Bld.getInt32(0); 6991 if (CondVal) 6992 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6993 return ThreadLimitVal; 6994 } 6995 case OMPD_target_teams_distribute_simd: 6996 case OMPD_target_simd: 6997 return Bld.getInt32(1); 6998 case OMPD_parallel: 6999 case OMPD_for: 7000 case OMPD_parallel_for: 7001 case OMPD_parallel_sections: 7002 case OMPD_for_simd: 7003 case OMPD_parallel_for_simd: 7004 case OMPD_cancel: 7005 case OMPD_cancellation_point: 7006 case OMPD_ordered: 7007 case OMPD_threadprivate: 7008 case OMPD_allocate: 7009 case OMPD_task: 7010 case OMPD_simd: 7011 case OMPD_sections: 7012 case OMPD_section: 7013 case OMPD_single: 7014 case OMPD_master: 7015 case OMPD_critical: 7016 case OMPD_taskyield: 7017 case OMPD_barrier: 7018 case OMPD_taskwait: 7019 case OMPD_taskgroup: 7020 case OMPD_atomic: 7021 case OMPD_flush: 7022 case OMPD_teams: 7023 case OMPD_target_data: 7024 case OMPD_target_exit_data: 7025 case OMPD_target_enter_data: 7026 case OMPD_distribute: 7027 case OMPD_distribute_simd: 7028 case OMPD_distribute_parallel_for: 7029 case OMPD_distribute_parallel_for_simd: 7030 case OMPD_teams_distribute: 7031 case OMPD_teams_distribute_simd: 7032 case OMPD_teams_distribute_parallel_for: 7033 case OMPD_teams_distribute_parallel_for_simd: 7034 case OMPD_target_update: 7035 case OMPD_declare_simd: 7036 case OMPD_declare_target: 7037 case OMPD_end_declare_target: 7038 case OMPD_declare_reduction: 7039 case OMPD_declare_mapper: 7040 case OMPD_taskloop: 7041 case OMPD_taskloop_simd: 7042 case OMPD_requires: 7043 case OMPD_unknown: 7044 break; 7045 } 7046 llvm_unreachable("Unsupported directive kind."); 7047 } 7048 7049 namespace { 7050 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7051 7052 // Utility to handle information from clauses associated with a given 7053 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7054 // It provides a convenient interface to obtain the information and generate 7055 // code for that information. 7056 class MappableExprsHandler { 7057 public: 7058 /// Values for bit flags used to specify the mapping type for 7059 /// offloading. 7060 enum OpenMPOffloadMappingFlags : uint64_t { 7061 /// No flags 7062 OMP_MAP_NONE = 0x0, 7063 /// Allocate memory on the device and move data from host to device. 7064 OMP_MAP_TO = 0x01, 7065 /// Allocate memory on the device and move data from device to host. 7066 OMP_MAP_FROM = 0x02, 7067 /// Always perform the requested mapping action on the element, even 7068 /// if it was already mapped before. 7069 OMP_MAP_ALWAYS = 0x04, 7070 /// Delete the element from the device environment, ignoring the 7071 /// current reference count associated with the element. 7072 OMP_MAP_DELETE = 0x08, 7073 /// The element being mapped is a pointer-pointee pair; both the 7074 /// pointer and the pointee should be mapped. 7075 OMP_MAP_PTR_AND_OBJ = 0x10, 7076 /// This flags signals that the base address of an entry should be 7077 /// passed to the target kernel as an argument. 7078 OMP_MAP_TARGET_PARAM = 0x20, 7079 /// Signal that the runtime library has to return the device pointer 7080 /// in the current position for the data being mapped. Used when we have the 7081 /// use_device_ptr clause. 7082 OMP_MAP_RETURN_PARAM = 0x40, 7083 /// This flag signals that the reference being passed is a pointer to 7084 /// private data. 7085 OMP_MAP_PRIVATE = 0x80, 7086 /// Pass the element to the device by value. 7087 OMP_MAP_LITERAL = 0x100, 7088 /// Implicit map 7089 OMP_MAP_IMPLICIT = 0x200, 7090 /// The 16 MSBs of the flags indicate whether the entry is member of some 7091 /// struct/class. 7092 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7093 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7094 }; 7095 7096 /// Class that associates information with a base pointer to be passed to the 7097 /// runtime library. 7098 class BasePointerInfo { 7099 /// The base pointer. 7100 llvm::Value *Ptr = nullptr; 7101 /// The base declaration that refers to this device pointer, or null if 7102 /// there is none. 7103 const ValueDecl *DevPtrDecl = nullptr; 7104 7105 public: 7106 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7107 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7108 llvm::Value *operator*() const { return Ptr; } 7109 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7110 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7111 }; 7112 7113 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7114 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7115 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7116 7117 /// Map between a struct and the its lowest & highest elements which have been 7118 /// mapped. 7119 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7120 /// HE(FieldIndex, Pointer)} 7121 struct StructRangeInfoTy { 7122 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7123 0, Address::invalid()}; 7124 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7125 0, Address::invalid()}; 7126 Address Base = Address::invalid(); 7127 }; 7128 7129 private: 7130 /// Kind that defines how a device pointer has to be returned. 7131 struct MapInfo { 7132 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7133 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7134 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7135 bool ReturnDevicePointer = false; 7136 bool IsImplicit = false; 7137 7138 MapInfo() = default; 7139 MapInfo( 7140 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7141 OpenMPMapClauseKind MapType, 7142 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7143 bool ReturnDevicePointer, bool IsImplicit) 7144 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7145 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 7146 }; 7147 7148 /// If use_device_ptr is used on a pointer which is a struct member and there 7149 /// is no map information about it, then emission of that entry is deferred 7150 /// until the whole struct has been processed. 7151 struct DeferredDevicePtrEntryTy { 7152 const Expr *IE = nullptr; 7153 const ValueDecl *VD = nullptr; 7154 7155 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 7156 : IE(IE), VD(VD) {} 7157 }; 7158 7159 /// Directive from where the map clauses were extracted. 7160 const OMPExecutableDirective &CurDir; 7161 7162 /// Function the directive is being generated for. 7163 CodeGenFunction &CGF; 7164 7165 /// Set of all first private variables in the current directive. 7166 /// bool data is set to true if the variable is implicitly marked as 7167 /// firstprivate, false otherwise. 7168 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7169 7170 /// Map between device pointer declarations and their expression components. 7171 /// The key value for declarations in 'this' is null. 7172 llvm::DenseMap< 7173 const ValueDecl *, 7174 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7175 DevPointersMap; 7176 7177 llvm::Value *getExprTypeSize(const Expr *E) const { 7178 QualType ExprTy = E->getType().getCanonicalType(); 7179 7180 // Reference types are ignored for mapping purposes. 7181 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7182 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7183 7184 // Given that an array section is considered a built-in type, we need to 7185 // do the calculation based on the length of the section instead of relying 7186 // on CGF.getTypeSize(E->getType()). 7187 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7188 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7189 OAE->getBase()->IgnoreParenImpCasts()) 7190 .getCanonicalType(); 7191 7192 // If there is no length associated with the expression, that means we 7193 // are using the whole length of the base. 7194 if (!OAE->getLength() && OAE->getColonLoc().isValid()) 7195 return CGF.getTypeSize(BaseTy); 7196 7197 llvm::Value *ElemSize; 7198 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7199 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7200 } else { 7201 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7202 assert(ATy && "Expecting array type if not a pointer type."); 7203 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7204 } 7205 7206 // If we don't have a length at this point, that is because we have an 7207 // array section with a single element. 7208 if (!OAE->getLength()) 7209 return ElemSize; 7210 7211 llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); 7212 LengthVal = 7213 CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); 7214 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7215 } 7216 return CGF.getTypeSize(ExprTy); 7217 } 7218 7219 /// Return the corresponding bits for a given map clause modifier. Add 7220 /// a flag marking the map as a pointer if requested. Add a flag marking the 7221 /// map as the first one of a series of maps that relate to the same map 7222 /// expression. 7223 OpenMPOffloadMappingFlags getMapTypeBits( 7224 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7225 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7226 OpenMPOffloadMappingFlags Bits = 7227 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7228 switch (MapType) { 7229 case OMPC_MAP_alloc: 7230 case OMPC_MAP_release: 7231 // alloc and release is the default behavior in the runtime library, i.e. 7232 // if we don't pass any bits alloc/release that is what the runtime is 7233 // going to do. Therefore, we don't need to signal anything for these two 7234 // type modifiers. 7235 break; 7236 case OMPC_MAP_to: 7237 Bits |= OMP_MAP_TO; 7238 break; 7239 case OMPC_MAP_from: 7240 Bits |= OMP_MAP_FROM; 7241 break; 7242 case OMPC_MAP_tofrom: 7243 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7244 break; 7245 case OMPC_MAP_delete: 7246 Bits |= OMP_MAP_DELETE; 7247 break; 7248 case OMPC_MAP_unknown: 7249 llvm_unreachable("Unexpected map type!"); 7250 } 7251 if (AddPtrFlag) 7252 Bits |= OMP_MAP_PTR_AND_OBJ; 7253 if (AddIsTargetParamFlag) 7254 Bits |= OMP_MAP_TARGET_PARAM; 7255 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7256 != MapModifiers.end()) 7257 Bits |= OMP_MAP_ALWAYS; 7258 return Bits; 7259 } 7260 7261 /// Return true if the provided expression is a final array section. A 7262 /// final array section, is one whose length can't be proved to be one. 7263 bool isFinalArraySectionExpression(const Expr *E) const { 7264 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7265 7266 // It is not an array section and therefore not a unity-size one. 7267 if (!OASE) 7268 return false; 7269 7270 // An array section with no colon always refer to a single element. 7271 if (OASE->getColonLoc().isInvalid()) 7272 return false; 7273 7274 const Expr *Length = OASE->getLength(); 7275 7276 // If we don't have a length we have to check if the array has size 1 7277 // for this dimension. Also, we should always expect a length if the 7278 // base type is pointer. 7279 if (!Length) { 7280 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7281 OASE->getBase()->IgnoreParenImpCasts()) 7282 .getCanonicalType(); 7283 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7284 return ATy->getSize().getSExtValue() != 1; 7285 // If we don't have a constant dimension length, we have to consider 7286 // the current section as having any size, so it is not necessarily 7287 // unitary. If it happen to be unity size, that's user fault. 7288 return true; 7289 } 7290 7291 // Check if the length evaluates to 1. 7292 Expr::EvalResult Result; 7293 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7294 return true; // Can have more that size 1. 7295 7296 llvm::APSInt ConstLength = Result.Val.getInt(); 7297 return ConstLength.getSExtValue() != 1; 7298 } 7299 7300 /// Generate the base pointers, section pointers, sizes and map type 7301 /// bits for the provided map type, map modifier, and expression components. 7302 /// \a IsFirstComponent should be set to true if the provided set of 7303 /// components is the first associated with a capture. 7304 void generateInfoForComponentList( 7305 OpenMPMapClauseKind MapType, 7306 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7307 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7308 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7309 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7310 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 7311 bool IsImplicit, 7312 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7313 OverlappedElements = llvm::None) const { 7314 // The following summarizes what has to be generated for each map and the 7315 // types below. The generated information is expressed in this order: 7316 // base pointer, section pointer, size, flags 7317 // (to add to the ones that come from the map type and modifier). 7318 // 7319 // double d; 7320 // int i[100]; 7321 // float *p; 7322 // 7323 // struct S1 { 7324 // int i; 7325 // float f[50]; 7326 // } 7327 // struct S2 { 7328 // int i; 7329 // float f[50]; 7330 // S1 s; 7331 // double *p; 7332 // struct S2 *ps; 7333 // } 7334 // S2 s; 7335 // S2 *ps; 7336 // 7337 // map(d) 7338 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7339 // 7340 // map(i) 7341 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7342 // 7343 // map(i[1:23]) 7344 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7345 // 7346 // map(p) 7347 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7348 // 7349 // map(p[1:24]) 7350 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7351 // 7352 // map(s) 7353 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7354 // 7355 // map(s.i) 7356 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7357 // 7358 // map(s.s.f) 7359 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7360 // 7361 // map(s.p) 7362 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7363 // 7364 // map(to: s.p[:22]) 7365 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7366 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7367 // &(s.p), &(s.p[0]), 22*sizeof(double), 7368 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7369 // (*) alloc space for struct members, only this is a target parameter 7370 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7371 // optimizes this entry out, same in the examples below) 7372 // (***) map the pointee (map: to) 7373 // 7374 // map(s.ps) 7375 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7376 // 7377 // map(from: s.ps->s.i) 7378 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7379 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7380 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7381 // 7382 // map(to: s.ps->ps) 7383 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7384 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7385 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7386 // 7387 // map(s.ps->ps->ps) 7388 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7389 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7390 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7391 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7392 // 7393 // map(to: s.ps->ps->s.f[:22]) 7394 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7395 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7396 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7397 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7398 // 7399 // map(ps) 7400 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7401 // 7402 // map(ps->i) 7403 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7404 // 7405 // map(ps->s.f) 7406 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7407 // 7408 // map(from: ps->p) 7409 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7410 // 7411 // map(to: ps->p[:22]) 7412 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7413 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7414 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7415 // 7416 // map(ps->ps) 7417 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7418 // 7419 // map(from: ps->ps->s.i) 7420 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7421 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7422 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7423 // 7424 // map(from: ps->ps->ps) 7425 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7426 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7427 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7428 // 7429 // map(ps->ps->ps->ps) 7430 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7431 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7432 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7433 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7434 // 7435 // map(to: ps->ps->ps->s.f[:22]) 7436 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7437 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7438 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7439 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7440 // 7441 // map(to: s.f[:22]) map(from: s.p[:33]) 7442 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7443 // sizeof(double*) (**), TARGET_PARAM 7444 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7445 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7446 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7447 // (*) allocate contiguous space needed to fit all mapped members even if 7448 // we allocate space for members not mapped (in this example, 7449 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7450 // them as well because they fall between &s.f[0] and &s.p) 7451 // 7452 // map(from: s.f[:22]) map(to: ps->p[:33]) 7453 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7454 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7455 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7456 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7457 // (*) the struct this entry pertains to is the 2nd element in the list of 7458 // arguments, hence MEMBER_OF(2) 7459 // 7460 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7461 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7462 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7463 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7464 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7465 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7466 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7467 // (*) the struct this entry pertains to is the 4th element in the list 7468 // of arguments, hence MEMBER_OF(4) 7469 7470 // Track if the map information being generated is the first for a capture. 7471 bool IsCaptureFirstInfo = IsFirstComponentList; 7472 // When the variable is on a declare target link or in a to clause with 7473 // unified memory, a reference is needed to hold the host/device address 7474 // of the variable. 7475 bool RequiresReference = false; 7476 7477 // Scan the components from the base to the complete expression. 7478 auto CI = Components.rbegin(); 7479 auto CE = Components.rend(); 7480 auto I = CI; 7481 7482 // Track if the map information being generated is the first for a list of 7483 // components. 7484 bool IsExpressionFirstInfo = true; 7485 Address BP = Address::invalid(); 7486 const Expr *AssocExpr = I->getAssociatedExpression(); 7487 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7488 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7489 7490 if (isa<MemberExpr>(AssocExpr)) { 7491 // The base is the 'this' pointer. The content of the pointer is going 7492 // to be the base of the field being mapped. 7493 BP = CGF.LoadCXXThisAddress(); 7494 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7495 (OASE && 7496 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7497 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7498 } else { 7499 // The base is the reference to the variable. 7500 // BP = &Var. 7501 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7502 if (const auto *VD = 7503 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7504 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7505 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7506 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7507 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7508 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7509 RequiresReference = true; 7510 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7511 } 7512 } 7513 } 7514 7515 // If the variable is a pointer and is being dereferenced (i.e. is not 7516 // the last component), the base has to be the pointer itself, not its 7517 // reference. References are ignored for mapping purposes. 7518 QualType Ty = 7519 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7520 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7521 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7522 7523 // We do not need to generate individual map information for the 7524 // pointer, it can be associated with the combined storage. 7525 ++I; 7526 } 7527 } 7528 7529 // Track whether a component of the list should be marked as MEMBER_OF some 7530 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7531 // in a component list should be marked as MEMBER_OF, all subsequent entries 7532 // do not belong to the base struct. E.g. 7533 // struct S2 s; 7534 // s.ps->ps->ps->f[:] 7535 // (1) (2) (3) (4) 7536 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7537 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7538 // is the pointee of ps(2) which is not member of struct s, so it should not 7539 // be marked as such (it is still PTR_AND_OBJ). 7540 // The variable is initialized to false so that PTR_AND_OBJ entries which 7541 // are not struct members are not considered (e.g. array of pointers to 7542 // data). 7543 bool ShouldBeMemberOf = false; 7544 7545 // Variable keeping track of whether or not we have encountered a component 7546 // in the component list which is a member expression. Useful when we have a 7547 // pointer or a final array section, in which case it is the previous 7548 // component in the list which tells us whether we have a member expression. 7549 // E.g. X.f[:] 7550 // While processing the final array section "[:]" it is "f" which tells us 7551 // whether we are dealing with a member of a declared struct. 7552 const MemberExpr *EncounteredME = nullptr; 7553 7554 for (; I != CE; ++I) { 7555 // If the current component is member of a struct (parent struct) mark it. 7556 if (!EncounteredME) { 7557 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7558 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7559 // as MEMBER_OF the parent struct. 7560 if (EncounteredME) 7561 ShouldBeMemberOf = true; 7562 } 7563 7564 auto Next = std::next(I); 7565 7566 // We need to generate the addresses and sizes if this is the last 7567 // component, if the component is a pointer or if it is an array section 7568 // whose length can't be proved to be one. If this is a pointer, it 7569 // becomes the base address for the following components. 7570 7571 // A final array section, is one whose length can't be proved to be one. 7572 bool IsFinalArraySection = 7573 isFinalArraySectionExpression(I->getAssociatedExpression()); 7574 7575 // Get information on whether the element is a pointer. Have to do a 7576 // special treatment for array sections given that they are built-in 7577 // types. 7578 const auto *OASE = 7579 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7580 bool IsPointer = 7581 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7582 .getCanonicalType() 7583 ->isAnyPointerType()) || 7584 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7585 7586 if (Next == CE || IsPointer || IsFinalArraySection) { 7587 // If this is not the last component, we expect the pointer to be 7588 // associated with an array expression or member expression. 7589 assert((Next == CE || 7590 isa<MemberExpr>(Next->getAssociatedExpression()) || 7591 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7592 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 7593 "Unexpected expression"); 7594 7595 Address LB = 7596 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); 7597 7598 // If this component is a pointer inside the base struct then we don't 7599 // need to create any entry for it - it will be combined with the object 7600 // it is pointing to into a single PTR_AND_OBJ entry. 7601 bool IsMemberPointer = 7602 IsPointer && EncounteredME && 7603 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7604 EncounteredME); 7605 if (!OverlappedElements.empty()) { 7606 // Handle base element with the info for overlapped elements. 7607 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7608 assert(Next == CE && 7609 "Expected last element for the overlapped elements."); 7610 assert(!IsPointer && 7611 "Unexpected base element with the pointer type."); 7612 // Mark the whole struct as the struct that requires allocation on the 7613 // device. 7614 PartialStruct.LowestElem = {0, LB}; 7615 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7616 I->getAssociatedExpression()->getType()); 7617 Address HB = CGF.Builder.CreateConstGEP( 7618 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7619 CGF.VoidPtrTy), 7620 TypeSize.getQuantity() - 1); 7621 PartialStruct.HighestElem = { 7622 std::numeric_limits<decltype( 7623 PartialStruct.HighestElem.first)>::max(), 7624 HB}; 7625 PartialStruct.Base = BP; 7626 // Emit data for non-overlapped data. 7627 OpenMPOffloadMappingFlags Flags = 7628 OMP_MAP_MEMBER_OF | 7629 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7630 /*AddPtrFlag=*/false, 7631 /*AddIsTargetParamFlag=*/false); 7632 LB = BP; 7633 llvm::Value *Size = nullptr; 7634 // Do bitcopy of all non-overlapped structure elements. 7635 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7636 Component : OverlappedElements) { 7637 Address ComponentLB = Address::invalid(); 7638 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7639 Component) { 7640 if (MC.getAssociatedDeclaration()) { 7641 ComponentLB = 7642 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7643 .getAddress(); 7644 Size = CGF.Builder.CreatePtrDiff( 7645 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7646 CGF.EmitCastToVoidPtr(LB.getPointer())); 7647 break; 7648 } 7649 } 7650 BasePointers.push_back(BP.getPointer()); 7651 Pointers.push_back(LB.getPointer()); 7652 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, 7653 /*isSigned=*/true)); 7654 Types.push_back(Flags); 7655 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7656 } 7657 BasePointers.push_back(BP.getPointer()); 7658 Pointers.push_back(LB.getPointer()); 7659 Size = CGF.Builder.CreatePtrDiff( 7660 CGF.EmitCastToVoidPtr( 7661 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7662 CGF.EmitCastToVoidPtr(LB.getPointer())); 7663 Sizes.push_back( 7664 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7665 Types.push_back(Flags); 7666 break; 7667 } 7668 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7669 if (!IsMemberPointer) { 7670 BasePointers.push_back(BP.getPointer()); 7671 Pointers.push_back(LB.getPointer()); 7672 Sizes.push_back( 7673 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7674 7675 // We need to add a pointer flag for each map that comes from the 7676 // same expression except for the first one. We also need to signal 7677 // this map is the first one that relates with the current capture 7678 // (there is a set of entries for each capture). 7679 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7680 MapType, MapModifiers, IsImplicit, 7681 !IsExpressionFirstInfo || RequiresReference, 7682 IsCaptureFirstInfo && !RequiresReference); 7683 7684 if (!IsExpressionFirstInfo) { 7685 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7686 // then we reset the TO/FROM/ALWAYS/DELETE flags. 7687 if (IsPointer) 7688 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7689 OMP_MAP_DELETE); 7690 7691 if (ShouldBeMemberOf) { 7692 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7693 // should be later updated with the correct value of MEMBER_OF. 7694 Flags |= OMP_MAP_MEMBER_OF; 7695 // From now on, all subsequent PTR_AND_OBJ entries should not be 7696 // marked as MEMBER_OF. 7697 ShouldBeMemberOf = false; 7698 } 7699 } 7700 7701 Types.push_back(Flags); 7702 } 7703 7704 // If we have encountered a member expression so far, keep track of the 7705 // mapped member. If the parent is "*this", then the value declaration 7706 // is nullptr. 7707 if (EncounteredME) { 7708 const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl()); 7709 unsigned FieldIndex = FD->getFieldIndex(); 7710 7711 // Update info about the lowest and highest elements for this struct 7712 if (!PartialStruct.Base.isValid()) { 7713 PartialStruct.LowestElem = {FieldIndex, LB}; 7714 PartialStruct.HighestElem = {FieldIndex, LB}; 7715 PartialStruct.Base = BP; 7716 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7717 PartialStruct.LowestElem = {FieldIndex, LB}; 7718 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7719 PartialStruct.HighestElem = {FieldIndex, LB}; 7720 } 7721 } 7722 7723 // If we have a final array section, we are done with this expression. 7724 if (IsFinalArraySection) 7725 break; 7726 7727 // The pointer becomes the base for the next element. 7728 if (Next != CE) 7729 BP = LB; 7730 7731 IsExpressionFirstInfo = false; 7732 IsCaptureFirstInfo = false; 7733 } 7734 } 7735 } 7736 7737 /// Return the adjusted map modifiers if the declaration a capture refers to 7738 /// appears in a first-private clause. This is expected to be used only with 7739 /// directives that start with 'target'. 7740 MappableExprsHandler::OpenMPOffloadMappingFlags 7741 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7742 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7743 7744 // A first private variable captured by reference will use only the 7745 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7746 // declaration is known as first-private in this handler. 7747 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7748 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7749 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7750 return MappableExprsHandler::OMP_MAP_ALWAYS | 7751 MappableExprsHandler::OMP_MAP_TO; 7752 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7753 return MappableExprsHandler::OMP_MAP_TO | 7754 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 7755 return MappableExprsHandler::OMP_MAP_PRIVATE | 7756 MappableExprsHandler::OMP_MAP_TO; 7757 } 7758 return MappableExprsHandler::OMP_MAP_TO | 7759 MappableExprsHandler::OMP_MAP_FROM; 7760 } 7761 7762 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7763 // Member of is given by the 16 MSB of the flag, so rotate by 48 bits. 7764 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7765 << 48); 7766 } 7767 7768 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7769 OpenMPOffloadMappingFlags MemberOfFlag) { 7770 // If the entry is PTR_AND_OBJ but has not been marked with the special 7771 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7772 // marked as MEMBER_OF. 7773 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7774 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7775 return; 7776 7777 // Reset the placeholder value to prepare the flag for the assignment of the 7778 // proper MEMBER_OF value. 7779 Flags &= ~OMP_MAP_MEMBER_OF; 7780 Flags |= MemberOfFlag; 7781 } 7782 7783 void getPlainLayout(const CXXRecordDecl *RD, 7784 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7785 bool AsBase) const { 7786 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7787 7788 llvm::StructType *St = 7789 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7790 7791 unsigned NumElements = St->getNumElements(); 7792 llvm::SmallVector< 7793 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7794 RecordLayout(NumElements); 7795 7796 // Fill bases. 7797 for (const auto &I : RD->bases()) { 7798 if (I.isVirtual()) 7799 continue; 7800 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7801 // Ignore empty bases. 7802 if (Base->isEmpty() || CGF.getContext() 7803 .getASTRecordLayout(Base) 7804 .getNonVirtualSize() 7805 .isZero()) 7806 continue; 7807 7808 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7809 RecordLayout[FieldIndex] = Base; 7810 } 7811 // Fill in virtual bases. 7812 for (const auto &I : RD->vbases()) { 7813 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7814 // Ignore empty bases. 7815 if (Base->isEmpty()) 7816 continue; 7817 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7818 if (RecordLayout[FieldIndex]) 7819 continue; 7820 RecordLayout[FieldIndex] = Base; 7821 } 7822 // Fill in all the fields. 7823 assert(!RD->isUnion() && "Unexpected union."); 7824 for (const auto *Field : RD->fields()) { 7825 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7826 // will fill in later.) 7827 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 7828 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7829 RecordLayout[FieldIndex] = Field; 7830 } 7831 } 7832 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7833 &Data : RecordLayout) { 7834 if (Data.isNull()) 7835 continue; 7836 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7837 getPlainLayout(Base, Layout, /*AsBase=*/true); 7838 else 7839 Layout.push_back(Data.get<const FieldDecl *>()); 7840 } 7841 } 7842 7843 public: 7844 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7845 : CurDir(Dir), CGF(CGF) { 7846 // Extract firstprivate clause information. 7847 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7848 for (const auto *D : C->varlists()) 7849 FirstPrivateDecls.try_emplace( 7850 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 7851 // Extract device pointer clause information. 7852 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7853 for (auto L : C->component_lists()) 7854 DevPointersMap[L.first].push_back(L.second); 7855 } 7856 7857 /// Generate code for the combined entry if we have a partially mapped struct 7858 /// and take care of the mapping flags of the arguments corresponding to 7859 /// individual struct members. 7860 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 7861 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7862 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 7863 const StructRangeInfoTy &PartialStruct) const { 7864 // Base is the base of the struct 7865 BasePointers.push_back(PartialStruct.Base.getPointer()); 7866 // Pointer is the address of the lowest element 7867 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7868 Pointers.push_back(LB); 7869 // Size is (addr of {highest+1} element) - (addr of lowest element) 7870 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7871 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7872 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7873 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7874 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7875 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 7876 /*isSigned=*/false); 7877 Sizes.push_back(Size); 7878 // Map type is always TARGET_PARAM 7879 Types.push_back(OMP_MAP_TARGET_PARAM); 7880 // Remove TARGET_PARAM flag from the first element 7881 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7882 7883 // All other current entries will be MEMBER_OF the combined entry 7884 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7885 // 0xFFFF in the MEMBER_OF field). 7886 OpenMPOffloadMappingFlags MemberOfFlag = 7887 getMemberOfFlag(BasePointers.size() - 1); 7888 for (auto &M : CurTypes) 7889 setCorrectMemberOfFlag(M, MemberOfFlag); 7890 } 7891 7892 /// Generate all the base pointers, section pointers, sizes and map 7893 /// types for the extracted mappable expressions. Also, for each item that 7894 /// relates with a device pointer, a pair of the relevant declaration and 7895 /// index where it occurs is appended to the device pointers info array. 7896 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 7897 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7898 MapFlagsArrayTy &Types) const { 7899 // We have to process the component lists that relate with the same 7900 // declaration in a single chunk so that we can generate the map flags 7901 // correctly. Therefore, we organize all lists in a map. 7902 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 7903 7904 // Helper function to fill the information map for the different supported 7905 // clauses. 7906 auto &&InfoGen = [&Info]( 7907 const ValueDecl *D, 7908 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7909 OpenMPMapClauseKind MapType, 7910 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7911 bool ReturnDevicePointer, bool IsImplicit) { 7912 const ValueDecl *VD = 7913 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 7914 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 7915 IsImplicit); 7916 }; 7917 7918 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 7919 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) 7920 for (const auto &L : C->component_lists()) { 7921 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 7922 /*ReturnDevicePointer=*/false, C->isImplicit()); 7923 } 7924 for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>()) 7925 for (const auto &L : C->component_lists()) { 7926 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 7927 /*ReturnDevicePointer=*/false, C->isImplicit()); 7928 } 7929 for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>()) 7930 for (const auto &L : C->component_lists()) { 7931 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 7932 /*ReturnDevicePointer=*/false, C->isImplicit()); 7933 } 7934 7935 // Look at the use_device_ptr clause information and mark the existing map 7936 // entries as such. If there is no map information for an entry in the 7937 // use_device_ptr list, we create one with map type 'alloc' and zero size 7938 // section. It is the user fault if that was not mapped before. If there is 7939 // no map information and the pointer is a struct member, then we defer the 7940 // emission of that entry until the whole struct has been processed. 7941 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 7942 DeferredInfo; 7943 7944 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 7945 for (const auto *C : 7946 this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) { 7947 for (const auto &L : C->component_lists()) { 7948 assert(!L.second.empty() && "Not expecting empty list of components!"); 7949 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 7950 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 7951 const Expr *IE = L.second.back().getAssociatedExpression(); 7952 // If the first component is a member expression, we have to look into 7953 // 'this', which maps to null in the map of map information. Otherwise 7954 // look directly for the information. 7955 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 7956 7957 // We potentially have map information for this declaration already. 7958 // Look for the first set of components that refer to it. 7959 if (It != Info.end()) { 7960 auto CI = std::find_if( 7961 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 7962 return MI.Components.back().getAssociatedDeclaration() == VD; 7963 }); 7964 // If we found a map entry, signal that the pointer has to be returned 7965 // and move on to the next declaration. 7966 if (CI != It->second.end()) { 7967 CI->ReturnDevicePointer = true; 7968 continue; 7969 } 7970 } 7971 7972 // We didn't find any match in our map information - generate a zero 7973 // size array section - if the pointer is a struct member we defer this 7974 // action until the whole struct has been processed. 7975 // FIXME: MSVC 2013 seems to require this-> to find member CGF. 7976 if (isa<MemberExpr>(IE)) { 7977 // Insert the pointer into Info to be processed by 7978 // generateInfoForComponentList. Because it is a member pointer 7979 // without a pointee, no entry will be generated for it, therefore 7980 // we need to generate one after the whole struct has been processed. 7981 // Nonetheless, generateInfoForComponentList must be called to take 7982 // the pointer into account for the calculation of the range of the 7983 // partial struct. 7984 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 7985 /*ReturnDevicePointer=*/false, C->isImplicit()); 7986 DeferredInfo[nullptr].emplace_back(IE, VD); 7987 } else { 7988 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 7989 this->CGF.EmitLValue(IE), IE->getExprLoc()); 7990 BasePointers.emplace_back(Ptr, VD); 7991 Pointers.push_back(Ptr); 7992 Sizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); 7993 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 7994 } 7995 } 7996 } 7997 7998 for (const auto &M : Info) { 7999 // We need to know when we generate information for the first component 8000 // associated with a capture, because the mapping flags depend on it. 8001 bool IsFirstComponentList = true; 8002 8003 // Temporary versions of arrays 8004 MapBaseValuesArrayTy CurBasePointers; 8005 MapValuesArrayTy CurPointers; 8006 MapValuesArrayTy CurSizes; 8007 MapFlagsArrayTy CurTypes; 8008 StructRangeInfoTy PartialStruct; 8009 8010 for (const MapInfo &L : M.second) { 8011 assert(!L.Components.empty() && 8012 "Not expecting declaration with no component lists."); 8013 8014 // Remember the current base pointer index. 8015 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 8016 // FIXME: MSVC 2013 seems to require this-> to find the member method. 8017 this->generateInfoForComponentList( 8018 L.MapType, L.MapModifiers, L.Components, CurBasePointers, 8019 CurPointers, CurSizes, CurTypes, PartialStruct, 8020 IsFirstComponentList, L.IsImplicit); 8021 8022 // If this entry relates with a device pointer, set the relevant 8023 // declaration and add the 'return pointer' flag. 8024 if (L.ReturnDevicePointer) { 8025 assert(CurBasePointers.size() > CurrentBasePointersIdx && 8026 "Unexpected number of mapped base pointers."); 8027 8028 const ValueDecl *RelevantVD = 8029 L.Components.back().getAssociatedDeclaration(); 8030 assert(RelevantVD && 8031 "No relevant declaration related with device pointer??"); 8032 8033 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 8034 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8035 } 8036 IsFirstComponentList = false; 8037 } 8038 8039 // Append any pending zero-length pointers which are struct members and 8040 // used with use_device_ptr. 8041 auto CI = DeferredInfo.find(M.first); 8042 if (CI != DeferredInfo.end()) { 8043 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8044 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(); 8045 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 8046 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 8047 CurBasePointers.emplace_back(BasePtr, L.VD); 8048 CurPointers.push_back(Ptr); 8049 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8050 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8051 // value MEMBER_OF=FFFF so that the entry is later updated with the 8052 // correct value of MEMBER_OF. 8053 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8054 OMP_MAP_MEMBER_OF); 8055 } 8056 } 8057 8058 // If there is an entry in PartialStruct it means we have a struct with 8059 // individual members mapped. Emit an extra combined entry. 8060 if (PartialStruct.Base.isValid()) 8061 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8062 PartialStruct); 8063 8064 // We need to append the results of this capture to what we already have. 8065 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8066 Pointers.append(CurPointers.begin(), CurPointers.end()); 8067 Sizes.append(CurSizes.begin(), CurSizes.end()); 8068 Types.append(CurTypes.begin(), CurTypes.end()); 8069 } 8070 } 8071 8072 /// Emit capture info for lambdas for variables captured by reference. 8073 void generateInfoForLambdaCaptures( 8074 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 8075 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8076 MapFlagsArrayTy &Types, 8077 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8078 const auto *RD = VD->getType() 8079 .getCanonicalType() 8080 .getNonReferenceType() 8081 ->getAsCXXRecordDecl(); 8082 if (!RD || !RD->isLambda()) 8083 return; 8084 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8085 LValue VDLVal = CGF.MakeAddrLValue( 8086 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8087 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8088 FieldDecl *ThisCapture = nullptr; 8089 RD->getCaptureFields(Captures, ThisCapture); 8090 if (ThisCapture) { 8091 LValue ThisLVal = 8092 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8093 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8094 LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer()); 8095 BasePointers.push_back(ThisLVal.getPointer()); 8096 Pointers.push_back(ThisLValVal.getPointer()); 8097 Sizes.push_back( 8098 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8099 CGF.Int64Ty, /*isSigned=*/true)); 8100 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8101 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8102 } 8103 for (const LambdaCapture &LC : RD->captures()) { 8104 if (!LC.capturesVariable()) 8105 continue; 8106 const VarDecl *VD = LC.getCapturedVar(); 8107 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8108 continue; 8109 auto It = Captures.find(VD); 8110 assert(It != Captures.end() && "Found lambda capture without field."); 8111 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8112 if (LC.getCaptureKind() == LCK_ByRef) { 8113 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8114 LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); 8115 BasePointers.push_back(VarLVal.getPointer()); 8116 Pointers.push_back(VarLValVal.getPointer()); 8117 Sizes.push_back(CGF.Builder.CreateIntCast( 8118 CGF.getTypeSize( 8119 VD->getType().getCanonicalType().getNonReferenceType()), 8120 CGF.Int64Ty, /*isSigned=*/true)); 8121 } else { 8122 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8123 LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); 8124 BasePointers.push_back(VarLVal.getPointer()); 8125 Pointers.push_back(VarRVal.getScalarVal()); 8126 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8127 } 8128 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8129 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8130 } 8131 } 8132 8133 /// Set correct indices for lambdas captures. 8134 void adjustMemberOfForLambdaCaptures( 8135 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8136 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8137 MapFlagsArrayTy &Types) const { 8138 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8139 // Set correct member_of idx for all implicit lambda captures. 8140 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8141 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8142 continue; 8143 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8144 assert(BasePtr && "Unable to find base lambda address."); 8145 int TgtIdx = -1; 8146 for (unsigned J = I; J > 0; --J) { 8147 unsigned Idx = J - 1; 8148 if (Pointers[Idx] != BasePtr) 8149 continue; 8150 TgtIdx = Idx; 8151 break; 8152 } 8153 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8154 // All other current entries will be MEMBER_OF the combined entry 8155 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8156 // 0xFFFF in the MEMBER_OF field). 8157 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8158 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8159 } 8160 } 8161 8162 /// Generate the base pointers, section pointers, sizes and map types 8163 /// associated to a given capture. 8164 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8165 llvm::Value *Arg, 8166 MapBaseValuesArrayTy &BasePointers, 8167 MapValuesArrayTy &Pointers, 8168 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8169 StructRangeInfoTy &PartialStruct) const { 8170 assert(!Cap->capturesVariableArrayType() && 8171 "Not expecting to generate map info for a variable array type!"); 8172 8173 // We need to know when we generating information for the first component 8174 const ValueDecl *VD = Cap->capturesThis() 8175 ? nullptr 8176 : Cap->getCapturedVar()->getCanonicalDecl(); 8177 8178 // If this declaration appears in a is_device_ptr clause we just have to 8179 // pass the pointer by value. If it is a reference to a declaration, we just 8180 // pass its value. 8181 if (DevPointersMap.count(VD)) { 8182 BasePointers.emplace_back(Arg, VD); 8183 Pointers.push_back(Arg); 8184 Sizes.push_back( 8185 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8186 CGF.Int64Ty, /*isSigned=*/true)); 8187 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8188 return; 8189 } 8190 8191 using MapData = 8192 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8193 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 8194 SmallVector<MapData, 4> DeclComponentLists; 8195 // FIXME: MSVC 2013 seems to require this-> to find member CurDir. 8196 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { 8197 for (const auto &L : C->decl_component_lists(VD)) { 8198 assert(L.first == VD && 8199 "We got information for the wrong declaration??"); 8200 assert(!L.second.empty() && 8201 "Not expecting declaration with no component lists."); 8202 DeclComponentLists.emplace_back(L.second, C->getMapType(), 8203 C->getMapTypeModifiers(), 8204 C->isImplicit()); 8205 } 8206 } 8207 8208 // Find overlapping elements (including the offset from the base element). 8209 llvm::SmallDenseMap< 8210 const MapData *, 8211 llvm::SmallVector< 8212 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8213 4> 8214 OverlappedData; 8215 size_t Count = 0; 8216 for (const MapData &L : DeclComponentLists) { 8217 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8218 OpenMPMapClauseKind MapType; 8219 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8220 bool IsImplicit; 8221 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8222 ++Count; 8223 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8224 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8225 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 8226 auto CI = Components.rbegin(); 8227 auto CE = Components.rend(); 8228 auto SI = Components1.rbegin(); 8229 auto SE = Components1.rend(); 8230 for (; CI != CE && SI != SE; ++CI, ++SI) { 8231 if (CI->getAssociatedExpression()->getStmtClass() != 8232 SI->getAssociatedExpression()->getStmtClass()) 8233 break; 8234 // Are we dealing with different variables/fields? 8235 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8236 break; 8237 } 8238 // Found overlapping if, at least for one component, reached the head of 8239 // the components list. 8240 if (CI == CE || SI == SE) { 8241 assert((CI != CE || SI != SE) && 8242 "Unexpected full match of the mapping components."); 8243 const MapData &BaseData = CI == CE ? L : L1; 8244 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8245 SI == SE ? Components : Components1; 8246 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8247 OverlappedElements.getSecond().push_back(SubData); 8248 } 8249 } 8250 } 8251 // Sort the overlapped elements for each item. 8252 llvm::SmallVector<const FieldDecl *, 4> Layout; 8253 if (!OverlappedData.empty()) { 8254 if (const auto *CRD = 8255 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8256 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8257 else { 8258 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8259 Layout.append(RD->field_begin(), RD->field_end()); 8260 } 8261 } 8262 for (auto &Pair : OverlappedData) { 8263 llvm::sort( 8264 Pair.getSecond(), 8265 [&Layout]( 8266 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8267 OMPClauseMappableExprCommon::MappableExprComponentListRef 8268 Second) { 8269 auto CI = First.rbegin(); 8270 auto CE = First.rend(); 8271 auto SI = Second.rbegin(); 8272 auto SE = Second.rend(); 8273 for (; CI != CE && SI != SE; ++CI, ++SI) { 8274 if (CI->getAssociatedExpression()->getStmtClass() != 8275 SI->getAssociatedExpression()->getStmtClass()) 8276 break; 8277 // Are we dealing with different variables/fields? 8278 if (CI->getAssociatedDeclaration() != 8279 SI->getAssociatedDeclaration()) 8280 break; 8281 } 8282 8283 // Lists contain the same elements. 8284 if (CI == CE && SI == SE) 8285 return false; 8286 8287 // List with less elements is less than list with more elements. 8288 if (CI == CE || SI == SE) 8289 return CI == CE; 8290 8291 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8292 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8293 if (FD1->getParent() == FD2->getParent()) 8294 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8295 const auto It = 8296 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8297 return FD == FD1 || FD == FD2; 8298 }); 8299 return *It == FD1; 8300 }); 8301 } 8302 8303 // Associated with a capture, because the mapping flags depend on it. 8304 // Go through all of the elements with the overlapped elements. 8305 for (const auto &Pair : OverlappedData) { 8306 const MapData &L = *Pair.getFirst(); 8307 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8308 OpenMPMapClauseKind MapType; 8309 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8310 bool IsImplicit; 8311 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8312 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8313 OverlappedComponents = Pair.getSecond(); 8314 bool IsFirstComponentList = true; 8315 generateInfoForComponentList(MapType, MapModifiers, Components, 8316 BasePointers, Pointers, Sizes, Types, 8317 PartialStruct, IsFirstComponentList, 8318 IsImplicit, OverlappedComponents); 8319 } 8320 // Go through other elements without overlapped elements. 8321 bool IsFirstComponentList = OverlappedData.empty(); 8322 for (const MapData &L : DeclComponentLists) { 8323 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8324 OpenMPMapClauseKind MapType; 8325 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8326 bool IsImplicit; 8327 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8328 auto It = OverlappedData.find(&L); 8329 if (It == OverlappedData.end()) 8330 generateInfoForComponentList(MapType, MapModifiers, Components, 8331 BasePointers, Pointers, Sizes, Types, 8332 PartialStruct, IsFirstComponentList, 8333 IsImplicit); 8334 IsFirstComponentList = false; 8335 } 8336 } 8337 8338 /// Generate the base pointers, section pointers, sizes and map types 8339 /// associated with the declare target link variables. 8340 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 8341 MapValuesArrayTy &Pointers, 8342 MapValuesArrayTy &Sizes, 8343 MapFlagsArrayTy &Types) const { 8344 // Map other list items in the map clause which are not captured variables 8345 // but "declare target link" global variables. 8346 for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) { 8347 for (const auto &L : C->component_lists()) { 8348 if (!L.first) 8349 continue; 8350 const auto *VD = dyn_cast<VarDecl>(L.first); 8351 if (!VD) 8352 continue; 8353 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8354 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8355 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8356 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 8357 continue; 8358 StructRangeInfoTy PartialStruct; 8359 generateInfoForComponentList( 8360 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 8361 Pointers, Sizes, Types, PartialStruct, 8362 /*IsFirstComponentList=*/true, C->isImplicit()); 8363 assert(!PartialStruct.Base.isValid() && 8364 "No partial structs for declare target link expected."); 8365 } 8366 } 8367 } 8368 8369 /// Generate the default map information for a given capture \a CI, 8370 /// record field declaration \a RI and captured value \a CV. 8371 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8372 const FieldDecl &RI, llvm::Value *CV, 8373 MapBaseValuesArrayTy &CurBasePointers, 8374 MapValuesArrayTy &CurPointers, 8375 MapValuesArrayTy &CurSizes, 8376 MapFlagsArrayTy &CurMapTypes) const { 8377 bool IsImplicit = true; 8378 // Do the default mapping. 8379 if (CI.capturesThis()) { 8380 CurBasePointers.push_back(CV); 8381 CurPointers.push_back(CV); 8382 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8383 CurSizes.push_back( 8384 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8385 CGF.Int64Ty, /*isSigned=*/true)); 8386 // Default map type. 8387 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8388 } else if (CI.capturesVariableByCopy()) { 8389 CurBasePointers.push_back(CV); 8390 CurPointers.push_back(CV); 8391 if (!RI.getType()->isAnyPointerType()) { 8392 // We have to signal to the runtime captures passed by value that are 8393 // not pointers. 8394 CurMapTypes.push_back(OMP_MAP_LITERAL); 8395 CurSizes.push_back(CGF.Builder.CreateIntCast( 8396 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8397 } else { 8398 // Pointers are implicitly mapped with a zero size and no flags 8399 // (other than first map that is added for all implicit maps). 8400 CurMapTypes.push_back(OMP_MAP_NONE); 8401 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8402 } 8403 const VarDecl *VD = CI.getCapturedVar(); 8404 auto I = FirstPrivateDecls.find(VD); 8405 if (I != FirstPrivateDecls.end()) 8406 IsImplicit = I->getSecond(); 8407 } else { 8408 assert(CI.capturesVariable() && "Expected captured reference."); 8409 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8410 QualType ElementType = PtrTy->getPointeeType(); 8411 CurSizes.push_back(CGF.Builder.CreateIntCast( 8412 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8413 // The default map type for a scalar/complex type is 'to' because by 8414 // default the value doesn't have to be retrieved. For an aggregate 8415 // type, the default is 'tofrom'. 8416 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 8417 const VarDecl *VD = CI.getCapturedVar(); 8418 auto I = FirstPrivateDecls.find(VD); 8419 if (I != FirstPrivateDecls.end() && 8420 VD->getType().isConstant(CGF.getContext())) { 8421 llvm::Constant *Addr = 8422 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8423 // Copy the value of the original variable to the new global copy. 8424 CGF.Builder.CreateMemCpy( 8425 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(), 8426 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8427 CurSizes.back(), /*IsVolatile=*/false); 8428 // Use new global variable as the base pointers. 8429 CurBasePointers.push_back(Addr); 8430 CurPointers.push_back(Addr); 8431 } else { 8432 CurBasePointers.push_back(CV); 8433 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8434 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8435 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8436 AlignmentSource::Decl)); 8437 CurPointers.push_back(PtrAddr.getPointer()); 8438 } else { 8439 CurPointers.push_back(CV); 8440 } 8441 } 8442 if (I != FirstPrivateDecls.end()) 8443 IsImplicit = I->getSecond(); 8444 } 8445 // Every default map produces a single argument which is a target parameter. 8446 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 8447 8448 // Add flag stating this is an implicit map. 8449 if (IsImplicit) 8450 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 8451 } 8452 }; 8453 } // anonymous namespace 8454 8455 /// Emit the arrays used to pass the captures and map information to the 8456 /// offloading runtime library. If there is no map or capture information, 8457 /// return nullptr by reference. 8458 static void 8459 emitOffloadingArrays(CodeGenFunction &CGF, 8460 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 8461 MappableExprsHandler::MapValuesArrayTy &Pointers, 8462 MappableExprsHandler::MapValuesArrayTy &Sizes, 8463 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 8464 CGOpenMPRuntime::TargetDataInfo &Info) { 8465 CodeGenModule &CGM = CGF.CGM; 8466 ASTContext &Ctx = CGF.getContext(); 8467 8468 // Reset the array information. 8469 Info.clearArrayInfo(); 8470 Info.NumberOfPtrs = BasePointers.size(); 8471 8472 if (Info.NumberOfPtrs) { 8473 // Detect if we have any capture size requiring runtime evaluation of the 8474 // size so that a constant array could be eventually used. 8475 bool hasRuntimeEvaluationCaptureSize = false; 8476 for (llvm::Value *S : Sizes) 8477 if (!isa<llvm::Constant>(S)) { 8478 hasRuntimeEvaluationCaptureSize = true; 8479 break; 8480 } 8481 8482 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8483 QualType PointerArrayType = 8484 Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 8485 /*IndexTypeQuals=*/0); 8486 8487 Info.BasePointersArray = 8488 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8489 Info.PointersArray = 8490 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8491 8492 // If we don't have any VLA types or other types that require runtime 8493 // evaluation, we can use a constant array for the map sizes, otherwise we 8494 // need to fill up the arrays as we do for the pointers. 8495 QualType Int64Ty = 8496 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8497 if (hasRuntimeEvaluationCaptureSize) { 8498 QualType SizeArrayType = 8499 Ctx.getConstantArrayType(Int64Ty, PointerNumAP, ArrayType::Normal, 8500 /*IndexTypeQuals=*/0); 8501 Info.SizesArray = 8502 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8503 } else { 8504 // We expect all the sizes to be constant, so we collect them to create 8505 // a constant array. 8506 SmallVector<llvm::Constant *, 16> ConstSizes; 8507 for (llvm::Value *S : Sizes) 8508 ConstSizes.push_back(cast<llvm::Constant>(S)); 8509 8510 auto *SizesArrayInit = llvm::ConstantArray::get( 8511 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 8512 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8513 auto *SizesArrayGbl = new llvm::GlobalVariable( 8514 CGM.getModule(), SizesArrayInit->getType(), 8515 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8516 SizesArrayInit, Name); 8517 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8518 Info.SizesArray = SizesArrayGbl; 8519 } 8520 8521 // The map types are always constant so we don't need to generate code to 8522 // fill arrays. Instead, we create an array constant. 8523 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 8524 llvm::copy(MapTypes, Mapping.begin()); 8525 llvm::Constant *MapTypesArrayInit = 8526 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8527 std::string MaptypesName = 8528 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8529 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8530 CGM.getModule(), MapTypesArrayInit->getType(), 8531 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8532 MapTypesArrayInit, MaptypesName); 8533 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8534 Info.MapTypesArray = MapTypesArrayGbl; 8535 8536 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8537 llvm::Value *BPVal = *BasePointers[I]; 8538 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8539 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8540 Info.BasePointersArray, 0, I); 8541 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8542 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8543 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8544 CGF.Builder.CreateStore(BPVal, BPAddr); 8545 8546 if (Info.requiresDevicePointerInfo()) 8547 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 8548 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8549 8550 llvm::Value *PVal = Pointers[I]; 8551 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8552 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8553 Info.PointersArray, 0, I); 8554 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8555 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8556 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8557 CGF.Builder.CreateStore(PVal, PAddr); 8558 8559 if (hasRuntimeEvaluationCaptureSize) { 8560 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8561 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8562 Info.SizesArray, 8563 /*Idx0=*/0, 8564 /*Idx1=*/I); 8565 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 8566 CGF.Builder.CreateStore( 8567 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true), 8568 SAddr); 8569 } 8570 } 8571 } 8572 } 8573 /// Emit the arguments to be passed to the runtime library based on the 8574 /// arrays of pointers, sizes and map types. 8575 static void emitOffloadingArraysArgument( 8576 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8577 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8578 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 8579 CodeGenModule &CGM = CGF.CGM; 8580 if (Info.NumberOfPtrs) { 8581 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8582 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8583 Info.BasePointersArray, 8584 /*Idx0=*/0, /*Idx1=*/0); 8585 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8586 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8587 Info.PointersArray, 8588 /*Idx0=*/0, 8589 /*Idx1=*/0); 8590 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8591 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 8592 /*Idx0=*/0, /*Idx1=*/0); 8593 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8594 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8595 Info.MapTypesArray, 8596 /*Idx0=*/0, 8597 /*Idx1=*/0); 8598 } else { 8599 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8600 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8601 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8602 MapTypesArrayArg = 8603 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8604 } 8605 } 8606 8607 /// Check for inner distribute directive. 8608 static const OMPExecutableDirective * 8609 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8610 const auto *CS = D.getInnermostCapturedStmt(); 8611 const auto *Body = 8612 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8613 const Stmt *ChildStmt = 8614 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8615 8616 if (const auto *NestedDir = 8617 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8618 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8619 switch (D.getDirectiveKind()) { 8620 case OMPD_target: 8621 if (isOpenMPDistributeDirective(DKind)) 8622 return NestedDir; 8623 if (DKind == OMPD_teams) { 8624 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8625 /*IgnoreCaptured=*/true); 8626 if (!Body) 8627 return nullptr; 8628 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8629 if (const auto *NND = 8630 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8631 DKind = NND->getDirectiveKind(); 8632 if (isOpenMPDistributeDirective(DKind)) 8633 return NND; 8634 } 8635 } 8636 return nullptr; 8637 case OMPD_target_teams: 8638 if (isOpenMPDistributeDirective(DKind)) 8639 return NestedDir; 8640 return nullptr; 8641 case OMPD_target_parallel: 8642 case OMPD_target_simd: 8643 case OMPD_target_parallel_for: 8644 case OMPD_target_parallel_for_simd: 8645 return nullptr; 8646 case OMPD_target_teams_distribute: 8647 case OMPD_target_teams_distribute_simd: 8648 case OMPD_target_teams_distribute_parallel_for: 8649 case OMPD_target_teams_distribute_parallel_for_simd: 8650 case OMPD_parallel: 8651 case OMPD_for: 8652 case OMPD_parallel_for: 8653 case OMPD_parallel_sections: 8654 case OMPD_for_simd: 8655 case OMPD_parallel_for_simd: 8656 case OMPD_cancel: 8657 case OMPD_cancellation_point: 8658 case OMPD_ordered: 8659 case OMPD_threadprivate: 8660 case OMPD_allocate: 8661 case OMPD_task: 8662 case OMPD_simd: 8663 case OMPD_sections: 8664 case OMPD_section: 8665 case OMPD_single: 8666 case OMPD_master: 8667 case OMPD_critical: 8668 case OMPD_taskyield: 8669 case OMPD_barrier: 8670 case OMPD_taskwait: 8671 case OMPD_taskgroup: 8672 case OMPD_atomic: 8673 case OMPD_flush: 8674 case OMPD_teams: 8675 case OMPD_target_data: 8676 case OMPD_target_exit_data: 8677 case OMPD_target_enter_data: 8678 case OMPD_distribute: 8679 case OMPD_distribute_simd: 8680 case OMPD_distribute_parallel_for: 8681 case OMPD_distribute_parallel_for_simd: 8682 case OMPD_teams_distribute: 8683 case OMPD_teams_distribute_simd: 8684 case OMPD_teams_distribute_parallel_for: 8685 case OMPD_teams_distribute_parallel_for_simd: 8686 case OMPD_target_update: 8687 case OMPD_declare_simd: 8688 case OMPD_declare_target: 8689 case OMPD_end_declare_target: 8690 case OMPD_declare_reduction: 8691 case OMPD_declare_mapper: 8692 case OMPD_taskloop: 8693 case OMPD_taskloop_simd: 8694 case OMPD_requires: 8695 case OMPD_unknown: 8696 llvm_unreachable("Unexpected directive."); 8697 } 8698 } 8699 8700 return nullptr; 8701 } 8702 8703 void CGOpenMPRuntime::emitTargetNumIterationsCall( 8704 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device, 8705 const llvm::function_ref<llvm::Value *( 8706 CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) { 8707 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 8708 const OMPExecutableDirective *TD = &D; 8709 // Get nested teams distribute kind directive, if any. 8710 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 8711 TD = getNestedDistributeDirective(CGM.getContext(), D); 8712 if (!TD) 8713 return; 8714 const auto *LD = cast<OMPLoopDirective>(TD); 8715 auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF, 8716 PrePostActionTy &) { 8717 llvm::Value *NumIterations = SizeEmitter(CGF, *LD); 8718 8719 // Emit device ID if any. 8720 llvm::Value *DeviceID; 8721 if (Device) 8722 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 8723 CGF.Int64Ty, /*isSigned=*/true); 8724 else 8725 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 8726 8727 llvm::Value *Args[] = {DeviceID, NumIterations}; 8728 CGF.EmitRuntimeCall( 8729 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); 8730 }; 8731 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 8732 } 8733 8734 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 8735 const OMPExecutableDirective &D, 8736 llvm::Function *OutlinedFn, 8737 llvm::Value *OutlinedFnID, 8738 const Expr *IfCond, const Expr *Device) { 8739 if (!CGF.HaveInsertPoint()) 8740 return; 8741 8742 assert(OutlinedFn && "Invalid outlined function!"); 8743 8744 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 8745 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 8746 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 8747 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 8748 PrePostActionTy &) { 8749 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 8750 }; 8751 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 8752 8753 CodeGenFunction::OMPTargetDataInfo InputInfo; 8754 llvm::Value *MapTypesArray = nullptr; 8755 // Fill up the pointer arrays and transfer execution to the device. 8756 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 8757 &MapTypesArray, &CS, RequiresOuterTask, 8758 &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) { 8759 // On top of the arrays that were filled up, the target offloading call 8760 // takes as arguments the device id as well as the host pointer. The host 8761 // pointer is used by the runtime library to identify the current target 8762 // region, so it only has to be unique and not necessarily point to 8763 // anything. It could be the pointer to the outlined function that 8764 // implements the target region, but we aren't using that so that the 8765 // compiler doesn't need to keep that, and could therefore inline the host 8766 // function if proven worthwhile during optimization. 8767 8768 // From this point on, we need to have an ID of the target region defined. 8769 assert(OutlinedFnID && "Invalid outlined function ID!"); 8770 8771 // Emit device ID if any. 8772 llvm::Value *DeviceID; 8773 if (Device) { 8774 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 8775 CGF.Int64Ty, /*isSigned=*/true); 8776 } else { 8777 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 8778 } 8779 8780 // Emit the number of elements in the offloading arrays. 8781 llvm::Value *PointerNum = 8782 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 8783 8784 // Return value of the runtime offloading call. 8785 llvm::Value *Return; 8786 8787 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 8788 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 8789 8790 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 8791 // The target region is an outlined function launched by the runtime 8792 // via calls __tgt_target() or __tgt_target_teams(). 8793 // 8794 // __tgt_target() launches a target region with one team and one thread, 8795 // executing a serial region. This master thread may in turn launch 8796 // more threads within its team upon encountering a parallel region, 8797 // however, no additional teams can be launched on the device. 8798 // 8799 // __tgt_target_teams() launches a target region with one or more teams, 8800 // each with one or more threads. This call is required for target 8801 // constructs such as: 8802 // 'target teams' 8803 // 'target' / 'teams' 8804 // 'target teams distribute parallel for' 8805 // 'target parallel' 8806 // and so on. 8807 // 8808 // Note that on the host and CPU targets, the runtime implementation of 8809 // these calls simply call the outlined function without forking threads. 8810 // The outlined functions themselves have runtime calls to 8811 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 8812 // the compiler in emitTeamsCall() and emitParallelCall(). 8813 // 8814 // In contrast, on the NVPTX target, the implementation of 8815 // __tgt_target_teams() launches a GPU kernel with the requested number 8816 // of teams and threads so no additional calls to the runtime are required. 8817 if (NumTeams) { 8818 // If we have NumTeams defined this means that we have an enclosed teams 8819 // region. Therefore we also expect to have NumThreads defined. These two 8820 // values should be defined in the presence of a teams directive, 8821 // regardless of having any clauses associated. If the user is using teams 8822 // but no clauses, these two values will be the default that should be 8823 // passed to the runtime library - a 32-bit integer with the value zero. 8824 assert(NumThreads && "Thread limit expression should be available along " 8825 "with number of teams."); 8826 llvm::Value *OffloadingArgs[] = {DeviceID, 8827 OutlinedFnID, 8828 PointerNum, 8829 InputInfo.BasePointersArray.getPointer(), 8830 InputInfo.PointersArray.getPointer(), 8831 InputInfo.SizesArray.getPointer(), 8832 MapTypesArray, 8833 NumTeams, 8834 NumThreads}; 8835 Return = CGF.EmitRuntimeCall( 8836 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 8837 : OMPRTL__tgt_target_teams), 8838 OffloadingArgs); 8839 } else { 8840 llvm::Value *OffloadingArgs[] = {DeviceID, 8841 OutlinedFnID, 8842 PointerNum, 8843 InputInfo.BasePointersArray.getPointer(), 8844 InputInfo.PointersArray.getPointer(), 8845 InputInfo.SizesArray.getPointer(), 8846 MapTypesArray}; 8847 Return = CGF.EmitRuntimeCall( 8848 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 8849 : OMPRTL__tgt_target), 8850 OffloadingArgs); 8851 } 8852 8853 // Check the error code and execute the host version if required. 8854 llvm::BasicBlock *OffloadFailedBlock = 8855 CGF.createBasicBlock("omp_offload.failed"); 8856 llvm::BasicBlock *OffloadContBlock = 8857 CGF.createBasicBlock("omp_offload.cont"); 8858 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 8859 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 8860 8861 CGF.EmitBlock(OffloadFailedBlock); 8862 if (RequiresOuterTask) { 8863 CapturedVars.clear(); 8864 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 8865 } 8866 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 8867 CGF.EmitBranch(OffloadContBlock); 8868 8869 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 8870 }; 8871 8872 // Notify that the host version must be executed. 8873 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 8874 RequiresOuterTask](CodeGenFunction &CGF, 8875 PrePostActionTy &) { 8876 if (RequiresOuterTask) { 8877 CapturedVars.clear(); 8878 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 8879 } 8880 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 8881 }; 8882 8883 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 8884 &CapturedVars, RequiresOuterTask, 8885 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 8886 // Fill up the arrays with all the captured variables. 8887 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 8888 MappableExprsHandler::MapValuesArrayTy Pointers; 8889 MappableExprsHandler::MapValuesArrayTy Sizes; 8890 MappableExprsHandler::MapFlagsArrayTy MapTypes; 8891 8892 // Get mappable expression information. 8893 MappableExprsHandler MEHandler(D, CGF); 8894 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 8895 8896 auto RI = CS.getCapturedRecordDecl()->field_begin(); 8897 auto CV = CapturedVars.begin(); 8898 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 8899 CE = CS.capture_end(); 8900 CI != CE; ++CI, ++RI, ++CV) { 8901 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 8902 MappableExprsHandler::MapValuesArrayTy CurPointers; 8903 MappableExprsHandler::MapValuesArrayTy CurSizes; 8904 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 8905 MappableExprsHandler::StructRangeInfoTy PartialStruct; 8906 8907 // VLA sizes are passed to the outlined region by copy and do not have map 8908 // information associated. 8909 if (CI->capturesVariableArrayType()) { 8910 CurBasePointers.push_back(*CV); 8911 CurPointers.push_back(*CV); 8912 CurSizes.push_back(CGF.Builder.CreateIntCast( 8913 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 8914 // Copy to the device as an argument. No need to retrieve it. 8915 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 8916 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 8917 MappableExprsHandler::OMP_MAP_IMPLICIT); 8918 } else { 8919 // If we have any information in the map clause, we use it, otherwise we 8920 // just do a default mapping. 8921 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 8922 CurSizes, CurMapTypes, PartialStruct); 8923 if (CurBasePointers.empty()) 8924 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 8925 CurPointers, CurSizes, CurMapTypes); 8926 // Generate correct mapping for variables captured by reference in 8927 // lambdas. 8928 if (CI->capturesVariable()) 8929 MEHandler.generateInfoForLambdaCaptures( 8930 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 8931 CurMapTypes, LambdaPointers); 8932 } 8933 // We expect to have at least an element of information for this capture. 8934 assert(!CurBasePointers.empty() && 8935 "Non-existing map pointer for capture!"); 8936 assert(CurBasePointers.size() == CurPointers.size() && 8937 CurBasePointers.size() == CurSizes.size() && 8938 CurBasePointers.size() == CurMapTypes.size() && 8939 "Inconsistent map information sizes!"); 8940 8941 // If there is an entry in PartialStruct it means we have a struct with 8942 // individual members mapped. Emit an extra combined entry. 8943 if (PartialStruct.Base.isValid()) 8944 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 8945 CurMapTypes, PartialStruct); 8946 8947 // We need to append the results of this capture to what we already have. 8948 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8949 Pointers.append(CurPointers.begin(), CurPointers.end()); 8950 Sizes.append(CurSizes.begin(), CurSizes.end()); 8951 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 8952 } 8953 // Adjust MEMBER_OF flags for the lambdas captures. 8954 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 8955 Pointers, MapTypes); 8956 // Map other list items in the map clause which are not captured variables 8957 // but "declare target link" global variables. 8958 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 8959 MapTypes); 8960 8961 TargetDataInfo Info; 8962 // Fill up the arrays and create the arguments. 8963 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 8964 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 8965 Info.PointersArray, Info.SizesArray, 8966 Info.MapTypesArray, Info); 8967 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 8968 InputInfo.BasePointersArray = 8969 Address(Info.BasePointersArray, CGM.getPointerAlign()); 8970 InputInfo.PointersArray = 8971 Address(Info.PointersArray, CGM.getPointerAlign()); 8972 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 8973 MapTypesArray = Info.MapTypesArray; 8974 if (RequiresOuterTask) 8975 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 8976 else 8977 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 8978 }; 8979 8980 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 8981 CodeGenFunction &CGF, PrePostActionTy &) { 8982 if (RequiresOuterTask) { 8983 CodeGenFunction::OMPTargetDataInfo InputInfo; 8984 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 8985 } else { 8986 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 8987 } 8988 }; 8989 8990 // If we have a target function ID it means that we need to support 8991 // offloading, otherwise, just execute on the host. We need to execute on host 8992 // regardless of the conditional in the if clause if, e.g., the user do not 8993 // specify target triples. 8994 if (OutlinedFnID) { 8995 if (IfCond) { 8996 emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 8997 } else { 8998 RegionCodeGenTy ThenRCG(TargetThenGen); 8999 ThenRCG(CGF); 9000 } 9001 } else { 9002 RegionCodeGenTy ElseRCG(TargetElseGen); 9003 ElseRCG(CGF); 9004 } 9005 } 9006 9007 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9008 StringRef ParentName) { 9009 if (!S) 9010 return; 9011 9012 // Codegen OMP target directives that offload compute to the device. 9013 bool RequiresDeviceCodegen = 9014 isa<OMPExecutableDirective>(S) && 9015 isOpenMPTargetExecutionDirective( 9016 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9017 9018 if (RequiresDeviceCodegen) { 9019 const auto &E = *cast<OMPExecutableDirective>(S); 9020 unsigned DeviceID; 9021 unsigned FileID; 9022 unsigned Line; 9023 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 9024 FileID, Line); 9025 9026 // Is this a target region that should not be emitted as an entry point? If 9027 // so just signal we are done with this target region. 9028 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 9029 ParentName, Line)) 9030 return; 9031 9032 switch (E.getDirectiveKind()) { 9033 case OMPD_target: 9034 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9035 cast<OMPTargetDirective>(E)); 9036 break; 9037 case OMPD_target_parallel: 9038 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9039 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9040 break; 9041 case OMPD_target_teams: 9042 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9043 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9044 break; 9045 case OMPD_target_teams_distribute: 9046 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9047 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9048 break; 9049 case OMPD_target_teams_distribute_simd: 9050 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9051 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9052 break; 9053 case OMPD_target_parallel_for: 9054 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9055 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9056 break; 9057 case OMPD_target_parallel_for_simd: 9058 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9059 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9060 break; 9061 case OMPD_target_simd: 9062 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9063 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9064 break; 9065 case OMPD_target_teams_distribute_parallel_for: 9066 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9067 CGM, ParentName, 9068 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9069 break; 9070 case OMPD_target_teams_distribute_parallel_for_simd: 9071 CodeGenFunction:: 9072 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9073 CGM, ParentName, 9074 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9075 break; 9076 case OMPD_parallel: 9077 case OMPD_for: 9078 case OMPD_parallel_for: 9079 case OMPD_parallel_sections: 9080 case OMPD_for_simd: 9081 case OMPD_parallel_for_simd: 9082 case OMPD_cancel: 9083 case OMPD_cancellation_point: 9084 case OMPD_ordered: 9085 case OMPD_threadprivate: 9086 case OMPD_allocate: 9087 case OMPD_task: 9088 case OMPD_simd: 9089 case OMPD_sections: 9090 case OMPD_section: 9091 case OMPD_single: 9092 case OMPD_master: 9093 case OMPD_critical: 9094 case OMPD_taskyield: 9095 case OMPD_barrier: 9096 case OMPD_taskwait: 9097 case OMPD_taskgroup: 9098 case OMPD_atomic: 9099 case OMPD_flush: 9100 case OMPD_teams: 9101 case OMPD_target_data: 9102 case OMPD_target_exit_data: 9103 case OMPD_target_enter_data: 9104 case OMPD_distribute: 9105 case OMPD_distribute_simd: 9106 case OMPD_distribute_parallel_for: 9107 case OMPD_distribute_parallel_for_simd: 9108 case OMPD_teams_distribute: 9109 case OMPD_teams_distribute_simd: 9110 case OMPD_teams_distribute_parallel_for: 9111 case OMPD_teams_distribute_parallel_for_simd: 9112 case OMPD_target_update: 9113 case OMPD_declare_simd: 9114 case OMPD_declare_target: 9115 case OMPD_end_declare_target: 9116 case OMPD_declare_reduction: 9117 case OMPD_declare_mapper: 9118 case OMPD_taskloop: 9119 case OMPD_taskloop_simd: 9120 case OMPD_requires: 9121 case OMPD_unknown: 9122 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9123 } 9124 return; 9125 } 9126 9127 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9128 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9129 return; 9130 9131 scanForTargetRegionsFunctions( 9132 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9133 return; 9134 } 9135 9136 // If this is a lambda function, look into its body. 9137 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9138 S = L->getBody(); 9139 9140 // Keep looking for target regions recursively. 9141 for (const Stmt *II : S->children()) 9142 scanForTargetRegionsFunctions(II, ParentName); 9143 } 9144 9145 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9146 // If emitting code for the host, we do not process FD here. Instead we do 9147 // the normal code generation. 9148 if (!CGM.getLangOpts().OpenMPIsDevice) 9149 return false; 9150 9151 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9152 StringRef Name = CGM.getMangledName(GD); 9153 // Try to detect target regions in the function. 9154 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) 9155 scanForTargetRegionsFunctions(FD->getBody(), Name); 9156 9157 // Do not to emit function if it is not marked as declare target. 9158 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9159 AlreadyEmittedTargetFunctions.count(Name) == 0; 9160 } 9161 9162 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9163 if (!CGM.getLangOpts().OpenMPIsDevice) 9164 return false; 9165 9166 // Check if there are Ctors/Dtors in this declaration and look for target 9167 // regions in it. We use the complete variant to produce the kernel name 9168 // mangling. 9169 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9170 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9171 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9172 StringRef ParentName = 9173 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9174 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9175 } 9176 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9177 StringRef ParentName = 9178 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9179 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9180 } 9181 } 9182 9183 // Do not to emit variable if it is not marked as declare target. 9184 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9185 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9186 cast<VarDecl>(GD.getDecl())); 9187 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9188 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9189 HasRequiresUnifiedSharedMemory)) { 9190 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9191 return true; 9192 } 9193 return false; 9194 } 9195 9196 llvm::Constant * 9197 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9198 const VarDecl *VD) { 9199 assert(VD->getType().isConstant(CGM.getContext()) && 9200 "Expected constant variable."); 9201 StringRef VarName; 9202 llvm::Constant *Addr; 9203 llvm::GlobalValue::LinkageTypes Linkage; 9204 QualType Ty = VD->getType(); 9205 SmallString<128> Buffer; 9206 { 9207 unsigned DeviceID; 9208 unsigned FileID; 9209 unsigned Line; 9210 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9211 FileID, Line); 9212 llvm::raw_svector_ostream OS(Buffer); 9213 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9214 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9215 VarName = OS.str(); 9216 } 9217 Linkage = llvm::GlobalValue::InternalLinkage; 9218 Addr = 9219 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9220 getDefaultFirstprivateAddressSpace()); 9221 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9222 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9223 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9224 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9225 VarName, Addr, VarSize, 9226 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9227 return Addr; 9228 } 9229 9230 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9231 llvm::Constant *Addr) { 9232 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9233 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9234 if (!Res) { 9235 if (CGM.getLangOpts().OpenMPIsDevice) { 9236 // Register non-target variables being emitted in device code (debug info 9237 // may cause this). 9238 StringRef VarName = CGM.getMangledName(VD); 9239 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9240 } 9241 return; 9242 } 9243 // Register declare target variables. 9244 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9245 StringRef VarName; 9246 CharUnits VarSize; 9247 llvm::GlobalValue::LinkageTypes Linkage; 9248 9249 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9250 !HasRequiresUnifiedSharedMemory) { 9251 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9252 VarName = CGM.getMangledName(VD); 9253 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9254 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9255 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9256 } else { 9257 VarSize = CharUnits::Zero(); 9258 } 9259 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 9260 // Temp solution to prevent optimizations of the internal variables. 9261 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 9262 std::string RefName = getName({VarName, "ref"}); 9263 if (!CGM.GetGlobalValue(RefName)) { 9264 llvm::Constant *AddrRef = 9265 getOrCreateInternalVariable(Addr->getType(), RefName); 9266 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 9267 GVAddrRef->setConstant(/*Val=*/true); 9268 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 9269 GVAddrRef->setInitializer(Addr); 9270 CGM.addCompilerUsedGlobal(GVAddrRef); 9271 } 9272 } 9273 } else { 9274 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 9275 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9276 HasRequiresUnifiedSharedMemory)) && 9277 "Declare target attribute must link or to with unified memory."); 9278 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 9279 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 9280 else 9281 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9282 9283 if (CGM.getLangOpts().OpenMPIsDevice) { 9284 VarName = Addr->getName(); 9285 Addr = nullptr; 9286 } else { 9287 VarName = getAddrOfDeclareTargetVar(VD).getName(); 9288 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 9289 } 9290 VarSize = CGM.getPointerSize(); 9291 Linkage = llvm::GlobalValue::WeakAnyLinkage; 9292 } 9293 9294 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9295 VarName, Addr, VarSize, Flags, Linkage); 9296 } 9297 9298 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9299 if (isa<FunctionDecl>(GD.getDecl()) || 9300 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9301 return emitTargetFunctions(GD); 9302 9303 return emitTargetGlobalVariable(GD); 9304 } 9305 9306 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9307 for (const VarDecl *VD : DeferredGlobalVariables) { 9308 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9309 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9310 if (!Res) 9311 continue; 9312 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9313 !HasRequiresUnifiedSharedMemory) { 9314 CGM.EmitGlobal(VD); 9315 } else { 9316 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 9317 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9318 HasRequiresUnifiedSharedMemory)) && 9319 "Expected link clause or to clause with unified memory."); 9320 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 9321 } 9322 } 9323 } 9324 9325 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 9326 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 9327 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 9328 " Expected target-based directive."); 9329 } 9330 9331 void CGOpenMPRuntime::checkArchForUnifiedAddressing( 9332 const OMPRequiresDecl *D) { 9333 for (const OMPClause *Clause : D->clauselists()) { 9334 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 9335 HasRequiresUnifiedSharedMemory = true; 9336 break; 9337 } 9338 } 9339 } 9340 9341 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 9342 LangAS &AS) { 9343 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 9344 return false; 9345 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 9346 switch(A->getAllocatorType()) { 9347 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 9348 // Not supported, fallback to the default mem space. 9349 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 9350 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 9351 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 9352 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 9353 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 9354 case OMPAllocateDeclAttr::OMPConstMemAlloc: 9355 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 9356 AS = LangAS::Default; 9357 return true; 9358 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 9359 llvm_unreachable("Expected predefined allocator for the variables with the " 9360 "static storage."); 9361 } 9362 return false; 9363 } 9364 9365 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 9366 return HasRequiresUnifiedSharedMemory; 9367 } 9368 9369 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 9370 CodeGenModule &CGM) 9371 : CGM(CGM) { 9372 if (CGM.getLangOpts().OpenMPIsDevice) { 9373 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 9374 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 9375 } 9376 } 9377 9378 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 9379 if (CGM.getLangOpts().OpenMPIsDevice) 9380 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 9381 } 9382 9383 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 9384 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 9385 return true; 9386 9387 StringRef Name = CGM.getMangledName(GD); 9388 const auto *D = cast<FunctionDecl>(GD.getDecl()); 9389 // Do not to emit function if it is marked as declare target as it was already 9390 // emitted. 9391 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 9392 if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) { 9393 if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name))) 9394 return !F->isDeclaration(); 9395 return false; 9396 } 9397 return true; 9398 } 9399 9400 return !AlreadyEmittedTargetFunctions.insert(Name).second; 9401 } 9402 9403 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 9404 // If we don't have entries or if we are emitting code for the device, we 9405 // don't need to do anything. 9406 if (CGM.getLangOpts().OMPTargetTriples.empty() || 9407 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 9408 (OffloadEntriesInfoManager.empty() && 9409 !HasEmittedDeclareTargetRegion && 9410 !HasEmittedTargetRegion)) 9411 return nullptr; 9412 9413 // Create and register the function that handles the requires directives. 9414 ASTContext &C = CGM.getContext(); 9415 9416 llvm::Function *RequiresRegFn; 9417 { 9418 CodeGenFunction CGF(CGM); 9419 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 9420 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 9421 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 9422 RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); 9423 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 9424 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 9425 // TODO: check for other requires clauses. 9426 // The requires directive takes effect only when a target region is 9427 // present in the compilation unit. Otherwise it is ignored and not 9428 // passed to the runtime. This avoids the runtime from throwing an error 9429 // for mismatching requires clauses across compilation units that don't 9430 // contain at least 1 target region. 9431 assert((HasEmittedTargetRegion || 9432 HasEmittedDeclareTargetRegion || 9433 !OffloadEntriesInfoManager.empty()) && 9434 "Target or declare target region expected."); 9435 if (HasRequiresUnifiedSharedMemory) 9436 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 9437 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires), 9438 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 9439 CGF.FinishFunction(); 9440 } 9441 return RequiresRegFn; 9442 } 9443 9444 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 9445 // If we have offloading in the current module, we need to emit the entries 9446 // now and register the offloading descriptor. 9447 createOffloadEntriesAndInfoMetadata(); 9448 9449 // Create and register the offloading binary descriptors. This is the main 9450 // entity that captures all the information about offloading in the current 9451 // compilation unit. 9452 return createOffloadingBinaryDescriptorRegistration(); 9453 } 9454 9455 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 9456 const OMPExecutableDirective &D, 9457 SourceLocation Loc, 9458 llvm::Function *OutlinedFn, 9459 ArrayRef<llvm::Value *> CapturedVars) { 9460 if (!CGF.HaveInsertPoint()) 9461 return; 9462 9463 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9464 CodeGenFunction::RunCleanupsScope Scope(CGF); 9465 9466 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 9467 llvm::Value *Args[] = { 9468 RTLoc, 9469 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 9470 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 9471 llvm::SmallVector<llvm::Value *, 16> RealArgs; 9472 RealArgs.append(std::begin(Args), std::end(Args)); 9473 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 9474 9475 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 9476 CGF.EmitRuntimeCall(RTLFn, RealArgs); 9477 } 9478 9479 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 9480 const Expr *NumTeams, 9481 const Expr *ThreadLimit, 9482 SourceLocation Loc) { 9483 if (!CGF.HaveInsertPoint()) 9484 return; 9485 9486 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9487 9488 llvm::Value *NumTeamsVal = 9489 NumTeams 9490 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 9491 CGF.CGM.Int32Ty, /* isSigned = */ true) 9492 : CGF.Builder.getInt32(0); 9493 9494 llvm::Value *ThreadLimitVal = 9495 ThreadLimit 9496 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 9497 CGF.CGM.Int32Ty, /* isSigned = */ true) 9498 : CGF.Builder.getInt32(0); 9499 9500 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 9501 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 9502 ThreadLimitVal}; 9503 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 9504 PushNumTeamsArgs); 9505 } 9506 9507 void CGOpenMPRuntime::emitTargetDataCalls( 9508 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9509 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 9510 if (!CGF.HaveInsertPoint()) 9511 return; 9512 9513 // Action used to replace the default codegen action and turn privatization 9514 // off. 9515 PrePostActionTy NoPrivAction; 9516 9517 // Generate the code for the opening of the data environment. Capture all the 9518 // arguments of the runtime call by reference because they are used in the 9519 // closing of the region. 9520 auto &&BeginThenGen = [this, &D, Device, &Info, 9521 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 9522 // Fill up the arrays with all the mapped variables. 9523 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9524 MappableExprsHandler::MapValuesArrayTy Pointers; 9525 MappableExprsHandler::MapValuesArrayTy Sizes; 9526 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9527 9528 // Get map clause information. 9529 MappableExprsHandler MCHandler(D, CGF); 9530 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 9531 9532 // Fill up the arrays and create the arguments. 9533 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9534 9535 llvm::Value *BasePointersArrayArg = nullptr; 9536 llvm::Value *PointersArrayArg = nullptr; 9537 llvm::Value *SizesArrayArg = nullptr; 9538 llvm::Value *MapTypesArrayArg = nullptr; 9539 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 9540 SizesArrayArg, MapTypesArrayArg, Info); 9541 9542 // Emit device ID if any. 9543 llvm::Value *DeviceID = nullptr; 9544 if (Device) { 9545 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9546 CGF.Int64Ty, /*isSigned=*/true); 9547 } else { 9548 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9549 } 9550 9551 // Emit the number of elements in the offloading arrays. 9552 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 9553 9554 llvm::Value *OffloadingArgs[] = { 9555 DeviceID, PointerNum, BasePointersArrayArg, 9556 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 9557 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 9558 OffloadingArgs); 9559 9560 // If device pointer privatization is required, emit the body of the region 9561 // here. It will have to be duplicated: with and without privatization. 9562 if (!Info.CaptureDeviceAddrMap.empty()) 9563 CodeGen(CGF); 9564 }; 9565 9566 // Generate code for the closing of the data region. 9567 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 9568 PrePostActionTy &) { 9569 assert(Info.isValid() && "Invalid data environment closing arguments."); 9570 9571 llvm::Value *BasePointersArrayArg = nullptr; 9572 llvm::Value *PointersArrayArg = nullptr; 9573 llvm::Value *SizesArrayArg = nullptr; 9574 llvm::Value *MapTypesArrayArg = nullptr; 9575 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 9576 SizesArrayArg, MapTypesArrayArg, Info); 9577 9578 // Emit device ID if any. 9579 llvm::Value *DeviceID = nullptr; 9580 if (Device) { 9581 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9582 CGF.Int64Ty, /*isSigned=*/true); 9583 } else { 9584 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9585 } 9586 9587 // Emit the number of elements in the offloading arrays. 9588 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 9589 9590 llvm::Value *OffloadingArgs[] = { 9591 DeviceID, PointerNum, BasePointersArrayArg, 9592 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 9593 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 9594 OffloadingArgs); 9595 }; 9596 9597 // If we need device pointer privatization, we need to emit the body of the 9598 // region with no privatization in the 'else' branch of the conditional. 9599 // Otherwise, we don't have to do anything. 9600 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 9601 PrePostActionTy &) { 9602 if (!Info.CaptureDeviceAddrMap.empty()) { 9603 CodeGen.setAction(NoPrivAction); 9604 CodeGen(CGF); 9605 } 9606 }; 9607 9608 // We don't have to do anything to close the region if the if clause evaluates 9609 // to false. 9610 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 9611 9612 if (IfCond) { 9613 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 9614 } else { 9615 RegionCodeGenTy RCG(BeginThenGen); 9616 RCG(CGF); 9617 } 9618 9619 // If we don't require privatization of device pointers, we emit the body in 9620 // between the runtime calls. This avoids duplicating the body code. 9621 if (Info.CaptureDeviceAddrMap.empty()) { 9622 CodeGen.setAction(NoPrivAction); 9623 CodeGen(CGF); 9624 } 9625 9626 if (IfCond) { 9627 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); 9628 } else { 9629 RegionCodeGenTy RCG(EndThenGen); 9630 RCG(CGF); 9631 } 9632 } 9633 9634 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 9635 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9636 const Expr *Device) { 9637 if (!CGF.HaveInsertPoint()) 9638 return; 9639 9640 assert((isa<OMPTargetEnterDataDirective>(D) || 9641 isa<OMPTargetExitDataDirective>(D) || 9642 isa<OMPTargetUpdateDirective>(D)) && 9643 "Expecting either target enter, exit data, or update directives."); 9644 9645 CodeGenFunction::OMPTargetDataInfo InputInfo; 9646 llvm::Value *MapTypesArray = nullptr; 9647 // Generate the code for the opening of the data environment. 9648 auto &&ThenGen = [this, &D, Device, &InputInfo, 9649 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 9650 // Emit device ID if any. 9651 llvm::Value *DeviceID = nullptr; 9652 if (Device) { 9653 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9654 CGF.Int64Ty, /*isSigned=*/true); 9655 } else { 9656 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9657 } 9658 9659 // Emit the number of elements in the offloading arrays. 9660 llvm::Constant *PointerNum = 9661 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9662 9663 llvm::Value *OffloadingArgs[] = {DeviceID, 9664 PointerNum, 9665 InputInfo.BasePointersArray.getPointer(), 9666 InputInfo.PointersArray.getPointer(), 9667 InputInfo.SizesArray.getPointer(), 9668 MapTypesArray}; 9669 9670 // Select the right runtime function call for each expected standalone 9671 // directive. 9672 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9673 OpenMPRTLFunction RTLFn; 9674 switch (D.getDirectiveKind()) { 9675 case OMPD_target_enter_data: 9676 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 9677 : OMPRTL__tgt_target_data_begin; 9678 break; 9679 case OMPD_target_exit_data: 9680 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 9681 : OMPRTL__tgt_target_data_end; 9682 break; 9683 case OMPD_target_update: 9684 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 9685 : OMPRTL__tgt_target_data_update; 9686 break; 9687 case OMPD_parallel: 9688 case OMPD_for: 9689 case OMPD_parallel_for: 9690 case OMPD_parallel_sections: 9691 case OMPD_for_simd: 9692 case OMPD_parallel_for_simd: 9693 case OMPD_cancel: 9694 case OMPD_cancellation_point: 9695 case OMPD_ordered: 9696 case OMPD_threadprivate: 9697 case OMPD_allocate: 9698 case OMPD_task: 9699 case OMPD_simd: 9700 case OMPD_sections: 9701 case OMPD_section: 9702 case OMPD_single: 9703 case OMPD_master: 9704 case OMPD_critical: 9705 case OMPD_taskyield: 9706 case OMPD_barrier: 9707 case OMPD_taskwait: 9708 case OMPD_taskgroup: 9709 case OMPD_atomic: 9710 case OMPD_flush: 9711 case OMPD_teams: 9712 case OMPD_target_data: 9713 case OMPD_distribute: 9714 case OMPD_distribute_simd: 9715 case OMPD_distribute_parallel_for: 9716 case OMPD_distribute_parallel_for_simd: 9717 case OMPD_teams_distribute: 9718 case OMPD_teams_distribute_simd: 9719 case OMPD_teams_distribute_parallel_for: 9720 case OMPD_teams_distribute_parallel_for_simd: 9721 case OMPD_declare_simd: 9722 case OMPD_declare_target: 9723 case OMPD_end_declare_target: 9724 case OMPD_declare_reduction: 9725 case OMPD_declare_mapper: 9726 case OMPD_taskloop: 9727 case OMPD_taskloop_simd: 9728 case OMPD_target: 9729 case OMPD_target_simd: 9730 case OMPD_target_teams_distribute: 9731 case OMPD_target_teams_distribute_simd: 9732 case OMPD_target_teams_distribute_parallel_for: 9733 case OMPD_target_teams_distribute_parallel_for_simd: 9734 case OMPD_target_teams: 9735 case OMPD_target_parallel: 9736 case OMPD_target_parallel_for: 9737 case OMPD_target_parallel_for_simd: 9738 case OMPD_requires: 9739 case OMPD_unknown: 9740 llvm_unreachable("Unexpected standalone target data directive."); 9741 break; 9742 } 9743 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 9744 }; 9745 9746 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 9747 CodeGenFunction &CGF, PrePostActionTy &) { 9748 // Fill up the arrays with all the mapped variables. 9749 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9750 MappableExprsHandler::MapValuesArrayTy Pointers; 9751 MappableExprsHandler::MapValuesArrayTy Sizes; 9752 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9753 9754 // Get map clause information. 9755 MappableExprsHandler MEHandler(D, CGF); 9756 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 9757 9758 TargetDataInfo Info; 9759 // Fill up the arrays and create the arguments. 9760 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9761 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9762 Info.PointersArray, Info.SizesArray, 9763 Info.MapTypesArray, Info); 9764 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9765 InputInfo.BasePointersArray = 9766 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9767 InputInfo.PointersArray = 9768 Address(Info.PointersArray, CGM.getPointerAlign()); 9769 InputInfo.SizesArray = 9770 Address(Info.SizesArray, CGM.getPointerAlign()); 9771 MapTypesArray = Info.MapTypesArray; 9772 if (D.hasClausesOfKind<OMPDependClause>()) 9773 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9774 else 9775 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9776 }; 9777 9778 if (IfCond) { 9779 emitOMPIfClause(CGF, IfCond, TargetThenGen, 9780 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 9781 } else { 9782 RegionCodeGenTy ThenRCG(TargetThenGen); 9783 ThenRCG(CGF); 9784 } 9785 } 9786 9787 namespace { 9788 /// Kind of parameter in a function with 'declare simd' directive. 9789 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 9790 /// Attribute set of the parameter. 9791 struct ParamAttrTy { 9792 ParamKindTy Kind = Vector; 9793 llvm::APSInt StrideOrArg; 9794 llvm::APSInt Alignment; 9795 }; 9796 } // namespace 9797 9798 static unsigned evaluateCDTSize(const FunctionDecl *FD, 9799 ArrayRef<ParamAttrTy> ParamAttrs) { 9800 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 9801 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 9802 // of that clause. The VLEN value must be power of 2. 9803 // In other case the notion of the function`s "characteristic data type" (CDT) 9804 // is used to compute the vector length. 9805 // CDT is defined in the following order: 9806 // a) For non-void function, the CDT is the return type. 9807 // b) If the function has any non-uniform, non-linear parameters, then the 9808 // CDT is the type of the first such parameter. 9809 // c) If the CDT determined by a) or b) above is struct, union, or class 9810 // type which is pass-by-value (except for the type that maps to the 9811 // built-in complex data type), the characteristic data type is int. 9812 // d) If none of the above three cases is applicable, the CDT is int. 9813 // The VLEN is then determined based on the CDT and the size of vector 9814 // register of that ISA for which current vector version is generated. The 9815 // VLEN is computed using the formula below: 9816 // VLEN = sizeof(vector_register) / sizeof(CDT), 9817 // where vector register size specified in section 3.2.1 Registers and the 9818 // Stack Frame of original AMD64 ABI document. 9819 QualType RetType = FD->getReturnType(); 9820 if (RetType.isNull()) 9821 return 0; 9822 ASTContext &C = FD->getASTContext(); 9823 QualType CDT; 9824 if (!RetType.isNull() && !RetType->isVoidType()) { 9825 CDT = RetType; 9826 } else { 9827 unsigned Offset = 0; 9828 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 9829 if (ParamAttrs[Offset].Kind == Vector) 9830 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 9831 ++Offset; 9832 } 9833 if (CDT.isNull()) { 9834 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 9835 if (ParamAttrs[I + Offset].Kind == Vector) { 9836 CDT = FD->getParamDecl(I)->getType(); 9837 break; 9838 } 9839 } 9840 } 9841 } 9842 if (CDT.isNull()) 9843 CDT = C.IntTy; 9844 CDT = CDT->getCanonicalTypeUnqualified(); 9845 if (CDT->isRecordType() || CDT->isUnionType()) 9846 CDT = C.IntTy; 9847 return C.getTypeSize(CDT); 9848 } 9849 9850 static void 9851 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 9852 const llvm::APSInt &VLENVal, 9853 ArrayRef<ParamAttrTy> ParamAttrs, 9854 OMPDeclareSimdDeclAttr::BranchStateTy State) { 9855 struct ISADataTy { 9856 char ISA; 9857 unsigned VecRegSize; 9858 }; 9859 ISADataTy ISAData[] = { 9860 { 9861 'b', 128 9862 }, // SSE 9863 { 9864 'c', 256 9865 }, // AVX 9866 { 9867 'd', 256 9868 }, // AVX2 9869 { 9870 'e', 512 9871 }, // AVX512 9872 }; 9873 llvm::SmallVector<char, 2> Masked; 9874 switch (State) { 9875 case OMPDeclareSimdDeclAttr::BS_Undefined: 9876 Masked.push_back('N'); 9877 Masked.push_back('M'); 9878 break; 9879 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 9880 Masked.push_back('N'); 9881 break; 9882 case OMPDeclareSimdDeclAttr::BS_Inbranch: 9883 Masked.push_back('M'); 9884 break; 9885 } 9886 for (char Mask : Masked) { 9887 for (const ISADataTy &Data : ISAData) { 9888 SmallString<256> Buffer; 9889 llvm::raw_svector_ostream Out(Buffer); 9890 Out << "_ZGV" << Data.ISA << Mask; 9891 if (!VLENVal) { 9892 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 9893 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 9894 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 9895 } else { 9896 Out << VLENVal; 9897 } 9898 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 9899 switch (ParamAttr.Kind){ 9900 case LinearWithVarStride: 9901 Out << 's' << ParamAttr.StrideOrArg; 9902 break; 9903 case Linear: 9904 Out << 'l'; 9905 if (!!ParamAttr.StrideOrArg) 9906 Out << ParamAttr.StrideOrArg; 9907 break; 9908 case Uniform: 9909 Out << 'u'; 9910 break; 9911 case Vector: 9912 Out << 'v'; 9913 break; 9914 } 9915 if (!!ParamAttr.Alignment) 9916 Out << 'a' << ParamAttr.Alignment; 9917 } 9918 Out << '_' << Fn->getName(); 9919 Fn->addFnAttr(Out.str()); 9920 } 9921 } 9922 } 9923 9924 // This are the Functions that are needed to mangle the name of the 9925 // vector functions generated by the compiler, according to the rules 9926 // defined in the "Vector Function ABI specifications for AArch64", 9927 // available at 9928 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 9929 9930 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 9931 /// 9932 /// TODO: Need to implement the behavior for reference marked with a 9933 /// var or no linear modifiers (1.b in the section). For this, we 9934 /// need to extend ParamKindTy to support the linear modifiers. 9935 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 9936 QT = QT.getCanonicalType(); 9937 9938 if (QT->isVoidType()) 9939 return false; 9940 9941 if (Kind == ParamKindTy::Uniform) 9942 return false; 9943 9944 if (Kind == ParamKindTy::Linear) 9945 return false; 9946 9947 // TODO: Handle linear references with modifiers 9948 9949 if (Kind == ParamKindTy::LinearWithVarStride) 9950 return false; 9951 9952 return true; 9953 } 9954 9955 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 9956 static bool getAArch64PBV(QualType QT, ASTContext &C) { 9957 QT = QT.getCanonicalType(); 9958 unsigned Size = C.getTypeSize(QT); 9959 9960 // Only scalars and complex within 16 bytes wide set PVB to true. 9961 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 9962 return false; 9963 9964 if (QT->isFloatingType()) 9965 return true; 9966 9967 if (QT->isIntegerType()) 9968 return true; 9969 9970 if (QT->isPointerType()) 9971 return true; 9972 9973 // TODO: Add support for complex types (section 3.1.2, item 2). 9974 9975 return false; 9976 } 9977 9978 /// Computes the lane size (LS) of a return type or of an input parameter, 9979 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 9980 /// TODO: Add support for references, section 3.2.1, item 1. 9981 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 9982 if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 9983 QualType PTy = QT.getCanonicalType()->getPointeeType(); 9984 if (getAArch64PBV(PTy, C)) 9985 return C.getTypeSize(PTy); 9986 } 9987 if (getAArch64PBV(QT, C)) 9988 return C.getTypeSize(QT); 9989 9990 return C.getTypeSize(C.getUIntPtrType()); 9991 } 9992 9993 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 9994 // signature of the scalar function, as defined in 3.2.2 of the 9995 // AAVFABI. 9996 static std::tuple<unsigned, unsigned, bool> 9997 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 9998 QualType RetType = FD->getReturnType().getCanonicalType(); 9999 10000 ASTContext &C = FD->getASTContext(); 10001 10002 bool OutputBecomesInput = false; 10003 10004 llvm::SmallVector<unsigned, 8> Sizes; 10005 if (!RetType->isVoidType()) { 10006 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10007 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10008 OutputBecomesInput = true; 10009 } 10010 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10011 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10012 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10013 } 10014 10015 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10016 // The LS of a function parameter / return value can only be a power 10017 // of 2, starting from 8 bits, up to 128. 10018 assert(std::all_of(Sizes.begin(), Sizes.end(), 10019 [](unsigned Size) { 10020 return Size == 8 || Size == 16 || Size == 32 || 10021 Size == 64 || Size == 128; 10022 }) && 10023 "Invalid size"); 10024 10025 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10026 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10027 OutputBecomesInput); 10028 } 10029 10030 /// Mangle the parameter part of the vector function name according to 10031 /// their OpenMP classification. The mangling function is defined in 10032 /// section 3.5 of the AAVFABI. 10033 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10034 SmallString<256> Buffer; 10035 llvm::raw_svector_ostream Out(Buffer); 10036 for (const auto &ParamAttr : ParamAttrs) { 10037 switch (ParamAttr.Kind) { 10038 case LinearWithVarStride: 10039 Out << "ls" << ParamAttr.StrideOrArg; 10040 break; 10041 case Linear: 10042 Out << 'l'; 10043 // Don't print the step value if it is not present or if it is 10044 // equal to 1. 10045 if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1) 10046 Out << ParamAttr.StrideOrArg; 10047 break; 10048 case Uniform: 10049 Out << 'u'; 10050 break; 10051 case Vector: 10052 Out << 'v'; 10053 break; 10054 } 10055 10056 if (!!ParamAttr.Alignment) 10057 Out << 'a' << ParamAttr.Alignment; 10058 } 10059 10060 return Out.str(); 10061 } 10062 10063 // Function used to add the attribute. The parameter `VLEN` is 10064 // templated to allow the use of "x" when targeting scalable functions 10065 // for SVE. 10066 template <typename T> 10067 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10068 char ISA, StringRef ParSeq, 10069 StringRef MangledName, bool OutputBecomesInput, 10070 llvm::Function *Fn) { 10071 SmallString<256> Buffer; 10072 llvm::raw_svector_ostream Out(Buffer); 10073 Out << Prefix << ISA << LMask << VLEN; 10074 if (OutputBecomesInput) 10075 Out << "v"; 10076 Out << ParSeq << "_" << MangledName; 10077 Fn->addFnAttr(Out.str()); 10078 } 10079 10080 // Helper function to generate the Advanced SIMD names depending on 10081 // the value of the NDS when simdlen is not present. 10082 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10083 StringRef Prefix, char ISA, 10084 StringRef ParSeq, StringRef MangledName, 10085 bool OutputBecomesInput, 10086 llvm::Function *Fn) { 10087 switch (NDS) { 10088 case 8: 10089 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10090 OutputBecomesInput, Fn); 10091 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10092 OutputBecomesInput, Fn); 10093 break; 10094 case 16: 10095 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10096 OutputBecomesInput, Fn); 10097 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10098 OutputBecomesInput, Fn); 10099 break; 10100 case 32: 10101 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10102 OutputBecomesInput, Fn); 10103 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10104 OutputBecomesInput, Fn); 10105 break; 10106 case 64: 10107 case 128: 10108 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10109 OutputBecomesInput, Fn); 10110 break; 10111 default: 10112 llvm_unreachable("Scalar type is too wide."); 10113 } 10114 } 10115 10116 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10117 static void emitAArch64DeclareSimdFunction( 10118 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10119 ArrayRef<ParamAttrTy> ParamAttrs, 10120 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10121 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10122 10123 // Get basic data for building the vector signature. 10124 const auto Data = getNDSWDS(FD, ParamAttrs); 10125 const unsigned NDS = std::get<0>(Data); 10126 const unsigned WDS = std::get<1>(Data); 10127 const bool OutputBecomesInput = std::get<2>(Data); 10128 10129 // Check the values provided via `simdlen` by the user. 10130 // 1. A `simdlen(1)` doesn't produce vector signatures, 10131 if (UserVLEN == 1) { 10132 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10133 DiagnosticsEngine::Warning, 10134 "The clause simdlen(1) has no effect when targeting aarch64."); 10135 CGM.getDiags().Report(SLoc, DiagID); 10136 return; 10137 } 10138 10139 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10140 // Advanced SIMD output. 10141 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10142 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10143 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10144 "power of 2 when targeting Advanced SIMD."); 10145 CGM.getDiags().Report(SLoc, DiagID); 10146 return; 10147 } 10148 10149 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10150 // limits. 10151 if (ISA == 's' && UserVLEN != 0) { 10152 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10153 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10154 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10155 "lanes in the architectural constraints " 10156 "for SVE (min is 128-bit, max is " 10157 "2048-bit, by steps of 128-bit)"); 10158 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10159 return; 10160 } 10161 } 10162 10163 // Sort out parameter sequence. 10164 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10165 StringRef Prefix = "_ZGV"; 10166 // Generate simdlen from user input (if any). 10167 if (UserVLEN) { 10168 if (ISA == 's') { 10169 // SVE generates only a masked function. 10170 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10171 OutputBecomesInput, Fn); 10172 } else { 10173 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10174 // Advanced SIMD generates one or two functions, depending on 10175 // the `[not]inbranch` clause. 10176 switch (State) { 10177 case OMPDeclareSimdDeclAttr::BS_Undefined: 10178 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10179 OutputBecomesInput, Fn); 10180 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10181 OutputBecomesInput, Fn); 10182 break; 10183 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10184 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10185 OutputBecomesInput, Fn); 10186 break; 10187 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10188 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10189 OutputBecomesInput, Fn); 10190 break; 10191 } 10192 } 10193 } else { 10194 // If no user simdlen is provided, follow the AAVFABI rules for 10195 // generating the vector length. 10196 if (ISA == 's') { 10197 // SVE, section 3.4.1, item 1. 10198 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10199 OutputBecomesInput, Fn); 10200 } else { 10201 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10202 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10203 // two vector names depending on the use of the clause 10204 // `[not]inbranch`. 10205 switch (State) { 10206 case OMPDeclareSimdDeclAttr::BS_Undefined: 10207 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10208 OutputBecomesInput, Fn); 10209 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10210 OutputBecomesInput, Fn); 10211 break; 10212 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10213 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10214 OutputBecomesInput, Fn); 10215 break; 10216 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10217 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10218 OutputBecomesInput, Fn); 10219 break; 10220 } 10221 } 10222 } 10223 } 10224 10225 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10226 llvm::Function *Fn) { 10227 ASTContext &C = CGM.getContext(); 10228 FD = FD->getMostRecentDecl(); 10229 // Map params to their positions in function decl. 10230 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10231 if (isa<CXXMethodDecl>(FD)) 10232 ParamPositions.try_emplace(FD, 0); 10233 unsigned ParamPos = ParamPositions.size(); 10234 for (const ParmVarDecl *P : FD->parameters()) { 10235 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10236 ++ParamPos; 10237 } 10238 while (FD) { 10239 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10240 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10241 // Mark uniform parameters. 10242 for (const Expr *E : Attr->uniforms()) { 10243 E = E->IgnoreParenImpCasts(); 10244 unsigned Pos; 10245 if (isa<CXXThisExpr>(E)) { 10246 Pos = ParamPositions[FD]; 10247 } else { 10248 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10249 ->getCanonicalDecl(); 10250 Pos = ParamPositions[PVD]; 10251 } 10252 ParamAttrs[Pos].Kind = Uniform; 10253 } 10254 // Get alignment info. 10255 auto NI = Attr->alignments_begin(); 10256 for (const Expr *E : Attr->aligneds()) { 10257 E = E->IgnoreParenImpCasts(); 10258 unsigned Pos; 10259 QualType ParmTy; 10260 if (isa<CXXThisExpr>(E)) { 10261 Pos = ParamPositions[FD]; 10262 ParmTy = E->getType(); 10263 } else { 10264 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10265 ->getCanonicalDecl(); 10266 Pos = ParamPositions[PVD]; 10267 ParmTy = PVD->getType(); 10268 } 10269 ParamAttrs[Pos].Alignment = 10270 (*NI) 10271 ? (*NI)->EvaluateKnownConstInt(C) 10272 : llvm::APSInt::getUnsigned( 10273 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10274 .getQuantity()); 10275 ++NI; 10276 } 10277 // Mark linear parameters. 10278 auto SI = Attr->steps_begin(); 10279 auto MI = Attr->modifiers_begin(); 10280 for (const Expr *E : Attr->linears()) { 10281 E = E->IgnoreParenImpCasts(); 10282 unsigned Pos; 10283 if (isa<CXXThisExpr>(E)) { 10284 Pos = ParamPositions[FD]; 10285 } else { 10286 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10287 ->getCanonicalDecl(); 10288 Pos = ParamPositions[PVD]; 10289 } 10290 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 10291 ParamAttr.Kind = Linear; 10292 if (*SI) { 10293 Expr::EvalResult Result; 10294 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 10295 if (const auto *DRE = 10296 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 10297 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 10298 ParamAttr.Kind = LinearWithVarStride; 10299 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 10300 ParamPositions[StridePVD->getCanonicalDecl()]); 10301 } 10302 } 10303 } else { 10304 ParamAttr.StrideOrArg = Result.Val.getInt(); 10305 } 10306 } 10307 ++SI; 10308 ++MI; 10309 } 10310 llvm::APSInt VLENVal; 10311 SourceLocation ExprLoc; 10312 const Expr *VLENExpr = Attr->getSimdlen(); 10313 if (VLENExpr) { 10314 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 10315 ExprLoc = VLENExpr->getExprLoc(); 10316 } 10317 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 10318 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 10319 CGM.getTriple().getArch() == llvm::Triple::x86_64) { 10320 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 10321 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 10322 unsigned VLEN = VLENVal.getExtValue(); 10323 StringRef MangledName = Fn->getName(); 10324 if (CGM.getTarget().hasFeature("sve")) 10325 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10326 MangledName, 's', 128, Fn, ExprLoc); 10327 if (CGM.getTarget().hasFeature("neon")) 10328 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10329 MangledName, 'n', 128, Fn, ExprLoc); 10330 } 10331 } 10332 FD = FD->getPreviousDecl(); 10333 } 10334 } 10335 10336 namespace { 10337 /// Cleanup action for doacross support. 10338 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 10339 public: 10340 static const int DoacrossFinArgs = 2; 10341 10342 private: 10343 llvm::FunctionCallee RTLFn; 10344 llvm::Value *Args[DoacrossFinArgs]; 10345 10346 public: 10347 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 10348 ArrayRef<llvm::Value *> CallArgs) 10349 : RTLFn(RTLFn) { 10350 assert(CallArgs.size() == DoacrossFinArgs); 10351 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10352 } 10353 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10354 if (!CGF.HaveInsertPoint()) 10355 return; 10356 CGF.EmitRuntimeCall(RTLFn, Args); 10357 } 10358 }; 10359 } // namespace 10360 10361 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 10362 const OMPLoopDirective &D, 10363 ArrayRef<Expr *> NumIterations) { 10364 if (!CGF.HaveInsertPoint()) 10365 return; 10366 10367 ASTContext &C = CGM.getContext(); 10368 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 10369 RecordDecl *RD; 10370 if (KmpDimTy.isNull()) { 10371 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 10372 // kmp_int64 lo; // lower 10373 // kmp_int64 up; // upper 10374 // kmp_int64 st; // stride 10375 // }; 10376 RD = C.buildImplicitRecord("kmp_dim"); 10377 RD->startDefinition(); 10378 addFieldToRecordDecl(C, RD, Int64Ty); 10379 addFieldToRecordDecl(C, RD, Int64Ty); 10380 addFieldToRecordDecl(C, RD, Int64Ty); 10381 RD->completeDefinition(); 10382 KmpDimTy = C.getRecordType(RD); 10383 } else { 10384 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 10385 } 10386 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 10387 QualType ArrayTy = 10388 C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0); 10389 10390 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 10391 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 10392 enum { LowerFD = 0, UpperFD, StrideFD }; 10393 // Fill dims with data. 10394 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 10395 LValue DimsLVal = CGF.MakeAddrLValue( 10396 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 10397 // dims.upper = num_iterations; 10398 LValue UpperLVal = CGF.EmitLValueForField( 10399 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 10400 llvm::Value *NumIterVal = 10401 CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]), 10402 D.getNumIterations()->getType(), Int64Ty, 10403 D.getNumIterations()->getExprLoc()); 10404 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 10405 // dims.stride = 1; 10406 LValue StrideLVal = CGF.EmitLValueForField( 10407 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 10408 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 10409 StrideLVal); 10410 } 10411 10412 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 10413 // kmp_int32 num_dims, struct kmp_dim * dims); 10414 llvm::Value *Args[] = { 10415 emitUpdateLocation(CGF, D.getBeginLoc()), 10416 getThreadID(CGF, D.getBeginLoc()), 10417 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 10418 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 10419 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 10420 CGM.VoidPtrTy)}; 10421 10422 llvm::FunctionCallee RTLFn = 10423 createRuntimeFunction(OMPRTL__kmpc_doacross_init); 10424 CGF.EmitRuntimeCall(RTLFn, Args); 10425 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 10426 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 10427 llvm::FunctionCallee FiniRTLFn = 10428 createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 10429 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 10430 llvm::makeArrayRef(FiniArgs)); 10431 } 10432 10433 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 10434 const OMPDependClause *C) { 10435 QualType Int64Ty = 10436 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 10437 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 10438 QualType ArrayTy = CGM.getContext().getConstantArrayType( 10439 Int64Ty, Size, ArrayType::Normal, 0); 10440 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 10441 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 10442 const Expr *CounterVal = C->getLoopData(I); 10443 assert(CounterVal); 10444 llvm::Value *CntVal = CGF.EmitScalarConversion( 10445 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 10446 CounterVal->getExprLoc()); 10447 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 10448 /*Volatile=*/false, Int64Ty); 10449 } 10450 llvm::Value *Args[] = { 10451 emitUpdateLocation(CGF, C->getBeginLoc()), 10452 getThreadID(CGF, C->getBeginLoc()), 10453 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 10454 llvm::FunctionCallee RTLFn; 10455 if (C->getDependencyKind() == OMPC_DEPEND_source) { 10456 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 10457 } else { 10458 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 10459 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 10460 } 10461 CGF.EmitRuntimeCall(RTLFn, Args); 10462 } 10463 10464 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 10465 llvm::FunctionCallee Callee, 10466 ArrayRef<llvm::Value *> Args) const { 10467 assert(Loc.isValid() && "Outlined function call location must be valid."); 10468 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 10469 10470 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 10471 if (Fn->doesNotThrow()) { 10472 CGF.EmitNounwindRuntimeCall(Fn, Args); 10473 return; 10474 } 10475 } 10476 CGF.EmitRuntimeCall(Callee, Args); 10477 } 10478 10479 void CGOpenMPRuntime::emitOutlinedFunctionCall( 10480 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 10481 ArrayRef<llvm::Value *> Args) const { 10482 emitCall(CGF, Loc, OutlinedFn, Args); 10483 } 10484 10485 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 10486 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 10487 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 10488 HasEmittedDeclareTargetRegion = true; 10489 } 10490 10491 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 10492 const VarDecl *NativeParam, 10493 const VarDecl *TargetParam) const { 10494 return CGF.GetAddrOfLocalVar(NativeParam); 10495 } 10496 10497 namespace { 10498 /// Cleanup action for allocate support. 10499 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 10500 public: 10501 static const int CleanupArgs = 3; 10502 10503 private: 10504 llvm::FunctionCallee RTLFn; 10505 llvm::Value *Args[CleanupArgs]; 10506 10507 public: 10508 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 10509 ArrayRef<llvm::Value *> CallArgs) 10510 : RTLFn(RTLFn) { 10511 assert(CallArgs.size() == CleanupArgs && 10512 "Size of arguments does not match."); 10513 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10514 } 10515 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10516 if (!CGF.HaveInsertPoint()) 10517 return; 10518 CGF.EmitRuntimeCall(RTLFn, Args); 10519 } 10520 }; 10521 } // namespace 10522 10523 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 10524 const VarDecl *VD) { 10525 if (!VD) 10526 return Address::invalid(); 10527 const VarDecl *CVD = VD->getCanonicalDecl(); 10528 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 10529 return Address::invalid(); 10530 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 10531 // Use the default allocation. 10532 if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 10533 !AA->getAllocator()) 10534 return Address::invalid(); 10535 llvm::Value *Size; 10536 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 10537 if (CVD->getType()->isVariablyModifiedType()) { 10538 Size = CGF.getTypeSize(CVD->getType()); 10539 // Align the size: ((size + align - 1) / align) * align 10540 Size = CGF.Builder.CreateNUWAdd( 10541 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 10542 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 10543 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 10544 } else { 10545 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 10546 Size = CGM.getSize(Sz.alignTo(Align)); 10547 } 10548 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 10549 assert(AA->getAllocator() && 10550 "Expected allocator expression for non-default allocator."); 10551 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 10552 // According to the standard, the original allocator type is a enum (integer). 10553 // Convert to pointer type, if required. 10554 if (Allocator->getType()->isIntegerTy()) 10555 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 10556 else if (Allocator->getType()->isPointerTy()) 10557 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 10558 CGM.VoidPtrTy); 10559 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 10560 10561 llvm::Value *Addr = 10562 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, 10563 CVD->getName() + ".void.addr"); 10564 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 10565 Allocator}; 10566 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); 10567 10568 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 10569 llvm::makeArrayRef(FiniArgs)); 10570 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 10571 Addr, 10572 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 10573 CVD->getName() + ".addr"); 10574 return Address(Addr, Align); 10575 } 10576 10577 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 10578 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 10579 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 10580 llvm_unreachable("Not supported in SIMD-only mode"); 10581 } 10582 10583 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 10584 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 10585 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 10586 llvm_unreachable("Not supported in SIMD-only mode"); 10587 } 10588 10589 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 10590 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 10591 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 10592 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 10593 bool Tied, unsigned &NumberOfParts) { 10594 llvm_unreachable("Not supported in SIMD-only mode"); 10595 } 10596 10597 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 10598 SourceLocation Loc, 10599 llvm::Function *OutlinedFn, 10600 ArrayRef<llvm::Value *> CapturedVars, 10601 const Expr *IfCond) { 10602 llvm_unreachable("Not supported in SIMD-only mode"); 10603 } 10604 10605 void CGOpenMPSIMDRuntime::emitCriticalRegion( 10606 CodeGenFunction &CGF, StringRef CriticalName, 10607 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 10608 const Expr *Hint) { 10609 llvm_unreachable("Not supported in SIMD-only mode"); 10610 } 10611 10612 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 10613 const RegionCodeGenTy &MasterOpGen, 10614 SourceLocation Loc) { 10615 llvm_unreachable("Not supported in SIMD-only mode"); 10616 } 10617 10618 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 10619 SourceLocation Loc) { 10620 llvm_unreachable("Not supported in SIMD-only mode"); 10621 } 10622 10623 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 10624 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 10625 SourceLocation Loc) { 10626 llvm_unreachable("Not supported in SIMD-only mode"); 10627 } 10628 10629 void CGOpenMPSIMDRuntime::emitSingleRegion( 10630 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 10631 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 10632 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 10633 ArrayRef<const Expr *> AssignmentOps) { 10634 llvm_unreachable("Not supported in SIMD-only mode"); 10635 } 10636 10637 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 10638 const RegionCodeGenTy &OrderedOpGen, 10639 SourceLocation Loc, 10640 bool IsThreads) { 10641 llvm_unreachable("Not supported in SIMD-only mode"); 10642 } 10643 10644 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 10645 SourceLocation Loc, 10646 OpenMPDirectiveKind Kind, 10647 bool EmitChecks, 10648 bool ForceSimpleCall) { 10649 llvm_unreachable("Not supported in SIMD-only mode"); 10650 } 10651 10652 void CGOpenMPSIMDRuntime::emitForDispatchInit( 10653 CodeGenFunction &CGF, SourceLocation Loc, 10654 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 10655 bool Ordered, const DispatchRTInput &DispatchValues) { 10656 llvm_unreachable("Not supported in SIMD-only mode"); 10657 } 10658 10659 void CGOpenMPSIMDRuntime::emitForStaticInit( 10660 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 10661 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 10662 llvm_unreachable("Not supported in SIMD-only mode"); 10663 } 10664 10665 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 10666 CodeGenFunction &CGF, SourceLocation Loc, 10667 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 10668 llvm_unreachable("Not supported in SIMD-only mode"); 10669 } 10670 10671 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 10672 SourceLocation Loc, 10673 unsigned IVSize, 10674 bool IVSigned) { 10675 llvm_unreachable("Not supported in SIMD-only mode"); 10676 } 10677 10678 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 10679 SourceLocation Loc, 10680 OpenMPDirectiveKind DKind) { 10681 llvm_unreachable("Not supported in SIMD-only mode"); 10682 } 10683 10684 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 10685 SourceLocation Loc, 10686 unsigned IVSize, bool IVSigned, 10687 Address IL, Address LB, 10688 Address UB, Address ST) { 10689 llvm_unreachable("Not supported in SIMD-only mode"); 10690 } 10691 10692 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 10693 llvm::Value *NumThreads, 10694 SourceLocation Loc) { 10695 llvm_unreachable("Not supported in SIMD-only mode"); 10696 } 10697 10698 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 10699 OpenMPProcBindClauseKind ProcBind, 10700 SourceLocation Loc) { 10701 llvm_unreachable("Not supported in SIMD-only mode"); 10702 } 10703 10704 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 10705 const VarDecl *VD, 10706 Address VDAddr, 10707 SourceLocation Loc) { 10708 llvm_unreachable("Not supported in SIMD-only mode"); 10709 } 10710 10711 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 10712 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 10713 CodeGenFunction *CGF) { 10714 llvm_unreachable("Not supported in SIMD-only mode"); 10715 } 10716 10717 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 10718 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 10719 llvm_unreachable("Not supported in SIMD-only mode"); 10720 } 10721 10722 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 10723 ArrayRef<const Expr *> Vars, 10724 SourceLocation Loc) { 10725 llvm_unreachable("Not supported in SIMD-only mode"); 10726 } 10727 10728 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 10729 const OMPExecutableDirective &D, 10730 llvm::Function *TaskFunction, 10731 QualType SharedsTy, Address Shareds, 10732 const Expr *IfCond, 10733 const OMPTaskDataTy &Data) { 10734 llvm_unreachable("Not supported in SIMD-only mode"); 10735 } 10736 10737 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 10738 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 10739 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 10740 const Expr *IfCond, const OMPTaskDataTy &Data) { 10741 llvm_unreachable("Not supported in SIMD-only mode"); 10742 } 10743 10744 void CGOpenMPSIMDRuntime::emitReduction( 10745 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 10746 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 10747 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 10748 assert(Options.SimpleReduction && "Only simple reduction is expected."); 10749 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 10750 ReductionOps, Options); 10751 } 10752 10753 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 10754 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 10755 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 10756 llvm_unreachable("Not supported in SIMD-only mode"); 10757 } 10758 10759 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 10760 SourceLocation Loc, 10761 ReductionCodeGen &RCG, 10762 unsigned N) { 10763 llvm_unreachable("Not supported in SIMD-only mode"); 10764 } 10765 10766 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 10767 SourceLocation Loc, 10768 llvm::Value *ReductionsPtr, 10769 LValue SharedLVal) { 10770 llvm_unreachable("Not supported in SIMD-only mode"); 10771 } 10772 10773 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 10774 SourceLocation Loc) { 10775 llvm_unreachable("Not supported in SIMD-only mode"); 10776 } 10777 10778 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 10779 CodeGenFunction &CGF, SourceLocation Loc, 10780 OpenMPDirectiveKind CancelRegion) { 10781 llvm_unreachable("Not supported in SIMD-only mode"); 10782 } 10783 10784 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 10785 SourceLocation Loc, const Expr *IfCond, 10786 OpenMPDirectiveKind CancelRegion) { 10787 llvm_unreachable("Not supported in SIMD-only mode"); 10788 } 10789 10790 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 10791 const OMPExecutableDirective &D, StringRef ParentName, 10792 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 10793 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 10794 llvm_unreachable("Not supported in SIMD-only mode"); 10795 } 10796 10797 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF, 10798 const OMPExecutableDirective &D, 10799 llvm::Function *OutlinedFn, 10800 llvm::Value *OutlinedFnID, 10801 const Expr *IfCond, 10802 const Expr *Device) { 10803 llvm_unreachable("Not supported in SIMD-only mode"); 10804 } 10805 10806 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 10807 llvm_unreachable("Not supported in SIMD-only mode"); 10808 } 10809 10810 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10811 llvm_unreachable("Not supported in SIMD-only mode"); 10812 } 10813 10814 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 10815 return false; 10816 } 10817 10818 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() { 10819 return nullptr; 10820 } 10821 10822 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 10823 const OMPExecutableDirective &D, 10824 SourceLocation Loc, 10825 llvm::Function *OutlinedFn, 10826 ArrayRef<llvm::Value *> CapturedVars) { 10827 llvm_unreachable("Not supported in SIMD-only mode"); 10828 } 10829 10830 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10831 const Expr *NumTeams, 10832 const Expr *ThreadLimit, 10833 SourceLocation Loc) { 10834 llvm_unreachable("Not supported in SIMD-only mode"); 10835 } 10836 10837 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 10838 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10839 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10840 llvm_unreachable("Not supported in SIMD-only mode"); 10841 } 10842 10843 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 10844 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10845 const Expr *Device) { 10846 llvm_unreachable("Not supported in SIMD-only mode"); 10847 } 10848 10849 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 10850 const OMPLoopDirective &D, 10851 ArrayRef<Expr *> NumIterations) { 10852 llvm_unreachable("Not supported in SIMD-only mode"); 10853 } 10854 10855 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 10856 const OMPDependClause *C) { 10857 llvm_unreachable("Not supported in SIMD-only mode"); 10858 } 10859 10860 const VarDecl * 10861 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 10862 const VarDecl *NativeParam) const { 10863 llvm_unreachable("Not supported in SIMD-only mode"); 10864 } 10865 10866 Address 10867 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 10868 const VarDecl *NativeParam, 10869 const VarDecl *TargetParam) const { 10870 llvm_unreachable("Not supported in SIMD-only mode"); 10871 } 10872