1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGCXXABI.h" 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/CodeGen/ConstantInitBuilder.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/Basic/BitmaskEnum.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/Bitcode/BitcodeReader.h" 24 #include "llvm/IR/DerivedTypes.h" 25 #include "llvm/IR/GlobalValue.h" 26 #include "llvm/IR/Value.h" 27 #include "llvm/Support/Format.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <cassert> 30 31 using namespace clang; 32 using namespace CodeGen; 33 34 namespace { 35 /// Base class for handling code generation inside OpenMP regions. 36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 37 public: 38 /// Kinds of OpenMP regions used in codegen. 39 enum CGOpenMPRegionKind { 40 /// Region with outlined function for standalone 'parallel' 41 /// directive. 42 ParallelOutlinedRegion, 43 /// Region with outlined function for standalone 'task' directive. 44 TaskOutlinedRegion, 45 /// Region for constructs that do not require function outlining, 46 /// like 'for', 'sections', 'atomic' etc. directives. 47 InlinedRegion, 48 /// Region with outlined function for standalone 'target' directive. 49 TargetRegion, 50 }; 51 52 CGOpenMPRegionInfo(const CapturedStmt &CS, 53 const CGOpenMPRegionKind RegionKind, 54 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 55 bool HasCancel) 56 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 57 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 58 59 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 60 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 61 bool HasCancel) 62 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 63 Kind(Kind), HasCancel(HasCancel) {} 64 65 /// Get a variable or parameter for storing global thread id 66 /// inside OpenMP construct. 67 virtual const VarDecl *getThreadIDVariable() const = 0; 68 69 /// Emit the captured statement body. 70 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 71 72 /// Get an LValue for the current ThreadID variable. 73 /// \return LValue for thread id variable. This LValue always has type int32*. 74 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 75 76 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 77 78 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 79 80 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 81 82 bool hasCancel() const { return HasCancel; } 83 84 static bool classof(const CGCapturedStmtInfo *Info) { 85 return Info->getKind() == CR_OpenMP; 86 } 87 88 ~CGOpenMPRegionInfo() override = default; 89 90 protected: 91 CGOpenMPRegionKind RegionKind; 92 RegionCodeGenTy CodeGen; 93 OpenMPDirectiveKind Kind; 94 bool HasCancel; 95 }; 96 97 /// API for captured statement code generation in OpenMP constructs. 98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 99 public: 100 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 101 const RegionCodeGenTy &CodeGen, 102 OpenMPDirectiveKind Kind, bool HasCancel, 103 StringRef HelperName) 104 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 105 HasCancel), 106 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 107 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 108 } 109 110 /// Get a variable or parameter for storing global thread id 111 /// inside OpenMP construct. 112 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 113 114 /// Get the name of the capture helper. 115 StringRef getHelperName() const override { return HelperName; } 116 117 static bool classof(const CGCapturedStmtInfo *Info) { 118 return CGOpenMPRegionInfo::classof(Info) && 119 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 120 ParallelOutlinedRegion; 121 } 122 123 private: 124 /// A variable or parameter storing global thread id for OpenMP 125 /// constructs. 126 const VarDecl *ThreadIDVar; 127 StringRef HelperName; 128 }; 129 130 /// API for captured statement code generation in OpenMP constructs. 131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 132 public: 133 class UntiedTaskActionTy final : public PrePostActionTy { 134 bool Untied; 135 const VarDecl *PartIDVar; 136 const RegionCodeGenTy UntiedCodeGen; 137 llvm::SwitchInst *UntiedSwitch = nullptr; 138 139 public: 140 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 141 const RegionCodeGenTy &UntiedCodeGen) 142 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 143 void Enter(CodeGenFunction &CGF) override { 144 if (Untied) { 145 // Emit task switching point. 146 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 147 CGF.GetAddrOfLocalVar(PartIDVar), 148 PartIDVar->getType()->castAs<PointerType>()); 149 llvm::Value *Res = 150 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 151 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 152 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 153 CGF.EmitBlock(DoneBB); 154 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 155 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 156 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 157 CGF.Builder.GetInsertBlock()); 158 emitUntiedSwitch(CGF); 159 } 160 } 161 void emitUntiedSwitch(CodeGenFunction &CGF) const { 162 if (Untied) { 163 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 164 CGF.GetAddrOfLocalVar(PartIDVar), 165 PartIDVar->getType()->castAs<PointerType>()); 166 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 167 PartIdLVal); 168 UntiedCodeGen(CGF); 169 CodeGenFunction::JumpDest CurPoint = 170 CGF.getJumpDestInCurrentScope(".untied.next."); 171 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 172 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 173 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 174 CGF.Builder.GetInsertBlock()); 175 CGF.EmitBranchThroughCleanup(CurPoint); 176 CGF.EmitBlock(CurPoint.getBlock()); 177 } 178 } 179 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 180 }; 181 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 182 const VarDecl *ThreadIDVar, 183 const RegionCodeGenTy &CodeGen, 184 OpenMPDirectiveKind Kind, bool HasCancel, 185 const UntiedTaskActionTy &Action) 186 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 187 ThreadIDVar(ThreadIDVar), Action(Action) { 188 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 189 } 190 191 /// Get a variable or parameter for storing global thread id 192 /// inside OpenMP construct. 193 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 194 195 /// Get an LValue for the current ThreadID variable. 196 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 197 198 /// Get the name of the capture helper. 199 StringRef getHelperName() const override { return ".omp_outlined."; } 200 201 void emitUntiedSwitch(CodeGenFunction &CGF) override { 202 Action.emitUntiedSwitch(CGF); 203 } 204 205 static bool classof(const CGCapturedStmtInfo *Info) { 206 return CGOpenMPRegionInfo::classof(Info) && 207 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 208 TaskOutlinedRegion; 209 } 210 211 private: 212 /// A variable or parameter storing global thread id for OpenMP 213 /// constructs. 214 const VarDecl *ThreadIDVar; 215 /// Action for emitting code for untied tasks. 216 const UntiedTaskActionTy &Action; 217 }; 218 219 /// API for inlined captured statement code generation in OpenMP 220 /// constructs. 221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 222 public: 223 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 224 const RegionCodeGenTy &CodeGen, 225 OpenMPDirectiveKind Kind, bool HasCancel) 226 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 227 OldCSI(OldCSI), 228 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 229 230 // Retrieve the value of the context parameter. 231 llvm::Value *getContextValue() const override { 232 if (OuterRegionInfo) 233 return OuterRegionInfo->getContextValue(); 234 llvm_unreachable("No context value for inlined OpenMP region"); 235 } 236 237 void setContextValue(llvm::Value *V) override { 238 if (OuterRegionInfo) { 239 OuterRegionInfo->setContextValue(V); 240 return; 241 } 242 llvm_unreachable("No context value for inlined OpenMP region"); 243 } 244 245 /// Lookup the captured field decl for a variable. 246 const FieldDecl *lookup(const VarDecl *VD) const override { 247 if (OuterRegionInfo) 248 return OuterRegionInfo->lookup(VD); 249 // If there is no outer outlined region,no need to lookup in a list of 250 // captured variables, we can use the original one. 251 return nullptr; 252 } 253 254 FieldDecl *getThisFieldDecl() const override { 255 if (OuterRegionInfo) 256 return OuterRegionInfo->getThisFieldDecl(); 257 return nullptr; 258 } 259 260 /// Get a variable or parameter for storing global thread id 261 /// inside OpenMP construct. 262 const VarDecl *getThreadIDVariable() const override { 263 if (OuterRegionInfo) 264 return OuterRegionInfo->getThreadIDVariable(); 265 return nullptr; 266 } 267 268 /// Get an LValue for the current ThreadID variable. 269 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 270 if (OuterRegionInfo) 271 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 272 llvm_unreachable("No LValue for inlined OpenMP construct"); 273 } 274 275 /// Get the name of the capture helper. 276 StringRef getHelperName() const override { 277 if (auto *OuterRegionInfo = getOldCSI()) 278 return OuterRegionInfo->getHelperName(); 279 llvm_unreachable("No helper name for inlined OpenMP construct"); 280 } 281 282 void emitUntiedSwitch(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 OuterRegionInfo->emitUntiedSwitch(CGF); 285 } 286 287 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 288 289 static bool classof(const CGCapturedStmtInfo *Info) { 290 return CGOpenMPRegionInfo::classof(Info) && 291 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 292 } 293 294 ~CGOpenMPInlinedRegionInfo() override = default; 295 296 private: 297 /// CodeGen info about outer OpenMP region. 298 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 299 CGOpenMPRegionInfo *OuterRegionInfo; 300 }; 301 302 /// API for captured statement code generation in OpenMP target 303 /// constructs. For this captures, implicit parameters are used instead of the 304 /// captured fields. The name of the target region has to be unique in a given 305 /// application so it is provided by the client, because only the client has 306 /// the information to generate that. 307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 308 public: 309 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 310 const RegionCodeGenTy &CodeGen, StringRef HelperName) 311 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 312 /*HasCancel=*/false), 313 HelperName(HelperName) {} 314 315 /// This is unused for target regions because each starts executing 316 /// with a single thread. 317 const VarDecl *getThreadIDVariable() const override { return nullptr; } 318 319 /// Get the name of the capture helper. 320 StringRef getHelperName() const override { return HelperName; } 321 322 static bool classof(const CGCapturedStmtInfo *Info) { 323 return CGOpenMPRegionInfo::classof(Info) && 324 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 325 } 326 327 private: 328 StringRef HelperName; 329 }; 330 331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 332 llvm_unreachable("No codegen for expressions"); 333 } 334 /// API for generation of expressions captured in a innermost OpenMP 335 /// region. 336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 337 public: 338 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 339 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 340 OMPD_unknown, 341 /*HasCancel=*/false), 342 PrivScope(CGF) { 343 // Make sure the globals captured in the provided statement are local by 344 // using the privatization logic. We assume the same variable is not 345 // captured more than once. 346 for (const auto &C : CS.captures()) { 347 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 348 continue; 349 350 const VarDecl *VD = C.getCapturedVar(); 351 if (VD->isLocalVarDeclOrParm()) 352 continue; 353 354 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 355 /*RefersToEnclosingVariableOrCapture=*/false, 356 VD->getType().getNonReferenceType(), VK_LValue, 357 C.getLocation()); 358 PrivScope.addPrivate( 359 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); }); 360 } 361 (void)PrivScope.Privatize(); 362 } 363 364 /// Lookup the captured field decl for a variable. 365 const FieldDecl *lookup(const VarDecl *VD) const override { 366 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 367 return FD; 368 return nullptr; 369 } 370 371 /// Emit the captured statement body. 372 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 373 llvm_unreachable("No body for expressions"); 374 } 375 376 /// Get a variable or parameter for storing global thread id 377 /// inside OpenMP construct. 378 const VarDecl *getThreadIDVariable() const override { 379 llvm_unreachable("No thread id for expressions"); 380 } 381 382 /// Get the name of the capture helper. 383 StringRef getHelperName() const override { 384 llvm_unreachable("No helper name for expressions"); 385 } 386 387 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 388 389 private: 390 /// Private scope to capture global variables. 391 CodeGenFunction::OMPPrivateScope PrivScope; 392 }; 393 394 /// RAII for emitting code of OpenMP constructs. 395 class InlinedOpenMPRegionRAII { 396 CodeGenFunction &CGF; 397 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 398 FieldDecl *LambdaThisCaptureField = nullptr; 399 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 400 401 public: 402 /// Constructs region for combined constructs. 403 /// \param CodeGen Code generation sequence for combined directives. Includes 404 /// a list of functions used for code generation of implicitly inlined 405 /// regions. 406 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 407 OpenMPDirectiveKind Kind, bool HasCancel) 408 : CGF(CGF) { 409 // Start emission for the construct. 410 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 411 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 412 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 413 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 414 CGF.LambdaThisCaptureField = nullptr; 415 BlockInfo = CGF.BlockInfo; 416 CGF.BlockInfo = nullptr; 417 } 418 419 ~InlinedOpenMPRegionRAII() { 420 // Restore original CapturedStmtInfo only if we're done with code emission. 421 auto *OldCSI = 422 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 423 delete CGF.CapturedStmtInfo; 424 CGF.CapturedStmtInfo = OldCSI; 425 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 426 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 427 CGF.BlockInfo = BlockInfo; 428 } 429 }; 430 431 /// Values for bit flags used in the ident_t to describe the fields. 432 /// All enumeric elements are named and described in accordance with the code 433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 434 enum OpenMPLocationFlags : unsigned { 435 /// Use trampoline for internal microtask. 436 OMP_IDENT_IMD = 0x01, 437 /// Use c-style ident structure. 438 OMP_IDENT_KMPC = 0x02, 439 /// Atomic reduction option for kmpc_reduce. 440 OMP_ATOMIC_REDUCE = 0x10, 441 /// Explicit 'barrier' directive. 442 OMP_IDENT_BARRIER_EXPL = 0x20, 443 /// Implicit barrier in code. 444 OMP_IDENT_BARRIER_IMPL = 0x40, 445 /// Implicit barrier in 'for' directive. 446 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 447 /// Implicit barrier in 'sections' directive. 448 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 449 /// Implicit barrier in 'single' directive. 450 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 451 /// Call of __kmp_for_static_init for static loop. 452 OMP_IDENT_WORK_LOOP = 0x200, 453 /// Call of __kmp_for_static_init for sections. 454 OMP_IDENT_WORK_SECTIONS = 0x400, 455 /// Call of __kmp_for_static_init for distribute. 456 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 457 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 458 }; 459 460 namespace { 461 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 462 /// Values for bit flags for marking which requires clauses have been used. 463 enum OpenMPOffloadingRequiresDirFlags : int64_t { 464 /// flag undefined. 465 OMP_REQ_UNDEFINED = 0x000, 466 /// no requires clause present. 467 OMP_REQ_NONE = 0x001, 468 /// reverse_offload clause. 469 OMP_REQ_REVERSE_OFFLOAD = 0x002, 470 /// unified_address clause. 471 OMP_REQ_UNIFIED_ADDRESS = 0x004, 472 /// unified_shared_memory clause. 473 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 474 /// dynamic_allocators clause. 475 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 476 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 477 }; 478 479 enum OpenMPOffloadingReservedDeviceIDs { 480 /// Device ID if the device was not defined, runtime should get it 481 /// from environment variables in the spec. 482 OMP_DEVICEID_UNDEF = -1, 483 }; 484 } // anonymous namespace 485 486 /// Describes ident structure that describes a source location. 487 /// All descriptions are taken from 488 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 489 /// Original structure: 490 /// typedef struct ident { 491 /// kmp_int32 reserved_1; /**< might be used in Fortran; 492 /// see above */ 493 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 494 /// KMP_IDENT_KMPC identifies this union 495 /// member */ 496 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 497 /// see above */ 498 ///#if USE_ITT_BUILD 499 /// /* but currently used for storing 500 /// region-specific ITT */ 501 /// /* contextual information. */ 502 ///#endif /* USE_ITT_BUILD */ 503 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 504 /// C++ */ 505 /// char const *psource; /**< String describing the source location. 506 /// The string is composed of semi-colon separated 507 // fields which describe the source file, 508 /// the function and a pair of line numbers that 509 /// delimit the construct. 510 /// */ 511 /// } ident_t; 512 enum IdentFieldIndex { 513 /// might be used in Fortran 514 IdentField_Reserved_1, 515 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 516 IdentField_Flags, 517 /// Not really used in Fortran any more 518 IdentField_Reserved_2, 519 /// Source[4] in Fortran, do not use for C++ 520 IdentField_Reserved_3, 521 /// String describing the source location. The string is composed of 522 /// semi-colon separated fields which describe the source file, the function 523 /// and a pair of line numbers that delimit the construct. 524 IdentField_PSource 525 }; 526 527 /// Schedule types for 'omp for' loops (these enumerators are taken from 528 /// the enum sched_type in kmp.h). 529 enum OpenMPSchedType { 530 /// Lower bound for default (unordered) versions. 531 OMP_sch_lower = 32, 532 OMP_sch_static_chunked = 33, 533 OMP_sch_static = 34, 534 OMP_sch_dynamic_chunked = 35, 535 OMP_sch_guided_chunked = 36, 536 OMP_sch_runtime = 37, 537 OMP_sch_auto = 38, 538 /// static with chunk adjustment (e.g., simd) 539 OMP_sch_static_balanced_chunked = 45, 540 /// Lower bound for 'ordered' versions. 541 OMP_ord_lower = 64, 542 OMP_ord_static_chunked = 65, 543 OMP_ord_static = 66, 544 OMP_ord_dynamic_chunked = 67, 545 OMP_ord_guided_chunked = 68, 546 OMP_ord_runtime = 69, 547 OMP_ord_auto = 70, 548 OMP_sch_default = OMP_sch_static, 549 /// dist_schedule types 550 OMP_dist_sch_static_chunked = 91, 551 OMP_dist_sch_static = 92, 552 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 553 /// Set if the monotonic schedule modifier was present. 554 OMP_sch_modifier_monotonic = (1 << 29), 555 /// Set if the nonmonotonic schedule modifier was present. 556 OMP_sch_modifier_nonmonotonic = (1 << 30), 557 }; 558 559 enum OpenMPRTLFunction { 560 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 561 /// kmpc_micro microtask, ...); 562 OMPRTL__kmpc_fork_call, 563 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, 564 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 565 OMPRTL__kmpc_threadprivate_cached, 566 /// Call to void __kmpc_threadprivate_register( ident_t *, 567 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 568 OMPRTL__kmpc_threadprivate_register, 569 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 570 OMPRTL__kmpc_global_thread_num, 571 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 572 // kmp_critical_name *crit); 573 OMPRTL__kmpc_critical, 574 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 575 // global_tid, kmp_critical_name *crit, uintptr_t hint); 576 OMPRTL__kmpc_critical_with_hint, 577 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 578 // kmp_critical_name *crit); 579 OMPRTL__kmpc_end_critical, 580 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 581 // global_tid); 582 OMPRTL__kmpc_cancel_barrier, 583 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 584 OMPRTL__kmpc_barrier, 585 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 586 OMPRTL__kmpc_for_static_fini, 587 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 588 // global_tid); 589 OMPRTL__kmpc_serialized_parallel, 590 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 591 // global_tid); 592 OMPRTL__kmpc_end_serialized_parallel, 593 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 594 // kmp_int32 num_threads); 595 OMPRTL__kmpc_push_num_threads, 596 // Call to void __kmpc_flush(ident_t *loc); 597 OMPRTL__kmpc_flush, 598 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 599 OMPRTL__kmpc_master, 600 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 601 OMPRTL__kmpc_end_master, 602 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 603 // int end_part); 604 OMPRTL__kmpc_omp_taskyield, 605 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 606 OMPRTL__kmpc_single, 607 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 608 OMPRTL__kmpc_end_single, 609 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 610 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 611 // kmp_routine_entry_t *task_entry); 612 OMPRTL__kmpc_omp_task_alloc, 613 // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *, 614 // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, 615 // size_t sizeof_shareds, kmp_routine_entry_t *task_entry, 616 // kmp_int64 device_id); 617 OMPRTL__kmpc_omp_target_task_alloc, 618 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 619 // new_task); 620 OMPRTL__kmpc_omp_task, 621 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 622 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 623 // kmp_int32 didit); 624 OMPRTL__kmpc_copyprivate, 625 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 626 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 627 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 628 OMPRTL__kmpc_reduce, 629 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 630 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 631 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 632 // *lck); 633 OMPRTL__kmpc_reduce_nowait, 634 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 635 // kmp_critical_name *lck); 636 OMPRTL__kmpc_end_reduce, 637 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 638 // kmp_critical_name *lck); 639 OMPRTL__kmpc_end_reduce_nowait, 640 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 641 // kmp_task_t * new_task); 642 OMPRTL__kmpc_omp_task_begin_if0, 643 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 644 // kmp_task_t * new_task); 645 OMPRTL__kmpc_omp_task_complete_if0, 646 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 647 OMPRTL__kmpc_ordered, 648 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 649 OMPRTL__kmpc_end_ordered, 650 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 651 // global_tid); 652 OMPRTL__kmpc_omp_taskwait, 653 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 654 OMPRTL__kmpc_taskgroup, 655 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 656 OMPRTL__kmpc_end_taskgroup, 657 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 658 // int proc_bind); 659 OMPRTL__kmpc_push_proc_bind, 660 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 661 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 662 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 663 OMPRTL__kmpc_omp_task_with_deps, 664 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 665 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 666 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 667 OMPRTL__kmpc_omp_wait_deps, 668 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 669 // global_tid, kmp_int32 cncl_kind); 670 OMPRTL__kmpc_cancellationpoint, 671 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 672 // kmp_int32 cncl_kind); 673 OMPRTL__kmpc_cancel, 674 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 675 // kmp_int32 num_teams, kmp_int32 thread_limit); 676 OMPRTL__kmpc_push_num_teams, 677 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 678 // microtask, ...); 679 OMPRTL__kmpc_fork_teams, 680 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 681 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 682 // sched, kmp_uint64 grainsize, void *task_dup); 683 OMPRTL__kmpc_taskloop, 684 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 685 // num_dims, struct kmp_dim *dims); 686 OMPRTL__kmpc_doacross_init, 687 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 688 OMPRTL__kmpc_doacross_fini, 689 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 690 // *vec); 691 OMPRTL__kmpc_doacross_post, 692 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 693 // *vec); 694 OMPRTL__kmpc_doacross_wait, 695 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 696 // *data); 697 OMPRTL__kmpc_task_reduction_init, 698 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 699 // *d); 700 OMPRTL__kmpc_task_reduction_get_th_data, 701 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); 702 OMPRTL__kmpc_alloc, 703 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); 704 OMPRTL__kmpc_free, 705 706 // 707 // Offloading related calls 708 // 709 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 710 // size); 711 OMPRTL__kmpc_push_target_tripcount, 712 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 713 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 714 // *arg_types); 715 OMPRTL__tgt_target, 716 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 717 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 718 // *arg_types); 719 OMPRTL__tgt_target_nowait, 720 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 721 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 722 // *arg_types, int32_t num_teams, int32_t thread_limit); 723 OMPRTL__tgt_target_teams, 724 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 725 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 726 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 727 OMPRTL__tgt_target_teams_nowait, 728 // Call to void __tgt_register_requires(int64_t flags); 729 OMPRTL__tgt_register_requires, 730 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 731 OMPRTL__tgt_register_lib, 732 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 733 OMPRTL__tgt_unregister_lib, 734 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 735 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 736 OMPRTL__tgt_target_data_begin, 737 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 738 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 739 // *arg_types); 740 OMPRTL__tgt_target_data_begin_nowait, 741 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 742 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 743 OMPRTL__tgt_target_data_end, 744 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 745 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 746 // *arg_types); 747 OMPRTL__tgt_target_data_end_nowait, 748 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 749 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 750 OMPRTL__tgt_target_data_update, 751 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 752 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 753 // *arg_types); 754 OMPRTL__tgt_target_data_update_nowait, 755 // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 756 OMPRTL__tgt_mapper_num_components, 757 // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void 758 // *base, void *begin, int64_t size, int64_t type); 759 OMPRTL__tgt_push_mapper_component, 760 }; 761 762 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 763 /// region. 764 class CleanupTy final : public EHScopeStack::Cleanup { 765 PrePostActionTy *Action; 766 767 public: 768 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 769 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 770 if (!CGF.HaveInsertPoint()) 771 return; 772 Action->Exit(CGF); 773 } 774 }; 775 776 } // anonymous namespace 777 778 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 779 CodeGenFunction::RunCleanupsScope Scope(CGF); 780 if (PrePostAction) { 781 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 782 Callback(CodeGen, CGF, *PrePostAction); 783 } else { 784 PrePostActionTy Action; 785 Callback(CodeGen, CGF, Action); 786 } 787 } 788 789 /// Check if the combiner is a call to UDR combiner and if it is so return the 790 /// UDR decl used for reduction. 791 static const OMPDeclareReductionDecl * 792 getReductionInit(const Expr *ReductionOp) { 793 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 794 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 795 if (const auto *DRE = 796 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 797 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 798 return DRD; 799 return nullptr; 800 } 801 802 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 803 const OMPDeclareReductionDecl *DRD, 804 const Expr *InitOp, 805 Address Private, Address Original, 806 QualType Ty) { 807 if (DRD->getInitializer()) { 808 std::pair<llvm::Function *, llvm::Function *> Reduction = 809 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 810 const auto *CE = cast<CallExpr>(InitOp); 811 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 812 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 813 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 814 const auto *LHSDRE = 815 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 816 const auto *RHSDRE = 817 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 818 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 819 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 820 [=]() { return Private; }); 821 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 822 [=]() { return Original; }); 823 (void)PrivateScope.Privatize(); 824 RValue Func = RValue::get(Reduction.second); 825 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 826 CGF.EmitIgnoredExpr(InitOp); 827 } else { 828 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 829 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 830 auto *GV = new llvm::GlobalVariable( 831 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 832 llvm::GlobalValue::PrivateLinkage, Init, Name); 833 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 834 RValue InitRVal; 835 switch (CGF.getEvaluationKind(Ty)) { 836 case TEK_Scalar: 837 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 838 break; 839 case TEK_Complex: 840 InitRVal = 841 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 842 break; 843 case TEK_Aggregate: 844 InitRVal = RValue::getAggregate(LV.getAddress()); 845 break; 846 } 847 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 848 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 849 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 850 /*IsInitializer=*/false); 851 } 852 } 853 854 /// Emit initialization of arrays of complex types. 855 /// \param DestAddr Address of the array. 856 /// \param Type Type of array. 857 /// \param Init Initial expression of array. 858 /// \param SrcAddr Address of the original array. 859 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 860 QualType Type, bool EmitDeclareReductionInit, 861 const Expr *Init, 862 const OMPDeclareReductionDecl *DRD, 863 Address SrcAddr = Address::invalid()) { 864 // Perform element-by-element initialization. 865 QualType ElementTy; 866 867 // Drill down to the base element type on both arrays. 868 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 869 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 870 DestAddr = 871 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 872 if (DRD) 873 SrcAddr = 874 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 875 876 llvm::Value *SrcBegin = nullptr; 877 if (DRD) 878 SrcBegin = SrcAddr.getPointer(); 879 llvm::Value *DestBegin = DestAddr.getPointer(); 880 // Cast from pointer to array type to pointer to single element. 881 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 882 // The basic structure here is a while-do loop. 883 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 884 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 885 llvm::Value *IsEmpty = 886 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 887 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 888 889 // Enter the loop body, making that address the current address. 890 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 891 CGF.EmitBlock(BodyBB); 892 893 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 894 895 llvm::PHINode *SrcElementPHI = nullptr; 896 Address SrcElementCurrent = Address::invalid(); 897 if (DRD) { 898 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 899 "omp.arraycpy.srcElementPast"); 900 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 901 SrcElementCurrent = 902 Address(SrcElementPHI, 903 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 904 } 905 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 906 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 907 DestElementPHI->addIncoming(DestBegin, EntryBB); 908 Address DestElementCurrent = 909 Address(DestElementPHI, 910 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 911 912 // Emit copy. 913 { 914 CodeGenFunction::RunCleanupsScope InitScope(CGF); 915 if (EmitDeclareReductionInit) { 916 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 917 SrcElementCurrent, ElementTy); 918 } else 919 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 920 /*IsInitializer=*/false); 921 } 922 923 if (DRD) { 924 // Shift the address forward by one element. 925 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 926 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 927 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 928 } 929 930 // Shift the address forward by one element. 931 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 932 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 933 // Check whether we've reached the end. 934 llvm::Value *Done = 935 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 936 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 937 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 938 939 // Done. 940 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 941 } 942 943 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 944 return CGF.EmitOMPSharedLValue(E); 945 } 946 947 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 948 const Expr *E) { 949 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 950 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 951 return LValue(); 952 } 953 954 void ReductionCodeGen::emitAggregateInitialization( 955 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 956 const OMPDeclareReductionDecl *DRD) { 957 // Emit VarDecl with copy init for arrays. 958 // Get the address of the original variable captured in current 959 // captured region. 960 const auto *PrivateVD = 961 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 962 bool EmitDeclareReductionInit = 963 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 964 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 965 EmitDeclareReductionInit, 966 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 967 : PrivateVD->getInit(), 968 DRD, SharedLVal.getAddress()); 969 } 970 971 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 972 ArrayRef<const Expr *> Privates, 973 ArrayRef<const Expr *> ReductionOps) { 974 ClausesData.reserve(Shareds.size()); 975 SharedAddresses.reserve(Shareds.size()); 976 Sizes.reserve(Shareds.size()); 977 BaseDecls.reserve(Shareds.size()); 978 auto IPriv = Privates.begin(); 979 auto IRed = ReductionOps.begin(); 980 for (const Expr *Ref : Shareds) { 981 ClausesData.emplace_back(Ref, *IPriv, *IRed); 982 std::advance(IPriv, 1); 983 std::advance(IRed, 1); 984 } 985 } 986 987 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 988 assert(SharedAddresses.size() == N && 989 "Number of generated lvalues must be exactly N."); 990 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 991 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 992 SharedAddresses.emplace_back(First, Second); 993 } 994 995 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 996 const auto *PrivateVD = 997 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 998 QualType PrivateType = PrivateVD->getType(); 999 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 1000 if (!PrivateType->isVariablyModifiedType()) { 1001 Sizes.emplace_back( 1002 CGF.getTypeSize( 1003 SharedAddresses[N].first.getType().getNonReferenceType()), 1004 nullptr); 1005 return; 1006 } 1007 llvm::Value *Size; 1008 llvm::Value *SizeInChars; 1009 auto *ElemType = 1010 cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) 1011 ->getElementType(); 1012 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 1013 if (AsArraySection) { 1014 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), 1015 SharedAddresses[N].first.getPointer()); 1016 Size = CGF.Builder.CreateNUWAdd( 1017 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 1018 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 1019 } else { 1020 SizeInChars = CGF.getTypeSize( 1021 SharedAddresses[N].first.getType().getNonReferenceType()); 1022 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 1023 } 1024 Sizes.emplace_back(SizeInChars, Size); 1025 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1026 CGF, 1027 cast<OpaqueValueExpr>( 1028 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1029 RValue::get(Size)); 1030 CGF.EmitVariablyModifiedType(PrivateType); 1031 } 1032 1033 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 1034 llvm::Value *Size) { 1035 const auto *PrivateVD = 1036 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1037 QualType PrivateType = PrivateVD->getType(); 1038 if (!PrivateType->isVariablyModifiedType()) { 1039 assert(!Size && !Sizes[N].second && 1040 "Size should be nullptr for non-variably modified reduction " 1041 "items."); 1042 return; 1043 } 1044 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1045 CGF, 1046 cast<OpaqueValueExpr>( 1047 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1048 RValue::get(Size)); 1049 CGF.EmitVariablyModifiedType(PrivateType); 1050 } 1051 1052 void ReductionCodeGen::emitInitialization( 1053 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1054 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1055 assert(SharedAddresses.size() > N && "No variable was generated"); 1056 const auto *PrivateVD = 1057 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1058 const OMPDeclareReductionDecl *DRD = 1059 getReductionInit(ClausesData[N].ReductionOp); 1060 QualType PrivateType = PrivateVD->getType(); 1061 PrivateAddr = CGF.Builder.CreateElementBitCast( 1062 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1063 QualType SharedType = SharedAddresses[N].first.getType(); 1064 SharedLVal = CGF.MakeAddrLValue( 1065 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), 1066 CGF.ConvertTypeForMem(SharedType)), 1067 SharedType, SharedAddresses[N].first.getBaseInfo(), 1068 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1069 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1070 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1071 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1072 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1073 PrivateAddr, SharedLVal.getAddress(), 1074 SharedLVal.getType()); 1075 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1076 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1077 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1078 PrivateVD->getType().getQualifiers(), 1079 /*IsInitializer=*/false); 1080 } 1081 } 1082 1083 bool ReductionCodeGen::needCleanups(unsigned N) { 1084 const auto *PrivateVD = 1085 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1086 QualType PrivateType = PrivateVD->getType(); 1087 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1088 return DTorKind != QualType::DK_none; 1089 } 1090 1091 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1092 Address PrivateAddr) { 1093 const auto *PrivateVD = 1094 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1095 QualType PrivateType = PrivateVD->getType(); 1096 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1097 if (needCleanups(N)) { 1098 PrivateAddr = CGF.Builder.CreateElementBitCast( 1099 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1100 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1101 } 1102 } 1103 1104 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1105 LValue BaseLV) { 1106 BaseTy = BaseTy.getNonReferenceType(); 1107 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1108 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1109 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1110 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 1111 } else { 1112 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); 1113 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1114 } 1115 BaseTy = BaseTy->getPointeeType(); 1116 } 1117 return CGF.MakeAddrLValue( 1118 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), 1119 CGF.ConvertTypeForMem(ElTy)), 1120 BaseLV.getType(), BaseLV.getBaseInfo(), 1121 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1122 } 1123 1124 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1125 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1126 llvm::Value *Addr) { 1127 Address Tmp = Address::invalid(); 1128 Address TopTmp = Address::invalid(); 1129 Address MostTopTmp = Address::invalid(); 1130 BaseTy = BaseTy.getNonReferenceType(); 1131 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1132 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1133 Tmp = CGF.CreateMemTemp(BaseTy); 1134 if (TopTmp.isValid()) 1135 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1136 else 1137 MostTopTmp = Tmp; 1138 TopTmp = Tmp; 1139 BaseTy = BaseTy->getPointeeType(); 1140 } 1141 llvm::Type *Ty = BaseLVType; 1142 if (Tmp.isValid()) 1143 Ty = Tmp.getElementType(); 1144 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1145 if (Tmp.isValid()) { 1146 CGF.Builder.CreateStore(Addr, Tmp); 1147 return MostTopTmp; 1148 } 1149 return Address(Addr, BaseLVAlignment); 1150 } 1151 1152 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1153 const VarDecl *OrigVD = nullptr; 1154 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1155 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1156 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1157 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1158 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1159 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1160 DE = cast<DeclRefExpr>(Base); 1161 OrigVD = cast<VarDecl>(DE->getDecl()); 1162 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1163 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1164 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1165 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1166 DE = cast<DeclRefExpr>(Base); 1167 OrigVD = cast<VarDecl>(DE->getDecl()); 1168 } 1169 return OrigVD; 1170 } 1171 1172 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1173 Address PrivateAddr) { 1174 const DeclRefExpr *DE; 1175 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1176 BaseDecls.emplace_back(OrigVD); 1177 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1178 LValue BaseLValue = 1179 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1180 OriginalBaseLValue); 1181 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1182 BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); 1183 llvm::Value *PrivatePointer = 1184 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1185 PrivateAddr.getPointer(), 1186 SharedAddresses[N].first.getAddress().getType()); 1187 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1188 return castToBase(CGF, OrigVD->getType(), 1189 SharedAddresses[N].first.getType(), 1190 OriginalBaseLValue.getAddress().getType(), 1191 OriginalBaseLValue.getAlignment(), Ptr); 1192 } 1193 BaseDecls.emplace_back( 1194 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1195 return PrivateAddr; 1196 } 1197 1198 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1199 const OMPDeclareReductionDecl *DRD = 1200 getReductionInit(ClausesData[N].ReductionOp); 1201 return DRD && DRD->getInitializer(); 1202 } 1203 1204 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1205 return CGF.EmitLoadOfPointerLValue( 1206 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1207 getThreadIDVariable()->getType()->castAs<PointerType>()); 1208 } 1209 1210 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1211 if (!CGF.HaveInsertPoint()) 1212 return; 1213 // 1.2.2 OpenMP Language Terminology 1214 // Structured block - An executable statement with a single entry at the 1215 // top and a single exit at the bottom. 1216 // The point of exit cannot be a branch out of the structured block. 1217 // longjmp() and throw() must not violate the entry/exit criteria. 1218 CGF.EHStack.pushTerminate(); 1219 CodeGen(CGF); 1220 CGF.EHStack.popTerminate(); 1221 } 1222 1223 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1224 CodeGenFunction &CGF) { 1225 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1226 getThreadIDVariable()->getType(), 1227 AlignmentSource::Decl); 1228 } 1229 1230 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1231 QualType FieldTy) { 1232 auto *Field = FieldDecl::Create( 1233 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1234 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1235 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1236 Field->setAccess(AS_public); 1237 DC->addDecl(Field); 1238 return Field; 1239 } 1240 1241 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1242 StringRef Separator) 1243 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1244 OffloadEntriesInfoManager(CGM) { 1245 ASTContext &C = CGM.getContext(); 1246 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1247 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1248 RD->startDefinition(); 1249 // reserved_1 1250 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1251 // flags 1252 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1253 // reserved_2 1254 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1255 // reserved_3 1256 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1257 // psource 1258 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1259 RD->completeDefinition(); 1260 IdentQTy = C.getRecordType(RD); 1261 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1262 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1263 1264 loadOffloadInfoMetadata(); 1265 } 1266 1267 void CGOpenMPRuntime::clear() { 1268 InternalVars.clear(); 1269 // Clean non-target variable declarations possibly used only in debug info. 1270 for (const auto &Data : EmittedNonTargetVariables) { 1271 if (!Data.getValue().pointsToAliveValue()) 1272 continue; 1273 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1274 if (!GV) 1275 continue; 1276 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1277 continue; 1278 GV->eraseFromParent(); 1279 } 1280 } 1281 1282 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1283 SmallString<128> Buffer; 1284 llvm::raw_svector_ostream OS(Buffer); 1285 StringRef Sep = FirstSeparator; 1286 for (StringRef Part : Parts) { 1287 OS << Sep << Part; 1288 Sep = Separator; 1289 } 1290 return OS.str(); 1291 } 1292 1293 static llvm::Function * 1294 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1295 const Expr *CombinerInitializer, const VarDecl *In, 1296 const VarDecl *Out, bool IsCombiner) { 1297 // void .omp_combiner.(Ty *in, Ty *out); 1298 ASTContext &C = CGM.getContext(); 1299 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1300 FunctionArgList Args; 1301 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1302 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1303 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1304 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1305 Args.push_back(&OmpOutParm); 1306 Args.push_back(&OmpInParm); 1307 const CGFunctionInfo &FnInfo = 1308 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1309 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1310 std::string Name = CGM.getOpenMPRuntime().getName( 1311 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1312 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1313 Name, &CGM.getModule()); 1314 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1315 if (CGM.getLangOpts().Optimize) { 1316 Fn->removeFnAttr(llvm::Attribute::NoInline); 1317 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1318 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1319 } 1320 CodeGenFunction CGF(CGM); 1321 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1322 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1323 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1324 Out->getLocation()); 1325 CodeGenFunction::OMPPrivateScope Scope(CGF); 1326 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1327 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1328 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1329 .getAddress(); 1330 }); 1331 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1332 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1333 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1334 .getAddress(); 1335 }); 1336 (void)Scope.Privatize(); 1337 if (!IsCombiner && Out->hasInit() && 1338 !CGF.isTrivialInitializer(Out->getInit())) { 1339 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1340 Out->getType().getQualifiers(), 1341 /*IsInitializer=*/true); 1342 } 1343 if (CombinerInitializer) 1344 CGF.EmitIgnoredExpr(CombinerInitializer); 1345 Scope.ForceCleanup(); 1346 CGF.FinishFunction(); 1347 return Fn; 1348 } 1349 1350 void CGOpenMPRuntime::emitUserDefinedReduction( 1351 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1352 if (UDRMap.count(D) > 0) 1353 return; 1354 llvm::Function *Combiner = emitCombinerOrInitializer( 1355 CGM, D->getType(), D->getCombiner(), 1356 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1357 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1358 /*IsCombiner=*/true); 1359 llvm::Function *Initializer = nullptr; 1360 if (const Expr *Init = D->getInitializer()) { 1361 Initializer = emitCombinerOrInitializer( 1362 CGM, D->getType(), 1363 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1364 : nullptr, 1365 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1366 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1367 /*IsCombiner=*/false); 1368 } 1369 UDRMap.try_emplace(D, Combiner, Initializer); 1370 if (CGF) { 1371 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1372 Decls.second.push_back(D); 1373 } 1374 } 1375 1376 std::pair<llvm::Function *, llvm::Function *> 1377 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1378 auto I = UDRMap.find(D); 1379 if (I != UDRMap.end()) 1380 return I->second; 1381 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1382 return UDRMap.lookup(D); 1383 } 1384 1385 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1386 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1387 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1388 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1389 assert(ThreadIDVar->getType()->isPointerType() && 1390 "thread id variable must be of type kmp_int32 *"); 1391 CodeGenFunction CGF(CGM, true); 1392 bool HasCancel = false; 1393 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1394 HasCancel = OPD->hasCancel(); 1395 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1396 HasCancel = OPSD->hasCancel(); 1397 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1398 HasCancel = OPFD->hasCancel(); 1399 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1400 HasCancel = OPFD->hasCancel(); 1401 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1402 HasCancel = OPFD->hasCancel(); 1403 else if (const auto *OPFD = 1404 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1405 HasCancel = OPFD->hasCancel(); 1406 else if (const auto *OPFD = 1407 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1408 HasCancel = OPFD->hasCancel(); 1409 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1410 HasCancel, OutlinedHelperName); 1411 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1412 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 1413 } 1414 1415 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1416 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1417 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1418 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1419 return emitParallelOrTeamsOutlinedFunction( 1420 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1421 } 1422 1423 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1424 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1425 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1426 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1427 return emitParallelOrTeamsOutlinedFunction( 1428 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1429 } 1430 1431 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1432 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1433 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1434 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1435 bool Tied, unsigned &NumberOfParts) { 1436 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1437 PrePostActionTy &) { 1438 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1439 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1440 llvm::Value *TaskArgs[] = { 1441 UpLoc, ThreadID, 1442 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1443 TaskTVar->getType()->castAs<PointerType>()) 1444 .getPointer()}; 1445 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1446 }; 1447 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1448 UntiedCodeGen); 1449 CodeGen.setAction(Action); 1450 assert(!ThreadIDVar->getType()->isPointerType() && 1451 "thread id variable must be of type kmp_int32 for tasks"); 1452 const OpenMPDirectiveKind Region = 1453 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1454 : OMPD_task; 1455 const CapturedStmt *CS = D.getCapturedStmt(Region); 1456 const auto *TD = dyn_cast<OMPTaskDirective>(&D); 1457 CodeGenFunction CGF(CGM, true); 1458 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1459 InnermostKind, 1460 TD ? TD->hasCancel() : false, Action); 1461 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1462 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1463 if (!Tied) 1464 NumberOfParts = Action.getNumberOfParts(); 1465 return Res; 1466 } 1467 1468 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1469 const RecordDecl *RD, const CGRecordLayout &RL, 1470 ArrayRef<llvm::Constant *> Data) { 1471 llvm::StructType *StructTy = RL.getLLVMType(); 1472 unsigned PrevIdx = 0; 1473 ConstantInitBuilder CIBuilder(CGM); 1474 auto DI = Data.begin(); 1475 for (const FieldDecl *FD : RD->fields()) { 1476 unsigned Idx = RL.getLLVMFieldNo(FD); 1477 // Fill the alignment. 1478 for (unsigned I = PrevIdx; I < Idx; ++I) 1479 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1480 PrevIdx = Idx + 1; 1481 Fields.add(*DI); 1482 ++DI; 1483 } 1484 } 1485 1486 template <class... As> 1487 static llvm::GlobalVariable * 1488 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1489 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1490 As &&... Args) { 1491 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1492 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1493 ConstantInitBuilder CIBuilder(CGM); 1494 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1495 buildStructValue(Fields, CGM, RD, RL, Data); 1496 return Fields.finishAndCreateGlobal( 1497 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1498 std::forward<As>(Args)...); 1499 } 1500 1501 template <typename T> 1502 static void 1503 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1504 ArrayRef<llvm::Constant *> Data, 1505 T &Parent) { 1506 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1507 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1508 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1509 buildStructValue(Fields, CGM, RD, RL, Data); 1510 Fields.finishAndAddTo(Parent); 1511 } 1512 1513 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1514 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1515 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1516 FlagsTy FlagsKey(Flags, Reserved2Flags); 1517 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1518 if (!Entry) { 1519 if (!DefaultOpenMPPSource) { 1520 // Initialize default location for psource field of ident_t structure of 1521 // all ident_t objects. Format is ";file;function;line;column;;". 1522 // Taken from 1523 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1524 DefaultOpenMPPSource = 1525 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1526 DefaultOpenMPPSource = 1527 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1528 } 1529 1530 llvm::Constant *Data[] = { 1531 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1532 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1533 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1534 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1535 llvm::GlobalValue *DefaultOpenMPLocation = 1536 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1537 llvm::GlobalValue::PrivateLinkage); 1538 DefaultOpenMPLocation->setUnnamedAddr( 1539 llvm::GlobalValue::UnnamedAddr::Global); 1540 1541 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1542 } 1543 return Address(Entry, Align); 1544 } 1545 1546 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1547 bool AtCurrentPoint) { 1548 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1549 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1550 1551 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1552 if (AtCurrentPoint) { 1553 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1554 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1555 } else { 1556 Elem.second.ServiceInsertPt = 1557 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1558 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1559 } 1560 } 1561 1562 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1563 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1564 if (Elem.second.ServiceInsertPt) { 1565 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1566 Elem.second.ServiceInsertPt = nullptr; 1567 Ptr->eraseFromParent(); 1568 } 1569 } 1570 1571 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1572 SourceLocation Loc, 1573 unsigned Flags) { 1574 Flags |= OMP_IDENT_KMPC; 1575 // If no debug info is generated - return global default location. 1576 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1577 Loc.isInvalid()) 1578 return getOrCreateDefaultLocation(Flags).getPointer(); 1579 1580 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1581 1582 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1583 Address LocValue = Address::invalid(); 1584 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1585 if (I != OpenMPLocThreadIDMap.end()) 1586 LocValue = Address(I->second.DebugLoc, Align); 1587 1588 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1589 // GetOpenMPThreadID was called before this routine. 1590 if (!LocValue.isValid()) { 1591 // Generate "ident_t .kmpc_loc.addr;" 1592 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1593 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1594 Elem.second.DebugLoc = AI.getPointer(); 1595 LocValue = AI; 1596 1597 if (!Elem.second.ServiceInsertPt) 1598 setLocThreadIdInsertPt(CGF); 1599 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1600 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1601 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1602 CGF.getTypeSize(IdentQTy)); 1603 } 1604 1605 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1606 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1607 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1608 LValue PSource = 1609 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1610 1611 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1612 if (OMPDebugLoc == nullptr) { 1613 SmallString<128> Buffer2; 1614 llvm::raw_svector_ostream OS2(Buffer2); 1615 // Build debug location 1616 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1617 OS2 << ";" << PLoc.getFilename() << ";"; 1618 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1619 OS2 << FD->getQualifiedNameAsString(); 1620 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1621 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1622 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1623 } 1624 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1625 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1626 1627 // Our callers always pass this to a runtime function, so for 1628 // convenience, go ahead and return a naked pointer. 1629 return LocValue.getPointer(); 1630 } 1631 1632 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1633 SourceLocation Loc) { 1634 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1635 1636 llvm::Value *ThreadID = nullptr; 1637 // Check whether we've already cached a load of the thread id in this 1638 // function. 1639 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1640 if (I != OpenMPLocThreadIDMap.end()) { 1641 ThreadID = I->second.ThreadID; 1642 if (ThreadID != nullptr) 1643 return ThreadID; 1644 } 1645 // If exceptions are enabled, do not use parameter to avoid possible crash. 1646 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1647 !CGF.getLangOpts().CXXExceptions || 1648 CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1649 if (auto *OMPRegionInfo = 1650 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1651 if (OMPRegionInfo->getThreadIDVariable()) { 1652 // Check if this an outlined function with thread id passed as argument. 1653 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1654 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1655 // If value loaded in entry block, cache it and use it everywhere in 1656 // function. 1657 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1658 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1659 Elem.second.ThreadID = ThreadID; 1660 } 1661 return ThreadID; 1662 } 1663 } 1664 } 1665 1666 // This is not an outlined function region - need to call __kmpc_int32 1667 // kmpc_global_thread_num(ident_t *loc). 1668 // Generate thread id value and cache this value for use across the 1669 // function. 1670 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1671 if (!Elem.second.ServiceInsertPt) 1672 setLocThreadIdInsertPt(CGF); 1673 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1674 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1675 llvm::CallInst *Call = CGF.Builder.CreateCall( 1676 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1677 emitUpdateLocation(CGF, Loc)); 1678 Call->setCallingConv(CGF.getRuntimeCC()); 1679 Elem.second.ThreadID = Call; 1680 return Call; 1681 } 1682 1683 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1684 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1685 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1686 clearLocThreadIdInsertPt(CGF); 1687 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1688 } 1689 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1690 for(auto *D : FunctionUDRMap[CGF.CurFn]) 1691 UDRMap.erase(D); 1692 FunctionUDRMap.erase(CGF.CurFn); 1693 } 1694 auto I = FunctionUDMMap.find(CGF.CurFn); 1695 if (I != FunctionUDMMap.end()) { 1696 for(auto *D : I->second) 1697 UDMMap.erase(D); 1698 FunctionUDMMap.erase(I); 1699 } 1700 } 1701 1702 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1703 return IdentTy->getPointerTo(); 1704 } 1705 1706 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1707 if (!Kmpc_MicroTy) { 1708 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1709 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1710 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1711 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1712 } 1713 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1714 } 1715 1716 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1717 llvm::FunctionCallee RTLFn = nullptr; 1718 switch (static_cast<OpenMPRTLFunction>(Function)) { 1719 case OMPRTL__kmpc_fork_call: { 1720 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1721 // microtask, ...); 1722 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1723 getKmpc_MicroPointerTy()}; 1724 auto *FnTy = 1725 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1726 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1727 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 1728 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 1729 llvm::LLVMContext &Ctx = F->getContext(); 1730 llvm::MDBuilder MDB(Ctx); 1731 // Annotate the callback behavior of the __kmpc_fork_call: 1732 // - The callback callee is argument number 2 (microtask). 1733 // - The first two arguments of the callback callee are unknown (-1). 1734 // - All variadic arguments to the __kmpc_fork_call are passed to the 1735 // callback callee. 1736 F->addMetadata( 1737 llvm::LLVMContext::MD_callback, 1738 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 1739 2, {-1, -1}, 1740 /* VarArgsArePassed */ true)})); 1741 } 1742 } 1743 break; 1744 } 1745 case OMPRTL__kmpc_global_thread_num: { 1746 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1747 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1748 auto *FnTy = 1749 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1750 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1751 break; 1752 } 1753 case OMPRTL__kmpc_threadprivate_cached: { 1754 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1755 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1756 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1757 CGM.VoidPtrTy, CGM.SizeTy, 1758 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1759 auto *FnTy = 1760 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1761 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1762 break; 1763 } 1764 case OMPRTL__kmpc_critical: { 1765 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1766 // kmp_critical_name *crit); 1767 llvm::Type *TypeParams[] = { 1768 getIdentTyPointerTy(), CGM.Int32Ty, 1769 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1770 auto *FnTy = 1771 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1772 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1773 break; 1774 } 1775 case OMPRTL__kmpc_critical_with_hint: { 1776 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1777 // kmp_critical_name *crit, uintptr_t hint); 1778 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1779 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1780 CGM.IntPtrTy}; 1781 auto *FnTy = 1782 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1783 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1784 break; 1785 } 1786 case OMPRTL__kmpc_threadprivate_register: { 1787 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1788 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1789 // typedef void *(*kmpc_ctor)(void *); 1790 auto *KmpcCtorTy = 1791 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1792 /*isVarArg*/ false)->getPointerTo(); 1793 // typedef void *(*kmpc_cctor)(void *, void *); 1794 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1795 auto *KmpcCopyCtorTy = 1796 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1797 /*isVarArg*/ false) 1798 ->getPointerTo(); 1799 // typedef void (*kmpc_dtor)(void *); 1800 auto *KmpcDtorTy = 1801 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1802 ->getPointerTo(); 1803 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1804 KmpcCopyCtorTy, KmpcDtorTy}; 1805 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1806 /*isVarArg*/ false); 1807 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1808 break; 1809 } 1810 case OMPRTL__kmpc_end_critical: { 1811 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1812 // kmp_critical_name *crit); 1813 llvm::Type *TypeParams[] = { 1814 getIdentTyPointerTy(), CGM.Int32Ty, 1815 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1816 auto *FnTy = 1817 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1818 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1819 break; 1820 } 1821 case OMPRTL__kmpc_cancel_barrier: { 1822 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1823 // global_tid); 1824 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1825 auto *FnTy = 1826 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1827 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1828 break; 1829 } 1830 case OMPRTL__kmpc_barrier: { 1831 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1832 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1833 auto *FnTy = 1834 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1835 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1836 break; 1837 } 1838 case OMPRTL__kmpc_for_static_fini: { 1839 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1840 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1841 auto *FnTy = 1842 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1843 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1844 break; 1845 } 1846 case OMPRTL__kmpc_push_num_threads: { 1847 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1848 // kmp_int32 num_threads) 1849 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1850 CGM.Int32Ty}; 1851 auto *FnTy = 1852 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1853 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1854 break; 1855 } 1856 case OMPRTL__kmpc_serialized_parallel: { 1857 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1858 // global_tid); 1859 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1860 auto *FnTy = 1861 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1862 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1863 break; 1864 } 1865 case OMPRTL__kmpc_end_serialized_parallel: { 1866 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1867 // global_tid); 1868 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1869 auto *FnTy = 1870 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1871 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1872 break; 1873 } 1874 case OMPRTL__kmpc_flush: { 1875 // Build void __kmpc_flush(ident_t *loc); 1876 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1877 auto *FnTy = 1878 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1879 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1880 break; 1881 } 1882 case OMPRTL__kmpc_master: { 1883 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1884 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1885 auto *FnTy = 1886 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1887 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1888 break; 1889 } 1890 case OMPRTL__kmpc_end_master: { 1891 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1892 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1893 auto *FnTy = 1894 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1895 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1896 break; 1897 } 1898 case OMPRTL__kmpc_omp_taskyield: { 1899 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1900 // int end_part); 1901 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1902 auto *FnTy = 1903 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1904 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1905 break; 1906 } 1907 case OMPRTL__kmpc_single: { 1908 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1909 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1910 auto *FnTy = 1911 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1912 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1913 break; 1914 } 1915 case OMPRTL__kmpc_end_single: { 1916 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1917 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1918 auto *FnTy = 1919 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1920 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1921 break; 1922 } 1923 case OMPRTL__kmpc_omp_task_alloc: { 1924 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1925 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1926 // kmp_routine_entry_t *task_entry); 1927 assert(KmpRoutineEntryPtrTy != nullptr && 1928 "Type kmp_routine_entry_t must be created."); 1929 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1930 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1931 // Return void * and then cast to particular kmp_task_t type. 1932 auto *FnTy = 1933 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1934 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1935 break; 1936 } 1937 case OMPRTL__kmpc_omp_target_task_alloc: { 1938 // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid, 1939 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1940 // kmp_routine_entry_t *task_entry, kmp_int64 device_id); 1941 assert(KmpRoutineEntryPtrTy != nullptr && 1942 "Type kmp_routine_entry_t must be created."); 1943 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1944 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy, 1945 CGM.Int64Ty}; 1946 // Return void * and then cast to particular kmp_task_t type. 1947 auto *FnTy = 1948 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1949 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc"); 1950 break; 1951 } 1952 case OMPRTL__kmpc_omp_task: { 1953 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1954 // *new_task); 1955 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1956 CGM.VoidPtrTy}; 1957 auto *FnTy = 1958 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1959 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 1960 break; 1961 } 1962 case OMPRTL__kmpc_copyprivate: { 1963 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 1964 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 1965 // kmp_int32 didit); 1966 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1967 auto *CpyFnTy = 1968 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 1969 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 1970 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 1971 CGM.Int32Ty}; 1972 auto *FnTy = 1973 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1974 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 1975 break; 1976 } 1977 case OMPRTL__kmpc_reduce: { 1978 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 1979 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 1980 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 1981 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1982 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1983 /*isVarArg=*/false); 1984 llvm::Type *TypeParams[] = { 1985 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1986 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1987 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1988 auto *FnTy = 1989 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1990 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 1991 break; 1992 } 1993 case OMPRTL__kmpc_reduce_nowait: { 1994 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 1995 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 1996 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 1997 // *lck); 1998 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1999 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2000 /*isVarArg=*/false); 2001 llvm::Type *TypeParams[] = { 2002 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2003 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2004 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2005 auto *FnTy = 2006 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2007 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 2008 break; 2009 } 2010 case OMPRTL__kmpc_end_reduce: { 2011 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 2012 // kmp_critical_name *lck); 2013 llvm::Type *TypeParams[] = { 2014 getIdentTyPointerTy(), CGM.Int32Ty, 2015 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2016 auto *FnTy = 2017 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2018 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 2019 break; 2020 } 2021 case OMPRTL__kmpc_end_reduce_nowait: { 2022 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 2023 // kmp_critical_name *lck); 2024 llvm::Type *TypeParams[] = { 2025 getIdentTyPointerTy(), CGM.Int32Ty, 2026 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2027 auto *FnTy = 2028 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2029 RTLFn = 2030 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 2031 break; 2032 } 2033 case OMPRTL__kmpc_omp_task_begin_if0: { 2034 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2035 // *new_task); 2036 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2037 CGM.VoidPtrTy}; 2038 auto *FnTy = 2039 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2040 RTLFn = 2041 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 2042 break; 2043 } 2044 case OMPRTL__kmpc_omp_task_complete_if0: { 2045 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2046 // *new_task); 2047 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2048 CGM.VoidPtrTy}; 2049 auto *FnTy = 2050 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2051 RTLFn = CGM.CreateRuntimeFunction(FnTy, 2052 /*Name=*/"__kmpc_omp_task_complete_if0"); 2053 break; 2054 } 2055 case OMPRTL__kmpc_ordered: { 2056 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 2057 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2058 auto *FnTy = 2059 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2060 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 2061 break; 2062 } 2063 case OMPRTL__kmpc_end_ordered: { 2064 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 2065 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2066 auto *FnTy = 2067 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2068 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 2069 break; 2070 } 2071 case OMPRTL__kmpc_omp_taskwait: { 2072 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 2073 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2074 auto *FnTy = 2075 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2076 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 2077 break; 2078 } 2079 case OMPRTL__kmpc_taskgroup: { 2080 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 2081 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2082 auto *FnTy = 2083 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2084 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 2085 break; 2086 } 2087 case OMPRTL__kmpc_end_taskgroup: { 2088 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 2089 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2090 auto *FnTy = 2091 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2092 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 2093 break; 2094 } 2095 case OMPRTL__kmpc_push_proc_bind: { 2096 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 2097 // int proc_bind) 2098 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2099 auto *FnTy = 2100 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2101 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 2102 break; 2103 } 2104 case OMPRTL__kmpc_omp_task_with_deps: { 2105 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2106 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2107 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 2108 llvm::Type *TypeParams[] = { 2109 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 2110 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 2111 auto *FnTy = 2112 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2113 RTLFn = 2114 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 2115 break; 2116 } 2117 case OMPRTL__kmpc_omp_wait_deps: { 2118 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2119 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2120 // kmp_depend_info_t *noalias_dep_list); 2121 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2122 CGM.Int32Ty, CGM.VoidPtrTy, 2123 CGM.Int32Ty, CGM.VoidPtrTy}; 2124 auto *FnTy = 2125 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2126 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2127 break; 2128 } 2129 case OMPRTL__kmpc_cancellationpoint: { 2130 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2131 // global_tid, kmp_int32 cncl_kind) 2132 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2133 auto *FnTy = 2134 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2135 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2136 break; 2137 } 2138 case OMPRTL__kmpc_cancel: { 2139 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2140 // kmp_int32 cncl_kind) 2141 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2142 auto *FnTy = 2143 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2144 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2145 break; 2146 } 2147 case OMPRTL__kmpc_push_num_teams: { 2148 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2149 // kmp_int32 num_teams, kmp_int32 num_threads) 2150 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2151 CGM.Int32Ty}; 2152 auto *FnTy = 2153 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2154 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2155 break; 2156 } 2157 case OMPRTL__kmpc_fork_teams: { 2158 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2159 // microtask, ...); 2160 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2161 getKmpc_MicroPointerTy()}; 2162 auto *FnTy = 2163 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2164 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2165 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 2166 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 2167 llvm::LLVMContext &Ctx = F->getContext(); 2168 llvm::MDBuilder MDB(Ctx); 2169 // Annotate the callback behavior of the __kmpc_fork_teams: 2170 // - The callback callee is argument number 2 (microtask). 2171 // - The first two arguments of the callback callee are unknown (-1). 2172 // - All variadic arguments to the __kmpc_fork_teams are passed to the 2173 // callback callee. 2174 F->addMetadata( 2175 llvm::LLVMContext::MD_callback, 2176 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 2177 2, {-1, -1}, 2178 /* VarArgsArePassed */ true)})); 2179 } 2180 } 2181 break; 2182 } 2183 case OMPRTL__kmpc_taskloop: { 2184 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2185 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2186 // sched, kmp_uint64 grainsize, void *task_dup); 2187 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2188 CGM.IntTy, 2189 CGM.VoidPtrTy, 2190 CGM.IntTy, 2191 CGM.Int64Ty->getPointerTo(), 2192 CGM.Int64Ty->getPointerTo(), 2193 CGM.Int64Ty, 2194 CGM.IntTy, 2195 CGM.IntTy, 2196 CGM.Int64Ty, 2197 CGM.VoidPtrTy}; 2198 auto *FnTy = 2199 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2200 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2201 break; 2202 } 2203 case OMPRTL__kmpc_doacross_init: { 2204 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2205 // num_dims, struct kmp_dim *dims); 2206 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2207 CGM.Int32Ty, 2208 CGM.Int32Ty, 2209 CGM.VoidPtrTy}; 2210 auto *FnTy = 2211 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2212 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2213 break; 2214 } 2215 case OMPRTL__kmpc_doacross_fini: { 2216 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2217 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2218 auto *FnTy = 2219 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2220 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2221 break; 2222 } 2223 case OMPRTL__kmpc_doacross_post: { 2224 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2225 // *vec); 2226 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2227 CGM.Int64Ty->getPointerTo()}; 2228 auto *FnTy = 2229 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2230 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2231 break; 2232 } 2233 case OMPRTL__kmpc_doacross_wait: { 2234 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2235 // *vec); 2236 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2237 CGM.Int64Ty->getPointerTo()}; 2238 auto *FnTy = 2239 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2240 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2241 break; 2242 } 2243 case OMPRTL__kmpc_task_reduction_init: { 2244 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2245 // *data); 2246 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2247 auto *FnTy = 2248 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2249 RTLFn = 2250 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2251 break; 2252 } 2253 case OMPRTL__kmpc_task_reduction_get_th_data: { 2254 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2255 // *d); 2256 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2257 auto *FnTy = 2258 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2259 RTLFn = CGM.CreateRuntimeFunction( 2260 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2261 break; 2262 } 2263 case OMPRTL__kmpc_alloc: { 2264 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t 2265 // al); omp_allocator_handle_t type is void *. 2266 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy}; 2267 auto *FnTy = 2268 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2269 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc"); 2270 break; 2271 } 2272 case OMPRTL__kmpc_free: { 2273 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t 2274 // al); omp_allocator_handle_t type is void *. 2275 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2276 auto *FnTy = 2277 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2278 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); 2279 break; 2280 } 2281 case OMPRTL__kmpc_push_target_tripcount: { 2282 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 2283 // size); 2284 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; 2285 llvm::FunctionType *FnTy = 2286 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2287 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); 2288 break; 2289 } 2290 case OMPRTL__tgt_target: { 2291 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2292 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2293 // *arg_types); 2294 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2295 CGM.VoidPtrTy, 2296 CGM.Int32Ty, 2297 CGM.VoidPtrPtrTy, 2298 CGM.VoidPtrPtrTy, 2299 CGM.Int64Ty->getPointerTo(), 2300 CGM.Int64Ty->getPointerTo()}; 2301 auto *FnTy = 2302 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2303 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2304 break; 2305 } 2306 case OMPRTL__tgt_target_nowait: { 2307 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2308 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2309 // int64_t *arg_types); 2310 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2311 CGM.VoidPtrTy, 2312 CGM.Int32Ty, 2313 CGM.VoidPtrPtrTy, 2314 CGM.VoidPtrPtrTy, 2315 CGM.Int64Ty->getPointerTo(), 2316 CGM.Int64Ty->getPointerTo()}; 2317 auto *FnTy = 2318 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2319 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2320 break; 2321 } 2322 case OMPRTL__tgt_target_teams: { 2323 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2324 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2325 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2326 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2327 CGM.VoidPtrTy, 2328 CGM.Int32Ty, 2329 CGM.VoidPtrPtrTy, 2330 CGM.VoidPtrPtrTy, 2331 CGM.Int64Ty->getPointerTo(), 2332 CGM.Int64Ty->getPointerTo(), 2333 CGM.Int32Ty, 2334 CGM.Int32Ty}; 2335 auto *FnTy = 2336 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2337 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2338 break; 2339 } 2340 case OMPRTL__tgt_target_teams_nowait: { 2341 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2342 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 2343 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2344 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2345 CGM.VoidPtrTy, 2346 CGM.Int32Ty, 2347 CGM.VoidPtrPtrTy, 2348 CGM.VoidPtrPtrTy, 2349 CGM.Int64Ty->getPointerTo(), 2350 CGM.Int64Ty->getPointerTo(), 2351 CGM.Int32Ty, 2352 CGM.Int32Ty}; 2353 auto *FnTy = 2354 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2355 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2356 break; 2357 } 2358 case OMPRTL__tgt_register_requires: { 2359 // Build void __tgt_register_requires(int64_t flags); 2360 llvm::Type *TypeParams[] = {CGM.Int64Ty}; 2361 auto *FnTy = 2362 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2363 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); 2364 break; 2365 } 2366 case OMPRTL__tgt_register_lib: { 2367 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 2368 QualType ParamTy = 2369 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2370 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2371 auto *FnTy = 2372 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2373 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 2374 break; 2375 } 2376 case OMPRTL__tgt_unregister_lib: { 2377 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 2378 QualType ParamTy = 2379 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2380 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2381 auto *FnTy = 2382 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2383 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 2384 break; 2385 } 2386 case OMPRTL__tgt_target_data_begin: { 2387 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2388 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2389 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2390 CGM.Int32Ty, 2391 CGM.VoidPtrPtrTy, 2392 CGM.VoidPtrPtrTy, 2393 CGM.Int64Ty->getPointerTo(), 2394 CGM.Int64Ty->getPointerTo()}; 2395 auto *FnTy = 2396 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2397 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2398 break; 2399 } 2400 case OMPRTL__tgt_target_data_begin_nowait: { 2401 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2402 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2403 // *arg_types); 2404 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2405 CGM.Int32Ty, 2406 CGM.VoidPtrPtrTy, 2407 CGM.VoidPtrPtrTy, 2408 CGM.Int64Ty->getPointerTo(), 2409 CGM.Int64Ty->getPointerTo()}; 2410 auto *FnTy = 2411 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2412 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2413 break; 2414 } 2415 case OMPRTL__tgt_target_data_end: { 2416 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2417 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2418 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2419 CGM.Int32Ty, 2420 CGM.VoidPtrPtrTy, 2421 CGM.VoidPtrPtrTy, 2422 CGM.Int64Ty->getPointerTo(), 2423 CGM.Int64Ty->getPointerTo()}; 2424 auto *FnTy = 2425 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2426 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2427 break; 2428 } 2429 case OMPRTL__tgt_target_data_end_nowait: { 2430 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2431 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2432 // *arg_types); 2433 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2434 CGM.Int32Ty, 2435 CGM.VoidPtrPtrTy, 2436 CGM.VoidPtrPtrTy, 2437 CGM.Int64Ty->getPointerTo(), 2438 CGM.Int64Ty->getPointerTo()}; 2439 auto *FnTy = 2440 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2441 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2442 break; 2443 } 2444 case OMPRTL__tgt_target_data_update: { 2445 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2446 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2447 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2448 CGM.Int32Ty, 2449 CGM.VoidPtrPtrTy, 2450 CGM.VoidPtrPtrTy, 2451 CGM.Int64Ty->getPointerTo(), 2452 CGM.Int64Ty->getPointerTo()}; 2453 auto *FnTy = 2454 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2455 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2456 break; 2457 } 2458 case OMPRTL__tgt_target_data_update_nowait: { 2459 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2460 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2461 // *arg_types); 2462 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2463 CGM.Int32Ty, 2464 CGM.VoidPtrPtrTy, 2465 CGM.VoidPtrPtrTy, 2466 CGM.Int64Ty->getPointerTo(), 2467 CGM.Int64Ty->getPointerTo()}; 2468 auto *FnTy = 2469 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2470 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2471 break; 2472 } 2473 case OMPRTL__tgt_mapper_num_components: { 2474 // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 2475 llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; 2476 auto *FnTy = 2477 llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); 2478 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components"); 2479 break; 2480 } 2481 case OMPRTL__tgt_push_mapper_component: { 2482 // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void 2483 // *base, void *begin, int64_t size, int64_t type); 2484 llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy, 2485 CGM.Int64Ty, CGM.Int64Ty}; 2486 auto *FnTy = 2487 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2488 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component"); 2489 break; 2490 } 2491 } 2492 assert(RTLFn && "Unable to find OpenMP runtime function"); 2493 return RTLFn; 2494 } 2495 2496 llvm::FunctionCallee 2497 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 2498 assert((IVSize == 32 || IVSize == 64) && 2499 "IV size is not compatible with the omp runtime"); 2500 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2501 : "__kmpc_for_static_init_4u") 2502 : (IVSigned ? "__kmpc_for_static_init_8" 2503 : "__kmpc_for_static_init_8u"); 2504 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2505 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2506 llvm::Type *TypeParams[] = { 2507 getIdentTyPointerTy(), // loc 2508 CGM.Int32Ty, // tid 2509 CGM.Int32Ty, // schedtype 2510 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2511 PtrTy, // p_lower 2512 PtrTy, // p_upper 2513 PtrTy, // p_stride 2514 ITy, // incr 2515 ITy // chunk 2516 }; 2517 auto *FnTy = 2518 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2519 return CGM.CreateRuntimeFunction(FnTy, Name); 2520 } 2521 2522 llvm::FunctionCallee 2523 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 2524 assert((IVSize == 32 || IVSize == 64) && 2525 "IV size is not compatible with the omp runtime"); 2526 StringRef Name = 2527 IVSize == 32 2528 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2529 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2530 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2531 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2532 CGM.Int32Ty, // tid 2533 CGM.Int32Ty, // schedtype 2534 ITy, // lower 2535 ITy, // upper 2536 ITy, // stride 2537 ITy // chunk 2538 }; 2539 auto *FnTy = 2540 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2541 return CGM.CreateRuntimeFunction(FnTy, Name); 2542 } 2543 2544 llvm::FunctionCallee 2545 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 2546 assert((IVSize == 32 || IVSize == 64) && 2547 "IV size is not compatible with the omp runtime"); 2548 StringRef Name = 2549 IVSize == 32 2550 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2551 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2552 llvm::Type *TypeParams[] = { 2553 getIdentTyPointerTy(), // loc 2554 CGM.Int32Ty, // tid 2555 }; 2556 auto *FnTy = 2557 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2558 return CGM.CreateRuntimeFunction(FnTy, Name); 2559 } 2560 2561 llvm::FunctionCallee 2562 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 2563 assert((IVSize == 32 || IVSize == 64) && 2564 "IV size is not compatible with the omp runtime"); 2565 StringRef Name = 2566 IVSize == 32 2567 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2568 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2569 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2570 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2571 llvm::Type *TypeParams[] = { 2572 getIdentTyPointerTy(), // loc 2573 CGM.Int32Ty, // tid 2574 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2575 PtrTy, // p_lower 2576 PtrTy, // p_upper 2577 PtrTy // p_stride 2578 }; 2579 auto *FnTy = 2580 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2581 return CGM.CreateRuntimeFunction(FnTy, Name); 2582 } 2583 2584 /// Obtain information that uniquely identifies a target entry. This 2585 /// consists of the file and device IDs as well as line number associated with 2586 /// the relevant entry source location. 2587 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2588 unsigned &DeviceID, unsigned &FileID, 2589 unsigned &LineNum) { 2590 SourceManager &SM = C.getSourceManager(); 2591 2592 // The loc should be always valid and have a file ID (the user cannot use 2593 // #pragma directives in macros) 2594 2595 assert(Loc.isValid() && "Source location is expected to be always valid."); 2596 2597 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2598 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2599 2600 llvm::sys::fs::UniqueID ID; 2601 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2602 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2603 << PLoc.getFilename() << EC.message(); 2604 2605 DeviceID = ID.getDevice(); 2606 FileID = ID.getFile(); 2607 LineNum = PLoc.getLine(); 2608 } 2609 2610 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 2611 if (CGM.getLangOpts().OpenMPSimd) 2612 return Address::invalid(); 2613 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2614 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2615 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 2616 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2617 HasRequiresUnifiedSharedMemory))) { 2618 SmallString<64> PtrName; 2619 { 2620 llvm::raw_svector_ostream OS(PtrName); 2621 OS << CGM.getMangledName(GlobalDecl(VD)); 2622 if (!VD->isExternallyVisible()) { 2623 unsigned DeviceID, FileID, Line; 2624 getTargetEntryUniqueInfo(CGM.getContext(), 2625 VD->getCanonicalDecl()->getBeginLoc(), 2626 DeviceID, FileID, Line); 2627 OS << llvm::format("_%x", FileID); 2628 } 2629 OS << "_decl_tgt_ref_ptr"; 2630 } 2631 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2632 if (!Ptr) { 2633 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2634 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2635 PtrName); 2636 2637 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2638 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 2639 2640 if (!CGM.getLangOpts().OpenMPIsDevice) 2641 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2642 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2643 } 2644 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2645 } 2646 return Address::invalid(); 2647 } 2648 2649 llvm::Constant * 2650 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2651 assert(!CGM.getLangOpts().OpenMPUseTLS || 2652 !CGM.getContext().getTargetInfo().isTLSSupported()); 2653 // Lookup the entry, lazily creating it if necessary. 2654 std::string Suffix = getName({"cache", ""}); 2655 return getOrCreateInternalVariable( 2656 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 2657 } 2658 2659 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2660 const VarDecl *VD, 2661 Address VDAddr, 2662 SourceLocation Loc) { 2663 if (CGM.getLangOpts().OpenMPUseTLS && 2664 CGM.getContext().getTargetInfo().isTLSSupported()) 2665 return VDAddr; 2666 2667 llvm::Type *VarTy = VDAddr.getElementType(); 2668 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2669 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2670 CGM.Int8PtrTy), 2671 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2672 getOrCreateThreadPrivateCache(VD)}; 2673 return Address(CGF.EmitRuntimeCall( 2674 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2675 VDAddr.getAlignment()); 2676 } 2677 2678 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2679 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2680 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2681 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2682 // library. 2683 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2684 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2685 OMPLoc); 2686 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2687 // to register constructor/destructor for variable. 2688 llvm::Value *Args[] = { 2689 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2690 Ctor, CopyCtor, Dtor}; 2691 CGF.EmitRuntimeCall( 2692 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2693 } 2694 2695 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2696 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2697 bool PerformInit, CodeGenFunction *CGF) { 2698 if (CGM.getLangOpts().OpenMPUseTLS && 2699 CGM.getContext().getTargetInfo().isTLSSupported()) 2700 return nullptr; 2701 2702 VD = VD->getDefinition(CGM.getContext()); 2703 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 2704 QualType ASTTy = VD->getType(); 2705 2706 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2707 const Expr *Init = VD->getAnyInitializer(); 2708 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2709 // Generate function that re-emits the declaration's initializer into the 2710 // threadprivate copy of the variable VD 2711 CodeGenFunction CtorCGF(CGM); 2712 FunctionArgList Args; 2713 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2714 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2715 ImplicitParamDecl::Other); 2716 Args.push_back(&Dst); 2717 2718 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2719 CGM.getContext().VoidPtrTy, Args); 2720 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2721 std::string Name = getName({"__kmpc_global_ctor_", ""}); 2722 llvm::Function *Fn = 2723 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2724 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2725 Args, Loc, Loc); 2726 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2727 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2728 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2729 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2730 Arg = CtorCGF.Builder.CreateElementBitCast( 2731 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2732 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2733 /*IsInitializer=*/true); 2734 ArgVal = CtorCGF.EmitLoadOfScalar( 2735 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2736 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2737 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2738 CtorCGF.FinishFunction(); 2739 Ctor = Fn; 2740 } 2741 if (VD->getType().isDestructedType() != QualType::DK_none) { 2742 // Generate function that emits destructor call for the threadprivate copy 2743 // of the variable VD 2744 CodeGenFunction DtorCGF(CGM); 2745 FunctionArgList Args; 2746 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2747 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2748 ImplicitParamDecl::Other); 2749 Args.push_back(&Dst); 2750 2751 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2752 CGM.getContext().VoidTy, Args); 2753 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2754 std::string Name = getName({"__kmpc_global_dtor_", ""}); 2755 llvm::Function *Fn = 2756 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2757 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2758 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2759 Loc, Loc); 2760 // Create a scope with an artificial location for the body of this function. 2761 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2762 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2763 DtorCGF.GetAddrOfLocalVar(&Dst), 2764 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2765 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2766 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2767 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2768 DtorCGF.FinishFunction(); 2769 Dtor = Fn; 2770 } 2771 // Do not emit init function if it is not required. 2772 if (!Ctor && !Dtor) 2773 return nullptr; 2774 2775 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2776 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2777 /*isVarArg=*/false) 2778 ->getPointerTo(); 2779 // Copying constructor for the threadprivate variable. 2780 // Must be NULL - reserved by runtime, but currently it requires that this 2781 // parameter is always NULL. Otherwise it fires assertion. 2782 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2783 if (Ctor == nullptr) { 2784 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2785 /*isVarArg=*/false) 2786 ->getPointerTo(); 2787 Ctor = llvm::Constant::getNullValue(CtorTy); 2788 } 2789 if (Dtor == nullptr) { 2790 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2791 /*isVarArg=*/false) 2792 ->getPointerTo(); 2793 Dtor = llvm::Constant::getNullValue(DtorTy); 2794 } 2795 if (!CGF) { 2796 auto *InitFunctionTy = 2797 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2798 std::string Name = getName({"__omp_threadprivate_init_", ""}); 2799 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2800 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 2801 CodeGenFunction InitCGF(CGM); 2802 FunctionArgList ArgList; 2803 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2804 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2805 Loc, Loc); 2806 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2807 InitCGF.FinishFunction(); 2808 return InitFunction; 2809 } 2810 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2811 } 2812 return nullptr; 2813 } 2814 2815 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2816 llvm::GlobalVariable *Addr, 2817 bool PerformInit) { 2818 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2819 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2820 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 2821 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2822 HasRequiresUnifiedSharedMemory)) 2823 return CGM.getLangOpts().OpenMPIsDevice; 2824 VD = VD->getDefinition(CGM.getContext()); 2825 if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 2826 return CGM.getLangOpts().OpenMPIsDevice; 2827 2828 QualType ASTTy = VD->getType(); 2829 2830 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 2831 // Produce the unique prefix to identify the new target regions. We use 2832 // the source location of the variable declaration which we know to not 2833 // conflict with any target region. 2834 unsigned DeviceID; 2835 unsigned FileID; 2836 unsigned Line; 2837 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2838 SmallString<128> Buffer, Out; 2839 { 2840 llvm::raw_svector_ostream OS(Buffer); 2841 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2842 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2843 } 2844 2845 const Expr *Init = VD->getAnyInitializer(); 2846 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2847 llvm::Constant *Ctor; 2848 llvm::Constant *ID; 2849 if (CGM.getLangOpts().OpenMPIsDevice) { 2850 // Generate function that re-emits the declaration's initializer into 2851 // the threadprivate copy of the variable VD 2852 CodeGenFunction CtorCGF(CGM); 2853 2854 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2855 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2856 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2857 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2858 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2859 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2860 FunctionArgList(), Loc, Loc); 2861 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2862 CtorCGF.EmitAnyExprToMem(Init, 2863 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2864 Init->getType().getQualifiers(), 2865 /*IsInitializer=*/true); 2866 CtorCGF.FinishFunction(); 2867 Ctor = Fn; 2868 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2869 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 2870 } else { 2871 Ctor = new llvm::GlobalVariable( 2872 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2873 llvm::GlobalValue::PrivateLinkage, 2874 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2875 ID = Ctor; 2876 } 2877 2878 // Register the information for the entry associated with the constructor. 2879 Out.clear(); 2880 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2881 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2882 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2883 } 2884 if (VD->getType().isDestructedType() != QualType::DK_none) { 2885 llvm::Constant *Dtor; 2886 llvm::Constant *ID; 2887 if (CGM.getLangOpts().OpenMPIsDevice) { 2888 // Generate function that emits destructor call for the threadprivate 2889 // copy of the variable VD 2890 CodeGenFunction DtorCGF(CGM); 2891 2892 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2893 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2894 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2895 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2896 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2897 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2898 FunctionArgList(), Loc, Loc); 2899 // Create a scope with an artificial location for the body of this 2900 // function. 2901 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2902 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 2903 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2904 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2905 DtorCGF.FinishFunction(); 2906 Dtor = Fn; 2907 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2908 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 2909 } else { 2910 Dtor = new llvm::GlobalVariable( 2911 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2912 llvm::GlobalValue::PrivateLinkage, 2913 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2914 ID = Dtor; 2915 } 2916 // Register the information for the entry associated with the destructor. 2917 Out.clear(); 2918 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2919 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2920 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2921 } 2922 return CGM.getLangOpts().OpenMPIsDevice; 2923 } 2924 2925 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2926 QualType VarType, 2927 StringRef Name) { 2928 std::string Suffix = getName({"artificial", ""}); 2929 std::string CacheSuffix = getName({"cache", ""}); 2930 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2931 llvm::Value *GAddr = 2932 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2933 llvm::Value *Args[] = { 2934 emitUpdateLocation(CGF, SourceLocation()), 2935 getThreadID(CGF, SourceLocation()), 2936 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2937 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2938 /*isSigned=*/false), 2939 getOrCreateInternalVariable( 2940 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2941 return Address( 2942 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2943 CGF.EmitRuntimeCall( 2944 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2945 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2946 CGM.getPointerAlign()); 2947 } 2948 2949 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 2950 const RegionCodeGenTy &ThenGen, 2951 const RegionCodeGenTy &ElseGen) { 2952 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2953 2954 // If the condition constant folds and can be elided, try to avoid emitting 2955 // the condition and the dead arm of the if/else. 2956 bool CondConstant; 2957 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2958 if (CondConstant) 2959 ThenGen(CGF); 2960 else 2961 ElseGen(CGF); 2962 return; 2963 } 2964 2965 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2966 // emit the conditional branch. 2967 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2968 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2969 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2970 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2971 2972 // Emit the 'then' code. 2973 CGF.EmitBlock(ThenBlock); 2974 ThenGen(CGF); 2975 CGF.EmitBranch(ContBlock); 2976 // Emit the 'else' code if present. 2977 // There is no need to emit line number for unconditional branch. 2978 (void)ApplyDebugLocation::CreateEmpty(CGF); 2979 CGF.EmitBlock(ElseBlock); 2980 ElseGen(CGF); 2981 // There is no need to emit line number for unconditional branch. 2982 (void)ApplyDebugLocation::CreateEmpty(CGF); 2983 CGF.EmitBranch(ContBlock); 2984 // Emit the continuation block for code after the if. 2985 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2986 } 2987 2988 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2989 llvm::Function *OutlinedFn, 2990 ArrayRef<llvm::Value *> CapturedVars, 2991 const Expr *IfCond) { 2992 if (!CGF.HaveInsertPoint()) 2993 return; 2994 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2995 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 2996 PrePostActionTy &) { 2997 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2998 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2999 llvm::Value *Args[] = { 3000 RTLoc, 3001 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 3002 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 3003 llvm::SmallVector<llvm::Value *, 16> RealArgs; 3004 RealArgs.append(std::begin(Args), std::end(Args)); 3005 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 3006 3007 llvm::FunctionCallee RTLFn = 3008 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 3009 CGF.EmitRuntimeCall(RTLFn, RealArgs); 3010 }; 3011 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 3012 PrePostActionTy &) { 3013 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3014 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 3015 // Build calls: 3016 // __kmpc_serialized_parallel(&Loc, GTid); 3017 llvm::Value *Args[] = {RTLoc, ThreadID}; 3018 CGF.EmitRuntimeCall( 3019 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 3020 3021 // OutlinedFn(>id, &zero, CapturedStruct); 3022 Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 3023 /*Name*/ ".zero.addr"); 3024 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 3025 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 3026 // ThreadId for serialized parallels is 0. 3027 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 3028 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 3029 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 3030 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 3031 3032 // __kmpc_end_serialized_parallel(&Loc, GTid); 3033 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 3034 CGF.EmitRuntimeCall( 3035 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 3036 EndArgs); 3037 }; 3038 if (IfCond) { 3039 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 3040 } else { 3041 RegionCodeGenTy ThenRCG(ThenGen); 3042 ThenRCG(CGF); 3043 } 3044 } 3045 3046 // If we're inside an (outlined) parallel region, use the region info's 3047 // thread-ID variable (it is passed in a first argument of the outlined function 3048 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 3049 // regular serial code region, get thread ID by calling kmp_int32 3050 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 3051 // return the address of that temp. 3052 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 3053 SourceLocation Loc) { 3054 if (auto *OMPRegionInfo = 3055 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3056 if (OMPRegionInfo->getThreadIDVariable()) 3057 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 3058 3059 llvm::Value *ThreadID = getThreadID(CGF, Loc); 3060 QualType Int32Ty = 3061 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 3062 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 3063 CGF.EmitStoreOfScalar(ThreadID, 3064 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 3065 3066 return ThreadIDTemp; 3067 } 3068 3069 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 3070 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 3071 SmallString<256> Buffer; 3072 llvm::raw_svector_ostream Out(Buffer); 3073 Out << Name; 3074 StringRef RuntimeName = Out.str(); 3075 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 3076 if (Elem.second) { 3077 assert(Elem.second->getType()->getPointerElementType() == Ty && 3078 "OMP internal variable has different type than requested"); 3079 return &*Elem.second; 3080 } 3081 3082 return Elem.second = new llvm::GlobalVariable( 3083 CGM.getModule(), Ty, /*IsConstant*/ false, 3084 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 3085 Elem.first(), /*InsertBefore=*/nullptr, 3086 llvm::GlobalValue::NotThreadLocal, AddressSpace); 3087 } 3088 3089 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 3090 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 3091 std::string Name = getName({Prefix, "var"}); 3092 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 3093 } 3094 3095 namespace { 3096 /// Common pre(post)-action for different OpenMP constructs. 3097 class CommonActionTy final : public PrePostActionTy { 3098 llvm::FunctionCallee EnterCallee; 3099 ArrayRef<llvm::Value *> EnterArgs; 3100 llvm::FunctionCallee ExitCallee; 3101 ArrayRef<llvm::Value *> ExitArgs; 3102 bool Conditional; 3103 llvm::BasicBlock *ContBlock = nullptr; 3104 3105 public: 3106 CommonActionTy(llvm::FunctionCallee EnterCallee, 3107 ArrayRef<llvm::Value *> EnterArgs, 3108 llvm::FunctionCallee ExitCallee, 3109 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 3110 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 3111 ExitArgs(ExitArgs), Conditional(Conditional) {} 3112 void Enter(CodeGenFunction &CGF) override { 3113 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 3114 if (Conditional) { 3115 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 3116 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3117 ContBlock = CGF.createBasicBlock("omp_if.end"); 3118 // Generate the branch (If-stmt) 3119 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 3120 CGF.EmitBlock(ThenBlock); 3121 } 3122 } 3123 void Done(CodeGenFunction &CGF) { 3124 // Emit the rest of blocks/branches 3125 CGF.EmitBranch(ContBlock); 3126 CGF.EmitBlock(ContBlock, true); 3127 } 3128 void Exit(CodeGenFunction &CGF) override { 3129 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 3130 } 3131 }; 3132 } // anonymous namespace 3133 3134 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 3135 StringRef CriticalName, 3136 const RegionCodeGenTy &CriticalOpGen, 3137 SourceLocation Loc, const Expr *Hint) { 3138 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 3139 // CriticalOpGen(); 3140 // __kmpc_end_critical(ident_t *, gtid, Lock); 3141 // Prepare arguments and build a call to __kmpc_critical 3142 if (!CGF.HaveInsertPoint()) 3143 return; 3144 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3145 getCriticalRegionLock(CriticalName)}; 3146 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 3147 std::end(Args)); 3148 if (Hint) { 3149 EnterArgs.push_back(CGF.Builder.CreateIntCast( 3150 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 3151 } 3152 CommonActionTy Action( 3153 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 3154 : OMPRTL__kmpc_critical), 3155 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 3156 CriticalOpGen.setAction(Action); 3157 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 3158 } 3159 3160 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 3161 const RegionCodeGenTy &MasterOpGen, 3162 SourceLocation Loc) { 3163 if (!CGF.HaveInsertPoint()) 3164 return; 3165 // if(__kmpc_master(ident_t *, gtid)) { 3166 // MasterOpGen(); 3167 // __kmpc_end_master(ident_t *, gtid); 3168 // } 3169 // Prepare arguments and build a call to __kmpc_master 3170 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3171 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 3172 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 3173 /*Conditional=*/true); 3174 MasterOpGen.setAction(Action); 3175 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 3176 Action.Done(CGF); 3177 } 3178 3179 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 3180 SourceLocation Loc) { 3181 if (!CGF.HaveInsertPoint()) 3182 return; 3183 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 3184 llvm::Value *Args[] = { 3185 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3186 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 3187 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 3188 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3189 Region->emitUntiedSwitch(CGF); 3190 } 3191 3192 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 3193 const RegionCodeGenTy &TaskgroupOpGen, 3194 SourceLocation Loc) { 3195 if (!CGF.HaveInsertPoint()) 3196 return; 3197 // __kmpc_taskgroup(ident_t *, gtid); 3198 // TaskgroupOpGen(); 3199 // __kmpc_end_taskgroup(ident_t *, gtid); 3200 // Prepare arguments and build a call to __kmpc_taskgroup 3201 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3202 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3203 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3204 Args); 3205 TaskgroupOpGen.setAction(Action); 3206 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3207 } 3208 3209 /// Given an array of pointers to variables, project the address of a 3210 /// given variable. 3211 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3212 unsigned Index, const VarDecl *Var) { 3213 // Pull out the pointer to the variable. 3214 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 3215 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3216 3217 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3218 Addr = CGF.Builder.CreateElementBitCast( 3219 Addr, CGF.ConvertTypeForMem(Var->getType())); 3220 return Addr; 3221 } 3222 3223 static llvm::Value *emitCopyprivateCopyFunction( 3224 CodeGenModule &CGM, llvm::Type *ArgsType, 3225 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3226 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3227 SourceLocation Loc) { 3228 ASTContext &C = CGM.getContext(); 3229 // void copy_func(void *LHSArg, void *RHSArg); 3230 FunctionArgList Args; 3231 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3232 ImplicitParamDecl::Other); 3233 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3234 ImplicitParamDecl::Other); 3235 Args.push_back(&LHSArg); 3236 Args.push_back(&RHSArg); 3237 const auto &CGFI = 3238 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3239 std::string Name = 3240 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 3241 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 3242 llvm::GlobalValue::InternalLinkage, Name, 3243 &CGM.getModule()); 3244 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3245 Fn->setDoesNotRecurse(); 3246 CodeGenFunction CGF(CGM); 3247 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3248 // Dest = (void*[n])(LHSArg); 3249 // Src = (void*[n])(RHSArg); 3250 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3251 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3252 ArgsType), CGF.getPointerAlign()); 3253 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3254 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3255 ArgsType), CGF.getPointerAlign()); 3256 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3257 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3258 // ... 3259 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3260 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3261 const auto *DestVar = 3262 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3263 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3264 3265 const auto *SrcVar = 3266 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3267 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3268 3269 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3270 QualType Type = VD->getType(); 3271 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3272 } 3273 CGF.FinishFunction(); 3274 return Fn; 3275 } 3276 3277 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3278 const RegionCodeGenTy &SingleOpGen, 3279 SourceLocation Loc, 3280 ArrayRef<const Expr *> CopyprivateVars, 3281 ArrayRef<const Expr *> SrcExprs, 3282 ArrayRef<const Expr *> DstExprs, 3283 ArrayRef<const Expr *> AssignmentOps) { 3284 if (!CGF.HaveInsertPoint()) 3285 return; 3286 assert(CopyprivateVars.size() == SrcExprs.size() && 3287 CopyprivateVars.size() == DstExprs.size() && 3288 CopyprivateVars.size() == AssignmentOps.size()); 3289 ASTContext &C = CGM.getContext(); 3290 // int32 did_it = 0; 3291 // if(__kmpc_single(ident_t *, gtid)) { 3292 // SingleOpGen(); 3293 // __kmpc_end_single(ident_t *, gtid); 3294 // did_it = 1; 3295 // } 3296 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3297 // <copy_func>, did_it); 3298 3299 Address DidIt = Address::invalid(); 3300 if (!CopyprivateVars.empty()) { 3301 // int32 did_it = 0; 3302 QualType KmpInt32Ty = 3303 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3304 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3305 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3306 } 3307 // Prepare arguments and build a call to __kmpc_single 3308 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3309 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3310 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3311 /*Conditional=*/true); 3312 SingleOpGen.setAction(Action); 3313 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3314 if (DidIt.isValid()) { 3315 // did_it = 1; 3316 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3317 } 3318 Action.Done(CGF); 3319 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3320 // <copy_func>, did_it); 3321 if (DidIt.isValid()) { 3322 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3323 QualType CopyprivateArrayTy = 3324 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 3325 /*IndexTypeQuals=*/0); 3326 // Create a list of all private variables for copyprivate. 3327 Address CopyprivateList = 3328 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3329 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3330 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 3331 CGF.Builder.CreateStore( 3332 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3333 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 3334 Elem); 3335 } 3336 // Build function that copies private values from single region to all other 3337 // threads in the corresponding parallel region. 3338 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3339 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3340 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3341 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3342 Address CL = 3343 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3344 CGF.VoidPtrTy); 3345 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3346 llvm::Value *Args[] = { 3347 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3348 getThreadID(CGF, Loc), // i32 <gtid> 3349 BufSize, // size_t <buf_size> 3350 CL.getPointer(), // void *<copyprivate list> 3351 CpyFn, // void (*) (void *, void *) <copy_func> 3352 DidItVal // i32 did_it 3353 }; 3354 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3355 } 3356 } 3357 3358 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3359 const RegionCodeGenTy &OrderedOpGen, 3360 SourceLocation Loc, bool IsThreads) { 3361 if (!CGF.HaveInsertPoint()) 3362 return; 3363 // __kmpc_ordered(ident_t *, gtid); 3364 // OrderedOpGen(); 3365 // __kmpc_end_ordered(ident_t *, gtid); 3366 // Prepare arguments and build a call to __kmpc_ordered 3367 if (IsThreads) { 3368 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3369 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3370 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3371 Args); 3372 OrderedOpGen.setAction(Action); 3373 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3374 return; 3375 } 3376 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3377 } 3378 3379 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 3380 unsigned Flags; 3381 if (Kind == OMPD_for) 3382 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3383 else if (Kind == OMPD_sections) 3384 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3385 else if (Kind == OMPD_single) 3386 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3387 else if (Kind == OMPD_barrier) 3388 Flags = OMP_IDENT_BARRIER_EXPL; 3389 else 3390 Flags = OMP_IDENT_BARRIER_IMPL; 3391 return Flags; 3392 } 3393 3394 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 3395 CodeGenFunction &CGF, const OMPLoopDirective &S, 3396 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 3397 // Check if the loop directive is actually a doacross loop directive. In this 3398 // case choose static, 1 schedule. 3399 if (llvm::any_of( 3400 S.getClausesOfKind<OMPOrderedClause>(), 3401 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 3402 ScheduleKind = OMPC_SCHEDULE_static; 3403 // Chunk size is 1 in this case. 3404 llvm::APInt ChunkSize(32, 1); 3405 ChunkExpr = IntegerLiteral::Create( 3406 CGF.getContext(), ChunkSize, 3407 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3408 SourceLocation()); 3409 } 3410 } 3411 3412 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3413 OpenMPDirectiveKind Kind, bool EmitChecks, 3414 bool ForceSimpleCall) { 3415 if (!CGF.HaveInsertPoint()) 3416 return; 3417 // Build call __kmpc_cancel_barrier(loc, thread_id); 3418 // Build call __kmpc_barrier(loc, thread_id); 3419 unsigned Flags = getDefaultFlagsForBarriers(Kind); 3420 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3421 // thread_id); 3422 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3423 getThreadID(CGF, Loc)}; 3424 if (auto *OMPRegionInfo = 3425 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 3426 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3427 llvm::Value *Result = CGF.EmitRuntimeCall( 3428 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3429 if (EmitChecks) { 3430 // if (__kmpc_cancel_barrier()) { 3431 // exit from construct; 3432 // } 3433 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3434 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3435 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3436 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3437 CGF.EmitBlock(ExitBB); 3438 // exit from construct; 3439 CodeGenFunction::JumpDest CancelDestination = 3440 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3441 CGF.EmitBranchThroughCleanup(CancelDestination); 3442 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3443 } 3444 return; 3445 } 3446 } 3447 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3448 } 3449 3450 /// Map the OpenMP loop schedule to the runtime enumeration. 3451 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3452 bool Chunked, bool Ordered) { 3453 switch (ScheduleKind) { 3454 case OMPC_SCHEDULE_static: 3455 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3456 : (Ordered ? OMP_ord_static : OMP_sch_static); 3457 case OMPC_SCHEDULE_dynamic: 3458 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3459 case OMPC_SCHEDULE_guided: 3460 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3461 case OMPC_SCHEDULE_runtime: 3462 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3463 case OMPC_SCHEDULE_auto: 3464 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3465 case OMPC_SCHEDULE_unknown: 3466 assert(!Chunked && "chunk was specified but schedule kind not known"); 3467 return Ordered ? OMP_ord_static : OMP_sch_static; 3468 } 3469 llvm_unreachable("Unexpected runtime schedule"); 3470 } 3471 3472 /// Map the OpenMP distribute schedule to the runtime enumeration. 3473 static OpenMPSchedType 3474 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3475 // only static is allowed for dist_schedule 3476 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3477 } 3478 3479 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3480 bool Chunked) const { 3481 OpenMPSchedType Schedule = 3482 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3483 return Schedule == OMP_sch_static; 3484 } 3485 3486 bool CGOpenMPRuntime::isStaticNonchunked( 3487 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3488 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3489 return Schedule == OMP_dist_sch_static; 3490 } 3491 3492 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 3493 bool Chunked) const { 3494 OpenMPSchedType Schedule = 3495 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3496 return Schedule == OMP_sch_static_chunked; 3497 } 3498 3499 bool CGOpenMPRuntime::isStaticChunked( 3500 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3501 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3502 return Schedule == OMP_dist_sch_static_chunked; 3503 } 3504 3505 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3506 OpenMPSchedType Schedule = 3507 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3508 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3509 return Schedule != OMP_sch_static; 3510 } 3511 3512 static int addMonoNonMonoModifier(OpenMPSchedType Schedule, 3513 OpenMPScheduleClauseModifier M1, 3514 OpenMPScheduleClauseModifier M2) { 3515 int Modifier = 0; 3516 switch (M1) { 3517 case OMPC_SCHEDULE_MODIFIER_monotonic: 3518 Modifier = OMP_sch_modifier_monotonic; 3519 break; 3520 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3521 Modifier = OMP_sch_modifier_nonmonotonic; 3522 break; 3523 case OMPC_SCHEDULE_MODIFIER_simd: 3524 if (Schedule == OMP_sch_static_chunked) 3525 Schedule = OMP_sch_static_balanced_chunked; 3526 break; 3527 case OMPC_SCHEDULE_MODIFIER_last: 3528 case OMPC_SCHEDULE_MODIFIER_unknown: 3529 break; 3530 } 3531 switch (M2) { 3532 case OMPC_SCHEDULE_MODIFIER_monotonic: 3533 Modifier = OMP_sch_modifier_monotonic; 3534 break; 3535 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3536 Modifier = OMP_sch_modifier_nonmonotonic; 3537 break; 3538 case OMPC_SCHEDULE_MODIFIER_simd: 3539 if (Schedule == OMP_sch_static_chunked) 3540 Schedule = OMP_sch_static_balanced_chunked; 3541 break; 3542 case OMPC_SCHEDULE_MODIFIER_last: 3543 case OMPC_SCHEDULE_MODIFIER_unknown: 3544 break; 3545 } 3546 return Schedule | Modifier; 3547 } 3548 3549 void CGOpenMPRuntime::emitForDispatchInit( 3550 CodeGenFunction &CGF, SourceLocation Loc, 3551 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3552 bool Ordered, const DispatchRTInput &DispatchValues) { 3553 if (!CGF.HaveInsertPoint()) 3554 return; 3555 OpenMPSchedType Schedule = getRuntimeSchedule( 3556 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3557 assert(Ordered || 3558 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3559 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3560 Schedule != OMP_sch_static_balanced_chunked)); 3561 // Call __kmpc_dispatch_init( 3562 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3563 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3564 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3565 3566 // If the Chunk was not specified in the clause - use default value 1. 3567 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3568 : CGF.Builder.getIntN(IVSize, 1); 3569 llvm::Value *Args[] = { 3570 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3571 CGF.Builder.getInt32(addMonoNonMonoModifier( 3572 Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3573 DispatchValues.LB, // Lower 3574 DispatchValues.UB, // Upper 3575 CGF.Builder.getIntN(IVSize, 1), // Stride 3576 Chunk // Chunk 3577 }; 3578 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3579 } 3580 3581 static void emitForStaticInitCall( 3582 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3583 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 3584 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3585 const CGOpenMPRuntime::StaticRTInput &Values) { 3586 if (!CGF.HaveInsertPoint()) 3587 return; 3588 3589 assert(!Values.Ordered); 3590 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3591 Schedule == OMP_sch_static_balanced_chunked || 3592 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3593 Schedule == OMP_dist_sch_static || 3594 Schedule == OMP_dist_sch_static_chunked); 3595 3596 // Call __kmpc_for_static_init( 3597 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3598 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3599 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3600 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3601 llvm::Value *Chunk = Values.Chunk; 3602 if (Chunk == nullptr) { 3603 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3604 Schedule == OMP_dist_sch_static) && 3605 "expected static non-chunked schedule"); 3606 // If the Chunk was not specified in the clause - use default value 1. 3607 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3608 } else { 3609 assert((Schedule == OMP_sch_static_chunked || 3610 Schedule == OMP_sch_static_balanced_chunked || 3611 Schedule == OMP_ord_static_chunked || 3612 Schedule == OMP_dist_sch_static_chunked) && 3613 "expected static chunked schedule"); 3614 } 3615 llvm::Value *Args[] = { 3616 UpdateLocation, 3617 ThreadId, 3618 CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1, 3619 M2)), // Schedule type 3620 Values.IL.getPointer(), // &isLastIter 3621 Values.LB.getPointer(), // &LB 3622 Values.UB.getPointer(), // &UB 3623 Values.ST.getPointer(), // &Stride 3624 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3625 Chunk // Chunk 3626 }; 3627 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3628 } 3629 3630 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3631 SourceLocation Loc, 3632 OpenMPDirectiveKind DKind, 3633 const OpenMPScheduleTy &ScheduleKind, 3634 const StaticRTInput &Values) { 3635 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3636 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3637 assert(isOpenMPWorksharingDirective(DKind) && 3638 "Expected loop-based or sections-based directive."); 3639 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3640 isOpenMPLoopDirective(DKind) 3641 ? OMP_IDENT_WORK_LOOP 3642 : OMP_IDENT_WORK_SECTIONS); 3643 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3644 llvm::FunctionCallee StaticInitFunction = 3645 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3646 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3647 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3648 } 3649 3650 void CGOpenMPRuntime::emitDistributeStaticInit( 3651 CodeGenFunction &CGF, SourceLocation Loc, 3652 OpenMPDistScheduleClauseKind SchedKind, 3653 const CGOpenMPRuntime::StaticRTInput &Values) { 3654 OpenMPSchedType ScheduleNum = 3655 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3656 llvm::Value *UpdatedLocation = 3657 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3658 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3659 llvm::FunctionCallee StaticInitFunction = 3660 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3661 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3662 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3663 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3664 } 3665 3666 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3667 SourceLocation Loc, 3668 OpenMPDirectiveKind DKind) { 3669 if (!CGF.HaveInsertPoint()) 3670 return; 3671 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3672 llvm::Value *Args[] = { 3673 emitUpdateLocation(CGF, Loc, 3674 isOpenMPDistributeDirective(DKind) 3675 ? OMP_IDENT_WORK_DISTRIBUTE 3676 : isOpenMPLoopDirective(DKind) 3677 ? OMP_IDENT_WORK_LOOP 3678 : OMP_IDENT_WORK_SECTIONS), 3679 getThreadID(CGF, Loc)}; 3680 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3681 Args); 3682 } 3683 3684 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3685 SourceLocation Loc, 3686 unsigned IVSize, 3687 bool IVSigned) { 3688 if (!CGF.HaveInsertPoint()) 3689 return; 3690 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3691 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3692 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3693 } 3694 3695 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3696 SourceLocation Loc, unsigned IVSize, 3697 bool IVSigned, Address IL, 3698 Address LB, Address UB, 3699 Address ST) { 3700 // Call __kmpc_dispatch_next( 3701 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3702 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3703 // kmp_int[32|64] *p_stride); 3704 llvm::Value *Args[] = { 3705 emitUpdateLocation(CGF, Loc), 3706 getThreadID(CGF, Loc), 3707 IL.getPointer(), // &isLastIter 3708 LB.getPointer(), // &Lower 3709 UB.getPointer(), // &Upper 3710 ST.getPointer() // &Stride 3711 }; 3712 llvm::Value *Call = 3713 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3714 return CGF.EmitScalarConversion( 3715 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3716 CGF.getContext().BoolTy, Loc); 3717 } 3718 3719 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3720 llvm::Value *NumThreads, 3721 SourceLocation Loc) { 3722 if (!CGF.HaveInsertPoint()) 3723 return; 3724 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3725 llvm::Value *Args[] = { 3726 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3727 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3728 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3729 Args); 3730 } 3731 3732 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3733 OpenMPProcBindClauseKind ProcBind, 3734 SourceLocation Loc) { 3735 if (!CGF.HaveInsertPoint()) 3736 return; 3737 // Constants for proc bind value accepted by the runtime. 3738 enum ProcBindTy { 3739 ProcBindFalse = 0, 3740 ProcBindTrue, 3741 ProcBindMaster, 3742 ProcBindClose, 3743 ProcBindSpread, 3744 ProcBindIntel, 3745 ProcBindDefault 3746 } RuntimeProcBind; 3747 switch (ProcBind) { 3748 case OMPC_PROC_BIND_master: 3749 RuntimeProcBind = ProcBindMaster; 3750 break; 3751 case OMPC_PROC_BIND_close: 3752 RuntimeProcBind = ProcBindClose; 3753 break; 3754 case OMPC_PROC_BIND_spread: 3755 RuntimeProcBind = ProcBindSpread; 3756 break; 3757 case OMPC_PROC_BIND_unknown: 3758 llvm_unreachable("Unsupported proc_bind value."); 3759 } 3760 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3761 llvm::Value *Args[] = { 3762 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3763 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 3764 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3765 } 3766 3767 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3768 SourceLocation Loc) { 3769 if (!CGF.HaveInsertPoint()) 3770 return; 3771 // Build call void __kmpc_flush(ident_t *loc) 3772 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3773 emitUpdateLocation(CGF, Loc)); 3774 } 3775 3776 namespace { 3777 /// Indexes of fields for type kmp_task_t. 3778 enum KmpTaskTFields { 3779 /// List of shared variables. 3780 KmpTaskTShareds, 3781 /// Task routine. 3782 KmpTaskTRoutine, 3783 /// Partition id for the untied tasks. 3784 KmpTaskTPartId, 3785 /// Function with call of destructors for private variables. 3786 Data1, 3787 /// Task priority. 3788 Data2, 3789 /// (Taskloops only) Lower bound. 3790 KmpTaskTLowerBound, 3791 /// (Taskloops only) Upper bound. 3792 KmpTaskTUpperBound, 3793 /// (Taskloops only) Stride. 3794 KmpTaskTStride, 3795 /// (Taskloops only) Is last iteration flag. 3796 KmpTaskTLastIter, 3797 /// (Taskloops only) Reduction data. 3798 KmpTaskTReductions, 3799 }; 3800 } // anonymous namespace 3801 3802 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3803 return OffloadEntriesTargetRegion.empty() && 3804 OffloadEntriesDeviceGlobalVar.empty(); 3805 } 3806 3807 /// Initialize target region entry. 3808 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3809 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3810 StringRef ParentName, unsigned LineNum, 3811 unsigned Order) { 3812 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3813 "only required for the device " 3814 "code generation."); 3815 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3816 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3817 OMPTargetRegionEntryTargetRegion); 3818 ++OffloadingEntriesNum; 3819 } 3820 3821 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3822 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3823 StringRef ParentName, unsigned LineNum, 3824 llvm::Constant *Addr, llvm::Constant *ID, 3825 OMPTargetRegionEntryKind Flags) { 3826 // If we are emitting code for a target, the entry is already initialized, 3827 // only has to be registered. 3828 if (CGM.getLangOpts().OpenMPIsDevice) { 3829 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3830 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3831 DiagnosticsEngine::Error, 3832 "Unable to find target region on line '%0' in the device code."); 3833 CGM.getDiags().Report(DiagID) << LineNum; 3834 return; 3835 } 3836 auto &Entry = 3837 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3838 assert(Entry.isValid() && "Entry not initialized!"); 3839 Entry.setAddress(Addr); 3840 Entry.setID(ID); 3841 Entry.setFlags(Flags); 3842 } else { 3843 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3844 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3845 ++OffloadingEntriesNum; 3846 } 3847 } 3848 3849 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3850 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3851 unsigned LineNum) const { 3852 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3853 if (PerDevice == OffloadEntriesTargetRegion.end()) 3854 return false; 3855 auto PerFile = PerDevice->second.find(FileID); 3856 if (PerFile == PerDevice->second.end()) 3857 return false; 3858 auto PerParentName = PerFile->second.find(ParentName); 3859 if (PerParentName == PerFile->second.end()) 3860 return false; 3861 auto PerLine = PerParentName->second.find(LineNum); 3862 if (PerLine == PerParentName->second.end()) 3863 return false; 3864 // Fail if this entry is already registered. 3865 if (PerLine->second.getAddress() || PerLine->second.getID()) 3866 return false; 3867 return true; 3868 } 3869 3870 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3871 const OffloadTargetRegionEntryInfoActTy &Action) { 3872 // Scan all target region entries and perform the provided action. 3873 for (const auto &D : OffloadEntriesTargetRegion) 3874 for (const auto &F : D.second) 3875 for (const auto &P : F.second) 3876 for (const auto &L : P.second) 3877 Action(D.first, F.first, P.first(), L.first, L.second); 3878 } 3879 3880 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3881 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3882 OMPTargetGlobalVarEntryKind Flags, 3883 unsigned Order) { 3884 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3885 "only required for the device " 3886 "code generation."); 3887 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3888 ++OffloadingEntriesNum; 3889 } 3890 3891 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3892 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3893 CharUnits VarSize, 3894 OMPTargetGlobalVarEntryKind Flags, 3895 llvm::GlobalValue::LinkageTypes Linkage) { 3896 if (CGM.getLangOpts().OpenMPIsDevice) { 3897 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3898 assert(Entry.isValid() && Entry.getFlags() == Flags && 3899 "Entry not initialized!"); 3900 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3901 "Resetting with the new address."); 3902 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3903 if (Entry.getVarSize().isZero()) { 3904 Entry.setVarSize(VarSize); 3905 Entry.setLinkage(Linkage); 3906 } 3907 return; 3908 } 3909 Entry.setVarSize(VarSize); 3910 Entry.setLinkage(Linkage); 3911 Entry.setAddress(Addr); 3912 } else { 3913 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3914 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3915 assert(Entry.isValid() && Entry.getFlags() == Flags && 3916 "Entry not initialized!"); 3917 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3918 "Resetting with the new address."); 3919 if (Entry.getVarSize().isZero()) { 3920 Entry.setVarSize(VarSize); 3921 Entry.setLinkage(Linkage); 3922 } 3923 return; 3924 } 3925 OffloadEntriesDeviceGlobalVar.try_emplace( 3926 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3927 ++OffloadingEntriesNum; 3928 } 3929 } 3930 3931 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3932 actOnDeviceGlobalVarEntriesInfo( 3933 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3934 // Scan all target region entries and perform the provided action. 3935 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3936 Action(E.getKey(), E.getValue()); 3937 } 3938 3939 llvm::Function * 3940 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 3941 // If we don't have entries or if we are emitting code for the device, we 3942 // don't need to do anything. 3943 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 3944 return nullptr; 3945 3946 llvm::Module &M = CGM.getModule(); 3947 ASTContext &C = CGM.getContext(); 3948 3949 // Get list of devices we care about 3950 const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples; 3951 3952 // We should be creating an offloading descriptor only if there are devices 3953 // specified. 3954 assert(!Devices.empty() && "No OpenMP offloading devices??"); 3955 3956 // Create the external variables that will point to the begin and end of the 3957 // host entries section. These will be defined by the linker. 3958 llvm::Type *OffloadEntryTy = 3959 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 3960 std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"}); 3961 auto *HostEntriesBegin = new llvm::GlobalVariable( 3962 M, OffloadEntryTy, /*isConstant=*/true, 3963 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 3964 EntriesBeginName); 3965 std::string EntriesEndName = getName({"omp_offloading", "entries_end"}); 3966 auto *HostEntriesEnd = 3967 new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true, 3968 llvm::GlobalValue::ExternalLinkage, 3969 /*Initializer=*/nullptr, EntriesEndName); 3970 3971 // Create all device images 3972 auto *DeviceImageTy = cast<llvm::StructType>( 3973 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 3974 ConstantInitBuilder DeviceImagesBuilder(CGM); 3975 ConstantArrayBuilder DeviceImagesEntries = 3976 DeviceImagesBuilder.beginArray(DeviceImageTy); 3977 3978 for (const llvm::Triple &Device : Devices) { 3979 StringRef T = Device.getTriple(); 3980 std::string BeginName = getName({"omp_offloading", "img_start", ""}); 3981 auto *ImgBegin = new llvm::GlobalVariable( 3982 M, CGM.Int8Ty, /*isConstant=*/true, 3983 llvm::GlobalValue::ExternalWeakLinkage, 3984 /*Initializer=*/nullptr, Twine(BeginName).concat(T)); 3985 std::string EndName = getName({"omp_offloading", "img_end", ""}); 3986 auto *ImgEnd = new llvm::GlobalVariable( 3987 M, CGM.Int8Ty, /*isConstant=*/true, 3988 llvm::GlobalValue::ExternalWeakLinkage, 3989 /*Initializer=*/nullptr, Twine(EndName).concat(T)); 3990 3991 llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin, 3992 HostEntriesEnd}; 3993 createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data, 3994 DeviceImagesEntries); 3995 } 3996 3997 // Create device images global array. 3998 std::string ImagesName = getName({"omp_offloading", "device_images"}); 3999 llvm::GlobalVariable *DeviceImages = 4000 DeviceImagesEntries.finishAndCreateGlobal(ImagesName, 4001 CGM.getPointerAlign(), 4002 /*isConstant=*/true); 4003 DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4004 4005 // This is a Zero array to be used in the creation of the constant expressions 4006 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 4007 llvm::Constant::getNullValue(CGM.Int32Ty)}; 4008 4009 // Create the target region descriptor. 4010 llvm::Constant *Data[] = { 4011 llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()), 4012 llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), 4013 DeviceImages, Index), 4014 HostEntriesBegin, HostEntriesEnd}; 4015 std::string Descriptor = getName({"omp_offloading", "descriptor"}); 4016 llvm::GlobalVariable *Desc = createGlobalStruct( 4017 CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor); 4018 4019 // Emit code to register or unregister the descriptor at execution 4020 // startup or closing, respectively. 4021 4022 llvm::Function *UnRegFn; 4023 { 4024 FunctionArgList Args; 4025 ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); 4026 Args.push_back(&DummyPtr); 4027 4028 CodeGenFunction CGF(CGM); 4029 // Disable debug info for global (de-)initializer because they are not part 4030 // of some particular construct. 4031 CGF.disableDebugInfo(); 4032 const auto &FI = 4033 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4034 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 4035 std::string UnregName = getName({"omp_offloading", "descriptor_unreg"}); 4036 UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI); 4037 CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args); 4038 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 4039 Desc); 4040 CGF.FinishFunction(); 4041 } 4042 llvm::Function *RegFn; 4043 { 4044 CodeGenFunction CGF(CGM); 4045 // Disable debug info for global (de-)initializer because they are not part 4046 // of some particular construct. 4047 CGF.disableDebugInfo(); 4048 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 4049 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 4050 4051 // Encode offload target triples into the registration function name. It 4052 // will serve as a comdat key for the registration/unregistration code for 4053 // this particular combination of offloading targets. 4054 SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U); 4055 RegFnNameParts[0] = "omp_offloading"; 4056 RegFnNameParts[1] = "descriptor_reg"; 4057 llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2), 4058 [](const llvm::Triple &T) -> const std::string& { 4059 return T.getTriple(); 4060 }); 4061 llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end()); 4062 std::string Descriptor = getName(RegFnNameParts); 4063 RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI); 4064 CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList()); 4065 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); 4066 // Create a variable to drive the registration and unregistration of the 4067 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 4068 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), 4069 SourceLocation(), nullptr, C.CharTy, 4070 ImplicitParamDecl::Other); 4071 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 4072 CGF.FinishFunction(); 4073 } 4074 if (CGM.supportsCOMDAT()) { 4075 // It is sufficient to call registration function only once, so create a 4076 // COMDAT group for registration/unregistration functions and associated 4077 // data. That would reduce startup time and code size. Registration 4078 // function serves as a COMDAT group key. 4079 llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName()); 4080 RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); 4081 RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); 4082 RegFn->setComdat(ComdatKey); 4083 UnRegFn->setComdat(ComdatKey); 4084 DeviceImages->setComdat(ComdatKey); 4085 Desc->setComdat(ComdatKey); 4086 } 4087 return RegFn; 4088 } 4089 4090 void CGOpenMPRuntime::createOffloadEntry( 4091 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 4092 llvm::GlobalValue::LinkageTypes Linkage) { 4093 StringRef Name = Addr->getName(); 4094 llvm::Module &M = CGM.getModule(); 4095 llvm::LLVMContext &C = M.getContext(); 4096 4097 // Create constant string with the name. 4098 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 4099 4100 std::string StringName = getName({"omp_offloading", "entry_name"}); 4101 auto *Str = new llvm::GlobalVariable( 4102 M, StrPtrInit->getType(), /*isConstant=*/true, 4103 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 4104 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4105 4106 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 4107 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 4108 llvm::ConstantInt::get(CGM.SizeTy, Size), 4109 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 4110 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 4111 std::string EntryName = getName({"omp_offloading", "entry", ""}); 4112 llvm::GlobalVariable *Entry = createGlobalStruct( 4113 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 4114 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 4115 4116 // The entry has to be created in the section the linker expects it to be. 4117 std::string Section = getName({"omp_offloading", "entries"}); 4118 Entry->setSection(Section); 4119 } 4120 4121 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 4122 // Emit the offloading entries and metadata so that the device codegen side 4123 // can easily figure out what to emit. The produced metadata looks like 4124 // this: 4125 // 4126 // !omp_offload.info = !{!1, ...} 4127 // 4128 // Right now we only generate metadata for function that contain target 4129 // regions. 4130 4131 // If we do not have entries, we don't need to do anything. 4132 if (OffloadEntriesInfoManager.empty()) 4133 return; 4134 4135 llvm::Module &M = CGM.getModule(); 4136 llvm::LLVMContext &C = M.getContext(); 4137 SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 4138 OrderedEntries(OffloadEntriesInfoManager.size()); 4139 llvm::SmallVector<StringRef, 16> ParentFunctions( 4140 OffloadEntriesInfoManager.size()); 4141 4142 // Auxiliary methods to create metadata values and strings. 4143 auto &&GetMDInt = [this](unsigned V) { 4144 return llvm::ConstantAsMetadata::get( 4145 llvm::ConstantInt::get(CGM.Int32Ty, V)); 4146 }; 4147 4148 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 4149 4150 // Create the offloading info metadata node. 4151 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 4152 4153 // Create function that emits metadata for each target region entry; 4154 auto &&TargetRegionMetadataEmitter = 4155 [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString]( 4156 unsigned DeviceID, unsigned FileID, StringRef ParentName, 4157 unsigned Line, 4158 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 4159 // Generate metadata for target regions. Each entry of this metadata 4160 // contains: 4161 // - Entry 0 -> Kind of this type of metadata (0). 4162 // - Entry 1 -> Device ID of the file where the entry was identified. 4163 // - Entry 2 -> File ID of the file where the entry was identified. 4164 // - Entry 3 -> Mangled name of the function where the entry was 4165 // identified. 4166 // - Entry 4 -> Line in the file where the entry was identified. 4167 // - Entry 5 -> Order the entry was created. 4168 // The first element of the metadata node is the kind. 4169 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 4170 GetMDInt(FileID), GetMDString(ParentName), 4171 GetMDInt(Line), GetMDInt(E.getOrder())}; 4172 4173 // Save this entry in the right position of the ordered entries array. 4174 OrderedEntries[E.getOrder()] = &E; 4175 ParentFunctions[E.getOrder()] = ParentName; 4176 4177 // Add metadata to the named metadata node. 4178 MD->addOperand(llvm::MDNode::get(C, Ops)); 4179 }; 4180 4181 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 4182 TargetRegionMetadataEmitter); 4183 4184 // Create function that emits metadata for each device global variable entry; 4185 auto &&DeviceGlobalVarMetadataEmitter = 4186 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 4187 MD](StringRef MangledName, 4188 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 4189 &E) { 4190 // Generate metadata for global variables. Each entry of this metadata 4191 // contains: 4192 // - Entry 0 -> Kind of this type of metadata (1). 4193 // - Entry 1 -> Mangled name of the variable. 4194 // - Entry 2 -> Declare target kind. 4195 // - Entry 3 -> Order the entry was created. 4196 // The first element of the metadata node is the kind. 4197 llvm::Metadata *Ops[] = { 4198 GetMDInt(E.getKind()), GetMDString(MangledName), 4199 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 4200 4201 // Save this entry in the right position of the ordered entries array. 4202 OrderedEntries[E.getOrder()] = &E; 4203 4204 // Add metadata to the named metadata node. 4205 MD->addOperand(llvm::MDNode::get(C, Ops)); 4206 }; 4207 4208 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 4209 DeviceGlobalVarMetadataEmitter); 4210 4211 for (const auto *E : OrderedEntries) { 4212 assert(E && "All ordered entries must exist!"); 4213 if (const auto *CE = 4214 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 4215 E)) { 4216 if (!CE->getID() || !CE->getAddress()) { 4217 // Do not blame the entry if the parent funtion is not emitted. 4218 StringRef FnName = ParentFunctions[CE->getOrder()]; 4219 if (!CGM.GetGlobalValue(FnName)) 4220 continue; 4221 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4222 DiagnosticsEngine::Error, 4223 "Offloading entry for target region is incorrect: either the " 4224 "address or the ID is invalid."); 4225 CGM.getDiags().Report(DiagID); 4226 continue; 4227 } 4228 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 4229 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 4230 } else if (const auto *CE = 4231 dyn_cast<OffloadEntriesInfoManagerTy:: 4232 OffloadEntryInfoDeviceGlobalVar>(E)) { 4233 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 4234 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4235 CE->getFlags()); 4236 switch (Flags) { 4237 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 4238 if (CGM.getLangOpts().OpenMPIsDevice && 4239 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 4240 continue; 4241 if (!CE->getAddress()) { 4242 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4243 DiagnosticsEngine::Error, 4244 "Offloading entry for declare target variable is incorrect: the " 4245 "address is invalid."); 4246 CGM.getDiags().Report(DiagID); 4247 continue; 4248 } 4249 // The vaiable has no definition - no need to add the entry. 4250 if (CE->getVarSize().isZero()) 4251 continue; 4252 break; 4253 } 4254 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 4255 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 4256 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 4257 "Declaret target link address is set."); 4258 if (CGM.getLangOpts().OpenMPIsDevice) 4259 continue; 4260 if (!CE->getAddress()) { 4261 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4262 DiagnosticsEngine::Error, 4263 "Offloading entry for declare target variable is incorrect: the " 4264 "address is invalid."); 4265 CGM.getDiags().Report(DiagID); 4266 continue; 4267 } 4268 break; 4269 } 4270 createOffloadEntry(CE->getAddress(), CE->getAddress(), 4271 CE->getVarSize().getQuantity(), Flags, 4272 CE->getLinkage()); 4273 } else { 4274 llvm_unreachable("Unsupported entry kind."); 4275 } 4276 } 4277 } 4278 4279 /// Loads all the offload entries information from the host IR 4280 /// metadata. 4281 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 4282 // If we are in target mode, load the metadata from the host IR. This code has 4283 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 4284 4285 if (!CGM.getLangOpts().OpenMPIsDevice) 4286 return; 4287 4288 if (CGM.getLangOpts().OMPHostIRFile.empty()) 4289 return; 4290 4291 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 4292 if (auto EC = Buf.getError()) { 4293 CGM.getDiags().Report(diag::err_cannot_open_file) 4294 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4295 return; 4296 } 4297 4298 llvm::LLVMContext C; 4299 auto ME = expectedToErrorOrAndEmitErrors( 4300 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 4301 4302 if (auto EC = ME.getError()) { 4303 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4304 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 4305 CGM.getDiags().Report(DiagID) 4306 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4307 return; 4308 } 4309 4310 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4311 if (!MD) 4312 return; 4313 4314 for (llvm::MDNode *MN : MD->operands()) { 4315 auto &&GetMDInt = [MN](unsigned Idx) { 4316 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4317 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4318 }; 4319 4320 auto &&GetMDString = [MN](unsigned Idx) { 4321 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4322 return V->getString(); 4323 }; 4324 4325 switch (GetMDInt(0)) { 4326 default: 4327 llvm_unreachable("Unexpected metadata!"); 4328 break; 4329 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4330 OffloadingEntryInfoTargetRegion: 4331 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4332 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4333 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4334 /*Order=*/GetMDInt(5)); 4335 break; 4336 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4337 OffloadingEntryInfoDeviceGlobalVar: 4338 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4339 /*MangledName=*/GetMDString(1), 4340 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4341 /*Flags=*/GetMDInt(2)), 4342 /*Order=*/GetMDInt(3)); 4343 break; 4344 } 4345 } 4346 } 4347 4348 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4349 if (!KmpRoutineEntryPtrTy) { 4350 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4351 ASTContext &C = CGM.getContext(); 4352 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4353 FunctionProtoType::ExtProtoInfo EPI; 4354 KmpRoutineEntryPtrQTy = C.getPointerType( 4355 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4356 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4357 } 4358 } 4359 4360 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4361 // Make sure the type of the entry is already created. This is the type we 4362 // have to create: 4363 // struct __tgt_offload_entry{ 4364 // void *addr; // Pointer to the offload entry info. 4365 // // (function or global) 4366 // char *name; // Name of the function or global. 4367 // size_t size; // Size of the entry info (0 if it a function). 4368 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4369 // int32_t reserved; // Reserved, to use by the runtime library. 4370 // }; 4371 if (TgtOffloadEntryQTy.isNull()) { 4372 ASTContext &C = CGM.getContext(); 4373 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4374 RD->startDefinition(); 4375 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4376 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4377 addFieldToRecordDecl(C, RD, C.getSizeType()); 4378 addFieldToRecordDecl( 4379 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4380 addFieldToRecordDecl( 4381 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4382 RD->completeDefinition(); 4383 RD->addAttr(PackedAttr::CreateImplicit(C)); 4384 TgtOffloadEntryQTy = C.getRecordType(RD); 4385 } 4386 return TgtOffloadEntryQTy; 4387 } 4388 4389 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 4390 // These are the types we need to build: 4391 // struct __tgt_device_image{ 4392 // void *ImageStart; // Pointer to the target code start. 4393 // void *ImageEnd; // Pointer to the target code end. 4394 // // We also add the host entries to the device image, as it may be useful 4395 // // for the target runtime to have access to that information. 4396 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 4397 // // the entries. 4398 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4399 // // entries (non inclusive). 4400 // }; 4401 if (TgtDeviceImageQTy.isNull()) { 4402 ASTContext &C = CGM.getContext(); 4403 RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image"); 4404 RD->startDefinition(); 4405 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4406 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4407 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4408 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4409 RD->completeDefinition(); 4410 TgtDeviceImageQTy = C.getRecordType(RD); 4411 } 4412 return TgtDeviceImageQTy; 4413 } 4414 4415 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 4416 // struct __tgt_bin_desc{ 4417 // int32_t NumDevices; // Number of devices supported. 4418 // __tgt_device_image *DeviceImages; // Arrays of device images 4419 // // (one per device). 4420 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 4421 // // entries. 4422 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4423 // // entries (non inclusive). 4424 // }; 4425 if (TgtBinaryDescriptorQTy.isNull()) { 4426 ASTContext &C = CGM.getContext(); 4427 RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc"); 4428 RD->startDefinition(); 4429 addFieldToRecordDecl( 4430 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4431 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 4432 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4433 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4434 RD->completeDefinition(); 4435 TgtBinaryDescriptorQTy = C.getRecordType(RD); 4436 } 4437 return TgtBinaryDescriptorQTy; 4438 } 4439 4440 namespace { 4441 struct PrivateHelpersTy { 4442 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 4443 const VarDecl *PrivateElemInit) 4444 : Original(Original), PrivateCopy(PrivateCopy), 4445 PrivateElemInit(PrivateElemInit) {} 4446 const VarDecl *Original; 4447 const VarDecl *PrivateCopy; 4448 const VarDecl *PrivateElemInit; 4449 }; 4450 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4451 } // anonymous namespace 4452 4453 static RecordDecl * 4454 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4455 if (!Privates.empty()) { 4456 ASTContext &C = CGM.getContext(); 4457 // Build struct .kmp_privates_t. { 4458 // /* private vars */ 4459 // }; 4460 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4461 RD->startDefinition(); 4462 for (const auto &Pair : Privates) { 4463 const VarDecl *VD = Pair.second.Original; 4464 QualType Type = VD->getType().getNonReferenceType(); 4465 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4466 if (VD->hasAttrs()) { 4467 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4468 E(VD->getAttrs().end()); 4469 I != E; ++I) 4470 FD->addAttr(*I); 4471 } 4472 } 4473 RD->completeDefinition(); 4474 return RD; 4475 } 4476 return nullptr; 4477 } 4478 4479 static RecordDecl * 4480 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4481 QualType KmpInt32Ty, 4482 QualType KmpRoutineEntryPointerQTy) { 4483 ASTContext &C = CGM.getContext(); 4484 // Build struct kmp_task_t { 4485 // void * shareds; 4486 // kmp_routine_entry_t routine; 4487 // kmp_int32 part_id; 4488 // kmp_cmplrdata_t data1; 4489 // kmp_cmplrdata_t data2; 4490 // For taskloops additional fields: 4491 // kmp_uint64 lb; 4492 // kmp_uint64 ub; 4493 // kmp_int64 st; 4494 // kmp_int32 liter; 4495 // void * reductions; 4496 // }; 4497 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4498 UD->startDefinition(); 4499 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4500 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4501 UD->completeDefinition(); 4502 QualType KmpCmplrdataTy = C.getRecordType(UD); 4503 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4504 RD->startDefinition(); 4505 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4506 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4507 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4508 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4509 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4510 if (isOpenMPTaskLoopDirective(Kind)) { 4511 QualType KmpUInt64Ty = 4512 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4513 QualType KmpInt64Ty = 4514 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4515 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4516 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4517 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4518 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4519 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4520 } 4521 RD->completeDefinition(); 4522 return RD; 4523 } 4524 4525 static RecordDecl * 4526 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4527 ArrayRef<PrivateDataTy> Privates) { 4528 ASTContext &C = CGM.getContext(); 4529 // Build struct kmp_task_t_with_privates { 4530 // kmp_task_t task_data; 4531 // .kmp_privates_t. privates; 4532 // }; 4533 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4534 RD->startDefinition(); 4535 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4536 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4537 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4538 RD->completeDefinition(); 4539 return RD; 4540 } 4541 4542 /// Emit a proxy function which accepts kmp_task_t as the second 4543 /// argument. 4544 /// \code 4545 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4546 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4547 /// For taskloops: 4548 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4549 /// tt->reductions, tt->shareds); 4550 /// return 0; 4551 /// } 4552 /// \endcode 4553 static llvm::Function * 4554 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4555 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4556 QualType KmpTaskTWithPrivatesPtrQTy, 4557 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4558 QualType SharedsPtrTy, llvm::Function *TaskFunction, 4559 llvm::Value *TaskPrivatesMap) { 4560 ASTContext &C = CGM.getContext(); 4561 FunctionArgList Args; 4562 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4563 ImplicitParamDecl::Other); 4564 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4565 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4566 ImplicitParamDecl::Other); 4567 Args.push_back(&GtidArg); 4568 Args.push_back(&TaskTypeArg); 4569 const auto &TaskEntryFnInfo = 4570 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4571 llvm::FunctionType *TaskEntryTy = 4572 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4573 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 4574 auto *TaskEntry = llvm::Function::Create( 4575 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4576 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4577 TaskEntry->setDoesNotRecurse(); 4578 CodeGenFunction CGF(CGM); 4579 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4580 Loc, Loc); 4581 4582 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4583 // tt, 4584 // For taskloops: 4585 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4586 // tt->task_data.shareds); 4587 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4588 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4589 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4590 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4591 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4592 const auto *KmpTaskTWithPrivatesQTyRD = 4593 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4594 LValue Base = 4595 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4596 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4597 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4598 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4599 llvm::Value *PartidParam = PartIdLVal.getPointer(); 4600 4601 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4602 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4603 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4604 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4605 CGF.ConvertTypeForMem(SharedsPtrTy)); 4606 4607 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4608 llvm::Value *PrivatesParam; 4609 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4610 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4611 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4612 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 4613 } else { 4614 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4615 } 4616 4617 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4618 TaskPrivatesMap, 4619 CGF.Builder 4620 .CreatePointerBitCastOrAddrSpaceCast( 4621 TDBase.getAddress(), CGF.VoidPtrTy) 4622 .getPointer()}; 4623 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4624 std::end(CommonArgs)); 4625 if (isOpenMPTaskLoopDirective(Kind)) { 4626 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4627 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4628 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4629 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4630 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4631 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4632 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4633 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4634 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4635 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4636 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4637 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4638 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4639 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4640 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4641 CallArgs.push_back(LBParam); 4642 CallArgs.push_back(UBParam); 4643 CallArgs.push_back(StParam); 4644 CallArgs.push_back(LIParam); 4645 CallArgs.push_back(RParam); 4646 } 4647 CallArgs.push_back(SharedsParam); 4648 4649 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4650 CallArgs); 4651 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4652 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4653 CGF.FinishFunction(); 4654 return TaskEntry; 4655 } 4656 4657 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4658 SourceLocation Loc, 4659 QualType KmpInt32Ty, 4660 QualType KmpTaskTWithPrivatesPtrQTy, 4661 QualType KmpTaskTWithPrivatesQTy) { 4662 ASTContext &C = CGM.getContext(); 4663 FunctionArgList Args; 4664 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4665 ImplicitParamDecl::Other); 4666 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4667 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4668 ImplicitParamDecl::Other); 4669 Args.push_back(&GtidArg); 4670 Args.push_back(&TaskTypeArg); 4671 const auto &DestructorFnInfo = 4672 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4673 llvm::FunctionType *DestructorFnTy = 4674 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4675 std::string Name = 4676 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 4677 auto *DestructorFn = 4678 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4679 Name, &CGM.getModule()); 4680 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4681 DestructorFnInfo); 4682 DestructorFn->setDoesNotRecurse(); 4683 CodeGenFunction CGF(CGM); 4684 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4685 Args, Loc, Loc); 4686 4687 LValue Base = CGF.EmitLoadOfPointerLValue( 4688 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4689 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4690 const auto *KmpTaskTWithPrivatesQTyRD = 4691 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4692 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4693 Base = CGF.EmitLValueForField(Base, *FI); 4694 for (const auto *Field : 4695 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4696 if (QualType::DestructionKind DtorKind = 4697 Field->getType().isDestructedType()) { 4698 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4699 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 4700 } 4701 } 4702 CGF.FinishFunction(); 4703 return DestructorFn; 4704 } 4705 4706 /// Emit a privates mapping function for correct handling of private and 4707 /// firstprivate variables. 4708 /// \code 4709 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4710 /// **noalias priv1,..., <tyn> **noalias privn) { 4711 /// *priv1 = &.privates.priv1; 4712 /// ...; 4713 /// *privn = &.privates.privn; 4714 /// } 4715 /// \endcode 4716 static llvm::Value * 4717 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4718 ArrayRef<const Expr *> PrivateVars, 4719 ArrayRef<const Expr *> FirstprivateVars, 4720 ArrayRef<const Expr *> LastprivateVars, 4721 QualType PrivatesQTy, 4722 ArrayRef<PrivateDataTy> Privates) { 4723 ASTContext &C = CGM.getContext(); 4724 FunctionArgList Args; 4725 ImplicitParamDecl TaskPrivatesArg( 4726 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4727 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4728 ImplicitParamDecl::Other); 4729 Args.push_back(&TaskPrivatesArg); 4730 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4731 unsigned Counter = 1; 4732 for (const Expr *E : PrivateVars) { 4733 Args.push_back(ImplicitParamDecl::Create( 4734 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4735 C.getPointerType(C.getPointerType(E->getType())) 4736 .withConst() 4737 .withRestrict(), 4738 ImplicitParamDecl::Other)); 4739 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4740 PrivateVarsPos[VD] = Counter; 4741 ++Counter; 4742 } 4743 for (const Expr *E : FirstprivateVars) { 4744 Args.push_back(ImplicitParamDecl::Create( 4745 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4746 C.getPointerType(C.getPointerType(E->getType())) 4747 .withConst() 4748 .withRestrict(), 4749 ImplicitParamDecl::Other)); 4750 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4751 PrivateVarsPos[VD] = Counter; 4752 ++Counter; 4753 } 4754 for (const Expr *E : LastprivateVars) { 4755 Args.push_back(ImplicitParamDecl::Create( 4756 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4757 C.getPointerType(C.getPointerType(E->getType())) 4758 .withConst() 4759 .withRestrict(), 4760 ImplicitParamDecl::Other)); 4761 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4762 PrivateVarsPos[VD] = Counter; 4763 ++Counter; 4764 } 4765 const auto &TaskPrivatesMapFnInfo = 4766 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4767 llvm::FunctionType *TaskPrivatesMapTy = 4768 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4769 std::string Name = 4770 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 4771 auto *TaskPrivatesMap = llvm::Function::Create( 4772 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 4773 &CGM.getModule()); 4774 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4775 TaskPrivatesMapFnInfo); 4776 if (CGM.getLangOpts().Optimize) { 4777 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4778 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4779 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4780 } 4781 CodeGenFunction CGF(CGM); 4782 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4783 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4784 4785 // *privi = &.privates.privi; 4786 LValue Base = CGF.EmitLoadOfPointerLValue( 4787 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4788 TaskPrivatesArg.getType()->castAs<PointerType>()); 4789 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4790 Counter = 0; 4791 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4792 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4793 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4794 LValue RefLVal = 4795 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4796 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4797 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 4798 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 4799 ++Counter; 4800 } 4801 CGF.FinishFunction(); 4802 return TaskPrivatesMap; 4803 } 4804 4805 /// Emit initialization for private variables in task-based directives. 4806 static void emitPrivatesInit(CodeGenFunction &CGF, 4807 const OMPExecutableDirective &D, 4808 Address KmpTaskSharedsPtr, LValue TDBase, 4809 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4810 QualType SharedsTy, QualType SharedsPtrTy, 4811 const OMPTaskDataTy &Data, 4812 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4813 ASTContext &C = CGF.getContext(); 4814 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4815 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4816 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4817 ? OMPD_taskloop 4818 : OMPD_task; 4819 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4820 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4821 LValue SrcBase; 4822 bool IsTargetTask = 4823 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4824 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4825 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4826 // PointersArray and SizesArray. The original variables for these arrays are 4827 // not captured and we get their addresses explicitly. 4828 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || 4829 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4830 SrcBase = CGF.MakeAddrLValue( 4831 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4832 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4833 SharedsTy); 4834 } 4835 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4836 for (const PrivateDataTy &Pair : Privates) { 4837 const VarDecl *VD = Pair.second.PrivateCopy; 4838 const Expr *Init = VD->getAnyInitializer(); 4839 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4840 !CGF.isTrivialInitializer(Init)))) { 4841 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4842 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4843 const VarDecl *OriginalVD = Pair.second.Original; 4844 // Check if the variable is the target-based BasePointersArray, 4845 // PointersArray or SizesArray. 4846 LValue SharedRefLValue; 4847 QualType Type = PrivateLValue.getType(); 4848 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4849 if (IsTargetTask && !SharedField) { 4850 assert(isa<ImplicitParamDecl>(OriginalVD) && 4851 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4852 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4853 ->getNumParams() == 0 && 4854 isa<TranslationUnitDecl>( 4855 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4856 ->getDeclContext()) && 4857 "Expected artificial target data variable."); 4858 SharedRefLValue = 4859 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4860 } else { 4861 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4862 SharedRefLValue = CGF.MakeAddrLValue( 4863 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 4864 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4865 SharedRefLValue.getTBAAInfo()); 4866 } 4867 if (Type->isArrayType()) { 4868 // Initialize firstprivate array. 4869 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4870 // Perform simple memcpy. 4871 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4872 } else { 4873 // Initialize firstprivate array using element-by-element 4874 // initialization. 4875 CGF.EmitOMPAggregateAssign( 4876 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 4877 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4878 Address SrcElement) { 4879 // Clean up any temporaries needed by the initialization. 4880 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4881 InitScope.addPrivate( 4882 Elem, [SrcElement]() -> Address { return SrcElement; }); 4883 (void)InitScope.Privatize(); 4884 // Emit initialization for single element. 4885 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4886 CGF, &CapturesInfo); 4887 CGF.EmitAnyExprToMem(Init, DestElement, 4888 Init->getType().getQualifiers(), 4889 /*IsInitializer=*/false); 4890 }); 4891 } 4892 } else { 4893 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4894 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 4895 return SharedRefLValue.getAddress(); 4896 }); 4897 (void)InitScope.Privatize(); 4898 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4899 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4900 /*capturedByInit=*/false); 4901 } 4902 } else { 4903 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4904 } 4905 } 4906 ++FI; 4907 } 4908 } 4909 4910 /// Check if duplication function is required for taskloops. 4911 static bool checkInitIsRequired(CodeGenFunction &CGF, 4912 ArrayRef<PrivateDataTy> Privates) { 4913 bool InitRequired = false; 4914 for (const PrivateDataTy &Pair : Privates) { 4915 const VarDecl *VD = Pair.second.PrivateCopy; 4916 const Expr *Init = VD->getAnyInitializer(); 4917 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4918 !CGF.isTrivialInitializer(Init)); 4919 if (InitRequired) 4920 break; 4921 } 4922 return InitRequired; 4923 } 4924 4925 4926 /// Emit task_dup function (for initialization of 4927 /// private/firstprivate/lastprivate vars and last_iter flag) 4928 /// \code 4929 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4930 /// lastpriv) { 4931 /// // setup lastprivate flag 4932 /// task_dst->last = lastpriv; 4933 /// // could be constructor calls here... 4934 /// } 4935 /// \endcode 4936 static llvm::Value * 4937 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4938 const OMPExecutableDirective &D, 4939 QualType KmpTaskTWithPrivatesPtrQTy, 4940 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4941 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4942 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4943 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4944 ASTContext &C = CGM.getContext(); 4945 FunctionArgList Args; 4946 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4947 KmpTaskTWithPrivatesPtrQTy, 4948 ImplicitParamDecl::Other); 4949 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4950 KmpTaskTWithPrivatesPtrQTy, 4951 ImplicitParamDecl::Other); 4952 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4953 ImplicitParamDecl::Other); 4954 Args.push_back(&DstArg); 4955 Args.push_back(&SrcArg); 4956 Args.push_back(&LastprivArg); 4957 const auto &TaskDupFnInfo = 4958 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4959 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4960 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4961 auto *TaskDup = llvm::Function::Create( 4962 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4963 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4964 TaskDup->setDoesNotRecurse(); 4965 CodeGenFunction CGF(CGM); 4966 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4967 Loc); 4968 4969 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4970 CGF.GetAddrOfLocalVar(&DstArg), 4971 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4972 // task_dst->liter = lastpriv; 4973 if (WithLastIter) { 4974 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4975 LValue Base = CGF.EmitLValueForField( 4976 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4977 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4978 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4979 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4980 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4981 } 4982 4983 // Emit initial values for private copies (if any). 4984 assert(!Privates.empty()); 4985 Address KmpTaskSharedsPtr = Address::invalid(); 4986 if (!Data.FirstprivateVars.empty()) { 4987 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4988 CGF.GetAddrOfLocalVar(&SrcArg), 4989 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4990 LValue Base = CGF.EmitLValueForField( 4991 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4992 KmpTaskSharedsPtr = Address( 4993 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4994 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4995 KmpTaskTShareds)), 4996 Loc), 4997 CGF.getNaturalTypeAlignment(SharedsTy)); 4998 } 4999 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 5000 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 5001 CGF.FinishFunction(); 5002 return TaskDup; 5003 } 5004 5005 /// Checks if destructor function is required to be generated. 5006 /// \return true if cleanups are required, false otherwise. 5007 static bool 5008 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 5009 bool NeedsCleanup = false; 5010 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 5011 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 5012 for (const FieldDecl *FD : PrivateRD->fields()) { 5013 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 5014 if (NeedsCleanup) 5015 break; 5016 } 5017 return NeedsCleanup; 5018 } 5019 5020 CGOpenMPRuntime::TaskResultTy 5021 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 5022 const OMPExecutableDirective &D, 5023 llvm::Function *TaskFunction, QualType SharedsTy, 5024 Address Shareds, const OMPTaskDataTy &Data) { 5025 ASTContext &C = CGM.getContext(); 5026 llvm::SmallVector<PrivateDataTy, 4> Privates; 5027 // Aggregate privates and sort them by the alignment. 5028 auto I = Data.PrivateCopies.begin(); 5029 for (const Expr *E : Data.PrivateVars) { 5030 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5031 Privates.emplace_back( 5032 C.getDeclAlign(VD), 5033 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5034 /*PrivateElemInit=*/nullptr)); 5035 ++I; 5036 } 5037 I = Data.FirstprivateCopies.begin(); 5038 auto IElemInitRef = Data.FirstprivateInits.begin(); 5039 for (const Expr *E : Data.FirstprivateVars) { 5040 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5041 Privates.emplace_back( 5042 C.getDeclAlign(VD), 5043 PrivateHelpersTy( 5044 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5045 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 5046 ++I; 5047 ++IElemInitRef; 5048 } 5049 I = Data.LastprivateCopies.begin(); 5050 for (const Expr *E : Data.LastprivateVars) { 5051 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5052 Privates.emplace_back( 5053 C.getDeclAlign(VD), 5054 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5055 /*PrivateElemInit=*/nullptr)); 5056 ++I; 5057 } 5058 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 5059 return L.first > R.first; 5060 }); 5061 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 5062 // Build type kmp_routine_entry_t (if not built yet). 5063 emitKmpRoutineEntryT(KmpInt32Ty); 5064 // Build type kmp_task_t (if not built yet). 5065 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 5066 if (SavedKmpTaskloopTQTy.isNull()) { 5067 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5068 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5069 } 5070 KmpTaskTQTy = SavedKmpTaskloopTQTy; 5071 } else { 5072 assert((D.getDirectiveKind() == OMPD_task || 5073 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 5074 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 5075 "Expected taskloop, task or target directive"); 5076 if (SavedKmpTaskTQTy.isNull()) { 5077 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5078 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5079 } 5080 KmpTaskTQTy = SavedKmpTaskTQTy; 5081 } 5082 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 5083 // Build particular struct kmp_task_t for the given task. 5084 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 5085 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 5086 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 5087 QualType KmpTaskTWithPrivatesPtrQTy = 5088 C.getPointerType(KmpTaskTWithPrivatesQTy); 5089 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 5090 llvm::Type *KmpTaskTWithPrivatesPtrTy = 5091 KmpTaskTWithPrivatesTy->getPointerTo(); 5092 llvm::Value *KmpTaskTWithPrivatesTySize = 5093 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 5094 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 5095 5096 // Emit initial values for private copies (if any). 5097 llvm::Value *TaskPrivatesMap = nullptr; 5098 llvm::Type *TaskPrivatesMapTy = 5099 std::next(TaskFunction->arg_begin(), 3)->getType(); 5100 if (!Privates.empty()) { 5101 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 5102 TaskPrivatesMap = emitTaskPrivateMappingFunction( 5103 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 5104 FI->getType(), Privates); 5105 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5106 TaskPrivatesMap, TaskPrivatesMapTy); 5107 } else { 5108 TaskPrivatesMap = llvm::ConstantPointerNull::get( 5109 cast<llvm::PointerType>(TaskPrivatesMapTy)); 5110 } 5111 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 5112 // kmp_task_t *tt); 5113 llvm::Function *TaskEntry = emitProxyTaskFunction( 5114 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5115 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 5116 TaskPrivatesMap); 5117 5118 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 5119 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 5120 // kmp_routine_entry_t *task_entry); 5121 // Task flags. Format is taken from 5122 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 5123 // description of kmp_tasking_flags struct. 5124 enum { 5125 TiedFlag = 0x1, 5126 FinalFlag = 0x2, 5127 DestructorsFlag = 0x8, 5128 PriorityFlag = 0x20 5129 }; 5130 unsigned Flags = Data.Tied ? TiedFlag : 0; 5131 bool NeedsCleanup = false; 5132 if (!Privates.empty()) { 5133 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 5134 if (NeedsCleanup) 5135 Flags = Flags | DestructorsFlag; 5136 } 5137 if (Data.Priority.getInt()) 5138 Flags = Flags | PriorityFlag; 5139 llvm::Value *TaskFlags = 5140 Data.Final.getPointer() 5141 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 5142 CGF.Builder.getInt32(FinalFlag), 5143 CGF.Builder.getInt32(/*C=*/0)) 5144 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 5145 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 5146 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 5147 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 5148 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 5149 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5150 TaskEntry, KmpRoutineEntryPtrTy)}; 5151 llvm::Value *NewTask; 5152 if (D.hasClausesOfKind<OMPNowaitClause>()) { 5153 // Check if we have any device clause associated with the directive. 5154 const Expr *Device = nullptr; 5155 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 5156 Device = C->getDevice(); 5157 // Emit device ID if any otherwise use default value. 5158 llvm::Value *DeviceID; 5159 if (Device) 5160 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 5161 CGF.Int64Ty, /*isSigned=*/true); 5162 else 5163 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 5164 AllocArgs.push_back(DeviceID); 5165 NewTask = CGF.EmitRuntimeCall( 5166 createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs); 5167 } else { 5168 NewTask = CGF.EmitRuntimeCall( 5169 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 5170 } 5171 llvm::Value *NewTaskNewTaskTTy = 5172 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5173 NewTask, KmpTaskTWithPrivatesPtrTy); 5174 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 5175 KmpTaskTWithPrivatesQTy); 5176 LValue TDBase = 5177 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 5178 // Fill the data in the resulting kmp_task_t record. 5179 // Copy shareds if there are any. 5180 Address KmpTaskSharedsPtr = Address::invalid(); 5181 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 5182 KmpTaskSharedsPtr = 5183 Address(CGF.EmitLoadOfScalar( 5184 CGF.EmitLValueForField( 5185 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 5186 KmpTaskTShareds)), 5187 Loc), 5188 CGF.getNaturalTypeAlignment(SharedsTy)); 5189 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 5190 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 5191 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 5192 } 5193 // Emit initial values for private copies (if any). 5194 TaskResultTy Result; 5195 if (!Privates.empty()) { 5196 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 5197 SharedsTy, SharedsPtrTy, Data, Privates, 5198 /*ForDup=*/false); 5199 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 5200 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 5201 Result.TaskDupFn = emitTaskDupFunction( 5202 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 5203 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 5204 /*WithLastIter=*/!Data.LastprivateVars.empty()); 5205 } 5206 } 5207 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 5208 enum { Priority = 0, Destructors = 1 }; 5209 // Provide pointer to function with destructors for privates. 5210 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 5211 const RecordDecl *KmpCmplrdataUD = 5212 (*FI)->getType()->getAsUnionType()->getDecl(); 5213 if (NeedsCleanup) { 5214 llvm::Value *DestructorFn = emitDestructorsFunction( 5215 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5216 KmpTaskTWithPrivatesQTy); 5217 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 5218 LValue DestructorsLV = CGF.EmitLValueForField( 5219 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 5220 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5221 DestructorFn, KmpRoutineEntryPtrTy), 5222 DestructorsLV); 5223 } 5224 // Set priority. 5225 if (Data.Priority.getInt()) { 5226 LValue Data2LV = CGF.EmitLValueForField( 5227 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 5228 LValue PriorityLV = CGF.EmitLValueForField( 5229 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 5230 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 5231 } 5232 Result.NewTask = NewTask; 5233 Result.TaskEntry = TaskEntry; 5234 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 5235 Result.TDBase = TDBase; 5236 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 5237 return Result; 5238 } 5239 5240 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5241 const OMPExecutableDirective &D, 5242 llvm::Function *TaskFunction, 5243 QualType SharedsTy, Address Shareds, 5244 const Expr *IfCond, 5245 const OMPTaskDataTy &Data) { 5246 if (!CGF.HaveInsertPoint()) 5247 return; 5248 5249 TaskResultTy Result = 5250 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5251 llvm::Value *NewTask = Result.NewTask; 5252 llvm::Function *TaskEntry = Result.TaskEntry; 5253 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5254 LValue TDBase = Result.TDBase; 5255 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5256 ASTContext &C = CGM.getContext(); 5257 // Process list of dependences. 5258 Address DependenciesArray = Address::invalid(); 5259 unsigned NumDependencies = Data.Dependences.size(); 5260 if (NumDependencies) { 5261 // Dependence kind for RTL. 5262 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 }; 5263 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 5264 RecordDecl *KmpDependInfoRD; 5265 QualType FlagsTy = 5266 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 5267 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5268 if (KmpDependInfoTy.isNull()) { 5269 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 5270 KmpDependInfoRD->startDefinition(); 5271 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 5272 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 5273 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 5274 KmpDependInfoRD->completeDefinition(); 5275 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 5276 } else { 5277 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5278 } 5279 // Define type kmp_depend_info[<Dependences.size()>]; 5280 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5281 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 5282 ArrayType::Normal, /*IndexTypeQuals=*/0); 5283 // kmp_depend_info[<Dependences.size()>] deps; 5284 DependenciesArray = 5285 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 5286 for (unsigned I = 0; I < NumDependencies; ++I) { 5287 const Expr *E = Data.Dependences[I].second; 5288 LValue Addr = CGF.EmitLValue(E); 5289 llvm::Value *Size; 5290 QualType Ty = E->getType(); 5291 if (const auto *ASE = 5292 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 5293 LValue UpAddrLVal = 5294 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 5295 llvm::Value *UpAddr = 5296 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 5297 llvm::Value *LowIntPtr = 5298 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 5299 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 5300 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 5301 } else { 5302 Size = CGF.getTypeSize(Ty); 5303 } 5304 LValue Base = CGF.MakeAddrLValue( 5305 CGF.Builder.CreateConstArrayGEP(DependenciesArray, I), 5306 KmpDependInfoTy); 5307 // deps[i].base_addr = &<Dependences[i].second>; 5308 LValue BaseAddrLVal = CGF.EmitLValueForField( 5309 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5310 CGF.EmitStoreOfScalar( 5311 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 5312 BaseAddrLVal); 5313 // deps[i].len = sizeof(<Dependences[i].second>); 5314 LValue LenLVal = CGF.EmitLValueForField( 5315 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 5316 CGF.EmitStoreOfScalar(Size, LenLVal); 5317 // deps[i].flags = <Dependences[i].first>; 5318 RTLDependenceKindTy DepKind; 5319 switch (Data.Dependences[I].first) { 5320 case OMPC_DEPEND_in: 5321 DepKind = DepIn; 5322 break; 5323 // Out and InOut dependencies must use the same code. 5324 case OMPC_DEPEND_out: 5325 case OMPC_DEPEND_inout: 5326 DepKind = DepInOut; 5327 break; 5328 case OMPC_DEPEND_mutexinoutset: 5329 DepKind = DepMutexInOutSet; 5330 break; 5331 case OMPC_DEPEND_source: 5332 case OMPC_DEPEND_sink: 5333 case OMPC_DEPEND_unknown: 5334 llvm_unreachable("Unknown task dependence type"); 5335 } 5336 LValue FlagsLVal = CGF.EmitLValueForField( 5337 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5338 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5339 FlagsLVal); 5340 } 5341 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5342 CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy); 5343 } 5344 5345 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5346 // libcall. 5347 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5348 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5349 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5350 // list is not empty 5351 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5352 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5353 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5354 llvm::Value *DepTaskArgs[7]; 5355 if (NumDependencies) { 5356 DepTaskArgs[0] = UpLoc; 5357 DepTaskArgs[1] = ThreadID; 5358 DepTaskArgs[2] = NewTask; 5359 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 5360 DepTaskArgs[4] = DependenciesArray.getPointer(); 5361 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5362 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5363 } 5364 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 5365 &TaskArgs, 5366 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5367 if (!Data.Tied) { 5368 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5369 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5370 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5371 } 5372 if (NumDependencies) { 5373 CGF.EmitRuntimeCall( 5374 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5375 } else { 5376 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5377 TaskArgs); 5378 } 5379 // Check if parent region is untied and build return for untied task; 5380 if (auto *Region = 5381 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5382 Region->emitUntiedSwitch(CGF); 5383 }; 5384 5385 llvm::Value *DepWaitTaskArgs[6]; 5386 if (NumDependencies) { 5387 DepWaitTaskArgs[0] = UpLoc; 5388 DepWaitTaskArgs[1] = ThreadID; 5389 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 5390 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5391 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5392 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5393 } 5394 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5395 NumDependencies, &DepWaitTaskArgs, 5396 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5397 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5398 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5399 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5400 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5401 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5402 // is specified. 5403 if (NumDependencies) 5404 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5405 DepWaitTaskArgs); 5406 // Call proxy_task_entry(gtid, new_task); 5407 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5408 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5409 Action.Enter(CGF); 5410 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5411 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5412 OutlinedFnArgs); 5413 }; 5414 5415 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5416 // kmp_task_t *new_task); 5417 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5418 // kmp_task_t *new_task); 5419 RegionCodeGenTy RCG(CodeGen); 5420 CommonActionTy Action( 5421 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5422 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5423 RCG.setAction(Action); 5424 RCG(CGF); 5425 }; 5426 5427 if (IfCond) { 5428 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5429 } else { 5430 RegionCodeGenTy ThenRCG(ThenCodeGen); 5431 ThenRCG(CGF); 5432 } 5433 } 5434 5435 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5436 const OMPLoopDirective &D, 5437 llvm::Function *TaskFunction, 5438 QualType SharedsTy, Address Shareds, 5439 const Expr *IfCond, 5440 const OMPTaskDataTy &Data) { 5441 if (!CGF.HaveInsertPoint()) 5442 return; 5443 TaskResultTy Result = 5444 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5445 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5446 // libcall. 5447 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5448 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5449 // sched, kmp_uint64 grainsize, void *task_dup); 5450 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5451 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5452 llvm::Value *IfVal; 5453 if (IfCond) { 5454 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5455 /*isSigned=*/true); 5456 } else { 5457 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5458 } 5459 5460 LValue LBLVal = CGF.EmitLValueForField( 5461 Result.TDBase, 5462 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5463 const auto *LBVar = 5464 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5465 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 5466 /*IsInitializer=*/true); 5467 LValue UBLVal = CGF.EmitLValueForField( 5468 Result.TDBase, 5469 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5470 const auto *UBVar = 5471 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5472 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 5473 /*IsInitializer=*/true); 5474 LValue StLVal = CGF.EmitLValueForField( 5475 Result.TDBase, 5476 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5477 const auto *StVar = 5478 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5479 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 5480 /*IsInitializer=*/true); 5481 // Store reductions address. 5482 LValue RedLVal = CGF.EmitLValueForField( 5483 Result.TDBase, 5484 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5485 if (Data.Reductions) { 5486 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5487 } else { 5488 CGF.EmitNullInitialization(RedLVal.getAddress(), 5489 CGF.getContext().VoidPtrTy); 5490 } 5491 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5492 llvm::Value *TaskArgs[] = { 5493 UpLoc, 5494 ThreadID, 5495 Result.NewTask, 5496 IfVal, 5497 LBLVal.getPointer(), 5498 UBLVal.getPointer(), 5499 CGF.EmitLoadOfScalar(StLVal, Loc), 5500 llvm::ConstantInt::getSigned( 5501 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5502 llvm::ConstantInt::getSigned( 5503 CGF.IntTy, Data.Schedule.getPointer() 5504 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5505 : NoSchedule), 5506 Data.Schedule.getPointer() 5507 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5508 /*isSigned=*/false) 5509 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5510 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5511 Result.TaskDupFn, CGF.VoidPtrTy) 5512 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5513 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 5514 } 5515 5516 /// Emit reduction operation for each element of array (required for 5517 /// array sections) LHS op = RHS. 5518 /// \param Type Type of array. 5519 /// \param LHSVar Variable on the left side of the reduction operation 5520 /// (references element of array in original variable). 5521 /// \param RHSVar Variable on the right side of the reduction operation 5522 /// (references element of array in original variable). 5523 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5524 /// RHSVar. 5525 static void EmitOMPAggregateReduction( 5526 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5527 const VarDecl *RHSVar, 5528 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5529 const Expr *, const Expr *)> &RedOpGen, 5530 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5531 const Expr *UpExpr = nullptr) { 5532 // Perform element-by-element initialization. 5533 QualType ElementTy; 5534 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5535 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5536 5537 // Drill down to the base element type on both arrays. 5538 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5539 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5540 5541 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5542 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5543 // Cast from pointer to array type to pointer to single element. 5544 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5545 // The basic structure here is a while-do loop. 5546 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5547 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5548 llvm::Value *IsEmpty = 5549 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5550 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5551 5552 // Enter the loop body, making that address the current address. 5553 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5554 CGF.EmitBlock(BodyBB); 5555 5556 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5557 5558 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5559 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5560 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5561 Address RHSElementCurrent = 5562 Address(RHSElementPHI, 5563 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5564 5565 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5566 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5567 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5568 Address LHSElementCurrent = 5569 Address(LHSElementPHI, 5570 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5571 5572 // Emit copy. 5573 CodeGenFunction::OMPPrivateScope Scope(CGF); 5574 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5575 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5576 Scope.Privatize(); 5577 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5578 Scope.ForceCleanup(); 5579 5580 // Shift the address forward by one element. 5581 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5582 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5583 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5584 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5585 // Check whether we've reached the end. 5586 llvm::Value *Done = 5587 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5588 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5589 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5590 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5591 5592 // Done. 5593 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5594 } 5595 5596 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5597 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5598 /// UDR combiner function. 5599 static void emitReductionCombiner(CodeGenFunction &CGF, 5600 const Expr *ReductionOp) { 5601 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5602 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5603 if (const auto *DRE = 5604 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5605 if (const auto *DRD = 5606 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5607 std::pair<llvm::Function *, llvm::Function *> Reduction = 5608 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5609 RValue Func = RValue::get(Reduction.first); 5610 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5611 CGF.EmitIgnoredExpr(ReductionOp); 5612 return; 5613 } 5614 CGF.EmitIgnoredExpr(ReductionOp); 5615 } 5616 5617 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5618 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5619 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5620 ArrayRef<const Expr *> ReductionOps) { 5621 ASTContext &C = CGM.getContext(); 5622 5623 // void reduction_func(void *LHSArg, void *RHSArg); 5624 FunctionArgList Args; 5625 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5626 ImplicitParamDecl::Other); 5627 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5628 ImplicitParamDecl::Other); 5629 Args.push_back(&LHSArg); 5630 Args.push_back(&RHSArg); 5631 const auto &CGFI = 5632 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5633 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5634 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5635 llvm::GlobalValue::InternalLinkage, Name, 5636 &CGM.getModule()); 5637 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5638 Fn->setDoesNotRecurse(); 5639 CodeGenFunction CGF(CGM); 5640 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5641 5642 // Dst = (void*[n])(LHSArg); 5643 // Src = (void*[n])(RHSArg); 5644 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5645 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5646 ArgsType), CGF.getPointerAlign()); 5647 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5648 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5649 ArgsType), CGF.getPointerAlign()); 5650 5651 // ... 5652 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5653 // ... 5654 CodeGenFunction::OMPPrivateScope Scope(CGF); 5655 auto IPriv = Privates.begin(); 5656 unsigned Idx = 0; 5657 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5658 const auto *RHSVar = 5659 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5660 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5661 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5662 }); 5663 const auto *LHSVar = 5664 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5665 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5666 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5667 }); 5668 QualType PrivTy = (*IPriv)->getType(); 5669 if (PrivTy->isVariablyModifiedType()) { 5670 // Get array size and emit VLA type. 5671 ++Idx; 5672 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5673 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5674 const VariableArrayType *VLA = 5675 CGF.getContext().getAsVariableArrayType(PrivTy); 5676 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5677 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5678 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5679 CGF.EmitVariablyModifiedType(PrivTy); 5680 } 5681 } 5682 Scope.Privatize(); 5683 IPriv = Privates.begin(); 5684 auto ILHS = LHSExprs.begin(); 5685 auto IRHS = RHSExprs.begin(); 5686 for (const Expr *E : ReductionOps) { 5687 if ((*IPriv)->getType()->isArrayType()) { 5688 // Emit reduction for array section. 5689 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5690 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5691 EmitOMPAggregateReduction( 5692 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5693 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5694 emitReductionCombiner(CGF, E); 5695 }); 5696 } else { 5697 // Emit reduction for array subscript or single variable. 5698 emitReductionCombiner(CGF, E); 5699 } 5700 ++IPriv; 5701 ++ILHS; 5702 ++IRHS; 5703 } 5704 Scope.ForceCleanup(); 5705 CGF.FinishFunction(); 5706 return Fn; 5707 } 5708 5709 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5710 const Expr *ReductionOp, 5711 const Expr *PrivateRef, 5712 const DeclRefExpr *LHS, 5713 const DeclRefExpr *RHS) { 5714 if (PrivateRef->getType()->isArrayType()) { 5715 // Emit reduction for array section. 5716 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5717 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5718 EmitOMPAggregateReduction( 5719 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5720 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5721 emitReductionCombiner(CGF, ReductionOp); 5722 }); 5723 } else { 5724 // Emit reduction for array subscript or single variable. 5725 emitReductionCombiner(CGF, ReductionOp); 5726 } 5727 } 5728 5729 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5730 ArrayRef<const Expr *> Privates, 5731 ArrayRef<const Expr *> LHSExprs, 5732 ArrayRef<const Expr *> RHSExprs, 5733 ArrayRef<const Expr *> ReductionOps, 5734 ReductionOptionsTy Options) { 5735 if (!CGF.HaveInsertPoint()) 5736 return; 5737 5738 bool WithNowait = Options.WithNowait; 5739 bool SimpleReduction = Options.SimpleReduction; 5740 5741 // Next code should be emitted for reduction: 5742 // 5743 // static kmp_critical_name lock = { 0 }; 5744 // 5745 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5746 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5747 // ... 5748 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5749 // *(Type<n>-1*)rhs[<n>-1]); 5750 // } 5751 // 5752 // ... 5753 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5754 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5755 // RedList, reduce_func, &<lock>)) { 5756 // case 1: 5757 // ... 5758 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5759 // ... 5760 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5761 // break; 5762 // case 2: 5763 // ... 5764 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5765 // ... 5766 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5767 // break; 5768 // default:; 5769 // } 5770 // 5771 // if SimpleReduction is true, only the next code is generated: 5772 // ... 5773 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5774 // ... 5775 5776 ASTContext &C = CGM.getContext(); 5777 5778 if (SimpleReduction) { 5779 CodeGenFunction::RunCleanupsScope Scope(CGF); 5780 auto IPriv = Privates.begin(); 5781 auto ILHS = LHSExprs.begin(); 5782 auto IRHS = RHSExprs.begin(); 5783 for (const Expr *E : ReductionOps) { 5784 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5785 cast<DeclRefExpr>(*IRHS)); 5786 ++IPriv; 5787 ++ILHS; 5788 ++IRHS; 5789 } 5790 return; 5791 } 5792 5793 // 1. Build a list of reduction variables. 5794 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5795 auto Size = RHSExprs.size(); 5796 for (const Expr *E : Privates) { 5797 if (E->getType()->isVariablyModifiedType()) 5798 // Reserve place for array size. 5799 ++Size; 5800 } 5801 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5802 QualType ReductionArrayTy = 5803 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 5804 /*IndexTypeQuals=*/0); 5805 Address ReductionList = 5806 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5807 auto IPriv = Privates.begin(); 5808 unsigned Idx = 0; 5809 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5810 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5811 CGF.Builder.CreateStore( 5812 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5813 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 5814 Elem); 5815 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5816 // Store array size. 5817 ++Idx; 5818 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5819 llvm::Value *Size = CGF.Builder.CreateIntCast( 5820 CGF.getVLASize( 5821 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5822 .NumElts, 5823 CGF.SizeTy, /*isSigned=*/false); 5824 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5825 Elem); 5826 } 5827 } 5828 5829 // 2. Emit reduce_func(). 5830 llvm::Function *ReductionFn = emitReductionFunction( 5831 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5832 LHSExprs, RHSExprs, ReductionOps); 5833 5834 // 3. Create static kmp_critical_name lock = { 0 }; 5835 std::string Name = getName({"reduction"}); 5836 llvm::Value *Lock = getCriticalRegionLock(Name); 5837 5838 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5839 // RedList, reduce_func, &<lock>); 5840 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5841 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5842 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5843 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5844 ReductionList.getPointer(), CGF.VoidPtrTy); 5845 llvm::Value *Args[] = { 5846 IdentTLoc, // ident_t *<loc> 5847 ThreadId, // i32 <gtid> 5848 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5849 ReductionArrayTySize, // size_type sizeof(RedList) 5850 RL, // void *RedList 5851 ReductionFn, // void (*) (void *, void *) <reduce_func> 5852 Lock // kmp_critical_name *&<lock> 5853 }; 5854 llvm::Value *Res = CGF.EmitRuntimeCall( 5855 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 5856 : OMPRTL__kmpc_reduce), 5857 Args); 5858 5859 // 5. Build switch(res) 5860 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5861 llvm::SwitchInst *SwInst = 5862 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5863 5864 // 6. Build case 1: 5865 // ... 5866 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5867 // ... 5868 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5869 // break; 5870 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5871 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5872 CGF.EmitBlock(Case1BB); 5873 5874 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5875 llvm::Value *EndArgs[] = { 5876 IdentTLoc, // ident_t *<loc> 5877 ThreadId, // i32 <gtid> 5878 Lock // kmp_critical_name *&<lock> 5879 }; 5880 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5881 CodeGenFunction &CGF, PrePostActionTy &Action) { 5882 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5883 auto IPriv = Privates.begin(); 5884 auto ILHS = LHSExprs.begin(); 5885 auto IRHS = RHSExprs.begin(); 5886 for (const Expr *E : ReductionOps) { 5887 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5888 cast<DeclRefExpr>(*IRHS)); 5889 ++IPriv; 5890 ++ILHS; 5891 ++IRHS; 5892 } 5893 }; 5894 RegionCodeGenTy RCG(CodeGen); 5895 CommonActionTy Action( 5896 nullptr, llvm::None, 5897 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 5898 : OMPRTL__kmpc_end_reduce), 5899 EndArgs); 5900 RCG.setAction(Action); 5901 RCG(CGF); 5902 5903 CGF.EmitBranch(DefaultBB); 5904 5905 // 7. Build case 2: 5906 // ... 5907 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5908 // ... 5909 // break; 5910 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5911 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5912 CGF.EmitBlock(Case2BB); 5913 5914 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5915 CodeGenFunction &CGF, PrePostActionTy &Action) { 5916 auto ILHS = LHSExprs.begin(); 5917 auto IRHS = RHSExprs.begin(); 5918 auto IPriv = Privates.begin(); 5919 for (const Expr *E : ReductionOps) { 5920 const Expr *XExpr = nullptr; 5921 const Expr *EExpr = nullptr; 5922 const Expr *UpExpr = nullptr; 5923 BinaryOperatorKind BO = BO_Comma; 5924 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5925 if (BO->getOpcode() == BO_Assign) { 5926 XExpr = BO->getLHS(); 5927 UpExpr = BO->getRHS(); 5928 } 5929 } 5930 // Try to emit update expression as a simple atomic. 5931 const Expr *RHSExpr = UpExpr; 5932 if (RHSExpr) { 5933 // Analyze RHS part of the whole expression. 5934 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5935 RHSExpr->IgnoreParenImpCasts())) { 5936 // If this is a conditional operator, analyze its condition for 5937 // min/max reduction operator. 5938 RHSExpr = ACO->getCond(); 5939 } 5940 if (const auto *BORHS = 5941 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5942 EExpr = BORHS->getRHS(); 5943 BO = BORHS->getOpcode(); 5944 } 5945 } 5946 if (XExpr) { 5947 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5948 auto &&AtomicRedGen = [BO, VD, 5949 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5950 const Expr *EExpr, const Expr *UpExpr) { 5951 LValue X = CGF.EmitLValue(XExpr); 5952 RValue E; 5953 if (EExpr) 5954 E = CGF.EmitAnyExpr(EExpr); 5955 CGF.EmitOMPAtomicSimpleUpdateExpr( 5956 X, E, BO, /*IsXLHSInRHSPart=*/true, 5957 llvm::AtomicOrdering::Monotonic, Loc, 5958 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5959 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5960 PrivateScope.addPrivate( 5961 VD, [&CGF, VD, XRValue, Loc]() { 5962 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5963 CGF.emitOMPSimpleStore( 5964 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5965 VD->getType().getNonReferenceType(), Loc); 5966 return LHSTemp; 5967 }); 5968 (void)PrivateScope.Privatize(); 5969 return CGF.EmitAnyExpr(UpExpr); 5970 }); 5971 }; 5972 if ((*IPriv)->getType()->isArrayType()) { 5973 // Emit atomic reduction for array section. 5974 const auto *RHSVar = 5975 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5976 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5977 AtomicRedGen, XExpr, EExpr, UpExpr); 5978 } else { 5979 // Emit atomic reduction for array subscript or single variable. 5980 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5981 } 5982 } else { 5983 // Emit as a critical region. 5984 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5985 const Expr *, const Expr *) { 5986 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5987 std::string Name = RT.getName({"atomic_reduction"}); 5988 RT.emitCriticalRegion( 5989 CGF, Name, 5990 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5991 Action.Enter(CGF); 5992 emitReductionCombiner(CGF, E); 5993 }, 5994 Loc); 5995 }; 5996 if ((*IPriv)->getType()->isArrayType()) { 5997 const auto *LHSVar = 5998 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5999 const auto *RHSVar = 6000 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 6001 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 6002 CritRedGen); 6003 } else { 6004 CritRedGen(CGF, nullptr, nullptr, nullptr); 6005 } 6006 } 6007 ++ILHS; 6008 ++IRHS; 6009 ++IPriv; 6010 } 6011 }; 6012 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 6013 if (!WithNowait) { 6014 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 6015 llvm::Value *EndArgs[] = { 6016 IdentTLoc, // ident_t *<loc> 6017 ThreadId, // i32 <gtid> 6018 Lock // kmp_critical_name *&<lock> 6019 }; 6020 CommonActionTy Action(nullptr, llvm::None, 6021 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 6022 EndArgs); 6023 AtomicRCG.setAction(Action); 6024 AtomicRCG(CGF); 6025 } else { 6026 AtomicRCG(CGF); 6027 } 6028 6029 CGF.EmitBranch(DefaultBB); 6030 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 6031 } 6032 6033 /// Generates unique name for artificial threadprivate variables. 6034 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 6035 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 6036 const Expr *Ref) { 6037 SmallString<256> Buffer; 6038 llvm::raw_svector_ostream Out(Buffer); 6039 const clang::DeclRefExpr *DE; 6040 const VarDecl *D = ::getBaseDecl(Ref, DE); 6041 if (!D) 6042 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 6043 D = D->getCanonicalDecl(); 6044 std::string Name = CGM.getOpenMPRuntime().getName( 6045 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 6046 Out << Prefix << Name << "_" 6047 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 6048 return Out.str(); 6049 } 6050 6051 /// Emits reduction initializer function: 6052 /// \code 6053 /// void @.red_init(void* %arg) { 6054 /// %0 = bitcast void* %arg to <type>* 6055 /// store <type> <init>, <type>* %0 6056 /// ret void 6057 /// } 6058 /// \endcode 6059 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 6060 SourceLocation Loc, 6061 ReductionCodeGen &RCG, unsigned N) { 6062 ASTContext &C = CGM.getContext(); 6063 FunctionArgList Args; 6064 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6065 ImplicitParamDecl::Other); 6066 Args.emplace_back(&Param); 6067 const auto &FnInfo = 6068 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6069 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6070 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 6071 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6072 Name, &CGM.getModule()); 6073 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6074 Fn->setDoesNotRecurse(); 6075 CodeGenFunction CGF(CGM); 6076 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6077 Address PrivateAddr = CGF.EmitLoadOfPointer( 6078 CGF.GetAddrOfLocalVar(&Param), 6079 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6080 llvm::Value *Size = nullptr; 6081 // If the size of the reduction item is non-constant, load it from global 6082 // threadprivate variable. 6083 if (RCG.getSizes(N).second) { 6084 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6085 CGF, CGM.getContext().getSizeType(), 6086 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6087 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6088 CGM.getContext().getSizeType(), Loc); 6089 } 6090 RCG.emitAggregateType(CGF, N, Size); 6091 LValue SharedLVal; 6092 // If initializer uses initializer from declare reduction construct, emit a 6093 // pointer to the address of the original reduction item (reuired by reduction 6094 // initializer) 6095 if (RCG.usesReductionInitializer(N)) { 6096 Address SharedAddr = 6097 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6098 CGF, CGM.getContext().VoidPtrTy, 6099 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6100 SharedAddr = CGF.EmitLoadOfPointer( 6101 SharedAddr, 6102 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 6103 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 6104 } else { 6105 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 6106 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 6107 CGM.getContext().VoidPtrTy); 6108 } 6109 // Emit the initializer: 6110 // %0 = bitcast void* %arg to <type>* 6111 // store <type> <init>, <type>* %0 6112 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 6113 [](CodeGenFunction &) { return false; }); 6114 CGF.FinishFunction(); 6115 return Fn; 6116 } 6117 6118 /// Emits reduction combiner function: 6119 /// \code 6120 /// void @.red_comb(void* %arg0, void* %arg1) { 6121 /// %lhs = bitcast void* %arg0 to <type>* 6122 /// %rhs = bitcast void* %arg1 to <type>* 6123 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 6124 /// store <type> %2, <type>* %lhs 6125 /// ret void 6126 /// } 6127 /// \endcode 6128 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 6129 SourceLocation Loc, 6130 ReductionCodeGen &RCG, unsigned N, 6131 const Expr *ReductionOp, 6132 const Expr *LHS, const Expr *RHS, 6133 const Expr *PrivateRef) { 6134 ASTContext &C = CGM.getContext(); 6135 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 6136 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 6137 FunctionArgList Args; 6138 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 6139 C.VoidPtrTy, ImplicitParamDecl::Other); 6140 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6141 ImplicitParamDecl::Other); 6142 Args.emplace_back(&ParamInOut); 6143 Args.emplace_back(&ParamIn); 6144 const auto &FnInfo = 6145 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6146 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6147 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 6148 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6149 Name, &CGM.getModule()); 6150 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6151 Fn->setDoesNotRecurse(); 6152 CodeGenFunction CGF(CGM); 6153 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6154 llvm::Value *Size = nullptr; 6155 // If the size of the reduction item is non-constant, load it from global 6156 // threadprivate variable. 6157 if (RCG.getSizes(N).second) { 6158 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6159 CGF, CGM.getContext().getSizeType(), 6160 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6161 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6162 CGM.getContext().getSizeType(), Loc); 6163 } 6164 RCG.emitAggregateType(CGF, N, Size); 6165 // Remap lhs and rhs variables to the addresses of the function arguments. 6166 // %lhs = bitcast void* %arg0 to <type>* 6167 // %rhs = bitcast void* %arg1 to <type>* 6168 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6169 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6170 // Pull out the pointer to the variable. 6171 Address PtrAddr = CGF.EmitLoadOfPointer( 6172 CGF.GetAddrOfLocalVar(&ParamInOut), 6173 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6174 return CGF.Builder.CreateElementBitCast( 6175 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6176 }); 6177 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6178 // Pull out the pointer to the variable. 6179 Address PtrAddr = CGF.EmitLoadOfPointer( 6180 CGF.GetAddrOfLocalVar(&ParamIn), 6181 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6182 return CGF.Builder.CreateElementBitCast( 6183 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6184 }); 6185 PrivateScope.Privatize(); 6186 // Emit the combiner body: 6187 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6188 // store <type> %2, <type>* %lhs 6189 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6190 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6191 cast<DeclRefExpr>(RHS)); 6192 CGF.FinishFunction(); 6193 return Fn; 6194 } 6195 6196 /// Emits reduction finalizer function: 6197 /// \code 6198 /// void @.red_fini(void* %arg) { 6199 /// %0 = bitcast void* %arg to <type>* 6200 /// <destroy>(<type>* %0) 6201 /// ret void 6202 /// } 6203 /// \endcode 6204 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6205 SourceLocation Loc, 6206 ReductionCodeGen &RCG, unsigned N) { 6207 if (!RCG.needCleanups(N)) 6208 return nullptr; 6209 ASTContext &C = CGM.getContext(); 6210 FunctionArgList Args; 6211 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6212 ImplicitParamDecl::Other); 6213 Args.emplace_back(&Param); 6214 const auto &FnInfo = 6215 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6216 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6217 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6218 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6219 Name, &CGM.getModule()); 6220 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6221 Fn->setDoesNotRecurse(); 6222 CodeGenFunction CGF(CGM); 6223 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6224 Address PrivateAddr = CGF.EmitLoadOfPointer( 6225 CGF.GetAddrOfLocalVar(&Param), 6226 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6227 llvm::Value *Size = nullptr; 6228 // If the size of the reduction item is non-constant, load it from global 6229 // threadprivate variable. 6230 if (RCG.getSizes(N).second) { 6231 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6232 CGF, CGM.getContext().getSizeType(), 6233 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6234 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6235 CGM.getContext().getSizeType(), Loc); 6236 } 6237 RCG.emitAggregateType(CGF, N, Size); 6238 // Emit the finalizer body: 6239 // <destroy>(<type>* %0) 6240 RCG.emitCleanups(CGF, N, PrivateAddr); 6241 CGF.FinishFunction(); 6242 return Fn; 6243 } 6244 6245 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6246 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6247 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6248 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6249 return nullptr; 6250 6251 // Build typedef struct: 6252 // kmp_task_red_input { 6253 // void *reduce_shar; // shared reduction item 6254 // size_t reduce_size; // size of data item 6255 // void *reduce_init; // data initialization routine 6256 // void *reduce_fini; // data finalization routine 6257 // void *reduce_comb; // data combiner routine 6258 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6259 // } kmp_task_red_input_t; 6260 ASTContext &C = CGM.getContext(); 6261 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 6262 RD->startDefinition(); 6263 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6264 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6265 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6266 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6267 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6268 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6269 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6270 RD->completeDefinition(); 6271 QualType RDType = C.getRecordType(RD); 6272 unsigned Size = Data.ReductionVars.size(); 6273 llvm::APInt ArraySize(/*numBits=*/64, Size); 6274 QualType ArrayRDType = C.getConstantArrayType( 6275 RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); 6276 // kmp_task_red_input_t .rd_input.[Size]; 6277 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6278 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 6279 Data.ReductionOps); 6280 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6281 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6282 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6283 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6284 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6285 TaskRedInput.getPointer(), Idxs, 6286 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6287 ".rd_input.gep."); 6288 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6289 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6290 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6291 RCG.emitSharedLValue(CGF, Cnt); 6292 llvm::Value *CastedShared = 6293 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); 6294 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6295 RCG.emitAggregateType(CGF, Cnt); 6296 llvm::Value *SizeValInChars; 6297 llvm::Value *SizeVal; 6298 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6299 // We use delayed creation/initialization for VLAs, array sections and 6300 // custom reduction initializations. It is required because runtime does not 6301 // provide the way to pass the sizes of VLAs/array sections to 6302 // initializer/combiner/finalizer functions and does not pass the pointer to 6303 // original reduction item to the initializer. Instead threadprivate global 6304 // variables are used to store these values and use them in the functions. 6305 bool DelayedCreation = !!SizeVal; 6306 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6307 /*isSigned=*/false); 6308 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6309 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6310 // ElemLVal.reduce_init = init; 6311 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6312 llvm::Value *InitAddr = 6313 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6314 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6315 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 6316 // ElemLVal.reduce_fini = fini; 6317 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6318 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6319 llvm::Value *FiniAddr = Fini 6320 ? CGF.EmitCastToVoidPtr(Fini) 6321 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6322 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6323 // ElemLVal.reduce_comb = comb; 6324 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6325 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6326 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6327 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6328 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6329 // ElemLVal.flags = 0; 6330 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6331 if (DelayedCreation) { 6332 CGF.EmitStoreOfScalar( 6333 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6334 FlagsLVal); 6335 } else 6336 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); 6337 } 6338 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 6339 // *data); 6340 llvm::Value *Args[] = { 6341 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6342 /*isSigned=*/true), 6343 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6344 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6345 CGM.VoidPtrTy)}; 6346 return CGF.EmitRuntimeCall( 6347 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 6348 } 6349 6350 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6351 SourceLocation Loc, 6352 ReductionCodeGen &RCG, 6353 unsigned N) { 6354 auto Sizes = RCG.getSizes(N); 6355 // Emit threadprivate global variable if the type is non-constant 6356 // (Sizes.second = nullptr). 6357 if (Sizes.second) { 6358 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6359 /*isSigned=*/false); 6360 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6361 CGF, CGM.getContext().getSizeType(), 6362 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6363 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6364 } 6365 // Store address of the original reduction item if custom initializer is used. 6366 if (RCG.usesReductionInitializer(N)) { 6367 Address SharedAddr = getAddrOfArtificialThreadPrivate( 6368 CGF, CGM.getContext().VoidPtrTy, 6369 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6370 CGF.Builder.CreateStore( 6371 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6372 RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), 6373 SharedAddr, /*IsVolatile=*/false); 6374 } 6375 } 6376 6377 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6378 SourceLocation Loc, 6379 llvm::Value *ReductionsPtr, 6380 LValue SharedLVal) { 6381 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6382 // *d); 6383 llvm::Value *Args[] = { 6384 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6385 /*isSigned=*/true), 6386 ReductionsPtr, 6387 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), 6388 CGM.VoidPtrTy)}; 6389 return Address( 6390 CGF.EmitRuntimeCall( 6391 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6392 SharedLVal.getAlignment()); 6393 } 6394 6395 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6396 SourceLocation Loc) { 6397 if (!CGF.HaveInsertPoint()) 6398 return; 6399 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6400 // global_tid); 6401 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6402 // Ignore return result until untied tasks are supported. 6403 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 6404 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6405 Region->emitUntiedSwitch(CGF); 6406 } 6407 6408 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6409 OpenMPDirectiveKind InnerKind, 6410 const RegionCodeGenTy &CodeGen, 6411 bool HasCancel) { 6412 if (!CGF.HaveInsertPoint()) 6413 return; 6414 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6415 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6416 } 6417 6418 namespace { 6419 enum RTCancelKind { 6420 CancelNoreq = 0, 6421 CancelParallel = 1, 6422 CancelLoop = 2, 6423 CancelSections = 3, 6424 CancelTaskgroup = 4 6425 }; 6426 } // anonymous namespace 6427 6428 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6429 RTCancelKind CancelKind = CancelNoreq; 6430 if (CancelRegion == OMPD_parallel) 6431 CancelKind = CancelParallel; 6432 else if (CancelRegion == OMPD_for) 6433 CancelKind = CancelLoop; 6434 else if (CancelRegion == OMPD_sections) 6435 CancelKind = CancelSections; 6436 else { 6437 assert(CancelRegion == OMPD_taskgroup); 6438 CancelKind = CancelTaskgroup; 6439 } 6440 return CancelKind; 6441 } 6442 6443 void CGOpenMPRuntime::emitCancellationPointCall( 6444 CodeGenFunction &CGF, SourceLocation Loc, 6445 OpenMPDirectiveKind CancelRegion) { 6446 if (!CGF.HaveInsertPoint()) 6447 return; 6448 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6449 // global_tid, kmp_int32 cncl_kind); 6450 if (auto *OMPRegionInfo = 6451 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6452 // For 'cancellation point taskgroup', the task region info may not have a 6453 // cancel. This may instead happen in another adjacent task. 6454 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6455 llvm::Value *Args[] = { 6456 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6457 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6458 // Ignore return result until untied tasks are supported. 6459 llvm::Value *Result = CGF.EmitRuntimeCall( 6460 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 6461 // if (__kmpc_cancellationpoint()) { 6462 // exit from construct; 6463 // } 6464 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6465 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6466 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6467 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6468 CGF.EmitBlock(ExitBB); 6469 // exit from construct; 6470 CodeGenFunction::JumpDest CancelDest = 6471 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6472 CGF.EmitBranchThroughCleanup(CancelDest); 6473 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6474 } 6475 } 6476 } 6477 6478 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6479 const Expr *IfCond, 6480 OpenMPDirectiveKind CancelRegion) { 6481 if (!CGF.HaveInsertPoint()) 6482 return; 6483 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6484 // kmp_int32 cncl_kind); 6485 if (auto *OMPRegionInfo = 6486 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6487 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 6488 PrePostActionTy &) { 6489 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6490 llvm::Value *Args[] = { 6491 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6492 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6493 // Ignore return result until untied tasks are supported. 6494 llvm::Value *Result = CGF.EmitRuntimeCall( 6495 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 6496 // if (__kmpc_cancel()) { 6497 // exit from construct; 6498 // } 6499 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6500 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6501 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6502 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6503 CGF.EmitBlock(ExitBB); 6504 // exit from construct; 6505 CodeGenFunction::JumpDest CancelDest = 6506 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6507 CGF.EmitBranchThroughCleanup(CancelDest); 6508 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6509 }; 6510 if (IfCond) { 6511 emitOMPIfClause(CGF, IfCond, ThenGen, 6512 [](CodeGenFunction &, PrePostActionTy &) {}); 6513 } else { 6514 RegionCodeGenTy ThenRCG(ThenGen); 6515 ThenRCG(CGF); 6516 } 6517 } 6518 } 6519 6520 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6521 const OMPExecutableDirective &D, StringRef ParentName, 6522 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6523 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6524 assert(!ParentName.empty() && "Invalid target region parent name!"); 6525 HasEmittedTargetRegion = true; 6526 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6527 IsOffloadEntry, CodeGen); 6528 } 6529 6530 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6531 const OMPExecutableDirective &D, StringRef ParentName, 6532 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6533 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6534 // Create a unique name for the entry function using the source location 6535 // information of the current target region. The name will be something like: 6536 // 6537 // __omp_offloading_DD_FFFF_PP_lBB 6538 // 6539 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6540 // mangled name of the function that encloses the target region and BB is the 6541 // line number of the target region. 6542 6543 unsigned DeviceID; 6544 unsigned FileID; 6545 unsigned Line; 6546 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6547 Line); 6548 SmallString<64> EntryFnName; 6549 { 6550 llvm::raw_svector_ostream OS(EntryFnName); 6551 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6552 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6553 } 6554 6555 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6556 6557 CodeGenFunction CGF(CGM, true); 6558 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6559 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6560 6561 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 6562 6563 // If this target outline function is not an offload entry, we don't need to 6564 // register it. 6565 if (!IsOffloadEntry) 6566 return; 6567 6568 // The target region ID is used by the runtime library to identify the current 6569 // target region, so it only has to be unique and not necessarily point to 6570 // anything. It could be the pointer to the outlined function that implements 6571 // the target region, but we aren't using that so that the compiler doesn't 6572 // need to keep that, and could therefore inline the host function if proven 6573 // worthwhile during optimization. In the other hand, if emitting code for the 6574 // device, the ID has to be the function address so that it can retrieved from 6575 // the offloading entry and launched by the runtime library. We also mark the 6576 // outlined function to have external linkage in case we are emitting code for 6577 // the device, because these functions will be entry points to the device. 6578 6579 if (CGM.getLangOpts().OpenMPIsDevice) { 6580 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6581 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6582 OutlinedFn->setDSOLocal(false); 6583 } else { 6584 std::string Name = getName({EntryFnName, "region_id"}); 6585 OutlinedFnID = new llvm::GlobalVariable( 6586 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6587 llvm::GlobalValue::WeakAnyLinkage, 6588 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6589 } 6590 6591 // Register the information for the entry associated with this target region. 6592 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6593 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6594 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6595 } 6596 6597 /// Checks if the expression is constant or does not have non-trivial function 6598 /// calls. 6599 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6600 // We can skip constant expressions. 6601 // We can skip expressions with trivial calls or simple expressions. 6602 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6603 !E->hasNonTrivialCall(Ctx)) && 6604 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6605 } 6606 6607 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6608 const Stmt *Body) { 6609 const Stmt *Child = Body->IgnoreContainers(); 6610 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6611 Child = nullptr; 6612 for (const Stmt *S : C->body()) { 6613 if (const auto *E = dyn_cast<Expr>(S)) { 6614 if (isTrivial(Ctx, E)) 6615 continue; 6616 } 6617 // Some of the statements can be ignored. 6618 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6619 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6620 continue; 6621 // Analyze declarations. 6622 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6623 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6624 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6625 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6626 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6627 isa<UsingDirectiveDecl>(D) || 6628 isa<OMPDeclareReductionDecl>(D) || 6629 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6630 return true; 6631 const auto *VD = dyn_cast<VarDecl>(D); 6632 if (!VD) 6633 return false; 6634 return VD->isConstexpr() || 6635 ((VD->getType().isTrivialType(Ctx) || 6636 VD->getType()->isReferenceType()) && 6637 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6638 })) 6639 continue; 6640 } 6641 // Found multiple children - cannot get the one child only. 6642 if (Child) 6643 return nullptr; 6644 Child = S; 6645 } 6646 if (Child) 6647 Child = Child->IgnoreContainers(); 6648 } 6649 return Child; 6650 } 6651 6652 /// Emit the number of teams for a target directive. Inspect the num_teams 6653 /// clause associated with a teams construct combined or closely nested 6654 /// with the target directive. 6655 /// 6656 /// Emit a team of size one for directives such as 'target parallel' that 6657 /// have no associated teams construct. 6658 /// 6659 /// Otherwise, return nullptr. 6660 static llvm::Value * 6661 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6662 const OMPExecutableDirective &D) { 6663 assert(!CGF.getLangOpts().OpenMPIsDevice && 6664 "Clauses associated with the teams directive expected to be emitted " 6665 "only for the host!"); 6666 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6667 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6668 "Expected target-based executable directive."); 6669 CGBuilderTy &Bld = CGF.Builder; 6670 switch (DirectiveKind) { 6671 case OMPD_target: { 6672 const auto *CS = D.getInnermostCapturedStmt(); 6673 const auto *Body = 6674 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6675 const Stmt *ChildStmt = 6676 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6677 if (const auto *NestedDir = 6678 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6679 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6680 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6681 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6682 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6683 const Expr *NumTeams = 6684 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6685 llvm::Value *NumTeamsVal = 6686 CGF.EmitScalarExpr(NumTeams, 6687 /*IgnoreResultAssign*/ true); 6688 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6689 /*isSigned=*/true); 6690 } 6691 return Bld.getInt32(0); 6692 } 6693 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6694 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6695 return Bld.getInt32(1); 6696 return Bld.getInt32(0); 6697 } 6698 return nullptr; 6699 } 6700 case OMPD_target_teams: 6701 case OMPD_target_teams_distribute: 6702 case OMPD_target_teams_distribute_simd: 6703 case OMPD_target_teams_distribute_parallel_for: 6704 case OMPD_target_teams_distribute_parallel_for_simd: { 6705 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6706 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6707 const Expr *NumTeams = 6708 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6709 llvm::Value *NumTeamsVal = 6710 CGF.EmitScalarExpr(NumTeams, 6711 /*IgnoreResultAssign*/ true); 6712 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6713 /*isSigned=*/true); 6714 } 6715 return Bld.getInt32(0); 6716 } 6717 case OMPD_target_parallel: 6718 case OMPD_target_parallel_for: 6719 case OMPD_target_parallel_for_simd: 6720 case OMPD_target_simd: 6721 return Bld.getInt32(1); 6722 case OMPD_parallel: 6723 case OMPD_for: 6724 case OMPD_parallel_for: 6725 case OMPD_parallel_sections: 6726 case OMPD_for_simd: 6727 case OMPD_parallel_for_simd: 6728 case OMPD_cancel: 6729 case OMPD_cancellation_point: 6730 case OMPD_ordered: 6731 case OMPD_threadprivate: 6732 case OMPD_allocate: 6733 case OMPD_task: 6734 case OMPD_simd: 6735 case OMPD_sections: 6736 case OMPD_section: 6737 case OMPD_single: 6738 case OMPD_master: 6739 case OMPD_critical: 6740 case OMPD_taskyield: 6741 case OMPD_barrier: 6742 case OMPD_taskwait: 6743 case OMPD_taskgroup: 6744 case OMPD_atomic: 6745 case OMPD_flush: 6746 case OMPD_teams: 6747 case OMPD_target_data: 6748 case OMPD_target_exit_data: 6749 case OMPD_target_enter_data: 6750 case OMPD_distribute: 6751 case OMPD_distribute_simd: 6752 case OMPD_distribute_parallel_for: 6753 case OMPD_distribute_parallel_for_simd: 6754 case OMPD_teams_distribute: 6755 case OMPD_teams_distribute_simd: 6756 case OMPD_teams_distribute_parallel_for: 6757 case OMPD_teams_distribute_parallel_for_simd: 6758 case OMPD_target_update: 6759 case OMPD_declare_simd: 6760 case OMPD_declare_target: 6761 case OMPD_end_declare_target: 6762 case OMPD_declare_reduction: 6763 case OMPD_declare_mapper: 6764 case OMPD_taskloop: 6765 case OMPD_taskloop_simd: 6766 case OMPD_requires: 6767 case OMPD_unknown: 6768 break; 6769 } 6770 llvm_unreachable("Unexpected directive kind."); 6771 } 6772 6773 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6774 llvm::Value *DefaultThreadLimitVal) { 6775 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6776 CGF.getContext(), CS->getCapturedStmt()); 6777 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6778 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6779 llvm::Value *NumThreads = nullptr; 6780 llvm::Value *CondVal = nullptr; 6781 // Handle if clause. If if clause present, the number of threads is 6782 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6783 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6784 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6785 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6786 const OMPIfClause *IfClause = nullptr; 6787 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6788 if (C->getNameModifier() == OMPD_unknown || 6789 C->getNameModifier() == OMPD_parallel) { 6790 IfClause = C; 6791 break; 6792 } 6793 } 6794 if (IfClause) { 6795 const Expr *Cond = IfClause->getCondition(); 6796 bool Result; 6797 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6798 if (!Result) 6799 return CGF.Builder.getInt32(1); 6800 } else { 6801 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6802 if (const auto *PreInit = 6803 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6804 for (const auto *I : PreInit->decls()) { 6805 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6806 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6807 } else { 6808 CodeGenFunction::AutoVarEmission Emission = 6809 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6810 CGF.EmitAutoVarCleanups(Emission); 6811 } 6812 } 6813 } 6814 CondVal = CGF.EvaluateExprAsBool(Cond); 6815 } 6816 } 6817 } 6818 // Check the value of num_threads clause iff if clause was not specified 6819 // or is not evaluated to false. 6820 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6821 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6822 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6823 const auto *NumThreadsClause = 6824 Dir->getSingleClause<OMPNumThreadsClause>(); 6825 CodeGenFunction::LexicalScope Scope( 6826 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6827 if (const auto *PreInit = 6828 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6829 for (const auto *I : PreInit->decls()) { 6830 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6831 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6832 } else { 6833 CodeGenFunction::AutoVarEmission Emission = 6834 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6835 CGF.EmitAutoVarCleanups(Emission); 6836 } 6837 } 6838 } 6839 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6840 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6841 /*isSigned=*/false); 6842 if (DefaultThreadLimitVal) 6843 NumThreads = CGF.Builder.CreateSelect( 6844 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6845 DefaultThreadLimitVal, NumThreads); 6846 } else { 6847 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6848 : CGF.Builder.getInt32(0); 6849 } 6850 // Process condition of the if clause. 6851 if (CondVal) { 6852 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6853 CGF.Builder.getInt32(1)); 6854 } 6855 return NumThreads; 6856 } 6857 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6858 return CGF.Builder.getInt32(1); 6859 return DefaultThreadLimitVal; 6860 } 6861 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6862 : CGF.Builder.getInt32(0); 6863 } 6864 6865 /// Emit the number of threads for a target directive. Inspect the 6866 /// thread_limit clause associated with a teams construct combined or closely 6867 /// nested with the target directive. 6868 /// 6869 /// Emit the num_threads clause for directives such as 'target parallel' that 6870 /// have no associated teams construct. 6871 /// 6872 /// Otherwise, return nullptr. 6873 static llvm::Value * 6874 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6875 const OMPExecutableDirective &D) { 6876 assert(!CGF.getLangOpts().OpenMPIsDevice && 6877 "Clauses associated with the teams directive expected to be emitted " 6878 "only for the host!"); 6879 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6880 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6881 "Expected target-based executable directive."); 6882 CGBuilderTy &Bld = CGF.Builder; 6883 llvm::Value *ThreadLimitVal = nullptr; 6884 llvm::Value *NumThreadsVal = nullptr; 6885 switch (DirectiveKind) { 6886 case OMPD_target: { 6887 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6888 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6889 return NumThreads; 6890 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6891 CGF.getContext(), CS->getCapturedStmt()); 6892 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6893 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6894 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6895 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6896 const auto *ThreadLimitClause = 6897 Dir->getSingleClause<OMPThreadLimitClause>(); 6898 CodeGenFunction::LexicalScope Scope( 6899 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6900 if (const auto *PreInit = 6901 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6902 for (const auto *I : PreInit->decls()) { 6903 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6904 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6905 } else { 6906 CodeGenFunction::AutoVarEmission Emission = 6907 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6908 CGF.EmitAutoVarCleanups(Emission); 6909 } 6910 } 6911 } 6912 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6913 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6914 ThreadLimitVal = 6915 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6916 } 6917 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6918 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6919 CS = Dir->getInnermostCapturedStmt(); 6920 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6921 CGF.getContext(), CS->getCapturedStmt()); 6922 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6923 } 6924 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6925 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6926 CS = Dir->getInnermostCapturedStmt(); 6927 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6928 return NumThreads; 6929 } 6930 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6931 return Bld.getInt32(1); 6932 } 6933 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6934 } 6935 case OMPD_target_teams: { 6936 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6937 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6938 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6939 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6940 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6941 ThreadLimitVal = 6942 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6943 } 6944 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6945 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6946 return NumThreads; 6947 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6948 CGF.getContext(), CS->getCapturedStmt()); 6949 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6950 if (Dir->getDirectiveKind() == OMPD_distribute) { 6951 CS = Dir->getInnermostCapturedStmt(); 6952 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6953 return NumThreads; 6954 } 6955 } 6956 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6957 } 6958 case OMPD_target_teams_distribute: 6959 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6960 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6961 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6962 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6963 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6964 ThreadLimitVal = 6965 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6966 } 6967 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6968 case OMPD_target_parallel: 6969 case OMPD_target_parallel_for: 6970 case OMPD_target_parallel_for_simd: 6971 case OMPD_target_teams_distribute_parallel_for: 6972 case OMPD_target_teams_distribute_parallel_for_simd: { 6973 llvm::Value *CondVal = nullptr; 6974 // Handle if clause. If if clause present, the number of threads is 6975 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6976 if (D.hasClausesOfKind<OMPIfClause>()) { 6977 const OMPIfClause *IfClause = nullptr; 6978 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6979 if (C->getNameModifier() == OMPD_unknown || 6980 C->getNameModifier() == OMPD_parallel) { 6981 IfClause = C; 6982 break; 6983 } 6984 } 6985 if (IfClause) { 6986 const Expr *Cond = IfClause->getCondition(); 6987 bool Result; 6988 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6989 if (!Result) 6990 return Bld.getInt32(1); 6991 } else { 6992 CodeGenFunction::RunCleanupsScope Scope(CGF); 6993 CondVal = CGF.EvaluateExprAsBool(Cond); 6994 } 6995 } 6996 } 6997 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6998 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6999 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7000 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7001 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7002 ThreadLimitVal = 7003 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7004 } 7005 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7006 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7007 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7008 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7009 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7010 NumThreadsVal = 7011 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7012 ThreadLimitVal = ThreadLimitVal 7013 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7014 ThreadLimitVal), 7015 NumThreadsVal, ThreadLimitVal) 7016 : NumThreadsVal; 7017 } 7018 if (!ThreadLimitVal) 7019 ThreadLimitVal = Bld.getInt32(0); 7020 if (CondVal) 7021 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7022 return ThreadLimitVal; 7023 } 7024 case OMPD_target_teams_distribute_simd: 7025 case OMPD_target_simd: 7026 return Bld.getInt32(1); 7027 case OMPD_parallel: 7028 case OMPD_for: 7029 case OMPD_parallel_for: 7030 case OMPD_parallel_sections: 7031 case OMPD_for_simd: 7032 case OMPD_parallel_for_simd: 7033 case OMPD_cancel: 7034 case OMPD_cancellation_point: 7035 case OMPD_ordered: 7036 case OMPD_threadprivate: 7037 case OMPD_allocate: 7038 case OMPD_task: 7039 case OMPD_simd: 7040 case OMPD_sections: 7041 case OMPD_section: 7042 case OMPD_single: 7043 case OMPD_master: 7044 case OMPD_critical: 7045 case OMPD_taskyield: 7046 case OMPD_barrier: 7047 case OMPD_taskwait: 7048 case OMPD_taskgroup: 7049 case OMPD_atomic: 7050 case OMPD_flush: 7051 case OMPD_teams: 7052 case OMPD_target_data: 7053 case OMPD_target_exit_data: 7054 case OMPD_target_enter_data: 7055 case OMPD_distribute: 7056 case OMPD_distribute_simd: 7057 case OMPD_distribute_parallel_for: 7058 case OMPD_distribute_parallel_for_simd: 7059 case OMPD_teams_distribute: 7060 case OMPD_teams_distribute_simd: 7061 case OMPD_teams_distribute_parallel_for: 7062 case OMPD_teams_distribute_parallel_for_simd: 7063 case OMPD_target_update: 7064 case OMPD_declare_simd: 7065 case OMPD_declare_target: 7066 case OMPD_end_declare_target: 7067 case OMPD_declare_reduction: 7068 case OMPD_declare_mapper: 7069 case OMPD_taskloop: 7070 case OMPD_taskloop_simd: 7071 case OMPD_requires: 7072 case OMPD_unknown: 7073 break; 7074 } 7075 llvm_unreachable("Unsupported directive kind."); 7076 } 7077 7078 namespace { 7079 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7080 7081 // Utility to handle information from clauses associated with a given 7082 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7083 // It provides a convenient interface to obtain the information and generate 7084 // code for that information. 7085 class MappableExprsHandler { 7086 public: 7087 /// Values for bit flags used to specify the mapping type for 7088 /// offloading. 7089 enum OpenMPOffloadMappingFlags : uint64_t { 7090 /// No flags 7091 OMP_MAP_NONE = 0x0, 7092 /// Allocate memory on the device and move data from host to device. 7093 OMP_MAP_TO = 0x01, 7094 /// Allocate memory on the device and move data from device to host. 7095 OMP_MAP_FROM = 0x02, 7096 /// Always perform the requested mapping action on the element, even 7097 /// if it was already mapped before. 7098 OMP_MAP_ALWAYS = 0x04, 7099 /// Delete the element from the device environment, ignoring the 7100 /// current reference count associated with the element. 7101 OMP_MAP_DELETE = 0x08, 7102 /// The element being mapped is a pointer-pointee pair; both the 7103 /// pointer and the pointee should be mapped. 7104 OMP_MAP_PTR_AND_OBJ = 0x10, 7105 /// This flags signals that the base address of an entry should be 7106 /// passed to the target kernel as an argument. 7107 OMP_MAP_TARGET_PARAM = 0x20, 7108 /// Signal that the runtime library has to return the device pointer 7109 /// in the current position for the data being mapped. Used when we have the 7110 /// use_device_ptr clause. 7111 OMP_MAP_RETURN_PARAM = 0x40, 7112 /// This flag signals that the reference being passed is a pointer to 7113 /// private data. 7114 OMP_MAP_PRIVATE = 0x80, 7115 /// Pass the element to the device by value. 7116 OMP_MAP_LITERAL = 0x100, 7117 /// Implicit map 7118 OMP_MAP_IMPLICIT = 0x200, 7119 /// Close is a hint to the runtime to allocate memory close to 7120 /// the target device. 7121 OMP_MAP_CLOSE = 0x400, 7122 /// The 16 MSBs of the flags indicate whether the entry is member of some 7123 /// struct/class. 7124 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7125 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7126 }; 7127 7128 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7129 static unsigned getFlagMemberOffset() { 7130 unsigned Offset = 0; 7131 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7132 Remain = Remain >> 1) 7133 Offset++; 7134 return Offset; 7135 } 7136 7137 /// Class that associates information with a base pointer to be passed to the 7138 /// runtime library. 7139 class BasePointerInfo { 7140 /// The base pointer. 7141 llvm::Value *Ptr = nullptr; 7142 /// The base declaration that refers to this device pointer, or null if 7143 /// there is none. 7144 const ValueDecl *DevPtrDecl = nullptr; 7145 7146 public: 7147 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7148 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7149 llvm::Value *operator*() const { return Ptr; } 7150 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7151 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7152 }; 7153 7154 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7155 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7156 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7157 7158 /// Map between a struct and the its lowest & highest elements which have been 7159 /// mapped. 7160 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7161 /// HE(FieldIndex, Pointer)} 7162 struct StructRangeInfoTy { 7163 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7164 0, Address::invalid()}; 7165 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7166 0, Address::invalid()}; 7167 Address Base = Address::invalid(); 7168 }; 7169 7170 private: 7171 /// Kind that defines how a device pointer has to be returned. 7172 struct MapInfo { 7173 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7174 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7175 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7176 bool ReturnDevicePointer = false; 7177 bool IsImplicit = false; 7178 7179 MapInfo() = default; 7180 MapInfo( 7181 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7182 OpenMPMapClauseKind MapType, 7183 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7184 bool ReturnDevicePointer, bool IsImplicit) 7185 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7186 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 7187 }; 7188 7189 /// If use_device_ptr is used on a pointer which is a struct member and there 7190 /// is no map information about it, then emission of that entry is deferred 7191 /// until the whole struct has been processed. 7192 struct DeferredDevicePtrEntryTy { 7193 const Expr *IE = nullptr; 7194 const ValueDecl *VD = nullptr; 7195 7196 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 7197 : IE(IE), VD(VD) {} 7198 }; 7199 7200 /// The target directive from where the mappable clauses were extracted. It 7201 /// is either a executable directive or a user-defined mapper directive. 7202 llvm::PointerUnion<const OMPExecutableDirective *, 7203 const OMPDeclareMapperDecl *> 7204 CurDir; 7205 7206 /// Function the directive is being generated for. 7207 CodeGenFunction &CGF; 7208 7209 /// Set of all first private variables in the current directive. 7210 /// bool data is set to true if the variable is implicitly marked as 7211 /// firstprivate, false otherwise. 7212 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7213 7214 /// Map between device pointer declarations and their expression components. 7215 /// The key value for declarations in 'this' is null. 7216 llvm::DenseMap< 7217 const ValueDecl *, 7218 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7219 DevPointersMap; 7220 7221 llvm::Value *getExprTypeSize(const Expr *E) const { 7222 QualType ExprTy = E->getType().getCanonicalType(); 7223 7224 // Reference types are ignored for mapping purposes. 7225 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7226 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7227 7228 // Given that an array section is considered a built-in type, we need to 7229 // do the calculation based on the length of the section instead of relying 7230 // on CGF.getTypeSize(E->getType()). 7231 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7232 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7233 OAE->getBase()->IgnoreParenImpCasts()) 7234 .getCanonicalType(); 7235 7236 // If there is no length associated with the expression, that means we 7237 // are using the whole length of the base. 7238 if (!OAE->getLength() && OAE->getColonLoc().isValid()) 7239 return CGF.getTypeSize(BaseTy); 7240 7241 llvm::Value *ElemSize; 7242 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7243 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7244 } else { 7245 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7246 assert(ATy && "Expecting array type if not a pointer type."); 7247 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7248 } 7249 7250 // If we don't have a length at this point, that is because we have an 7251 // array section with a single element. 7252 if (!OAE->getLength()) 7253 return ElemSize; 7254 7255 llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); 7256 LengthVal = 7257 CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); 7258 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7259 } 7260 return CGF.getTypeSize(ExprTy); 7261 } 7262 7263 /// Return the corresponding bits for a given map clause modifier. Add 7264 /// a flag marking the map as a pointer if requested. Add a flag marking the 7265 /// map as the first one of a series of maps that relate to the same map 7266 /// expression. 7267 OpenMPOffloadMappingFlags getMapTypeBits( 7268 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7269 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7270 OpenMPOffloadMappingFlags Bits = 7271 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7272 switch (MapType) { 7273 case OMPC_MAP_alloc: 7274 case OMPC_MAP_release: 7275 // alloc and release is the default behavior in the runtime library, i.e. 7276 // if we don't pass any bits alloc/release that is what the runtime is 7277 // going to do. Therefore, we don't need to signal anything for these two 7278 // type modifiers. 7279 break; 7280 case OMPC_MAP_to: 7281 Bits |= OMP_MAP_TO; 7282 break; 7283 case OMPC_MAP_from: 7284 Bits |= OMP_MAP_FROM; 7285 break; 7286 case OMPC_MAP_tofrom: 7287 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7288 break; 7289 case OMPC_MAP_delete: 7290 Bits |= OMP_MAP_DELETE; 7291 break; 7292 case OMPC_MAP_unknown: 7293 llvm_unreachable("Unexpected map type!"); 7294 } 7295 if (AddPtrFlag) 7296 Bits |= OMP_MAP_PTR_AND_OBJ; 7297 if (AddIsTargetParamFlag) 7298 Bits |= OMP_MAP_TARGET_PARAM; 7299 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7300 != MapModifiers.end()) 7301 Bits |= OMP_MAP_ALWAYS; 7302 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7303 != MapModifiers.end()) 7304 Bits |= OMP_MAP_CLOSE; 7305 return Bits; 7306 } 7307 7308 /// Return true if the provided expression is a final array section. A 7309 /// final array section, is one whose length can't be proved to be one. 7310 bool isFinalArraySectionExpression(const Expr *E) const { 7311 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7312 7313 // It is not an array section and therefore not a unity-size one. 7314 if (!OASE) 7315 return false; 7316 7317 // An array section with no colon always refer to a single element. 7318 if (OASE->getColonLoc().isInvalid()) 7319 return false; 7320 7321 const Expr *Length = OASE->getLength(); 7322 7323 // If we don't have a length we have to check if the array has size 1 7324 // for this dimension. Also, we should always expect a length if the 7325 // base type is pointer. 7326 if (!Length) { 7327 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7328 OASE->getBase()->IgnoreParenImpCasts()) 7329 .getCanonicalType(); 7330 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7331 return ATy->getSize().getSExtValue() != 1; 7332 // If we don't have a constant dimension length, we have to consider 7333 // the current section as having any size, so it is not necessarily 7334 // unitary. If it happen to be unity size, that's user fault. 7335 return true; 7336 } 7337 7338 // Check if the length evaluates to 1. 7339 Expr::EvalResult Result; 7340 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7341 return true; // Can have more that size 1. 7342 7343 llvm::APSInt ConstLength = Result.Val.getInt(); 7344 return ConstLength.getSExtValue() != 1; 7345 } 7346 7347 /// Generate the base pointers, section pointers, sizes and map type 7348 /// bits for the provided map type, map modifier, and expression components. 7349 /// \a IsFirstComponent should be set to true if the provided set of 7350 /// components is the first associated with a capture. 7351 void generateInfoForComponentList( 7352 OpenMPMapClauseKind MapType, 7353 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7354 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7355 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7356 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7357 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 7358 bool IsImplicit, 7359 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7360 OverlappedElements = llvm::None) const { 7361 // The following summarizes what has to be generated for each map and the 7362 // types below. The generated information is expressed in this order: 7363 // base pointer, section pointer, size, flags 7364 // (to add to the ones that come from the map type and modifier). 7365 // 7366 // double d; 7367 // int i[100]; 7368 // float *p; 7369 // 7370 // struct S1 { 7371 // int i; 7372 // float f[50]; 7373 // } 7374 // struct S2 { 7375 // int i; 7376 // float f[50]; 7377 // S1 s; 7378 // double *p; 7379 // struct S2 *ps; 7380 // } 7381 // S2 s; 7382 // S2 *ps; 7383 // 7384 // map(d) 7385 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7386 // 7387 // map(i) 7388 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7389 // 7390 // map(i[1:23]) 7391 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7392 // 7393 // map(p) 7394 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7395 // 7396 // map(p[1:24]) 7397 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7398 // 7399 // map(s) 7400 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7401 // 7402 // map(s.i) 7403 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7404 // 7405 // map(s.s.f) 7406 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7407 // 7408 // map(s.p) 7409 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7410 // 7411 // map(to: s.p[:22]) 7412 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7413 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7414 // &(s.p), &(s.p[0]), 22*sizeof(double), 7415 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7416 // (*) alloc space for struct members, only this is a target parameter 7417 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7418 // optimizes this entry out, same in the examples below) 7419 // (***) map the pointee (map: to) 7420 // 7421 // map(s.ps) 7422 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7423 // 7424 // map(from: s.ps->s.i) 7425 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7426 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7427 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7428 // 7429 // map(to: s.ps->ps) 7430 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7431 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7432 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7433 // 7434 // map(s.ps->ps->ps) 7435 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7436 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7437 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7438 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7439 // 7440 // map(to: s.ps->ps->s.f[:22]) 7441 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7442 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7443 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7444 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7445 // 7446 // map(ps) 7447 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7448 // 7449 // map(ps->i) 7450 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7451 // 7452 // map(ps->s.f) 7453 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7454 // 7455 // map(from: ps->p) 7456 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7457 // 7458 // map(to: ps->p[:22]) 7459 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7460 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7461 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7462 // 7463 // map(ps->ps) 7464 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7465 // 7466 // map(from: ps->ps->s.i) 7467 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7468 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7469 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7470 // 7471 // map(from: ps->ps->ps) 7472 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7473 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7474 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7475 // 7476 // map(ps->ps->ps->ps) 7477 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7478 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7479 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7480 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7481 // 7482 // map(to: ps->ps->ps->s.f[:22]) 7483 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7484 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7485 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7486 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7487 // 7488 // map(to: s.f[:22]) map(from: s.p[:33]) 7489 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7490 // sizeof(double*) (**), TARGET_PARAM 7491 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7492 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7493 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7494 // (*) allocate contiguous space needed to fit all mapped members even if 7495 // we allocate space for members not mapped (in this example, 7496 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7497 // them as well because they fall between &s.f[0] and &s.p) 7498 // 7499 // map(from: s.f[:22]) map(to: ps->p[:33]) 7500 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7501 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7502 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7503 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7504 // (*) the struct this entry pertains to is the 2nd element in the list of 7505 // arguments, hence MEMBER_OF(2) 7506 // 7507 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7508 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7509 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7510 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7511 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7512 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7513 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7514 // (*) the struct this entry pertains to is the 4th element in the list 7515 // of arguments, hence MEMBER_OF(4) 7516 7517 // Track if the map information being generated is the first for a capture. 7518 bool IsCaptureFirstInfo = IsFirstComponentList; 7519 // When the variable is on a declare target link or in a to clause with 7520 // unified memory, a reference is needed to hold the host/device address 7521 // of the variable. 7522 bool RequiresReference = false; 7523 7524 // Scan the components from the base to the complete expression. 7525 auto CI = Components.rbegin(); 7526 auto CE = Components.rend(); 7527 auto I = CI; 7528 7529 // Track if the map information being generated is the first for a list of 7530 // components. 7531 bool IsExpressionFirstInfo = true; 7532 Address BP = Address::invalid(); 7533 const Expr *AssocExpr = I->getAssociatedExpression(); 7534 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7535 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7536 7537 if (isa<MemberExpr>(AssocExpr)) { 7538 // The base is the 'this' pointer. The content of the pointer is going 7539 // to be the base of the field being mapped. 7540 BP = CGF.LoadCXXThisAddress(); 7541 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7542 (OASE && 7543 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7544 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7545 } else { 7546 // The base is the reference to the variable. 7547 // BP = &Var. 7548 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7549 if (const auto *VD = 7550 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7551 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7552 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7553 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7554 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7555 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7556 RequiresReference = true; 7557 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7558 } 7559 } 7560 } 7561 7562 // If the variable is a pointer and is being dereferenced (i.e. is not 7563 // the last component), the base has to be the pointer itself, not its 7564 // reference. References are ignored for mapping purposes. 7565 QualType Ty = 7566 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7567 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7568 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7569 7570 // We do not need to generate individual map information for the 7571 // pointer, it can be associated with the combined storage. 7572 ++I; 7573 } 7574 } 7575 7576 // Track whether a component of the list should be marked as MEMBER_OF some 7577 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7578 // in a component list should be marked as MEMBER_OF, all subsequent entries 7579 // do not belong to the base struct. E.g. 7580 // struct S2 s; 7581 // s.ps->ps->ps->f[:] 7582 // (1) (2) (3) (4) 7583 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7584 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7585 // is the pointee of ps(2) which is not member of struct s, so it should not 7586 // be marked as such (it is still PTR_AND_OBJ). 7587 // The variable is initialized to false so that PTR_AND_OBJ entries which 7588 // are not struct members are not considered (e.g. array of pointers to 7589 // data). 7590 bool ShouldBeMemberOf = false; 7591 7592 // Variable keeping track of whether or not we have encountered a component 7593 // in the component list which is a member expression. Useful when we have a 7594 // pointer or a final array section, in which case it is the previous 7595 // component in the list which tells us whether we have a member expression. 7596 // E.g. X.f[:] 7597 // While processing the final array section "[:]" it is "f" which tells us 7598 // whether we are dealing with a member of a declared struct. 7599 const MemberExpr *EncounteredME = nullptr; 7600 7601 for (; I != CE; ++I) { 7602 // If the current component is member of a struct (parent struct) mark it. 7603 if (!EncounteredME) { 7604 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7605 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7606 // as MEMBER_OF the parent struct. 7607 if (EncounteredME) 7608 ShouldBeMemberOf = true; 7609 } 7610 7611 auto Next = std::next(I); 7612 7613 // We need to generate the addresses and sizes if this is the last 7614 // component, if the component is a pointer or if it is an array section 7615 // whose length can't be proved to be one. If this is a pointer, it 7616 // becomes the base address for the following components. 7617 7618 // A final array section, is one whose length can't be proved to be one. 7619 bool IsFinalArraySection = 7620 isFinalArraySectionExpression(I->getAssociatedExpression()); 7621 7622 // Get information on whether the element is a pointer. Have to do a 7623 // special treatment for array sections given that they are built-in 7624 // types. 7625 const auto *OASE = 7626 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7627 bool IsPointer = 7628 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7629 .getCanonicalType() 7630 ->isAnyPointerType()) || 7631 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7632 7633 if (Next == CE || IsPointer || IsFinalArraySection) { 7634 // If this is not the last component, we expect the pointer to be 7635 // associated with an array expression or member expression. 7636 assert((Next == CE || 7637 isa<MemberExpr>(Next->getAssociatedExpression()) || 7638 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7639 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 7640 "Unexpected expression"); 7641 7642 Address LB = 7643 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); 7644 7645 // If this component is a pointer inside the base struct then we don't 7646 // need to create any entry for it - it will be combined with the object 7647 // it is pointing to into a single PTR_AND_OBJ entry. 7648 bool IsMemberPointer = 7649 IsPointer && EncounteredME && 7650 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7651 EncounteredME); 7652 if (!OverlappedElements.empty()) { 7653 // Handle base element with the info for overlapped elements. 7654 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7655 assert(Next == CE && 7656 "Expected last element for the overlapped elements."); 7657 assert(!IsPointer && 7658 "Unexpected base element with the pointer type."); 7659 // Mark the whole struct as the struct that requires allocation on the 7660 // device. 7661 PartialStruct.LowestElem = {0, LB}; 7662 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7663 I->getAssociatedExpression()->getType()); 7664 Address HB = CGF.Builder.CreateConstGEP( 7665 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7666 CGF.VoidPtrTy), 7667 TypeSize.getQuantity() - 1); 7668 PartialStruct.HighestElem = { 7669 std::numeric_limits<decltype( 7670 PartialStruct.HighestElem.first)>::max(), 7671 HB}; 7672 PartialStruct.Base = BP; 7673 // Emit data for non-overlapped data. 7674 OpenMPOffloadMappingFlags Flags = 7675 OMP_MAP_MEMBER_OF | 7676 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7677 /*AddPtrFlag=*/false, 7678 /*AddIsTargetParamFlag=*/false); 7679 LB = BP; 7680 llvm::Value *Size = nullptr; 7681 // Do bitcopy of all non-overlapped structure elements. 7682 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7683 Component : OverlappedElements) { 7684 Address ComponentLB = Address::invalid(); 7685 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7686 Component) { 7687 if (MC.getAssociatedDeclaration()) { 7688 ComponentLB = 7689 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7690 .getAddress(); 7691 Size = CGF.Builder.CreatePtrDiff( 7692 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7693 CGF.EmitCastToVoidPtr(LB.getPointer())); 7694 break; 7695 } 7696 } 7697 BasePointers.push_back(BP.getPointer()); 7698 Pointers.push_back(LB.getPointer()); 7699 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, 7700 /*isSigned=*/true)); 7701 Types.push_back(Flags); 7702 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7703 } 7704 BasePointers.push_back(BP.getPointer()); 7705 Pointers.push_back(LB.getPointer()); 7706 Size = CGF.Builder.CreatePtrDiff( 7707 CGF.EmitCastToVoidPtr( 7708 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7709 CGF.EmitCastToVoidPtr(LB.getPointer())); 7710 Sizes.push_back( 7711 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7712 Types.push_back(Flags); 7713 break; 7714 } 7715 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7716 if (!IsMemberPointer) { 7717 BasePointers.push_back(BP.getPointer()); 7718 Pointers.push_back(LB.getPointer()); 7719 Sizes.push_back( 7720 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7721 7722 // We need to add a pointer flag for each map that comes from the 7723 // same expression except for the first one. We also need to signal 7724 // this map is the first one that relates with the current capture 7725 // (there is a set of entries for each capture). 7726 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7727 MapType, MapModifiers, IsImplicit, 7728 !IsExpressionFirstInfo || RequiresReference, 7729 IsCaptureFirstInfo && !RequiresReference); 7730 7731 if (!IsExpressionFirstInfo) { 7732 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7733 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7734 if (IsPointer) 7735 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7736 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7737 7738 if (ShouldBeMemberOf) { 7739 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7740 // should be later updated with the correct value of MEMBER_OF. 7741 Flags |= OMP_MAP_MEMBER_OF; 7742 // From now on, all subsequent PTR_AND_OBJ entries should not be 7743 // marked as MEMBER_OF. 7744 ShouldBeMemberOf = false; 7745 } 7746 } 7747 7748 Types.push_back(Flags); 7749 } 7750 7751 // If we have encountered a member expression so far, keep track of the 7752 // mapped member. If the parent is "*this", then the value declaration 7753 // is nullptr. 7754 if (EncounteredME) { 7755 const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl()); 7756 unsigned FieldIndex = FD->getFieldIndex(); 7757 7758 // Update info about the lowest and highest elements for this struct 7759 if (!PartialStruct.Base.isValid()) { 7760 PartialStruct.LowestElem = {FieldIndex, LB}; 7761 PartialStruct.HighestElem = {FieldIndex, LB}; 7762 PartialStruct.Base = BP; 7763 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7764 PartialStruct.LowestElem = {FieldIndex, LB}; 7765 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7766 PartialStruct.HighestElem = {FieldIndex, LB}; 7767 } 7768 } 7769 7770 // If we have a final array section, we are done with this expression. 7771 if (IsFinalArraySection) 7772 break; 7773 7774 // The pointer becomes the base for the next element. 7775 if (Next != CE) 7776 BP = LB; 7777 7778 IsExpressionFirstInfo = false; 7779 IsCaptureFirstInfo = false; 7780 } 7781 } 7782 } 7783 7784 /// Return the adjusted map modifiers if the declaration a capture refers to 7785 /// appears in a first-private clause. This is expected to be used only with 7786 /// directives that start with 'target'. 7787 MappableExprsHandler::OpenMPOffloadMappingFlags 7788 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7789 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7790 7791 // A first private variable captured by reference will use only the 7792 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7793 // declaration is known as first-private in this handler. 7794 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7795 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7796 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7797 return MappableExprsHandler::OMP_MAP_ALWAYS | 7798 MappableExprsHandler::OMP_MAP_TO; 7799 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7800 return MappableExprsHandler::OMP_MAP_TO | 7801 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 7802 return MappableExprsHandler::OMP_MAP_PRIVATE | 7803 MappableExprsHandler::OMP_MAP_TO; 7804 } 7805 return MappableExprsHandler::OMP_MAP_TO | 7806 MappableExprsHandler::OMP_MAP_FROM; 7807 } 7808 7809 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7810 // Rotate by getFlagMemberOffset() bits. 7811 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7812 << getFlagMemberOffset()); 7813 } 7814 7815 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7816 OpenMPOffloadMappingFlags MemberOfFlag) { 7817 // If the entry is PTR_AND_OBJ but has not been marked with the special 7818 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7819 // marked as MEMBER_OF. 7820 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7821 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7822 return; 7823 7824 // Reset the placeholder value to prepare the flag for the assignment of the 7825 // proper MEMBER_OF value. 7826 Flags &= ~OMP_MAP_MEMBER_OF; 7827 Flags |= MemberOfFlag; 7828 } 7829 7830 void getPlainLayout(const CXXRecordDecl *RD, 7831 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7832 bool AsBase) const { 7833 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7834 7835 llvm::StructType *St = 7836 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7837 7838 unsigned NumElements = St->getNumElements(); 7839 llvm::SmallVector< 7840 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7841 RecordLayout(NumElements); 7842 7843 // Fill bases. 7844 for (const auto &I : RD->bases()) { 7845 if (I.isVirtual()) 7846 continue; 7847 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7848 // Ignore empty bases. 7849 if (Base->isEmpty() || CGF.getContext() 7850 .getASTRecordLayout(Base) 7851 .getNonVirtualSize() 7852 .isZero()) 7853 continue; 7854 7855 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7856 RecordLayout[FieldIndex] = Base; 7857 } 7858 // Fill in virtual bases. 7859 for (const auto &I : RD->vbases()) { 7860 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7861 // Ignore empty bases. 7862 if (Base->isEmpty()) 7863 continue; 7864 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7865 if (RecordLayout[FieldIndex]) 7866 continue; 7867 RecordLayout[FieldIndex] = Base; 7868 } 7869 // Fill in all the fields. 7870 assert(!RD->isUnion() && "Unexpected union."); 7871 for (const auto *Field : RD->fields()) { 7872 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7873 // will fill in later.) 7874 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 7875 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7876 RecordLayout[FieldIndex] = Field; 7877 } 7878 } 7879 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7880 &Data : RecordLayout) { 7881 if (Data.isNull()) 7882 continue; 7883 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7884 getPlainLayout(Base, Layout, /*AsBase=*/true); 7885 else 7886 Layout.push_back(Data.get<const FieldDecl *>()); 7887 } 7888 } 7889 7890 public: 7891 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7892 : CurDir(&Dir), CGF(CGF) { 7893 // Extract firstprivate clause information. 7894 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7895 for (const auto *D : C->varlists()) 7896 FirstPrivateDecls.try_emplace( 7897 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 7898 // Extract device pointer clause information. 7899 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7900 for (auto L : C->component_lists()) 7901 DevPointersMap[L.first].push_back(L.second); 7902 } 7903 7904 /// Constructor for the declare mapper directive. 7905 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 7906 : CurDir(&Dir), CGF(CGF) {} 7907 7908 /// Generate code for the combined entry if we have a partially mapped struct 7909 /// and take care of the mapping flags of the arguments corresponding to 7910 /// individual struct members. 7911 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 7912 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7913 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 7914 const StructRangeInfoTy &PartialStruct) const { 7915 // Base is the base of the struct 7916 BasePointers.push_back(PartialStruct.Base.getPointer()); 7917 // Pointer is the address of the lowest element 7918 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7919 Pointers.push_back(LB); 7920 // Size is (addr of {highest+1} element) - (addr of lowest element) 7921 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7922 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7923 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7924 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7925 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7926 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 7927 /*isSigned=*/false); 7928 Sizes.push_back(Size); 7929 // Map type is always TARGET_PARAM 7930 Types.push_back(OMP_MAP_TARGET_PARAM); 7931 // Remove TARGET_PARAM flag from the first element 7932 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7933 7934 // All other current entries will be MEMBER_OF the combined entry 7935 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7936 // 0xFFFF in the MEMBER_OF field). 7937 OpenMPOffloadMappingFlags MemberOfFlag = 7938 getMemberOfFlag(BasePointers.size() - 1); 7939 for (auto &M : CurTypes) 7940 setCorrectMemberOfFlag(M, MemberOfFlag); 7941 } 7942 7943 /// Generate all the base pointers, section pointers, sizes and map 7944 /// types for the extracted mappable expressions. Also, for each item that 7945 /// relates with a device pointer, a pair of the relevant declaration and 7946 /// index where it occurs is appended to the device pointers info array. 7947 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 7948 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7949 MapFlagsArrayTy &Types) const { 7950 // We have to process the component lists that relate with the same 7951 // declaration in a single chunk so that we can generate the map flags 7952 // correctly. Therefore, we organize all lists in a map. 7953 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 7954 7955 // Helper function to fill the information map for the different supported 7956 // clauses. 7957 auto &&InfoGen = [&Info]( 7958 const ValueDecl *D, 7959 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7960 OpenMPMapClauseKind MapType, 7961 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7962 bool ReturnDevicePointer, bool IsImplicit) { 7963 const ValueDecl *VD = 7964 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 7965 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 7966 IsImplicit); 7967 }; 7968 7969 assert(CurDir.is<const OMPExecutableDirective *>() && 7970 "Expect a executable directive"); 7971 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 7972 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 7973 for (const auto &L : C->component_lists()) { 7974 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 7975 /*ReturnDevicePointer=*/false, C->isImplicit()); 7976 } 7977 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 7978 for (const auto &L : C->component_lists()) { 7979 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 7980 /*ReturnDevicePointer=*/false, C->isImplicit()); 7981 } 7982 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 7983 for (const auto &L : C->component_lists()) { 7984 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 7985 /*ReturnDevicePointer=*/false, C->isImplicit()); 7986 } 7987 7988 // Look at the use_device_ptr clause information and mark the existing map 7989 // entries as such. If there is no map information for an entry in the 7990 // use_device_ptr list, we create one with map type 'alloc' and zero size 7991 // section. It is the user fault if that was not mapped before. If there is 7992 // no map information and the pointer is a struct member, then we defer the 7993 // emission of that entry until the whole struct has been processed. 7994 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 7995 DeferredInfo; 7996 7997 for (const auto *C : 7998 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 7999 for (const auto &L : C->component_lists()) { 8000 assert(!L.second.empty() && "Not expecting empty list of components!"); 8001 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 8002 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8003 const Expr *IE = L.second.back().getAssociatedExpression(); 8004 // If the first component is a member expression, we have to look into 8005 // 'this', which maps to null in the map of map information. Otherwise 8006 // look directly for the information. 8007 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8008 8009 // We potentially have map information for this declaration already. 8010 // Look for the first set of components that refer to it. 8011 if (It != Info.end()) { 8012 auto CI = std::find_if( 8013 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 8014 return MI.Components.back().getAssociatedDeclaration() == VD; 8015 }); 8016 // If we found a map entry, signal that the pointer has to be returned 8017 // and move on to the next declaration. 8018 if (CI != It->second.end()) { 8019 CI->ReturnDevicePointer = true; 8020 continue; 8021 } 8022 } 8023 8024 // We didn't find any match in our map information - generate a zero 8025 // size array section - if the pointer is a struct member we defer this 8026 // action until the whole struct has been processed. 8027 if (isa<MemberExpr>(IE)) { 8028 // Insert the pointer into Info to be processed by 8029 // generateInfoForComponentList. Because it is a member pointer 8030 // without a pointee, no entry will be generated for it, therefore 8031 // we need to generate one after the whole struct has been processed. 8032 // Nonetheless, generateInfoForComponentList must be called to take 8033 // the pointer into account for the calculation of the range of the 8034 // partial struct. 8035 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 8036 /*ReturnDevicePointer=*/false, C->isImplicit()); 8037 DeferredInfo[nullptr].emplace_back(IE, VD); 8038 } else { 8039 llvm::Value *Ptr = 8040 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8041 BasePointers.emplace_back(Ptr, VD); 8042 Pointers.push_back(Ptr); 8043 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8044 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8045 } 8046 } 8047 } 8048 8049 for (const auto &M : Info) { 8050 // We need to know when we generate information for the first component 8051 // associated with a capture, because the mapping flags depend on it. 8052 bool IsFirstComponentList = true; 8053 8054 // Temporary versions of arrays 8055 MapBaseValuesArrayTy CurBasePointers; 8056 MapValuesArrayTy CurPointers; 8057 MapValuesArrayTy CurSizes; 8058 MapFlagsArrayTy CurTypes; 8059 StructRangeInfoTy PartialStruct; 8060 8061 for (const MapInfo &L : M.second) { 8062 assert(!L.Components.empty() && 8063 "Not expecting declaration with no component lists."); 8064 8065 // Remember the current base pointer index. 8066 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 8067 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8068 CurBasePointers, CurPointers, CurSizes, 8069 CurTypes, PartialStruct, 8070 IsFirstComponentList, L.IsImplicit); 8071 8072 // If this entry relates with a device pointer, set the relevant 8073 // declaration and add the 'return pointer' flag. 8074 if (L.ReturnDevicePointer) { 8075 assert(CurBasePointers.size() > CurrentBasePointersIdx && 8076 "Unexpected number of mapped base pointers."); 8077 8078 const ValueDecl *RelevantVD = 8079 L.Components.back().getAssociatedDeclaration(); 8080 assert(RelevantVD && 8081 "No relevant declaration related with device pointer??"); 8082 8083 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 8084 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8085 } 8086 IsFirstComponentList = false; 8087 } 8088 8089 // Append any pending zero-length pointers which are struct members and 8090 // used with use_device_ptr. 8091 auto CI = DeferredInfo.find(M.first); 8092 if (CI != DeferredInfo.end()) { 8093 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8094 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(); 8095 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 8096 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 8097 CurBasePointers.emplace_back(BasePtr, L.VD); 8098 CurPointers.push_back(Ptr); 8099 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8100 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8101 // value MEMBER_OF=FFFF so that the entry is later updated with the 8102 // correct value of MEMBER_OF. 8103 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8104 OMP_MAP_MEMBER_OF); 8105 } 8106 } 8107 8108 // If there is an entry in PartialStruct it means we have a struct with 8109 // individual members mapped. Emit an extra combined entry. 8110 if (PartialStruct.Base.isValid()) 8111 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8112 PartialStruct); 8113 8114 // We need to append the results of this capture to what we already have. 8115 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8116 Pointers.append(CurPointers.begin(), CurPointers.end()); 8117 Sizes.append(CurSizes.begin(), CurSizes.end()); 8118 Types.append(CurTypes.begin(), CurTypes.end()); 8119 } 8120 } 8121 8122 /// Generate all the base pointers, section pointers, sizes and map types for 8123 /// the extracted map clauses of user-defined mapper. 8124 void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, 8125 MapValuesArrayTy &Pointers, 8126 MapValuesArrayTy &Sizes, 8127 MapFlagsArrayTy &Types) const { 8128 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8129 "Expect a declare mapper directive"); 8130 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8131 // We have to process the component lists that relate with the same 8132 // declaration in a single chunk so that we can generate the map flags 8133 // correctly. Therefore, we organize all lists in a map. 8134 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8135 8136 // Helper function to fill the information map for the different supported 8137 // clauses. 8138 auto &&InfoGen = [&Info]( 8139 const ValueDecl *D, 8140 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8141 OpenMPMapClauseKind MapType, 8142 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8143 bool ReturnDevicePointer, bool IsImplicit) { 8144 const ValueDecl *VD = 8145 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8146 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8147 IsImplicit); 8148 }; 8149 8150 for (const auto *C : CurMapperDir->clauselists()) { 8151 const auto *MC = cast<OMPMapClause>(C); 8152 for (const auto &L : MC->component_lists()) { 8153 InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), 8154 /*ReturnDevicePointer=*/false, MC->isImplicit()); 8155 } 8156 } 8157 8158 for (const auto &M : Info) { 8159 // We need to know when we generate information for the first component 8160 // associated with a capture, because the mapping flags depend on it. 8161 bool IsFirstComponentList = true; 8162 8163 // Temporary versions of arrays 8164 MapBaseValuesArrayTy CurBasePointers; 8165 MapValuesArrayTy CurPointers; 8166 MapValuesArrayTy CurSizes; 8167 MapFlagsArrayTy CurTypes; 8168 StructRangeInfoTy PartialStruct; 8169 8170 for (const MapInfo &L : M.second) { 8171 assert(!L.Components.empty() && 8172 "Not expecting declaration with no component lists."); 8173 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8174 CurBasePointers, CurPointers, CurSizes, 8175 CurTypes, PartialStruct, 8176 IsFirstComponentList, L.IsImplicit); 8177 IsFirstComponentList = false; 8178 } 8179 8180 // If there is an entry in PartialStruct it means we have a struct with 8181 // individual members mapped. Emit an extra combined entry. 8182 if (PartialStruct.Base.isValid()) 8183 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8184 PartialStruct); 8185 8186 // We need to append the results of this capture to what we already have. 8187 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8188 Pointers.append(CurPointers.begin(), CurPointers.end()); 8189 Sizes.append(CurSizes.begin(), CurSizes.end()); 8190 Types.append(CurTypes.begin(), CurTypes.end()); 8191 } 8192 } 8193 8194 /// Emit capture info for lambdas for variables captured by reference. 8195 void generateInfoForLambdaCaptures( 8196 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 8197 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8198 MapFlagsArrayTy &Types, 8199 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8200 const auto *RD = VD->getType() 8201 .getCanonicalType() 8202 .getNonReferenceType() 8203 ->getAsCXXRecordDecl(); 8204 if (!RD || !RD->isLambda()) 8205 return; 8206 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8207 LValue VDLVal = CGF.MakeAddrLValue( 8208 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8209 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8210 FieldDecl *ThisCapture = nullptr; 8211 RD->getCaptureFields(Captures, ThisCapture); 8212 if (ThisCapture) { 8213 LValue ThisLVal = 8214 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8215 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8216 LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer()); 8217 BasePointers.push_back(ThisLVal.getPointer()); 8218 Pointers.push_back(ThisLValVal.getPointer()); 8219 Sizes.push_back( 8220 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8221 CGF.Int64Ty, /*isSigned=*/true)); 8222 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8223 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8224 } 8225 for (const LambdaCapture &LC : RD->captures()) { 8226 if (!LC.capturesVariable()) 8227 continue; 8228 const VarDecl *VD = LC.getCapturedVar(); 8229 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8230 continue; 8231 auto It = Captures.find(VD); 8232 assert(It != Captures.end() && "Found lambda capture without field."); 8233 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8234 if (LC.getCaptureKind() == LCK_ByRef) { 8235 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8236 LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); 8237 BasePointers.push_back(VarLVal.getPointer()); 8238 Pointers.push_back(VarLValVal.getPointer()); 8239 Sizes.push_back(CGF.Builder.CreateIntCast( 8240 CGF.getTypeSize( 8241 VD->getType().getCanonicalType().getNonReferenceType()), 8242 CGF.Int64Ty, /*isSigned=*/true)); 8243 } else { 8244 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8245 LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); 8246 BasePointers.push_back(VarLVal.getPointer()); 8247 Pointers.push_back(VarRVal.getScalarVal()); 8248 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8249 } 8250 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8251 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8252 } 8253 } 8254 8255 /// Set correct indices for lambdas captures. 8256 void adjustMemberOfForLambdaCaptures( 8257 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8258 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8259 MapFlagsArrayTy &Types) const { 8260 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8261 // Set correct member_of idx for all implicit lambda captures. 8262 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8263 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8264 continue; 8265 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8266 assert(BasePtr && "Unable to find base lambda address."); 8267 int TgtIdx = -1; 8268 for (unsigned J = I; J > 0; --J) { 8269 unsigned Idx = J - 1; 8270 if (Pointers[Idx] != BasePtr) 8271 continue; 8272 TgtIdx = Idx; 8273 break; 8274 } 8275 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8276 // All other current entries will be MEMBER_OF the combined entry 8277 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8278 // 0xFFFF in the MEMBER_OF field). 8279 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8280 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8281 } 8282 } 8283 8284 /// Generate the base pointers, section pointers, sizes and map types 8285 /// associated to a given capture. 8286 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8287 llvm::Value *Arg, 8288 MapBaseValuesArrayTy &BasePointers, 8289 MapValuesArrayTy &Pointers, 8290 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8291 StructRangeInfoTy &PartialStruct) const { 8292 assert(!Cap->capturesVariableArrayType() && 8293 "Not expecting to generate map info for a variable array type!"); 8294 8295 // We need to know when we generating information for the first component 8296 const ValueDecl *VD = Cap->capturesThis() 8297 ? nullptr 8298 : Cap->getCapturedVar()->getCanonicalDecl(); 8299 8300 // If this declaration appears in a is_device_ptr clause we just have to 8301 // pass the pointer by value. If it is a reference to a declaration, we just 8302 // pass its value. 8303 if (DevPointersMap.count(VD)) { 8304 BasePointers.emplace_back(Arg, VD); 8305 Pointers.push_back(Arg); 8306 Sizes.push_back( 8307 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8308 CGF.Int64Ty, /*isSigned=*/true)); 8309 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8310 return; 8311 } 8312 8313 using MapData = 8314 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8315 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 8316 SmallVector<MapData, 4> DeclComponentLists; 8317 assert(CurDir.is<const OMPExecutableDirective *>() && 8318 "Expect a executable directive"); 8319 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8320 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8321 for (const auto &L : C->decl_component_lists(VD)) { 8322 assert(L.first == VD && 8323 "We got information for the wrong declaration??"); 8324 assert(!L.second.empty() && 8325 "Not expecting declaration with no component lists."); 8326 DeclComponentLists.emplace_back(L.second, C->getMapType(), 8327 C->getMapTypeModifiers(), 8328 C->isImplicit()); 8329 } 8330 } 8331 8332 // Find overlapping elements (including the offset from the base element). 8333 llvm::SmallDenseMap< 8334 const MapData *, 8335 llvm::SmallVector< 8336 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8337 4> 8338 OverlappedData; 8339 size_t Count = 0; 8340 for (const MapData &L : DeclComponentLists) { 8341 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8342 OpenMPMapClauseKind MapType; 8343 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8344 bool IsImplicit; 8345 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8346 ++Count; 8347 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8348 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8349 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 8350 auto CI = Components.rbegin(); 8351 auto CE = Components.rend(); 8352 auto SI = Components1.rbegin(); 8353 auto SE = Components1.rend(); 8354 for (; CI != CE && SI != SE; ++CI, ++SI) { 8355 if (CI->getAssociatedExpression()->getStmtClass() != 8356 SI->getAssociatedExpression()->getStmtClass()) 8357 break; 8358 // Are we dealing with different variables/fields? 8359 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8360 break; 8361 } 8362 // Found overlapping if, at least for one component, reached the head of 8363 // the components list. 8364 if (CI == CE || SI == SE) { 8365 assert((CI != CE || SI != SE) && 8366 "Unexpected full match of the mapping components."); 8367 const MapData &BaseData = CI == CE ? L : L1; 8368 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8369 SI == SE ? Components : Components1; 8370 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8371 OverlappedElements.getSecond().push_back(SubData); 8372 } 8373 } 8374 } 8375 // Sort the overlapped elements for each item. 8376 llvm::SmallVector<const FieldDecl *, 4> Layout; 8377 if (!OverlappedData.empty()) { 8378 if (const auto *CRD = 8379 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8380 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8381 else { 8382 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8383 Layout.append(RD->field_begin(), RD->field_end()); 8384 } 8385 } 8386 for (auto &Pair : OverlappedData) { 8387 llvm::sort( 8388 Pair.getSecond(), 8389 [&Layout]( 8390 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8391 OMPClauseMappableExprCommon::MappableExprComponentListRef 8392 Second) { 8393 auto CI = First.rbegin(); 8394 auto CE = First.rend(); 8395 auto SI = Second.rbegin(); 8396 auto SE = Second.rend(); 8397 for (; CI != CE && SI != SE; ++CI, ++SI) { 8398 if (CI->getAssociatedExpression()->getStmtClass() != 8399 SI->getAssociatedExpression()->getStmtClass()) 8400 break; 8401 // Are we dealing with different variables/fields? 8402 if (CI->getAssociatedDeclaration() != 8403 SI->getAssociatedDeclaration()) 8404 break; 8405 } 8406 8407 // Lists contain the same elements. 8408 if (CI == CE && SI == SE) 8409 return false; 8410 8411 // List with less elements is less than list with more elements. 8412 if (CI == CE || SI == SE) 8413 return CI == CE; 8414 8415 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8416 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8417 if (FD1->getParent() == FD2->getParent()) 8418 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8419 const auto It = 8420 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8421 return FD == FD1 || FD == FD2; 8422 }); 8423 return *It == FD1; 8424 }); 8425 } 8426 8427 // Associated with a capture, because the mapping flags depend on it. 8428 // Go through all of the elements with the overlapped elements. 8429 for (const auto &Pair : OverlappedData) { 8430 const MapData &L = *Pair.getFirst(); 8431 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8432 OpenMPMapClauseKind MapType; 8433 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8434 bool IsImplicit; 8435 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8436 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8437 OverlappedComponents = Pair.getSecond(); 8438 bool IsFirstComponentList = true; 8439 generateInfoForComponentList(MapType, MapModifiers, Components, 8440 BasePointers, Pointers, Sizes, Types, 8441 PartialStruct, IsFirstComponentList, 8442 IsImplicit, OverlappedComponents); 8443 } 8444 // Go through other elements without overlapped elements. 8445 bool IsFirstComponentList = OverlappedData.empty(); 8446 for (const MapData &L : DeclComponentLists) { 8447 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8448 OpenMPMapClauseKind MapType; 8449 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8450 bool IsImplicit; 8451 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8452 auto It = OverlappedData.find(&L); 8453 if (It == OverlappedData.end()) 8454 generateInfoForComponentList(MapType, MapModifiers, Components, 8455 BasePointers, Pointers, Sizes, Types, 8456 PartialStruct, IsFirstComponentList, 8457 IsImplicit); 8458 IsFirstComponentList = false; 8459 } 8460 } 8461 8462 /// Generate the base pointers, section pointers, sizes and map types 8463 /// associated with the declare target link variables. 8464 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 8465 MapValuesArrayTy &Pointers, 8466 MapValuesArrayTy &Sizes, 8467 MapFlagsArrayTy &Types) const { 8468 assert(CurDir.is<const OMPExecutableDirective *>() && 8469 "Expect a executable directive"); 8470 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8471 // Map other list items in the map clause which are not captured variables 8472 // but "declare target link" global variables. 8473 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8474 for (const auto &L : C->component_lists()) { 8475 if (!L.first) 8476 continue; 8477 const auto *VD = dyn_cast<VarDecl>(L.first); 8478 if (!VD) 8479 continue; 8480 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8481 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8482 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8483 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 8484 continue; 8485 StructRangeInfoTy PartialStruct; 8486 generateInfoForComponentList( 8487 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 8488 Pointers, Sizes, Types, PartialStruct, 8489 /*IsFirstComponentList=*/true, C->isImplicit()); 8490 assert(!PartialStruct.Base.isValid() && 8491 "No partial structs for declare target link expected."); 8492 } 8493 } 8494 } 8495 8496 /// Generate the default map information for a given capture \a CI, 8497 /// record field declaration \a RI and captured value \a CV. 8498 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8499 const FieldDecl &RI, llvm::Value *CV, 8500 MapBaseValuesArrayTy &CurBasePointers, 8501 MapValuesArrayTy &CurPointers, 8502 MapValuesArrayTy &CurSizes, 8503 MapFlagsArrayTy &CurMapTypes) const { 8504 bool IsImplicit = true; 8505 // Do the default mapping. 8506 if (CI.capturesThis()) { 8507 CurBasePointers.push_back(CV); 8508 CurPointers.push_back(CV); 8509 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8510 CurSizes.push_back( 8511 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8512 CGF.Int64Ty, /*isSigned=*/true)); 8513 // Default map type. 8514 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8515 } else if (CI.capturesVariableByCopy()) { 8516 CurBasePointers.push_back(CV); 8517 CurPointers.push_back(CV); 8518 if (!RI.getType()->isAnyPointerType()) { 8519 // We have to signal to the runtime captures passed by value that are 8520 // not pointers. 8521 CurMapTypes.push_back(OMP_MAP_LITERAL); 8522 CurSizes.push_back(CGF.Builder.CreateIntCast( 8523 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8524 } else { 8525 // Pointers are implicitly mapped with a zero size and no flags 8526 // (other than first map that is added for all implicit maps). 8527 CurMapTypes.push_back(OMP_MAP_NONE); 8528 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8529 } 8530 const VarDecl *VD = CI.getCapturedVar(); 8531 auto I = FirstPrivateDecls.find(VD); 8532 if (I != FirstPrivateDecls.end()) 8533 IsImplicit = I->getSecond(); 8534 } else { 8535 assert(CI.capturesVariable() && "Expected captured reference."); 8536 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8537 QualType ElementType = PtrTy->getPointeeType(); 8538 CurSizes.push_back(CGF.Builder.CreateIntCast( 8539 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8540 // The default map type for a scalar/complex type is 'to' because by 8541 // default the value doesn't have to be retrieved. For an aggregate 8542 // type, the default is 'tofrom'. 8543 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 8544 const VarDecl *VD = CI.getCapturedVar(); 8545 auto I = FirstPrivateDecls.find(VD); 8546 if (I != FirstPrivateDecls.end() && 8547 VD->getType().isConstant(CGF.getContext())) { 8548 llvm::Constant *Addr = 8549 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8550 // Copy the value of the original variable to the new global copy. 8551 CGF.Builder.CreateMemCpy( 8552 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(), 8553 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8554 CurSizes.back(), /*IsVolatile=*/false); 8555 // Use new global variable as the base pointers. 8556 CurBasePointers.push_back(Addr); 8557 CurPointers.push_back(Addr); 8558 } else { 8559 CurBasePointers.push_back(CV); 8560 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8561 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8562 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8563 AlignmentSource::Decl)); 8564 CurPointers.push_back(PtrAddr.getPointer()); 8565 } else { 8566 CurPointers.push_back(CV); 8567 } 8568 } 8569 if (I != FirstPrivateDecls.end()) 8570 IsImplicit = I->getSecond(); 8571 } 8572 // Every default map produces a single argument which is a target parameter. 8573 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 8574 8575 // Add flag stating this is an implicit map. 8576 if (IsImplicit) 8577 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 8578 } 8579 }; 8580 } // anonymous namespace 8581 8582 /// Emit the arrays used to pass the captures and map information to the 8583 /// offloading runtime library. If there is no map or capture information, 8584 /// return nullptr by reference. 8585 static void 8586 emitOffloadingArrays(CodeGenFunction &CGF, 8587 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 8588 MappableExprsHandler::MapValuesArrayTy &Pointers, 8589 MappableExprsHandler::MapValuesArrayTy &Sizes, 8590 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 8591 CGOpenMPRuntime::TargetDataInfo &Info) { 8592 CodeGenModule &CGM = CGF.CGM; 8593 ASTContext &Ctx = CGF.getContext(); 8594 8595 // Reset the array information. 8596 Info.clearArrayInfo(); 8597 Info.NumberOfPtrs = BasePointers.size(); 8598 8599 if (Info.NumberOfPtrs) { 8600 // Detect if we have any capture size requiring runtime evaluation of the 8601 // size so that a constant array could be eventually used. 8602 bool hasRuntimeEvaluationCaptureSize = false; 8603 for (llvm::Value *S : Sizes) 8604 if (!isa<llvm::Constant>(S)) { 8605 hasRuntimeEvaluationCaptureSize = true; 8606 break; 8607 } 8608 8609 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8610 QualType PointerArrayType = 8611 Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 8612 /*IndexTypeQuals=*/0); 8613 8614 Info.BasePointersArray = 8615 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8616 Info.PointersArray = 8617 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8618 8619 // If we don't have any VLA types or other types that require runtime 8620 // evaluation, we can use a constant array for the map sizes, otherwise we 8621 // need to fill up the arrays as we do for the pointers. 8622 QualType Int64Ty = 8623 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8624 if (hasRuntimeEvaluationCaptureSize) { 8625 QualType SizeArrayType = 8626 Ctx.getConstantArrayType(Int64Ty, PointerNumAP, ArrayType::Normal, 8627 /*IndexTypeQuals=*/0); 8628 Info.SizesArray = 8629 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8630 } else { 8631 // We expect all the sizes to be constant, so we collect them to create 8632 // a constant array. 8633 SmallVector<llvm::Constant *, 16> ConstSizes; 8634 for (llvm::Value *S : Sizes) 8635 ConstSizes.push_back(cast<llvm::Constant>(S)); 8636 8637 auto *SizesArrayInit = llvm::ConstantArray::get( 8638 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 8639 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8640 auto *SizesArrayGbl = new llvm::GlobalVariable( 8641 CGM.getModule(), SizesArrayInit->getType(), 8642 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8643 SizesArrayInit, Name); 8644 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8645 Info.SizesArray = SizesArrayGbl; 8646 } 8647 8648 // The map types are always constant so we don't need to generate code to 8649 // fill arrays. Instead, we create an array constant. 8650 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 8651 llvm::copy(MapTypes, Mapping.begin()); 8652 llvm::Constant *MapTypesArrayInit = 8653 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8654 std::string MaptypesName = 8655 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8656 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8657 CGM.getModule(), MapTypesArrayInit->getType(), 8658 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8659 MapTypesArrayInit, MaptypesName); 8660 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8661 Info.MapTypesArray = MapTypesArrayGbl; 8662 8663 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8664 llvm::Value *BPVal = *BasePointers[I]; 8665 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8666 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8667 Info.BasePointersArray, 0, I); 8668 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8669 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8670 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8671 CGF.Builder.CreateStore(BPVal, BPAddr); 8672 8673 if (Info.requiresDevicePointerInfo()) 8674 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 8675 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8676 8677 llvm::Value *PVal = Pointers[I]; 8678 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8679 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8680 Info.PointersArray, 0, I); 8681 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8682 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8683 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8684 CGF.Builder.CreateStore(PVal, PAddr); 8685 8686 if (hasRuntimeEvaluationCaptureSize) { 8687 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8688 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8689 Info.SizesArray, 8690 /*Idx0=*/0, 8691 /*Idx1=*/I); 8692 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 8693 CGF.Builder.CreateStore( 8694 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true), 8695 SAddr); 8696 } 8697 } 8698 } 8699 } 8700 8701 /// Emit the arguments to be passed to the runtime library based on the 8702 /// arrays of pointers, sizes and map types. 8703 static void emitOffloadingArraysArgument( 8704 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8705 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8706 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 8707 CodeGenModule &CGM = CGF.CGM; 8708 if (Info.NumberOfPtrs) { 8709 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8710 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8711 Info.BasePointersArray, 8712 /*Idx0=*/0, /*Idx1=*/0); 8713 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8714 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8715 Info.PointersArray, 8716 /*Idx0=*/0, 8717 /*Idx1=*/0); 8718 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8719 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 8720 /*Idx0=*/0, /*Idx1=*/0); 8721 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8722 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8723 Info.MapTypesArray, 8724 /*Idx0=*/0, 8725 /*Idx1=*/0); 8726 } else { 8727 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8728 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8729 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8730 MapTypesArrayArg = 8731 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8732 } 8733 } 8734 8735 /// Check for inner distribute directive. 8736 static const OMPExecutableDirective * 8737 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8738 const auto *CS = D.getInnermostCapturedStmt(); 8739 const auto *Body = 8740 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8741 const Stmt *ChildStmt = 8742 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8743 8744 if (const auto *NestedDir = 8745 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8746 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8747 switch (D.getDirectiveKind()) { 8748 case OMPD_target: 8749 if (isOpenMPDistributeDirective(DKind)) 8750 return NestedDir; 8751 if (DKind == OMPD_teams) { 8752 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8753 /*IgnoreCaptured=*/true); 8754 if (!Body) 8755 return nullptr; 8756 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8757 if (const auto *NND = 8758 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8759 DKind = NND->getDirectiveKind(); 8760 if (isOpenMPDistributeDirective(DKind)) 8761 return NND; 8762 } 8763 } 8764 return nullptr; 8765 case OMPD_target_teams: 8766 if (isOpenMPDistributeDirective(DKind)) 8767 return NestedDir; 8768 return nullptr; 8769 case OMPD_target_parallel: 8770 case OMPD_target_simd: 8771 case OMPD_target_parallel_for: 8772 case OMPD_target_parallel_for_simd: 8773 return nullptr; 8774 case OMPD_target_teams_distribute: 8775 case OMPD_target_teams_distribute_simd: 8776 case OMPD_target_teams_distribute_parallel_for: 8777 case OMPD_target_teams_distribute_parallel_for_simd: 8778 case OMPD_parallel: 8779 case OMPD_for: 8780 case OMPD_parallel_for: 8781 case OMPD_parallel_sections: 8782 case OMPD_for_simd: 8783 case OMPD_parallel_for_simd: 8784 case OMPD_cancel: 8785 case OMPD_cancellation_point: 8786 case OMPD_ordered: 8787 case OMPD_threadprivate: 8788 case OMPD_allocate: 8789 case OMPD_task: 8790 case OMPD_simd: 8791 case OMPD_sections: 8792 case OMPD_section: 8793 case OMPD_single: 8794 case OMPD_master: 8795 case OMPD_critical: 8796 case OMPD_taskyield: 8797 case OMPD_barrier: 8798 case OMPD_taskwait: 8799 case OMPD_taskgroup: 8800 case OMPD_atomic: 8801 case OMPD_flush: 8802 case OMPD_teams: 8803 case OMPD_target_data: 8804 case OMPD_target_exit_data: 8805 case OMPD_target_enter_data: 8806 case OMPD_distribute: 8807 case OMPD_distribute_simd: 8808 case OMPD_distribute_parallel_for: 8809 case OMPD_distribute_parallel_for_simd: 8810 case OMPD_teams_distribute: 8811 case OMPD_teams_distribute_simd: 8812 case OMPD_teams_distribute_parallel_for: 8813 case OMPD_teams_distribute_parallel_for_simd: 8814 case OMPD_target_update: 8815 case OMPD_declare_simd: 8816 case OMPD_declare_target: 8817 case OMPD_end_declare_target: 8818 case OMPD_declare_reduction: 8819 case OMPD_declare_mapper: 8820 case OMPD_taskloop: 8821 case OMPD_taskloop_simd: 8822 case OMPD_requires: 8823 case OMPD_unknown: 8824 llvm_unreachable("Unexpected directive."); 8825 } 8826 } 8827 8828 return nullptr; 8829 } 8830 8831 /// Emit the user-defined mapper function. The code generation follows the 8832 /// pattern in the example below. 8833 /// \code 8834 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 8835 /// void *base, void *begin, 8836 /// int64_t size, int64_t type) { 8837 /// // Allocate space for an array section first. 8838 /// if (size > 1 && !maptype.IsDelete) 8839 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8840 /// size*sizeof(Ty), clearToFrom(type)); 8841 /// // Map members. 8842 /// for (unsigned i = 0; i < size; i++) { 8843 /// // For each component specified by this mapper: 8844 /// for (auto c : all_components) { 8845 /// if (c.hasMapper()) 8846 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 8847 /// c.arg_type); 8848 /// else 8849 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 8850 /// c.arg_begin, c.arg_size, c.arg_type); 8851 /// } 8852 /// } 8853 /// // Delete the array section. 8854 /// if (size > 1 && maptype.IsDelete) 8855 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8856 /// size*sizeof(Ty), clearToFrom(type)); 8857 /// } 8858 /// \endcode 8859 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 8860 CodeGenFunction *CGF) { 8861 if (UDMMap.count(D) > 0) 8862 return; 8863 ASTContext &C = CGM.getContext(); 8864 QualType Ty = D->getType(); 8865 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 8866 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 8867 auto *MapperVarDecl = 8868 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 8869 SourceLocation Loc = D->getLocation(); 8870 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 8871 8872 // Prepare mapper function arguments and attributes. 8873 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8874 C.VoidPtrTy, ImplicitParamDecl::Other); 8875 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 8876 ImplicitParamDecl::Other); 8877 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8878 C.VoidPtrTy, ImplicitParamDecl::Other); 8879 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8880 ImplicitParamDecl::Other); 8881 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8882 ImplicitParamDecl::Other); 8883 FunctionArgList Args; 8884 Args.push_back(&HandleArg); 8885 Args.push_back(&BaseArg); 8886 Args.push_back(&BeginArg); 8887 Args.push_back(&SizeArg); 8888 Args.push_back(&TypeArg); 8889 const CGFunctionInfo &FnInfo = 8890 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 8891 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 8892 SmallString<64> TyStr; 8893 llvm::raw_svector_ostream Out(TyStr); 8894 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 8895 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 8896 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 8897 Name, &CGM.getModule()); 8898 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 8899 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 8900 // Start the mapper function code generation. 8901 CodeGenFunction MapperCGF(CGM); 8902 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 8903 // Compute the starting and end addreses of array elements. 8904 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 8905 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 8906 C.getPointerType(Int64Ty), Loc); 8907 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 8908 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 8909 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 8910 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 8911 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 8912 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 8913 C.getPointerType(Int64Ty), Loc); 8914 // Prepare common arguments for array initiation and deletion. 8915 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 8916 MapperCGF.GetAddrOfLocalVar(&HandleArg), 8917 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8918 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 8919 MapperCGF.GetAddrOfLocalVar(&BaseArg), 8920 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8921 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 8922 MapperCGF.GetAddrOfLocalVar(&BeginArg), 8923 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8924 8925 // Emit array initiation if this is an array section and \p MapType indicates 8926 // that memory allocation is required. 8927 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 8928 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 8929 ElementSize, HeadBB, /*IsInit=*/true); 8930 8931 // Emit a for loop to iterate through SizeArg of elements and map all of them. 8932 8933 // Emit the loop header block. 8934 MapperCGF.EmitBlock(HeadBB); 8935 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 8936 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 8937 // Evaluate whether the initial condition is satisfied. 8938 llvm::Value *IsEmpty = 8939 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 8940 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 8941 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 8942 8943 // Emit the loop body block. 8944 MapperCGF.EmitBlock(BodyBB); 8945 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 8946 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 8947 PtrPHI->addIncoming(PtrBegin, EntryBB); 8948 Address PtrCurrent = 8949 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 8950 .getAlignment() 8951 .alignmentOfArrayElement(ElementSize)); 8952 // Privatize the declared variable of mapper to be the current array element. 8953 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 8954 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 8955 return MapperCGF 8956 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 8957 .getAddress(); 8958 }); 8959 (void)Scope.Privatize(); 8960 8961 // Get map clause information. Fill up the arrays with all mapped variables. 8962 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 8963 MappableExprsHandler::MapValuesArrayTy Pointers; 8964 MappableExprsHandler::MapValuesArrayTy Sizes; 8965 MappableExprsHandler::MapFlagsArrayTy MapTypes; 8966 MappableExprsHandler MEHandler(*D, MapperCGF); 8967 MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); 8968 8969 // Call the runtime API __tgt_mapper_num_components to get the number of 8970 // pre-existing components. 8971 llvm::Value *OffloadingArgs[] = {Handle}; 8972 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 8973 createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs); 8974 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 8975 PreviousSize, 8976 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 8977 8978 // Fill up the runtime mapper handle for all components. 8979 for (unsigned I = 0; I < BasePointers.size(); ++I) { 8980 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 8981 *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 8982 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 8983 Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 8984 llvm::Value *CurSizeArg = Sizes[I]; 8985 8986 // Extract the MEMBER_OF field from the map type. 8987 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 8988 MapperCGF.EmitBlock(MemberBB); 8989 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); 8990 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 8991 OriMapType, 8992 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 8993 llvm::BasicBlock *MemberCombineBB = 8994 MapperCGF.createBasicBlock("omp.member.combine"); 8995 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 8996 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 8997 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 8998 // Add the number of pre-existing components to the MEMBER_OF field if it 8999 // is valid. 9000 MapperCGF.EmitBlock(MemberCombineBB); 9001 llvm::Value *CombinedMember = 9002 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9003 // Do nothing if it is not a member of previous components. 9004 MapperCGF.EmitBlock(TypeBB); 9005 llvm::PHINode *MemberMapType = 9006 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9007 MemberMapType->addIncoming(OriMapType, MemberBB); 9008 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9009 9010 // Combine the map type inherited from user-defined mapper with that 9011 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9012 // bits of the \a MapType, which is the input argument of the mapper 9013 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9014 // bits of MemberMapType. 9015 // [OpenMP 5.0], 1.2.6. map-type decay. 9016 // | alloc | to | from | tofrom | release | delete 9017 // ---------------------------------------------------------- 9018 // alloc | alloc | alloc | alloc | alloc | release | delete 9019 // to | alloc | to | alloc | to | release | delete 9020 // from | alloc | alloc | from | from | release | delete 9021 // tofrom | alloc | to | from | tofrom | release | delete 9022 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9023 MapType, 9024 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9025 MappableExprsHandler::OMP_MAP_FROM)); 9026 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9027 llvm::BasicBlock *AllocElseBB = 9028 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9029 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9030 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9031 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9032 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9033 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9034 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9035 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9036 MapperCGF.EmitBlock(AllocBB); 9037 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9038 MemberMapType, 9039 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9040 MappableExprsHandler::OMP_MAP_FROM))); 9041 MapperCGF.Builder.CreateBr(EndBB); 9042 MapperCGF.EmitBlock(AllocElseBB); 9043 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9044 LeftToFrom, 9045 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9046 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9047 // In case of to, clear OMP_MAP_FROM. 9048 MapperCGF.EmitBlock(ToBB); 9049 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9050 MemberMapType, 9051 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9052 MapperCGF.Builder.CreateBr(EndBB); 9053 MapperCGF.EmitBlock(ToElseBB); 9054 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9055 LeftToFrom, 9056 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9057 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9058 // In case of from, clear OMP_MAP_TO. 9059 MapperCGF.EmitBlock(FromBB); 9060 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9061 MemberMapType, 9062 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9063 // In case of tofrom, do nothing. 9064 MapperCGF.EmitBlock(EndBB); 9065 llvm::PHINode *CurMapType = 9066 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9067 CurMapType->addIncoming(AllocMapType, AllocBB); 9068 CurMapType->addIncoming(ToMapType, ToBB); 9069 CurMapType->addIncoming(FromMapType, FromBB); 9070 CurMapType->addIncoming(MemberMapType, ToElseBB); 9071 9072 // TODO: call the corresponding mapper function if a user-defined mapper is 9073 // associated with this map clause. 9074 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9075 // data structure. 9076 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9077 CurSizeArg, CurMapType}; 9078 MapperCGF.EmitRuntimeCall( 9079 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), 9080 OffloadingArgs); 9081 } 9082 9083 // Update the pointer to point to the next element that needs to be mapped, 9084 // and check whether we have mapped all elements. 9085 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9086 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9087 PtrPHI->addIncoming(PtrNext, BodyBB); 9088 llvm::Value *IsDone = 9089 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9090 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9091 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9092 9093 MapperCGF.EmitBlock(ExitBB); 9094 // Emit array deletion if this is an array section and \p MapType indicates 9095 // that deletion is required. 9096 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9097 ElementSize, DoneBB, /*IsInit=*/false); 9098 9099 // Emit the function exit block. 9100 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9101 MapperCGF.FinishFunction(); 9102 UDMMap.try_emplace(D, Fn); 9103 if (CGF) { 9104 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9105 Decls.second.push_back(D); 9106 } 9107 } 9108 9109 /// Emit the array initialization or deletion portion for user-defined mapper 9110 /// code generation. First, it evaluates whether an array section is mapped and 9111 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9112 /// true, and \a MapType indicates to not delete this array, array 9113 /// initialization code is generated. If \a IsInit is false, and \a MapType 9114 /// indicates to not this array, array deletion code is generated. 9115 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9116 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9117 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9118 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9119 StringRef Prefix = IsInit ? ".init" : ".del"; 9120 9121 // Evaluate if this is an array section. 9122 llvm::BasicBlock *IsDeleteBB = 9123 MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete"); 9124 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix); 9125 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9126 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9127 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9128 9129 // Evaluate if we are going to delete this section. 9130 MapperCGF.EmitBlock(IsDeleteBB); 9131 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9132 MapType, 9133 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9134 llvm::Value *DeleteCond; 9135 if (IsInit) { 9136 DeleteCond = MapperCGF.Builder.CreateIsNull( 9137 DeleteBit, "omp.array" + Prefix + ".delete"); 9138 } else { 9139 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9140 DeleteBit, "omp.array" + Prefix + ".delete"); 9141 } 9142 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9143 9144 MapperCGF.EmitBlock(BodyBB); 9145 // Get the array size by multiplying element size and element number (i.e., \p 9146 // Size). 9147 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9148 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9149 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9150 // memory allocation/deletion purpose only. 9151 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9152 MapType, 9153 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9154 MappableExprsHandler::OMP_MAP_FROM))); 9155 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9156 // data structure. 9157 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9158 MapperCGF.EmitRuntimeCall( 9159 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs); 9160 } 9161 9162 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9163 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device, 9164 const llvm::function_ref<llvm::Value *( 9165 CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) { 9166 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9167 const OMPExecutableDirective *TD = &D; 9168 // Get nested teams distribute kind directive, if any. 9169 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9170 TD = getNestedDistributeDirective(CGM.getContext(), D); 9171 if (!TD) 9172 return; 9173 const auto *LD = cast<OMPLoopDirective>(TD); 9174 auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF, 9175 PrePostActionTy &) { 9176 llvm::Value *NumIterations = SizeEmitter(CGF, *LD); 9177 9178 // Emit device ID if any. 9179 llvm::Value *DeviceID; 9180 if (Device) 9181 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9182 CGF.Int64Ty, /*isSigned=*/true); 9183 else 9184 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9185 9186 llvm::Value *Args[] = {DeviceID, NumIterations}; 9187 CGF.EmitRuntimeCall( 9188 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); 9189 }; 9190 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9191 } 9192 9193 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 9194 const OMPExecutableDirective &D, 9195 llvm::Function *OutlinedFn, 9196 llvm::Value *OutlinedFnID, 9197 const Expr *IfCond, const Expr *Device) { 9198 if (!CGF.HaveInsertPoint()) 9199 return; 9200 9201 assert(OutlinedFn && "Invalid outlined function!"); 9202 9203 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 9204 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9205 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9206 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9207 PrePostActionTy &) { 9208 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9209 }; 9210 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9211 9212 CodeGenFunction::OMPTargetDataInfo InputInfo; 9213 llvm::Value *MapTypesArray = nullptr; 9214 // Fill up the pointer arrays and transfer execution to the device. 9215 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9216 &MapTypesArray, &CS, RequiresOuterTask, 9217 &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) { 9218 // On top of the arrays that were filled up, the target offloading call 9219 // takes as arguments the device id as well as the host pointer. The host 9220 // pointer is used by the runtime library to identify the current target 9221 // region, so it only has to be unique and not necessarily point to 9222 // anything. It could be the pointer to the outlined function that 9223 // implements the target region, but we aren't using that so that the 9224 // compiler doesn't need to keep that, and could therefore inline the host 9225 // function if proven worthwhile during optimization. 9226 9227 // From this point on, we need to have an ID of the target region defined. 9228 assert(OutlinedFnID && "Invalid outlined function ID!"); 9229 9230 // Emit device ID if any. 9231 llvm::Value *DeviceID; 9232 if (Device) { 9233 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9234 CGF.Int64Ty, /*isSigned=*/true); 9235 } else { 9236 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9237 } 9238 9239 // Emit the number of elements in the offloading arrays. 9240 llvm::Value *PointerNum = 9241 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9242 9243 // Return value of the runtime offloading call. 9244 llvm::Value *Return; 9245 9246 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9247 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9248 9249 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9250 // The target region is an outlined function launched by the runtime 9251 // via calls __tgt_target() or __tgt_target_teams(). 9252 // 9253 // __tgt_target() launches a target region with one team and one thread, 9254 // executing a serial region. This master thread may in turn launch 9255 // more threads within its team upon encountering a parallel region, 9256 // however, no additional teams can be launched on the device. 9257 // 9258 // __tgt_target_teams() launches a target region with one or more teams, 9259 // each with one or more threads. This call is required for target 9260 // constructs such as: 9261 // 'target teams' 9262 // 'target' / 'teams' 9263 // 'target teams distribute parallel for' 9264 // 'target parallel' 9265 // and so on. 9266 // 9267 // Note that on the host and CPU targets, the runtime implementation of 9268 // these calls simply call the outlined function without forking threads. 9269 // The outlined functions themselves have runtime calls to 9270 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9271 // the compiler in emitTeamsCall() and emitParallelCall(). 9272 // 9273 // In contrast, on the NVPTX target, the implementation of 9274 // __tgt_target_teams() launches a GPU kernel with the requested number 9275 // of teams and threads so no additional calls to the runtime are required. 9276 if (NumTeams) { 9277 // If we have NumTeams defined this means that we have an enclosed teams 9278 // region. Therefore we also expect to have NumThreads defined. These two 9279 // values should be defined in the presence of a teams directive, 9280 // regardless of having any clauses associated. If the user is using teams 9281 // but no clauses, these two values will be the default that should be 9282 // passed to the runtime library - a 32-bit integer with the value zero. 9283 assert(NumThreads && "Thread limit expression should be available along " 9284 "with number of teams."); 9285 llvm::Value *OffloadingArgs[] = {DeviceID, 9286 OutlinedFnID, 9287 PointerNum, 9288 InputInfo.BasePointersArray.getPointer(), 9289 InputInfo.PointersArray.getPointer(), 9290 InputInfo.SizesArray.getPointer(), 9291 MapTypesArray, 9292 NumTeams, 9293 NumThreads}; 9294 Return = CGF.EmitRuntimeCall( 9295 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 9296 : OMPRTL__tgt_target_teams), 9297 OffloadingArgs); 9298 } else { 9299 llvm::Value *OffloadingArgs[] = {DeviceID, 9300 OutlinedFnID, 9301 PointerNum, 9302 InputInfo.BasePointersArray.getPointer(), 9303 InputInfo.PointersArray.getPointer(), 9304 InputInfo.SizesArray.getPointer(), 9305 MapTypesArray}; 9306 Return = CGF.EmitRuntimeCall( 9307 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 9308 : OMPRTL__tgt_target), 9309 OffloadingArgs); 9310 } 9311 9312 // Check the error code and execute the host version if required. 9313 llvm::BasicBlock *OffloadFailedBlock = 9314 CGF.createBasicBlock("omp_offload.failed"); 9315 llvm::BasicBlock *OffloadContBlock = 9316 CGF.createBasicBlock("omp_offload.cont"); 9317 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9318 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9319 9320 CGF.EmitBlock(OffloadFailedBlock); 9321 if (RequiresOuterTask) { 9322 CapturedVars.clear(); 9323 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9324 } 9325 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9326 CGF.EmitBranch(OffloadContBlock); 9327 9328 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9329 }; 9330 9331 // Notify that the host version must be executed. 9332 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 9333 RequiresOuterTask](CodeGenFunction &CGF, 9334 PrePostActionTy &) { 9335 if (RequiresOuterTask) { 9336 CapturedVars.clear(); 9337 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9338 } 9339 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9340 }; 9341 9342 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 9343 &CapturedVars, RequiresOuterTask, 9344 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 9345 // Fill up the arrays with all the captured variables. 9346 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9347 MappableExprsHandler::MapValuesArrayTy Pointers; 9348 MappableExprsHandler::MapValuesArrayTy Sizes; 9349 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9350 9351 // Get mappable expression information. 9352 MappableExprsHandler MEHandler(D, CGF); 9353 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9354 9355 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9356 auto CV = CapturedVars.begin(); 9357 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9358 CE = CS.capture_end(); 9359 CI != CE; ++CI, ++RI, ++CV) { 9360 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 9361 MappableExprsHandler::MapValuesArrayTy CurPointers; 9362 MappableExprsHandler::MapValuesArrayTy CurSizes; 9363 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 9364 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9365 9366 // VLA sizes are passed to the outlined region by copy and do not have map 9367 // information associated. 9368 if (CI->capturesVariableArrayType()) { 9369 CurBasePointers.push_back(*CV); 9370 CurPointers.push_back(*CV); 9371 CurSizes.push_back(CGF.Builder.CreateIntCast( 9372 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9373 // Copy to the device as an argument. No need to retrieve it. 9374 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 9375 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 9376 MappableExprsHandler::OMP_MAP_IMPLICIT); 9377 } else { 9378 // If we have any information in the map clause, we use it, otherwise we 9379 // just do a default mapping. 9380 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 9381 CurSizes, CurMapTypes, PartialStruct); 9382 if (CurBasePointers.empty()) 9383 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 9384 CurPointers, CurSizes, CurMapTypes); 9385 // Generate correct mapping for variables captured by reference in 9386 // lambdas. 9387 if (CI->capturesVariable()) 9388 MEHandler.generateInfoForLambdaCaptures( 9389 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 9390 CurMapTypes, LambdaPointers); 9391 } 9392 // We expect to have at least an element of information for this capture. 9393 assert(!CurBasePointers.empty() && 9394 "Non-existing map pointer for capture!"); 9395 assert(CurBasePointers.size() == CurPointers.size() && 9396 CurBasePointers.size() == CurSizes.size() && 9397 CurBasePointers.size() == CurMapTypes.size() && 9398 "Inconsistent map information sizes!"); 9399 9400 // If there is an entry in PartialStruct it means we have a struct with 9401 // individual members mapped. Emit an extra combined entry. 9402 if (PartialStruct.Base.isValid()) 9403 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 9404 CurMapTypes, PartialStruct); 9405 9406 // We need to append the results of this capture to what we already have. 9407 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 9408 Pointers.append(CurPointers.begin(), CurPointers.end()); 9409 Sizes.append(CurSizes.begin(), CurSizes.end()); 9410 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 9411 } 9412 // Adjust MEMBER_OF flags for the lambdas captures. 9413 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 9414 Pointers, MapTypes); 9415 // Map other list items in the map clause which are not captured variables 9416 // but "declare target link" global variables. 9417 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 9418 MapTypes); 9419 9420 TargetDataInfo Info; 9421 // Fill up the arrays and create the arguments. 9422 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9423 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9424 Info.PointersArray, Info.SizesArray, 9425 Info.MapTypesArray, Info); 9426 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9427 InputInfo.BasePointersArray = 9428 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9429 InputInfo.PointersArray = 9430 Address(Info.PointersArray, CGM.getPointerAlign()); 9431 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 9432 MapTypesArray = Info.MapTypesArray; 9433 if (RequiresOuterTask) 9434 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9435 else 9436 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9437 }; 9438 9439 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 9440 CodeGenFunction &CGF, PrePostActionTy &) { 9441 if (RequiresOuterTask) { 9442 CodeGenFunction::OMPTargetDataInfo InputInfo; 9443 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9444 } else { 9445 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9446 } 9447 }; 9448 9449 // If we have a target function ID it means that we need to support 9450 // offloading, otherwise, just execute on the host. We need to execute on host 9451 // regardless of the conditional in the if clause if, e.g., the user do not 9452 // specify target triples. 9453 if (OutlinedFnID) { 9454 if (IfCond) { 9455 emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9456 } else { 9457 RegionCodeGenTy ThenRCG(TargetThenGen); 9458 ThenRCG(CGF); 9459 } 9460 } else { 9461 RegionCodeGenTy ElseRCG(TargetElseGen); 9462 ElseRCG(CGF); 9463 } 9464 } 9465 9466 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9467 StringRef ParentName) { 9468 if (!S) 9469 return; 9470 9471 // Codegen OMP target directives that offload compute to the device. 9472 bool RequiresDeviceCodegen = 9473 isa<OMPExecutableDirective>(S) && 9474 isOpenMPTargetExecutionDirective( 9475 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9476 9477 if (RequiresDeviceCodegen) { 9478 const auto &E = *cast<OMPExecutableDirective>(S); 9479 unsigned DeviceID; 9480 unsigned FileID; 9481 unsigned Line; 9482 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 9483 FileID, Line); 9484 9485 // Is this a target region that should not be emitted as an entry point? If 9486 // so just signal we are done with this target region. 9487 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 9488 ParentName, Line)) 9489 return; 9490 9491 switch (E.getDirectiveKind()) { 9492 case OMPD_target: 9493 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9494 cast<OMPTargetDirective>(E)); 9495 break; 9496 case OMPD_target_parallel: 9497 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9498 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9499 break; 9500 case OMPD_target_teams: 9501 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9502 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9503 break; 9504 case OMPD_target_teams_distribute: 9505 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9506 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9507 break; 9508 case OMPD_target_teams_distribute_simd: 9509 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9510 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9511 break; 9512 case OMPD_target_parallel_for: 9513 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9514 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9515 break; 9516 case OMPD_target_parallel_for_simd: 9517 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9518 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9519 break; 9520 case OMPD_target_simd: 9521 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9522 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9523 break; 9524 case OMPD_target_teams_distribute_parallel_for: 9525 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9526 CGM, ParentName, 9527 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9528 break; 9529 case OMPD_target_teams_distribute_parallel_for_simd: 9530 CodeGenFunction:: 9531 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9532 CGM, ParentName, 9533 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9534 break; 9535 case OMPD_parallel: 9536 case OMPD_for: 9537 case OMPD_parallel_for: 9538 case OMPD_parallel_sections: 9539 case OMPD_for_simd: 9540 case OMPD_parallel_for_simd: 9541 case OMPD_cancel: 9542 case OMPD_cancellation_point: 9543 case OMPD_ordered: 9544 case OMPD_threadprivate: 9545 case OMPD_allocate: 9546 case OMPD_task: 9547 case OMPD_simd: 9548 case OMPD_sections: 9549 case OMPD_section: 9550 case OMPD_single: 9551 case OMPD_master: 9552 case OMPD_critical: 9553 case OMPD_taskyield: 9554 case OMPD_barrier: 9555 case OMPD_taskwait: 9556 case OMPD_taskgroup: 9557 case OMPD_atomic: 9558 case OMPD_flush: 9559 case OMPD_teams: 9560 case OMPD_target_data: 9561 case OMPD_target_exit_data: 9562 case OMPD_target_enter_data: 9563 case OMPD_distribute: 9564 case OMPD_distribute_simd: 9565 case OMPD_distribute_parallel_for: 9566 case OMPD_distribute_parallel_for_simd: 9567 case OMPD_teams_distribute: 9568 case OMPD_teams_distribute_simd: 9569 case OMPD_teams_distribute_parallel_for: 9570 case OMPD_teams_distribute_parallel_for_simd: 9571 case OMPD_target_update: 9572 case OMPD_declare_simd: 9573 case OMPD_declare_target: 9574 case OMPD_end_declare_target: 9575 case OMPD_declare_reduction: 9576 case OMPD_declare_mapper: 9577 case OMPD_taskloop: 9578 case OMPD_taskloop_simd: 9579 case OMPD_requires: 9580 case OMPD_unknown: 9581 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9582 } 9583 return; 9584 } 9585 9586 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9587 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9588 return; 9589 9590 scanForTargetRegionsFunctions( 9591 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9592 return; 9593 } 9594 9595 // If this is a lambda function, look into its body. 9596 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9597 S = L->getBody(); 9598 9599 // Keep looking for target regions recursively. 9600 for (const Stmt *II : S->children()) 9601 scanForTargetRegionsFunctions(II, ParentName); 9602 } 9603 9604 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9605 // If emitting code for the host, we do not process FD here. Instead we do 9606 // the normal code generation. 9607 if (!CGM.getLangOpts().OpenMPIsDevice) 9608 return false; 9609 9610 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9611 StringRef Name = CGM.getMangledName(GD); 9612 // Try to detect target regions in the function. 9613 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) 9614 scanForTargetRegionsFunctions(FD->getBody(), Name); 9615 9616 // Do not to emit function if it is not marked as declare target. 9617 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9618 AlreadyEmittedTargetFunctions.count(Name) == 0; 9619 } 9620 9621 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9622 if (!CGM.getLangOpts().OpenMPIsDevice) 9623 return false; 9624 9625 // Check if there are Ctors/Dtors in this declaration and look for target 9626 // regions in it. We use the complete variant to produce the kernel name 9627 // mangling. 9628 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9629 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9630 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9631 StringRef ParentName = 9632 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9633 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9634 } 9635 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9636 StringRef ParentName = 9637 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9638 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9639 } 9640 } 9641 9642 // Do not to emit variable if it is not marked as declare target. 9643 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9644 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9645 cast<VarDecl>(GD.getDecl())); 9646 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9647 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9648 HasRequiresUnifiedSharedMemory)) { 9649 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9650 return true; 9651 } 9652 return false; 9653 } 9654 9655 llvm::Constant * 9656 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9657 const VarDecl *VD) { 9658 assert(VD->getType().isConstant(CGM.getContext()) && 9659 "Expected constant variable."); 9660 StringRef VarName; 9661 llvm::Constant *Addr; 9662 llvm::GlobalValue::LinkageTypes Linkage; 9663 QualType Ty = VD->getType(); 9664 SmallString<128> Buffer; 9665 { 9666 unsigned DeviceID; 9667 unsigned FileID; 9668 unsigned Line; 9669 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9670 FileID, Line); 9671 llvm::raw_svector_ostream OS(Buffer); 9672 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9673 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9674 VarName = OS.str(); 9675 } 9676 Linkage = llvm::GlobalValue::InternalLinkage; 9677 Addr = 9678 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9679 getDefaultFirstprivateAddressSpace()); 9680 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9681 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9682 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9683 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9684 VarName, Addr, VarSize, 9685 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9686 return Addr; 9687 } 9688 9689 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9690 llvm::Constant *Addr) { 9691 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9692 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9693 if (!Res) { 9694 if (CGM.getLangOpts().OpenMPIsDevice) { 9695 // Register non-target variables being emitted in device code (debug info 9696 // may cause this). 9697 StringRef VarName = CGM.getMangledName(VD); 9698 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9699 } 9700 return; 9701 } 9702 // Register declare target variables. 9703 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9704 StringRef VarName; 9705 CharUnits VarSize; 9706 llvm::GlobalValue::LinkageTypes Linkage; 9707 9708 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9709 !HasRequiresUnifiedSharedMemory) { 9710 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9711 VarName = CGM.getMangledName(VD); 9712 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9713 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9714 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9715 } else { 9716 VarSize = CharUnits::Zero(); 9717 } 9718 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 9719 // Temp solution to prevent optimizations of the internal variables. 9720 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 9721 std::string RefName = getName({VarName, "ref"}); 9722 if (!CGM.GetGlobalValue(RefName)) { 9723 llvm::Constant *AddrRef = 9724 getOrCreateInternalVariable(Addr->getType(), RefName); 9725 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 9726 GVAddrRef->setConstant(/*Val=*/true); 9727 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 9728 GVAddrRef->setInitializer(Addr); 9729 CGM.addCompilerUsedGlobal(GVAddrRef); 9730 } 9731 } 9732 } else { 9733 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 9734 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9735 HasRequiresUnifiedSharedMemory)) && 9736 "Declare target attribute must link or to with unified memory."); 9737 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 9738 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 9739 else 9740 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9741 9742 if (CGM.getLangOpts().OpenMPIsDevice) { 9743 VarName = Addr->getName(); 9744 Addr = nullptr; 9745 } else { 9746 VarName = getAddrOfDeclareTargetVar(VD).getName(); 9747 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 9748 } 9749 VarSize = CGM.getPointerSize(); 9750 Linkage = llvm::GlobalValue::WeakAnyLinkage; 9751 } 9752 9753 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9754 VarName, Addr, VarSize, Flags, Linkage); 9755 } 9756 9757 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9758 if (isa<FunctionDecl>(GD.getDecl()) || 9759 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9760 return emitTargetFunctions(GD); 9761 9762 return emitTargetGlobalVariable(GD); 9763 } 9764 9765 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9766 for (const VarDecl *VD : DeferredGlobalVariables) { 9767 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9768 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9769 if (!Res) 9770 continue; 9771 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9772 !HasRequiresUnifiedSharedMemory) { 9773 CGM.EmitGlobal(VD); 9774 } else { 9775 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 9776 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9777 HasRequiresUnifiedSharedMemory)) && 9778 "Expected link clause or to clause with unified memory."); 9779 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 9780 } 9781 } 9782 } 9783 9784 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 9785 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 9786 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 9787 " Expected target-based directive."); 9788 } 9789 9790 void CGOpenMPRuntime::checkArchForUnifiedAddressing( 9791 const OMPRequiresDecl *D) { 9792 for (const OMPClause *Clause : D->clauselists()) { 9793 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 9794 HasRequiresUnifiedSharedMemory = true; 9795 break; 9796 } 9797 } 9798 } 9799 9800 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 9801 LangAS &AS) { 9802 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 9803 return false; 9804 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 9805 switch(A->getAllocatorType()) { 9806 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 9807 // Not supported, fallback to the default mem space. 9808 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 9809 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 9810 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 9811 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 9812 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 9813 case OMPAllocateDeclAttr::OMPConstMemAlloc: 9814 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 9815 AS = LangAS::Default; 9816 return true; 9817 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 9818 llvm_unreachable("Expected predefined allocator for the variables with the " 9819 "static storage."); 9820 } 9821 return false; 9822 } 9823 9824 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 9825 return HasRequiresUnifiedSharedMemory; 9826 } 9827 9828 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 9829 CodeGenModule &CGM) 9830 : CGM(CGM) { 9831 if (CGM.getLangOpts().OpenMPIsDevice) { 9832 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 9833 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 9834 } 9835 } 9836 9837 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 9838 if (CGM.getLangOpts().OpenMPIsDevice) 9839 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 9840 } 9841 9842 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 9843 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 9844 return true; 9845 9846 StringRef Name = CGM.getMangledName(GD); 9847 const auto *D = cast<FunctionDecl>(GD.getDecl()); 9848 // Do not to emit function if it is marked as declare target as it was already 9849 // emitted. 9850 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 9851 if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) { 9852 if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name))) 9853 return !F->isDeclaration(); 9854 return false; 9855 } 9856 return true; 9857 } 9858 9859 return !AlreadyEmittedTargetFunctions.insert(Name).second; 9860 } 9861 9862 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 9863 // If we don't have entries or if we are emitting code for the device, we 9864 // don't need to do anything. 9865 if (CGM.getLangOpts().OMPTargetTriples.empty() || 9866 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 9867 (OffloadEntriesInfoManager.empty() && 9868 !HasEmittedDeclareTargetRegion && 9869 !HasEmittedTargetRegion)) 9870 return nullptr; 9871 9872 // Create and register the function that handles the requires directives. 9873 ASTContext &C = CGM.getContext(); 9874 9875 llvm::Function *RequiresRegFn; 9876 { 9877 CodeGenFunction CGF(CGM); 9878 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 9879 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 9880 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 9881 RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); 9882 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 9883 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 9884 // TODO: check for other requires clauses. 9885 // The requires directive takes effect only when a target region is 9886 // present in the compilation unit. Otherwise it is ignored and not 9887 // passed to the runtime. This avoids the runtime from throwing an error 9888 // for mismatching requires clauses across compilation units that don't 9889 // contain at least 1 target region. 9890 assert((HasEmittedTargetRegion || 9891 HasEmittedDeclareTargetRegion || 9892 !OffloadEntriesInfoManager.empty()) && 9893 "Target or declare target region expected."); 9894 if (HasRequiresUnifiedSharedMemory) 9895 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 9896 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires), 9897 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 9898 CGF.FinishFunction(); 9899 } 9900 return RequiresRegFn; 9901 } 9902 9903 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 9904 // If we have offloading in the current module, we need to emit the entries 9905 // now and register the offloading descriptor. 9906 createOffloadEntriesAndInfoMetadata(); 9907 9908 // Create and register the offloading binary descriptors. This is the main 9909 // entity that captures all the information about offloading in the current 9910 // compilation unit. 9911 return createOffloadingBinaryDescriptorRegistration(); 9912 } 9913 9914 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 9915 const OMPExecutableDirective &D, 9916 SourceLocation Loc, 9917 llvm::Function *OutlinedFn, 9918 ArrayRef<llvm::Value *> CapturedVars) { 9919 if (!CGF.HaveInsertPoint()) 9920 return; 9921 9922 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9923 CodeGenFunction::RunCleanupsScope Scope(CGF); 9924 9925 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 9926 llvm::Value *Args[] = { 9927 RTLoc, 9928 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 9929 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 9930 llvm::SmallVector<llvm::Value *, 16> RealArgs; 9931 RealArgs.append(std::begin(Args), std::end(Args)); 9932 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 9933 9934 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 9935 CGF.EmitRuntimeCall(RTLFn, RealArgs); 9936 } 9937 9938 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 9939 const Expr *NumTeams, 9940 const Expr *ThreadLimit, 9941 SourceLocation Loc) { 9942 if (!CGF.HaveInsertPoint()) 9943 return; 9944 9945 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9946 9947 llvm::Value *NumTeamsVal = 9948 NumTeams 9949 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 9950 CGF.CGM.Int32Ty, /* isSigned = */ true) 9951 : CGF.Builder.getInt32(0); 9952 9953 llvm::Value *ThreadLimitVal = 9954 ThreadLimit 9955 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 9956 CGF.CGM.Int32Ty, /* isSigned = */ true) 9957 : CGF.Builder.getInt32(0); 9958 9959 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 9960 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 9961 ThreadLimitVal}; 9962 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 9963 PushNumTeamsArgs); 9964 } 9965 9966 void CGOpenMPRuntime::emitTargetDataCalls( 9967 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9968 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 9969 if (!CGF.HaveInsertPoint()) 9970 return; 9971 9972 // Action used to replace the default codegen action and turn privatization 9973 // off. 9974 PrePostActionTy NoPrivAction; 9975 9976 // Generate the code for the opening of the data environment. Capture all the 9977 // arguments of the runtime call by reference because they are used in the 9978 // closing of the region. 9979 auto &&BeginThenGen = [this, &D, Device, &Info, 9980 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 9981 // Fill up the arrays with all the mapped variables. 9982 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9983 MappableExprsHandler::MapValuesArrayTy Pointers; 9984 MappableExprsHandler::MapValuesArrayTy Sizes; 9985 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9986 9987 // Get map clause information. 9988 MappableExprsHandler MCHandler(D, CGF); 9989 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 9990 9991 // Fill up the arrays and create the arguments. 9992 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9993 9994 llvm::Value *BasePointersArrayArg = nullptr; 9995 llvm::Value *PointersArrayArg = nullptr; 9996 llvm::Value *SizesArrayArg = nullptr; 9997 llvm::Value *MapTypesArrayArg = nullptr; 9998 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 9999 SizesArrayArg, MapTypesArrayArg, Info); 10000 10001 // Emit device ID if any. 10002 llvm::Value *DeviceID = nullptr; 10003 if (Device) { 10004 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10005 CGF.Int64Ty, /*isSigned=*/true); 10006 } else { 10007 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10008 } 10009 10010 // Emit the number of elements in the offloading arrays. 10011 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10012 10013 llvm::Value *OffloadingArgs[] = { 10014 DeviceID, PointerNum, BasePointersArrayArg, 10015 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10016 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 10017 OffloadingArgs); 10018 10019 // If device pointer privatization is required, emit the body of the region 10020 // here. It will have to be duplicated: with and without privatization. 10021 if (!Info.CaptureDeviceAddrMap.empty()) 10022 CodeGen(CGF); 10023 }; 10024 10025 // Generate code for the closing of the data region. 10026 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10027 PrePostActionTy &) { 10028 assert(Info.isValid() && "Invalid data environment closing arguments."); 10029 10030 llvm::Value *BasePointersArrayArg = nullptr; 10031 llvm::Value *PointersArrayArg = nullptr; 10032 llvm::Value *SizesArrayArg = nullptr; 10033 llvm::Value *MapTypesArrayArg = nullptr; 10034 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10035 SizesArrayArg, MapTypesArrayArg, Info); 10036 10037 // Emit device ID if any. 10038 llvm::Value *DeviceID = nullptr; 10039 if (Device) { 10040 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10041 CGF.Int64Ty, /*isSigned=*/true); 10042 } else { 10043 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10044 } 10045 10046 // Emit the number of elements in the offloading arrays. 10047 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10048 10049 llvm::Value *OffloadingArgs[] = { 10050 DeviceID, PointerNum, BasePointersArrayArg, 10051 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10052 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 10053 OffloadingArgs); 10054 }; 10055 10056 // If we need device pointer privatization, we need to emit the body of the 10057 // region with no privatization in the 'else' branch of the conditional. 10058 // Otherwise, we don't have to do anything. 10059 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10060 PrePostActionTy &) { 10061 if (!Info.CaptureDeviceAddrMap.empty()) { 10062 CodeGen.setAction(NoPrivAction); 10063 CodeGen(CGF); 10064 } 10065 }; 10066 10067 // We don't have to do anything to close the region if the if clause evaluates 10068 // to false. 10069 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10070 10071 if (IfCond) { 10072 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10073 } else { 10074 RegionCodeGenTy RCG(BeginThenGen); 10075 RCG(CGF); 10076 } 10077 10078 // If we don't require privatization of device pointers, we emit the body in 10079 // between the runtime calls. This avoids duplicating the body code. 10080 if (Info.CaptureDeviceAddrMap.empty()) { 10081 CodeGen.setAction(NoPrivAction); 10082 CodeGen(CGF); 10083 } 10084 10085 if (IfCond) { 10086 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10087 } else { 10088 RegionCodeGenTy RCG(EndThenGen); 10089 RCG(CGF); 10090 } 10091 } 10092 10093 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10094 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10095 const Expr *Device) { 10096 if (!CGF.HaveInsertPoint()) 10097 return; 10098 10099 assert((isa<OMPTargetEnterDataDirective>(D) || 10100 isa<OMPTargetExitDataDirective>(D) || 10101 isa<OMPTargetUpdateDirective>(D)) && 10102 "Expecting either target enter, exit data, or update directives."); 10103 10104 CodeGenFunction::OMPTargetDataInfo InputInfo; 10105 llvm::Value *MapTypesArray = nullptr; 10106 // Generate the code for the opening of the data environment. 10107 auto &&ThenGen = [this, &D, Device, &InputInfo, 10108 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10109 // Emit device ID if any. 10110 llvm::Value *DeviceID = nullptr; 10111 if (Device) { 10112 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10113 CGF.Int64Ty, /*isSigned=*/true); 10114 } else { 10115 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10116 } 10117 10118 // Emit the number of elements in the offloading arrays. 10119 llvm::Constant *PointerNum = 10120 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10121 10122 llvm::Value *OffloadingArgs[] = {DeviceID, 10123 PointerNum, 10124 InputInfo.BasePointersArray.getPointer(), 10125 InputInfo.PointersArray.getPointer(), 10126 InputInfo.SizesArray.getPointer(), 10127 MapTypesArray}; 10128 10129 // Select the right runtime function call for each expected standalone 10130 // directive. 10131 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10132 OpenMPRTLFunction RTLFn; 10133 switch (D.getDirectiveKind()) { 10134 case OMPD_target_enter_data: 10135 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 10136 : OMPRTL__tgt_target_data_begin; 10137 break; 10138 case OMPD_target_exit_data: 10139 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 10140 : OMPRTL__tgt_target_data_end; 10141 break; 10142 case OMPD_target_update: 10143 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 10144 : OMPRTL__tgt_target_data_update; 10145 break; 10146 case OMPD_parallel: 10147 case OMPD_for: 10148 case OMPD_parallel_for: 10149 case OMPD_parallel_sections: 10150 case OMPD_for_simd: 10151 case OMPD_parallel_for_simd: 10152 case OMPD_cancel: 10153 case OMPD_cancellation_point: 10154 case OMPD_ordered: 10155 case OMPD_threadprivate: 10156 case OMPD_allocate: 10157 case OMPD_task: 10158 case OMPD_simd: 10159 case OMPD_sections: 10160 case OMPD_section: 10161 case OMPD_single: 10162 case OMPD_master: 10163 case OMPD_critical: 10164 case OMPD_taskyield: 10165 case OMPD_barrier: 10166 case OMPD_taskwait: 10167 case OMPD_taskgroup: 10168 case OMPD_atomic: 10169 case OMPD_flush: 10170 case OMPD_teams: 10171 case OMPD_target_data: 10172 case OMPD_distribute: 10173 case OMPD_distribute_simd: 10174 case OMPD_distribute_parallel_for: 10175 case OMPD_distribute_parallel_for_simd: 10176 case OMPD_teams_distribute: 10177 case OMPD_teams_distribute_simd: 10178 case OMPD_teams_distribute_parallel_for: 10179 case OMPD_teams_distribute_parallel_for_simd: 10180 case OMPD_declare_simd: 10181 case OMPD_declare_target: 10182 case OMPD_end_declare_target: 10183 case OMPD_declare_reduction: 10184 case OMPD_declare_mapper: 10185 case OMPD_taskloop: 10186 case OMPD_taskloop_simd: 10187 case OMPD_target: 10188 case OMPD_target_simd: 10189 case OMPD_target_teams_distribute: 10190 case OMPD_target_teams_distribute_simd: 10191 case OMPD_target_teams_distribute_parallel_for: 10192 case OMPD_target_teams_distribute_parallel_for_simd: 10193 case OMPD_target_teams: 10194 case OMPD_target_parallel: 10195 case OMPD_target_parallel_for: 10196 case OMPD_target_parallel_for_simd: 10197 case OMPD_requires: 10198 case OMPD_unknown: 10199 llvm_unreachable("Unexpected standalone target data directive."); 10200 break; 10201 } 10202 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 10203 }; 10204 10205 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 10206 CodeGenFunction &CGF, PrePostActionTy &) { 10207 // Fill up the arrays with all the mapped variables. 10208 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10209 MappableExprsHandler::MapValuesArrayTy Pointers; 10210 MappableExprsHandler::MapValuesArrayTy Sizes; 10211 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10212 10213 // Get map clause information. 10214 MappableExprsHandler MEHandler(D, CGF); 10215 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10216 10217 TargetDataInfo Info; 10218 // Fill up the arrays and create the arguments. 10219 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10220 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10221 Info.PointersArray, Info.SizesArray, 10222 Info.MapTypesArray, Info); 10223 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10224 InputInfo.BasePointersArray = 10225 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10226 InputInfo.PointersArray = 10227 Address(Info.PointersArray, CGM.getPointerAlign()); 10228 InputInfo.SizesArray = 10229 Address(Info.SizesArray, CGM.getPointerAlign()); 10230 MapTypesArray = Info.MapTypesArray; 10231 if (D.hasClausesOfKind<OMPDependClause>()) 10232 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10233 else 10234 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10235 }; 10236 10237 if (IfCond) { 10238 emitOMPIfClause(CGF, IfCond, TargetThenGen, 10239 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10240 } else { 10241 RegionCodeGenTy ThenRCG(TargetThenGen); 10242 ThenRCG(CGF); 10243 } 10244 } 10245 10246 namespace { 10247 /// Kind of parameter in a function with 'declare simd' directive. 10248 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 10249 /// Attribute set of the parameter. 10250 struct ParamAttrTy { 10251 ParamKindTy Kind = Vector; 10252 llvm::APSInt StrideOrArg; 10253 llvm::APSInt Alignment; 10254 }; 10255 } // namespace 10256 10257 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10258 ArrayRef<ParamAttrTy> ParamAttrs) { 10259 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10260 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10261 // of that clause. The VLEN value must be power of 2. 10262 // In other case the notion of the function`s "characteristic data type" (CDT) 10263 // is used to compute the vector length. 10264 // CDT is defined in the following order: 10265 // a) For non-void function, the CDT is the return type. 10266 // b) If the function has any non-uniform, non-linear parameters, then the 10267 // CDT is the type of the first such parameter. 10268 // c) If the CDT determined by a) or b) above is struct, union, or class 10269 // type which is pass-by-value (except for the type that maps to the 10270 // built-in complex data type), the characteristic data type is int. 10271 // d) If none of the above three cases is applicable, the CDT is int. 10272 // The VLEN is then determined based on the CDT and the size of vector 10273 // register of that ISA for which current vector version is generated. The 10274 // VLEN is computed using the formula below: 10275 // VLEN = sizeof(vector_register) / sizeof(CDT), 10276 // where vector register size specified in section 3.2.1 Registers and the 10277 // Stack Frame of original AMD64 ABI document. 10278 QualType RetType = FD->getReturnType(); 10279 if (RetType.isNull()) 10280 return 0; 10281 ASTContext &C = FD->getASTContext(); 10282 QualType CDT; 10283 if (!RetType.isNull() && !RetType->isVoidType()) { 10284 CDT = RetType; 10285 } else { 10286 unsigned Offset = 0; 10287 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10288 if (ParamAttrs[Offset].Kind == Vector) 10289 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10290 ++Offset; 10291 } 10292 if (CDT.isNull()) { 10293 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10294 if (ParamAttrs[I + Offset].Kind == Vector) { 10295 CDT = FD->getParamDecl(I)->getType(); 10296 break; 10297 } 10298 } 10299 } 10300 } 10301 if (CDT.isNull()) 10302 CDT = C.IntTy; 10303 CDT = CDT->getCanonicalTypeUnqualified(); 10304 if (CDT->isRecordType() || CDT->isUnionType()) 10305 CDT = C.IntTy; 10306 return C.getTypeSize(CDT); 10307 } 10308 10309 static void 10310 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10311 const llvm::APSInt &VLENVal, 10312 ArrayRef<ParamAttrTy> ParamAttrs, 10313 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10314 struct ISADataTy { 10315 char ISA; 10316 unsigned VecRegSize; 10317 }; 10318 ISADataTy ISAData[] = { 10319 { 10320 'b', 128 10321 }, // SSE 10322 { 10323 'c', 256 10324 }, // AVX 10325 { 10326 'd', 256 10327 }, // AVX2 10328 { 10329 'e', 512 10330 }, // AVX512 10331 }; 10332 llvm::SmallVector<char, 2> Masked; 10333 switch (State) { 10334 case OMPDeclareSimdDeclAttr::BS_Undefined: 10335 Masked.push_back('N'); 10336 Masked.push_back('M'); 10337 break; 10338 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10339 Masked.push_back('N'); 10340 break; 10341 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10342 Masked.push_back('M'); 10343 break; 10344 } 10345 for (char Mask : Masked) { 10346 for (const ISADataTy &Data : ISAData) { 10347 SmallString<256> Buffer; 10348 llvm::raw_svector_ostream Out(Buffer); 10349 Out << "_ZGV" << Data.ISA << Mask; 10350 if (!VLENVal) { 10351 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10352 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10353 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10354 } else { 10355 Out << VLENVal; 10356 } 10357 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 10358 switch (ParamAttr.Kind){ 10359 case LinearWithVarStride: 10360 Out << 's' << ParamAttr.StrideOrArg; 10361 break; 10362 case Linear: 10363 Out << 'l'; 10364 if (!!ParamAttr.StrideOrArg) 10365 Out << ParamAttr.StrideOrArg; 10366 break; 10367 case Uniform: 10368 Out << 'u'; 10369 break; 10370 case Vector: 10371 Out << 'v'; 10372 break; 10373 } 10374 if (!!ParamAttr.Alignment) 10375 Out << 'a' << ParamAttr.Alignment; 10376 } 10377 Out << '_' << Fn->getName(); 10378 Fn->addFnAttr(Out.str()); 10379 } 10380 } 10381 } 10382 10383 // This are the Functions that are needed to mangle the name of the 10384 // vector functions generated by the compiler, according to the rules 10385 // defined in the "Vector Function ABI specifications for AArch64", 10386 // available at 10387 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10388 10389 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 10390 /// 10391 /// TODO: Need to implement the behavior for reference marked with a 10392 /// var or no linear modifiers (1.b in the section). For this, we 10393 /// need to extend ParamKindTy to support the linear modifiers. 10394 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10395 QT = QT.getCanonicalType(); 10396 10397 if (QT->isVoidType()) 10398 return false; 10399 10400 if (Kind == ParamKindTy::Uniform) 10401 return false; 10402 10403 if (Kind == ParamKindTy::Linear) 10404 return false; 10405 10406 // TODO: Handle linear references with modifiers 10407 10408 if (Kind == ParamKindTy::LinearWithVarStride) 10409 return false; 10410 10411 return true; 10412 } 10413 10414 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10415 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10416 QT = QT.getCanonicalType(); 10417 unsigned Size = C.getTypeSize(QT); 10418 10419 // Only scalars and complex within 16 bytes wide set PVB to true. 10420 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10421 return false; 10422 10423 if (QT->isFloatingType()) 10424 return true; 10425 10426 if (QT->isIntegerType()) 10427 return true; 10428 10429 if (QT->isPointerType()) 10430 return true; 10431 10432 // TODO: Add support for complex types (section 3.1.2, item 2). 10433 10434 return false; 10435 } 10436 10437 /// Computes the lane size (LS) of a return type or of an input parameter, 10438 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10439 /// TODO: Add support for references, section 3.2.1, item 1. 10440 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10441 if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10442 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10443 if (getAArch64PBV(PTy, C)) 10444 return C.getTypeSize(PTy); 10445 } 10446 if (getAArch64PBV(QT, C)) 10447 return C.getTypeSize(QT); 10448 10449 return C.getTypeSize(C.getUIntPtrType()); 10450 } 10451 10452 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10453 // signature of the scalar function, as defined in 3.2.2 of the 10454 // AAVFABI. 10455 static std::tuple<unsigned, unsigned, bool> 10456 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10457 QualType RetType = FD->getReturnType().getCanonicalType(); 10458 10459 ASTContext &C = FD->getASTContext(); 10460 10461 bool OutputBecomesInput = false; 10462 10463 llvm::SmallVector<unsigned, 8> Sizes; 10464 if (!RetType->isVoidType()) { 10465 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10466 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10467 OutputBecomesInput = true; 10468 } 10469 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10470 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10471 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10472 } 10473 10474 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10475 // The LS of a function parameter / return value can only be a power 10476 // of 2, starting from 8 bits, up to 128. 10477 assert(std::all_of(Sizes.begin(), Sizes.end(), 10478 [](unsigned Size) { 10479 return Size == 8 || Size == 16 || Size == 32 || 10480 Size == 64 || Size == 128; 10481 }) && 10482 "Invalid size"); 10483 10484 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10485 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10486 OutputBecomesInput); 10487 } 10488 10489 /// Mangle the parameter part of the vector function name according to 10490 /// their OpenMP classification. The mangling function is defined in 10491 /// section 3.5 of the AAVFABI. 10492 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10493 SmallString<256> Buffer; 10494 llvm::raw_svector_ostream Out(Buffer); 10495 for (const auto &ParamAttr : ParamAttrs) { 10496 switch (ParamAttr.Kind) { 10497 case LinearWithVarStride: 10498 Out << "ls" << ParamAttr.StrideOrArg; 10499 break; 10500 case Linear: 10501 Out << 'l'; 10502 // Don't print the step value if it is not present or if it is 10503 // equal to 1. 10504 if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1) 10505 Out << ParamAttr.StrideOrArg; 10506 break; 10507 case Uniform: 10508 Out << 'u'; 10509 break; 10510 case Vector: 10511 Out << 'v'; 10512 break; 10513 } 10514 10515 if (!!ParamAttr.Alignment) 10516 Out << 'a' << ParamAttr.Alignment; 10517 } 10518 10519 return Out.str(); 10520 } 10521 10522 // Function used to add the attribute. The parameter `VLEN` is 10523 // templated to allow the use of "x" when targeting scalable functions 10524 // for SVE. 10525 template <typename T> 10526 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10527 char ISA, StringRef ParSeq, 10528 StringRef MangledName, bool OutputBecomesInput, 10529 llvm::Function *Fn) { 10530 SmallString<256> Buffer; 10531 llvm::raw_svector_ostream Out(Buffer); 10532 Out << Prefix << ISA << LMask << VLEN; 10533 if (OutputBecomesInput) 10534 Out << "v"; 10535 Out << ParSeq << "_" << MangledName; 10536 Fn->addFnAttr(Out.str()); 10537 } 10538 10539 // Helper function to generate the Advanced SIMD names depending on 10540 // the value of the NDS when simdlen is not present. 10541 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10542 StringRef Prefix, char ISA, 10543 StringRef ParSeq, StringRef MangledName, 10544 bool OutputBecomesInput, 10545 llvm::Function *Fn) { 10546 switch (NDS) { 10547 case 8: 10548 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10549 OutputBecomesInput, Fn); 10550 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10551 OutputBecomesInput, Fn); 10552 break; 10553 case 16: 10554 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10555 OutputBecomesInput, Fn); 10556 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10557 OutputBecomesInput, Fn); 10558 break; 10559 case 32: 10560 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10561 OutputBecomesInput, Fn); 10562 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10563 OutputBecomesInput, Fn); 10564 break; 10565 case 64: 10566 case 128: 10567 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10568 OutputBecomesInput, Fn); 10569 break; 10570 default: 10571 llvm_unreachable("Scalar type is too wide."); 10572 } 10573 } 10574 10575 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10576 static void emitAArch64DeclareSimdFunction( 10577 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10578 ArrayRef<ParamAttrTy> ParamAttrs, 10579 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10580 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10581 10582 // Get basic data for building the vector signature. 10583 const auto Data = getNDSWDS(FD, ParamAttrs); 10584 const unsigned NDS = std::get<0>(Data); 10585 const unsigned WDS = std::get<1>(Data); 10586 const bool OutputBecomesInput = std::get<2>(Data); 10587 10588 // Check the values provided via `simdlen` by the user. 10589 // 1. A `simdlen(1)` doesn't produce vector signatures, 10590 if (UserVLEN == 1) { 10591 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10592 DiagnosticsEngine::Warning, 10593 "The clause simdlen(1) has no effect when targeting aarch64."); 10594 CGM.getDiags().Report(SLoc, DiagID); 10595 return; 10596 } 10597 10598 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10599 // Advanced SIMD output. 10600 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10601 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10602 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10603 "power of 2 when targeting Advanced SIMD."); 10604 CGM.getDiags().Report(SLoc, DiagID); 10605 return; 10606 } 10607 10608 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10609 // limits. 10610 if (ISA == 's' && UserVLEN != 0) { 10611 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10612 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10613 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10614 "lanes in the architectural constraints " 10615 "for SVE (min is 128-bit, max is " 10616 "2048-bit, by steps of 128-bit)"); 10617 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10618 return; 10619 } 10620 } 10621 10622 // Sort out parameter sequence. 10623 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10624 StringRef Prefix = "_ZGV"; 10625 // Generate simdlen from user input (if any). 10626 if (UserVLEN) { 10627 if (ISA == 's') { 10628 // SVE generates only a masked function. 10629 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10630 OutputBecomesInput, Fn); 10631 } else { 10632 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10633 // Advanced SIMD generates one or two functions, depending on 10634 // the `[not]inbranch` clause. 10635 switch (State) { 10636 case OMPDeclareSimdDeclAttr::BS_Undefined: 10637 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10638 OutputBecomesInput, Fn); 10639 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10640 OutputBecomesInput, Fn); 10641 break; 10642 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10643 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10644 OutputBecomesInput, Fn); 10645 break; 10646 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10647 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10648 OutputBecomesInput, Fn); 10649 break; 10650 } 10651 } 10652 } else { 10653 // If no user simdlen is provided, follow the AAVFABI rules for 10654 // generating the vector length. 10655 if (ISA == 's') { 10656 // SVE, section 3.4.1, item 1. 10657 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10658 OutputBecomesInput, Fn); 10659 } else { 10660 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10661 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10662 // two vector names depending on the use of the clause 10663 // `[not]inbranch`. 10664 switch (State) { 10665 case OMPDeclareSimdDeclAttr::BS_Undefined: 10666 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10667 OutputBecomesInput, Fn); 10668 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10669 OutputBecomesInput, Fn); 10670 break; 10671 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10672 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10673 OutputBecomesInput, Fn); 10674 break; 10675 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10676 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10677 OutputBecomesInput, Fn); 10678 break; 10679 } 10680 } 10681 } 10682 } 10683 10684 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10685 llvm::Function *Fn) { 10686 ASTContext &C = CGM.getContext(); 10687 FD = FD->getMostRecentDecl(); 10688 // Map params to their positions in function decl. 10689 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10690 if (isa<CXXMethodDecl>(FD)) 10691 ParamPositions.try_emplace(FD, 0); 10692 unsigned ParamPos = ParamPositions.size(); 10693 for (const ParmVarDecl *P : FD->parameters()) { 10694 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10695 ++ParamPos; 10696 } 10697 while (FD) { 10698 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10699 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10700 // Mark uniform parameters. 10701 for (const Expr *E : Attr->uniforms()) { 10702 E = E->IgnoreParenImpCasts(); 10703 unsigned Pos; 10704 if (isa<CXXThisExpr>(E)) { 10705 Pos = ParamPositions[FD]; 10706 } else { 10707 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10708 ->getCanonicalDecl(); 10709 Pos = ParamPositions[PVD]; 10710 } 10711 ParamAttrs[Pos].Kind = Uniform; 10712 } 10713 // Get alignment info. 10714 auto NI = Attr->alignments_begin(); 10715 for (const Expr *E : Attr->aligneds()) { 10716 E = E->IgnoreParenImpCasts(); 10717 unsigned Pos; 10718 QualType ParmTy; 10719 if (isa<CXXThisExpr>(E)) { 10720 Pos = ParamPositions[FD]; 10721 ParmTy = E->getType(); 10722 } else { 10723 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10724 ->getCanonicalDecl(); 10725 Pos = ParamPositions[PVD]; 10726 ParmTy = PVD->getType(); 10727 } 10728 ParamAttrs[Pos].Alignment = 10729 (*NI) 10730 ? (*NI)->EvaluateKnownConstInt(C) 10731 : llvm::APSInt::getUnsigned( 10732 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10733 .getQuantity()); 10734 ++NI; 10735 } 10736 // Mark linear parameters. 10737 auto SI = Attr->steps_begin(); 10738 auto MI = Attr->modifiers_begin(); 10739 for (const Expr *E : Attr->linears()) { 10740 E = E->IgnoreParenImpCasts(); 10741 unsigned Pos; 10742 if (isa<CXXThisExpr>(E)) { 10743 Pos = ParamPositions[FD]; 10744 } else { 10745 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10746 ->getCanonicalDecl(); 10747 Pos = ParamPositions[PVD]; 10748 } 10749 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 10750 ParamAttr.Kind = Linear; 10751 if (*SI) { 10752 Expr::EvalResult Result; 10753 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 10754 if (const auto *DRE = 10755 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 10756 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 10757 ParamAttr.Kind = LinearWithVarStride; 10758 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 10759 ParamPositions[StridePVD->getCanonicalDecl()]); 10760 } 10761 } 10762 } else { 10763 ParamAttr.StrideOrArg = Result.Val.getInt(); 10764 } 10765 } 10766 ++SI; 10767 ++MI; 10768 } 10769 llvm::APSInt VLENVal; 10770 SourceLocation ExprLoc; 10771 const Expr *VLENExpr = Attr->getSimdlen(); 10772 if (VLENExpr) { 10773 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 10774 ExprLoc = VLENExpr->getExprLoc(); 10775 } 10776 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 10777 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 10778 CGM.getTriple().getArch() == llvm::Triple::x86_64) { 10779 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 10780 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 10781 unsigned VLEN = VLENVal.getExtValue(); 10782 StringRef MangledName = Fn->getName(); 10783 if (CGM.getTarget().hasFeature("sve")) 10784 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10785 MangledName, 's', 128, Fn, ExprLoc); 10786 if (CGM.getTarget().hasFeature("neon")) 10787 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10788 MangledName, 'n', 128, Fn, ExprLoc); 10789 } 10790 } 10791 FD = FD->getPreviousDecl(); 10792 } 10793 } 10794 10795 namespace { 10796 /// Cleanup action for doacross support. 10797 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 10798 public: 10799 static const int DoacrossFinArgs = 2; 10800 10801 private: 10802 llvm::FunctionCallee RTLFn; 10803 llvm::Value *Args[DoacrossFinArgs]; 10804 10805 public: 10806 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 10807 ArrayRef<llvm::Value *> CallArgs) 10808 : RTLFn(RTLFn) { 10809 assert(CallArgs.size() == DoacrossFinArgs); 10810 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10811 } 10812 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10813 if (!CGF.HaveInsertPoint()) 10814 return; 10815 CGF.EmitRuntimeCall(RTLFn, Args); 10816 } 10817 }; 10818 } // namespace 10819 10820 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 10821 const OMPLoopDirective &D, 10822 ArrayRef<Expr *> NumIterations) { 10823 if (!CGF.HaveInsertPoint()) 10824 return; 10825 10826 ASTContext &C = CGM.getContext(); 10827 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 10828 RecordDecl *RD; 10829 if (KmpDimTy.isNull()) { 10830 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 10831 // kmp_int64 lo; // lower 10832 // kmp_int64 up; // upper 10833 // kmp_int64 st; // stride 10834 // }; 10835 RD = C.buildImplicitRecord("kmp_dim"); 10836 RD->startDefinition(); 10837 addFieldToRecordDecl(C, RD, Int64Ty); 10838 addFieldToRecordDecl(C, RD, Int64Ty); 10839 addFieldToRecordDecl(C, RD, Int64Ty); 10840 RD->completeDefinition(); 10841 KmpDimTy = C.getRecordType(RD); 10842 } else { 10843 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 10844 } 10845 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 10846 QualType ArrayTy = 10847 C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0); 10848 10849 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 10850 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 10851 enum { LowerFD = 0, UpperFD, StrideFD }; 10852 // Fill dims with data. 10853 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 10854 LValue DimsLVal = CGF.MakeAddrLValue( 10855 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 10856 // dims.upper = num_iterations; 10857 LValue UpperLVal = CGF.EmitLValueForField( 10858 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 10859 llvm::Value *NumIterVal = 10860 CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]), 10861 D.getNumIterations()->getType(), Int64Ty, 10862 D.getNumIterations()->getExprLoc()); 10863 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 10864 // dims.stride = 1; 10865 LValue StrideLVal = CGF.EmitLValueForField( 10866 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 10867 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 10868 StrideLVal); 10869 } 10870 10871 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 10872 // kmp_int32 num_dims, struct kmp_dim * dims); 10873 llvm::Value *Args[] = { 10874 emitUpdateLocation(CGF, D.getBeginLoc()), 10875 getThreadID(CGF, D.getBeginLoc()), 10876 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 10877 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 10878 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 10879 CGM.VoidPtrTy)}; 10880 10881 llvm::FunctionCallee RTLFn = 10882 createRuntimeFunction(OMPRTL__kmpc_doacross_init); 10883 CGF.EmitRuntimeCall(RTLFn, Args); 10884 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 10885 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 10886 llvm::FunctionCallee FiniRTLFn = 10887 createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 10888 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 10889 llvm::makeArrayRef(FiniArgs)); 10890 } 10891 10892 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 10893 const OMPDependClause *C) { 10894 QualType Int64Ty = 10895 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 10896 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 10897 QualType ArrayTy = CGM.getContext().getConstantArrayType( 10898 Int64Ty, Size, ArrayType::Normal, 0); 10899 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 10900 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 10901 const Expr *CounterVal = C->getLoopData(I); 10902 assert(CounterVal); 10903 llvm::Value *CntVal = CGF.EmitScalarConversion( 10904 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 10905 CounterVal->getExprLoc()); 10906 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 10907 /*Volatile=*/false, Int64Ty); 10908 } 10909 llvm::Value *Args[] = { 10910 emitUpdateLocation(CGF, C->getBeginLoc()), 10911 getThreadID(CGF, C->getBeginLoc()), 10912 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 10913 llvm::FunctionCallee RTLFn; 10914 if (C->getDependencyKind() == OMPC_DEPEND_source) { 10915 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 10916 } else { 10917 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 10918 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 10919 } 10920 CGF.EmitRuntimeCall(RTLFn, Args); 10921 } 10922 10923 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 10924 llvm::FunctionCallee Callee, 10925 ArrayRef<llvm::Value *> Args) const { 10926 assert(Loc.isValid() && "Outlined function call location must be valid."); 10927 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 10928 10929 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 10930 if (Fn->doesNotThrow()) { 10931 CGF.EmitNounwindRuntimeCall(Fn, Args); 10932 return; 10933 } 10934 } 10935 CGF.EmitRuntimeCall(Callee, Args); 10936 } 10937 10938 void CGOpenMPRuntime::emitOutlinedFunctionCall( 10939 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 10940 ArrayRef<llvm::Value *> Args) const { 10941 emitCall(CGF, Loc, OutlinedFn, Args); 10942 } 10943 10944 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 10945 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 10946 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 10947 HasEmittedDeclareTargetRegion = true; 10948 } 10949 10950 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 10951 const VarDecl *NativeParam, 10952 const VarDecl *TargetParam) const { 10953 return CGF.GetAddrOfLocalVar(NativeParam); 10954 } 10955 10956 namespace { 10957 /// Cleanup action for allocate support. 10958 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 10959 public: 10960 static const int CleanupArgs = 3; 10961 10962 private: 10963 llvm::FunctionCallee RTLFn; 10964 llvm::Value *Args[CleanupArgs]; 10965 10966 public: 10967 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 10968 ArrayRef<llvm::Value *> CallArgs) 10969 : RTLFn(RTLFn) { 10970 assert(CallArgs.size() == CleanupArgs && 10971 "Size of arguments does not match."); 10972 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10973 } 10974 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10975 if (!CGF.HaveInsertPoint()) 10976 return; 10977 CGF.EmitRuntimeCall(RTLFn, Args); 10978 } 10979 }; 10980 } // namespace 10981 10982 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 10983 const VarDecl *VD) { 10984 if (!VD) 10985 return Address::invalid(); 10986 const VarDecl *CVD = VD->getCanonicalDecl(); 10987 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 10988 return Address::invalid(); 10989 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 10990 // Use the default allocation. 10991 if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 10992 !AA->getAllocator()) 10993 return Address::invalid(); 10994 llvm::Value *Size; 10995 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 10996 if (CVD->getType()->isVariablyModifiedType()) { 10997 Size = CGF.getTypeSize(CVD->getType()); 10998 // Align the size: ((size + align - 1) / align) * align 10999 Size = CGF.Builder.CreateNUWAdd( 11000 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11001 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11002 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11003 } else { 11004 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11005 Size = CGM.getSize(Sz.alignTo(Align)); 11006 } 11007 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11008 assert(AA->getAllocator() && 11009 "Expected allocator expression for non-default allocator."); 11010 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11011 // According to the standard, the original allocator type is a enum (integer). 11012 // Convert to pointer type, if required. 11013 if (Allocator->getType()->isIntegerTy()) 11014 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 11015 else if (Allocator->getType()->isPointerTy()) 11016 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 11017 CGM.VoidPtrTy); 11018 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11019 11020 llvm::Value *Addr = 11021 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, 11022 CVD->getName() + ".void.addr"); 11023 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 11024 Allocator}; 11025 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); 11026 11027 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11028 llvm::makeArrayRef(FiniArgs)); 11029 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11030 Addr, 11031 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 11032 CVD->getName() + ".addr"); 11033 return Address(Addr, Align); 11034 } 11035 11036 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11037 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11038 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11039 llvm_unreachable("Not supported in SIMD-only mode"); 11040 } 11041 11042 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11043 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11044 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11045 llvm_unreachable("Not supported in SIMD-only mode"); 11046 } 11047 11048 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11049 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11050 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11051 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11052 bool Tied, unsigned &NumberOfParts) { 11053 llvm_unreachable("Not supported in SIMD-only mode"); 11054 } 11055 11056 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11057 SourceLocation Loc, 11058 llvm::Function *OutlinedFn, 11059 ArrayRef<llvm::Value *> CapturedVars, 11060 const Expr *IfCond) { 11061 llvm_unreachable("Not supported in SIMD-only mode"); 11062 } 11063 11064 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11065 CodeGenFunction &CGF, StringRef CriticalName, 11066 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11067 const Expr *Hint) { 11068 llvm_unreachable("Not supported in SIMD-only mode"); 11069 } 11070 11071 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11072 const RegionCodeGenTy &MasterOpGen, 11073 SourceLocation Loc) { 11074 llvm_unreachable("Not supported in SIMD-only mode"); 11075 } 11076 11077 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11078 SourceLocation Loc) { 11079 llvm_unreachable("Not supported in SIMD-only mode"); 11080 } 11081 11082 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 11083 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 11084 SourceLocation Loc) { 11085 llvm_unreachable("Not supported in SIMD-only mode"); 11086 } 11087 11088 void CGOpenMPSIMDRuntime::emitSingleRegion( 11089 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 11090 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 11091 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 11092 ArrayRef<const Expr *> AssignmentOps) { 11093 llvm_unreachable("Not supported in SIMD-only mode"); 11094 } 11095 11096 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 11097 const RegionCodeGenTy &OrderedOpGen, 11098 SourceLocation Loc, 11099 bool IsThreads) { 11100 llvm_unreachable("Not supported in SIMD-only mode"); 11101 } 11102 11103 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 11104 SourceLocation Loc, 11105 OpenMPDirectiveKind Kind, 11106 bool EmitChecks, 11107 bool ForceSimpleCall) { 11108 llvm_unreachable("Not supported in SIMD-only mode"); 11109 } 11110 11111 void CGOpenMPSIMDRuntime::emitForDispatchInit( 11112 CodeGenFunction &CGF, SourceLocation Loc, 11113 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 11114 bool Ordered, const DispatchRTInput &DispatchValues) { 11115 llvm_unreachable("Not supported in SIMD-only mode"); 11116 } 11117 11118 void CGOpenMPSIMDRuntime::emitForStaticInit( 11119 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 11120 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 11121 llvm_unreachable("Not supported in SIMD-only mode"); 11122 } 11123 11124 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 11125 CodeGenFunction &CGF, SourceLocation Loc, 11126 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 11127 llvm_unreachable("Not supported in SIMD-only mode"); 11128 } 11129 11130 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 11131 SourceLocation Loc, 11132 unsigned IVSize, 11133 bool IVSigned) { 11134 llvm_unreachable("Not supported in SIMD-only mode"); 11135 } 11136 11137 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 11138 SourceLocation Loc, 11139 OpenMPDirectiveKind DKind) { 11140 llvm_unreachable("Not supported in SIMD-only mode"); 11141 } 11142 11143 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 11144 SourceLocation Loc, 11145 unsigned IVSize, bool IVSigned, 11146 Address IL, Address LB, 11147 Address UB, Address ST) { 11148 llvm_unreachable("Not supported in SIMD-only mode"); 11149 } 11150 11151 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 11152 llvm::Value *NumThreads, 11153 SourceLocation Loc) { 11154 llvm_unreachable("Not supported in SIMD-only mode"); 11155 } 11156 11157 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 11158 OpenMPProcBindClauseKind ProcBind, 11159 SourceLocation Loc) { 11160 llvm_unreachable("Not supported in SIMD-only mode"); 11161 } 11162 11163 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 11164 const VarDecl *VD, 11165 Address VDAddr, 11166 SourceLocation Loc) { 11167 llvm_unreachable("Not supported in SIMD-only mode"); 11168 } 11169 11170 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 11171 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 11172 CodeGenFunction *CGF) { 11173 llvm_unreachable("Not supported in SIMD-only mode"); 11174 } 11175 11176 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 11177 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 11178 llvm_unreachable("Not supported in SIMD-only mode"); 11179 } 11180 11181 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 11182 ArrayRef<const Expr *> Vars, 11183 SourceLocation Loc) { 11184 llvm_unreachable("Not supported in SIMD-only mode"); 11185 } 11186 11187 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 11188 const OMPExecutableDirective &D, 11189 llvm::Function *TaskFunction, 11190 QualType SharedsTy, Address Shareds, 11191 const Expr *IfCond, 11192 const OMPTaskDataTy &Data) { 11193 llvm_unreachable("Not supported in SIMD-only mode"); 11194 } 11195 11196 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 11197 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 11198 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 11199 const Expr *IfCond, const OMPTaskDataTy &Data) { 11200 llvm_unreachable("Not supported in SIMD-only mode"); 11201 } 11202 11203 void CGOpenMPSIMDRuntime::emitReduction( 11204 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 11205 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 11206 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 11207 assert(Options.SimpleReduction && "Only simple reduction is expected."); 11208 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 11209 ReductionOps, Options); 11210 } 11211 11212 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 11213 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 11214 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 11215 llvm_unreachable("Not supported in SIMD-only mode"); 11216 } 11217 11218 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 11219 SourceLocation Loc, 11220 ReductionCodeGen &RCG, 11221 unsigned N) { 11222 llvm_unreachable("Not supported in SIMD-only mode"); 11223 } 11224 11225 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 11226 SourceLocation Loc, 11227 llvm::Value *ReductionsPtr, 11228 LValue SharedLVal) { 11229 llvm_unreachable("Not supported in SIMD-only mode"); 11230 } 11231 11232 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 11233 SourceLocation Loc) { 11234 llvm_unreachable("Not supported in SIMD-only mode"); 11235 } 11236 11237 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 11238 CodeGenFunction &CGF, SourceLocation Loc, 11239 OpenMPDirectiveKind CancelRegion) { 11240 llvm_unreachable("Not supported in SIMD-only mode"); 11241 } 11242 11243 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 11244 SourceLocation Loc, const Expr *IfCond, 11245 OpenMPDirectiveKind CancelRegion) { 11246 llvm_unreachable("Not supported in SIMD-only mode"); 11247 } 11248 11249 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 11250 const OMPExecutableDirective &D, StringRef ParentName, 11251 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 11252 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 11253 llvm_unreachable("Not supported in SIMD-only mode"); 11254 } 11255 11256 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF, 11257 const OMPExecutableDirective &D, 11258 llvm::Function *OutlinedFn, 11259 llvm::Value *OutlinedFnID, 11260 const Expr *IfCond, 11261 const Expr *Device) { 11262 llvm_unreachable("Not supported in SIMD-only mode"); 11263 } 11264 11265 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 11266 llvm_unreachable("Not supported in SIMD-only mode"); 11267 } 11268 11269 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 11270 llvm_unreachable("Not supported in SIMD-only mode"); 11271 } 11272 11273 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 11274 return false; 11275 } 11276 11277 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() { 11278 return nullptr; 11279 } 11280 11281 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 11282 const OMPExecutableDirective &D, 11283 SourceLocation Loc, 11284 llvm::Function *OutlinedFn, 11285 ArrayRef<llvm::Value *> CapturedVars) { 11286 llvm_unreachable("Not supported in SIMD-only mode"); 11287 } 11288 11289 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11290 const Expr *NumTeams, 11291 const Expr *ThreadLimit, 11292 SourceLocation Loc) { 11293 llvm_unreachable("Not supported in SIMD-only mode"); 11294 } 11295 11296 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 11297 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11298 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11299 llvm_unreachable("Not supported in SIMD-only mode"); 11300 } 11301 11302 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 11303 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11304 const Expr *Device) { 11305 llvm_unreachable("Not supported in SIMD-only mode"); 11306 } 11307 11308 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11309 const OMPLoopDirective &D, 11310 ArrayRef<Expr *> NumIterations) { 11311 llvm_unreachable("Not supported in SIMD-only mode"); 11312 } 11313 11314 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11315 const OMPDependClause *C) { 11316 llvm_unreachable("Not supported in SIMD-only mode"); 11317 } 11318 11319 const VarDecl * 11320 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 11321 const VarDecl *NativeParam) const { 11322 llvm_unreachable("Not supported in SIMD-only mode"); 11323 } 11324 11325 Address 11326 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 11327 const VarDecl *NativeParam, 11328 const VarDecl *TargetParam) const { 11329 llvm_unreachable("Not supported in SIMD-only mode"); 11330 } 11331