1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/CodeGen/ConstantInitBuilder.h" 25 #include "llvm/ADT/ArrayRef.h" 26 #include "llvm/ADT/SetOperations.h" 27 #include "llvm/Bitcode/BitcodeReader.h" 28 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 29 #include "llvm/IR/DerivedTypes.h" 30 #include "llvm/IR/GlobalValue.h" 31 #include "llvm/IR/Value.h" 32 #include "llvm/Support/Format.h" 33 #include "llvm/Support/raw_ostream.h" 34 #include <cassert> 35 36 using namespace clang; 37 using namespace CodeGen; 38 using namespace llvm::omp; 39 40 namespace { 41 /// Base class for handling code generation inside OpenMP regions. 42 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 43 public: 44 /// Kinds of OpenMP regions used in codegen. 45 enum CGOpenMPRegionKind { 46 /// Region with outlined function for standalone 'parallel' 47 /// directive. 48 ParallelOutlinedRegion, 49 /// Region with outlined function for standalone 'task' directive. 50 TaskOutlinedRegion, 51 /// Region for constructs that do not require function outlining, 52 /// like 'for', 'sections', 'atomic' etc. directives. 53 InlinedRegion, 54 /// Region with outlined function for standalone 'target' directive. 55 TargetRegion, 56 }; 57 58 CGOpenMPRegionInfo(const CapturedStmt &CS, 59 const CGOpenMPRegionKind RegionKind, 60 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 61 bool HasCancel) 62 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 63 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 64 65 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 66 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 67 bool HasCancel) 68 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 69 Kind(Kind), HasCancel(HasCancel) {} 70 71 /// Get a variable or parameter for storing global thread id 72 /// inside OpenMP construct. 73 virtual const VarDecl *getThreadIDVariable() const = 0; 74 75 /// Emit the captured statement body. 76 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 77 78 /// Get an LValue for the current ThreadID variable. 79 /// \return LValue for thread id variable. This LValue always has type int32*. 80 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 81 82 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 83 84 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 85 86 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 87 88 bool hasCancel() const { return HasCancel; } 89 90 static bool classof(const CGCapturedStmtInfo *Info) { 91 return Info->getKind() == CR_OpenMP; 92 } 93 94 ~CGOpenMPRegionInfo() override = default; 95 96 protected: 97 CGOpenMPRegionKind RegionKind; 98 RegionCodeGenTy CodeGen; 99 OpenMPDirectiveKind Kind; 100 bool HasCancel; 101 }; 102 103 /// API for captured statement code generation in OpenMP constructs. 104 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 105 public: 106 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 107 const RegionCodeGenTy &CodeGen, 108 OpenMPDirectiveKind Kind, bool HasCancel, 109 StringRef HelperName) 110 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 111 HasCancel), 112 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 113 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 114 } 115 116 /// Get a variable or parameter for storing global thread id 117 /// inside OpenMP construct. 118 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 119 120 /// Get the name of the capture helper. 121 StringRef getHelperName() const override { return HelperName; } 122 123 static bool classof(const CGCapturedStmtInfo *Info) { 124 return CGOpenMPRegionInfo::classof(Info) && 125 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 126 ParallelOutlinedRegion; 127 } 128 129 private: 130 /// A variable or parameter storing global thread id for OpenMP 131 /// constructs. 132 const VarDecl *ThreadIDVar; 133 StringRef HelperName; 134 }; 135 136 /// API for captured statement code generation in OpenMP constructs. 137 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 138 public: 139 class UntiedTaskActionTy final : public PrePostActionTy { 140 bool Untied; 141 const VarDecl *PartIDVar; 142 const RegionCodeGenTy UntiedCodeGen; 143 llvm::SwitchInst *UntiedSwitch = nullptr; 144 145 public: 146 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 147 const RegionCodeGenTy &UntiedCodeGen) 148 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 149 void Enter(CodeGenFunction &CGF) override { 150 if (Untied) { 151 // Emit task switching point. 152 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 153 CGF.GetAddrOfLocalVar(PartIDVar), 154 PartIDVar->getType()->castAs<PointerType>()); 155 llvm::Value *Res = 156 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 157 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 158 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 159 CGF.EmitBlock(DoneBB); 160 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 161 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 162 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 163 CGF.Builder.GetInsertBlock()); 164 emitUntiedSwitch(CGF); 165 } 166 } 167 void emitUntiedSwitch(CodeGenFunction &CGF) const { 168 if (Untied) { 169 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 170 CGF.GetAddrOfLocalVar(PartIDVar), 171 PartIDVar->getType()->castAs<PointerType>()); 172 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 173 PartIdLVal); 174 UntiedCodeGen(CGF); 175 CodeGenFunction::JumpDest CurPoint = 176 CGF.getJumpDestInCurrentScope(".untied.next."); 177 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 178 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 179 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 180 CGF.Builder.GetInsertBlock()); 181 CGF.EmitBranchThroughCleanup(CurPoint); 182 CGF.EmitBlock(CurPoint.getBlock()); 183 } 184 } 185 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 186 }; 187 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 188 const VarDecl *ThreadIDVar, 189 const RegionCodeGenTy &CodeGen, 190 OpenMPDirectiveKind Kind, bool HasCancel, 191 const UntiedTaskActionTy &Action) 192 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 193 ThreadIDVar(ThreadIDVar), Action(Action) { 194 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 195 } 196 197 /// Get a variable or parameter for storing global thread id 198 /// inside OpenMP construct. 199 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 200 201 /// Get an LValue for the current ThreadID variable. 202 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 203 204 /// Get the name of the capture helper. 205 StringRef getHelperName() const override { return ".omp_outlined."; } 206 207 void emitUntiedSwitch(CodeGenFunction &CGF) override { 208 Action.emitUntiedSwitch(CGF); 209 } 210 211 static bool classof(const CGCapturedStmtInfo *Info) { 212 return CGOpenMPRegionInfo::classof(Info) && 213 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 214 TaskOutlinedRegion; 215 } 216 217 private: 218 /// A variable or parameter storing global thread id for OpenMP 219 /// constructs. 220 const VarDecl *ThreadIDVar; 221 /// Action for emitting code for untied tasks. 222 const UntiedTaskActionTy &Action; 223 }; 224 225 /// API for inlined captured statement code generation in OpenMP 226 /// constructs. 227 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 228 public: 229 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 230 const RegionCodeGenTy &CodeGen, 231 OpenMPDirectiveKind Kind, bool HasCancel) 232 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 233 OldCSI(OldCSI), 234 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 235 236 // Retrieve the value of the context parameter. 237 llvm::Value *getContextValue() const override { 238 if (OuterRegionInfo) 239 return OuterRegionInfo->getContextValue(); 240 llvm_unreachable("No context value for inlined OpenMP region"); 241 } 242 243 void setContextValue(llvm::Value *V) override { 244 if (OuterRegionInfo) { 245 OuterRegionInfo->setContextValue(V); 246 return; 247 } 248 llvm_unreachable("No context value for inlined OpenMP region"); 249 } 250 251 /// Lookup the captured field decl for a variable. 252 const FieldDecl *lookup(const VarDecl *VD) const override { 253 if (OuterRegionInfo) 254 return OuterRegionInfo->lookup(VD); 255 // If there is no outer outlined region,no need to lookup in a list of 256 // captured variables, we can use the original one. 257 return nullptr; 258 } 259 260 FieldDecl *getThisFieldDecl() const override { 261 if (OuterRegionInfo) 262 return OuterRegionInfo->getThisFieldDecl(); 263 return nullptr; 264 } 265 266 /// Get a variable or parameter for storing global thread id 267 /// inside OpenMP construct. 268 const VarDecl *getThreadIDVariable() const override { 269 if (OuterRegionInfo) 270 return OuterRegionInfo->getThreadIDVariable(); 271 return nullptr; 272 } 273 274 /// Get an LValue for the current ThreadID variable. 275 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 276 if (OuterRegionInfo) 277 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 278 llvm_unreachable("No LValue for inlined OpenMP construct"); 279 } 280 281 /// Get the name of the capture helper. 282 StringRef getHelperName() const override { 283 if (auto *OuterRegionInfo = getOldCSI()) 284 return OuterRegionInfo->getHelperName(); 285 llvm_unreachable("No helper name for inlined OpenMP construct"); 286 } 287 288 void emitUntiedSwitch(CodeGenFunction &CGF) override { 289 if (OuterRegionInfo) 290 OuterRegionInfo->emitUntiedSwitch(CGF); 291 } 292 293 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 294 295 static bool classof(const CGCapturedStmtInfo *Info) { 296 return CGOpenMPRegionInfo::classof(Info) && 297 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 298 } 299 300 ~CGOpenMPInlinedRegionInfo() override = default; 301 302 private: 303 /// CodeGen info about outer OpenMP region. 304 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 305 CGOpenMPRegionInfo *OuterRegionInfo; 306 }; 307 308 /// API for captured statement code generation in OpenMP target 309 /// constructs. For this captures, implicit parameters are used instead of the 310 /// captured fields. The name of the target region has to be unique in a given 311 /// application so it is provided by the client, because only the client has 312 /// the information to generate that. 313 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 314 public: 315 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 316 const RegionCodeGenTy &CodeGen, StringRef HelperName) 317 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 318 /*HasCancel=*/false), 319 HelperName(HelperName) {} 320 321 /// This is unused for target regions because each starts executing 322 /// with a single thread. 323 const VarDecl *getThreadIDVariable() const override { return nullptr; } 324 325 /// Get the name of the capture helper. 326 StringRef getHelperName() const override { return HelperName; } 327 328 static bool classof(const CGCapturedStmtInfo *Info) { 329 return CGOpenMPRegionInfo::classof(Info) && 330 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 331 } 332 333 private: 334 StringRef HelperName; 335 }; 336 337 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 338 llvm_unreachable("No codegen for expressions"); 339 } 340 /// API for generation of expressions captured in a innermost OpenMP 341 /// region. 342 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 343 public: 344 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 345 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 346 OMPD_unknown, 347 /*HasCancel=*/false), 348 PrivScope(CGF) { 349 // Make sure the globals captured in the provided statement are local by 350 // using the privatization logic. We assume the same variable is not 351 // captured more than once. 352 for (const auto &C : CS.captures()) { 353 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 354 continue; 355 356 const VarDecl *VD = C.getCapturedVar(); 357 if (VD->isLocalVarDeclOrParm()) 358 continue; 359 360 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 361 /*RefersToEnclosingVariableOrCapture=*/false, 362 VD->getType().getNonReferenceType(), VK_LValue, 363 C.getLocation()); 364 PrivScope.addPrivate( 365 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 366 } 367 (void)PrivScope.Privatize(); 368 } 369 370 /// Lookup the captured field decl for a variable. 371 const FieldDecl *lookup(const VarDecl *VD) const override { 372 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 373 return FD; 374 return nullptr; 375 } 376 377 /// Emit the captured statement body. 378 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 379 llvm_unreachable("No body for expressions"); 380 } 381 382 /// Get a variable or parameter for storing global thread id 383 /// inside OpenMP construct. 384 const VarDecl *getThreadIDVariable() const override { 385 llvm_unreachable("No thread id for expressions"); 386 } 387 388 /// Get the name of the capture helper. 389 StringRef getHelperName() const override { 390 llvm_unreachable("No helper name for expressions"); 391 } 392 393 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 394 395 private: 396 /// Private scope to capture global variables. 397 CodeGenFunction::OMPPrivateScope PrivScope; 398 }; 399 400 /// RAII for emitting code of OpenMP constructs. 401 class InlinedOpenMPRegionRAII { 402 CodeGenFunction &CGF; 403 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 404 FieldDecl *LambdaThisCaptureField = nullptr; 405 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 406 407 public: 408 /// Constructs region for combined constructs. 409 /// \param CodeGen Code generation sequence for combined directives. Includes 410 /// a list of functions used for code generation of implicitly inlined 411 /// regions. 412 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 413 OpenMPDirectiveKind Kind, bool HasCancel) 414 : CGF(CGF) { 415 // Start emission for the construct. 416 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 417 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 418 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 419 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 420 CGF.LambdaThisCaptureField = nullptr; 421 BlockInfo = CGF.BlockInfo; 422 CGF.BlockInfo = nullptr; 423 } 424 425 ~InlinedOpenMPRegionRAII() { 426 // Restore original CapturedStmtInfo only if we're done with code emission. 427 auto *OldCSI = 428 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 429 delete CGF.CapturedStmtInfo; 430 CGF.CapturedStmtInfo = OldCSI; 431 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 432 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 433 CGF.BlockInfo = BlockInfo; 434 } 435 }; 436 437 /// Values for bit flags used in the ident_t to describe the fields. 438 /// All enumeric elements are named and described in accordance with the code 439 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 440 enum OpenMPLocationFlags : unsigned { 441 /// Use trampoline for internal microtask. 442 OMP_IDENT_IMD = 0x01, 443 /// Use c-style ident structure. 444 OMP_IDENT_KMPC = 0x02, 445 /// Atomic reduction option for kmpc_reduce. 446 OMP_ATOMIC_REDUCE = 0x10, 447 /// Explicit 'barrier' directive. 448 OMP_IDENT_BARRIER_EXPL = 0x20, 449 /// Implicit barrier in code. 450 OMP_IDENT_BARRIER_IMPL = 0x40, 451 /// Implicit barrier in 'for' directive. 452 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 453 /// Implicit barrier in 'sections' directive. 454 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 455 /// Implicit barrier in 'single' directive. 456 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 457 /// Call of __kmp_for_static_init for static loop. 458 OMP_IDENT_WORK_LOOP = 0x200, 459 /// Call of __kmp_for_static_init for sections. 460 OMP_IDENT_WORK_SECTIONS = 0x400, 461 /// Call of __kmp_for_static_init for distribute. 462 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 463 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 464 }; 465 466 namespace { 467 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 468 /// Values for bit flags for marking which requires clauses have been used. 469 enum OpenMPOffloadingRequiresDirFlags : int64_t { 470 /// flag undefined. 471 OMP_REQ_UNDEFINED = 0x000, 472 /// no requires clause present. 473 OMP_REQ_NONE = 0x001, 474 /// reverse_offload clause. 475 OMP_REQ_REVERSE_OFFLOAD = 0x002, 476 /// unified_address clause. 477 OMP_REQ_UNIFIED_ADDRESS = 0x004, 478 /// unified_shared_memory clause. 479 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 480 /// dynamic_allocators clause. 481 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 482 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 483 }; 484 485 enum OpenMPOffloadingReservedDeviceIDs { 486 /// Device ID if the device was not defined, runtime should get it 487 /// from environment variables in the spec. 488 OMP_DEVICEID_UNDEF = -1, 489 }; 490 } // anonymous namespace 491 492 /// Describes ident structure that describes a source location. 493 /// All descriptions are taken from 494 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 495 /// Original structure: 496 /// typedef struct ident { 497 /// kmp_int32 reserved_1; /**< might be used in Fortran; 498 /// see above */ 499 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 500 /// KMP_IDENT_KMPC identifies this union 501 /// member */ 502 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 503 /// see above */ 504 ///#if USE_ITT_BUILD 505 /// /* but currently used for storing 506 /// region-specific ITT */ 507 /// /* contextual information. */ 508 ///#endif /* USE_ITT_BUILD */ 509 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 510 /// C++ */ 511 /// char const *psource; /**< String describing the source location. 512 /// The string is composed of semi-colon separated 513 // fields which describe the source file, 514 /// the function and a pair of line numbers that 515 /// delimit the construct. 516 /// */ 517 /// } ident_t; 518 enum IdentFieldIndex { 519 /// might be used in Fortran 520 IdentField_Reserved_1, 521 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 522 IdentField_Flags, 523 /// Not really used in Fortran any more 524 IdentField_Reserved_2, 525 /// Source[4] in Fortran, do not use for C++ 526 IdentField_Reserved_3, 527 /// String describing the source location. The string is composed of 528 /// semi-colon separated fields which describe the source file, the function 529 /// and a pair of line numbers that delimit the construct. 530 IdentField_PSource 531 }; 532 533 /// Schedule types for 'omp for' loops (these enumerators are taken from 534 /// the enum sched_type in kmp.h). 535 enum OpenMPSchedType { 536 /// Lower bound for default (unordered) versions. 537 OMP_sch_lower = 32, 538 OMP_sch_static_chunked = 33, 539 OMP_sch_static = 34, 540 OMP_sch_dynamic_chunked = 35, 541 OMP_sch_guided_chunked = 36, 542 OMP_sch_runtime = 37, 543 OMP_sch_auto = 38, 544 /// static with chunk adjustment (e.g., simd) 545 OMP_sch_static_balanced_chunked = 45, 546 /// Lower bound for 'ordered' versions. 547 OMP_ord_lower = 64, 548 OMP_ord_static_chunked = 65, 549 OMP_ord_static = 66, 550 OMP_ord_dynamic_chunked = 67, 551 OMP_ord_guided_chunked = 68, 552 OMP_ord_runtime = 69, 553 OMP_ord_auto = 70, 554 OMP_sch_default = OMP_sch_static, 555 /// dist_schedule types 556 OMP_dist_sch_static_chunked = 91, 557 OMP_dist_sch_static = 92, 558 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 559 /// Set if the monotonic schedule modifier was present. 560 OMP_sch_modifier_monotonic = (1 << 29), 561 /// Set if the nonmonotonic schedule modifier was present. 562 OMP_sch_modifier_nonmonotonic = (1 << 30), 563 }; 564 565 enum OpenMPRTLFunction { 566 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 567 /// kmpc_micro microtask, ...); 568 OMPRTL__kmpc_fork_call, 569 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, 570 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 571 OMPRTL__kmpc_threadprivate_cached, 572 /// Call to void __kmpc_threadprivate_register( ident_t *, 573 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 574 OMPRTL__kmpc_threadprivate_register, 575 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 576 OMPRTL__kmpc_global_thread_num, 577 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 578 // kmp_critical_name *crit); 579 OMPRTL__kmpc_critical, 580 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 581 // global_tid, kmp_critical_name *crit, uintptr_t hint); 582 OMPRTL__kmpc_critical_with_hint, 583 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 584 // kmp_critical_name *crit); 585 OMPRTL__kmpc_end_critical, 586 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 587 // global_tid); 588 OMPRTL__kmpc_cancel_barrier, 589 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 590 OMPRTL__kmpc_barrier, 591 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 592 OMPRTL__kmpc_for_static_fini, 593 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 594 // global_tid); 595 OMPRTL__kmpc_serialized_parallel, 596 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 597 // global_tid); 598 OMPRTL__kmpc_end_serialized_parallel, 599 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 600 // kmp_int32 num_threads); 601 OMPRTL__kmpc_push_num_threads, 602 // Call to void __kmpc_flush(ident_t *loc); 603 OMPRTL__kmpc_flush, 604 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 605 OMPRTL__kmpc_master, 606 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 607 OMPRTL__kmpc_end_master, 608 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 609 // int end_part); 610 OMPRTL__kmpc_omp_taskyield, 611 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 612 OMPRTL__kmpc_single, 613 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 614 OMPRTL__kmpc_end_single, 615 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 616 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 617 // kmp_routine_entry_t *task_entry); 618 OMPRTL__kmpc_omp_task_alloc, 619 // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *, 620 // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, 621 // size_t sizeof_shareds, kmp_routine_entry_t *task_entry, 622 // kmp_int64 device_id); 623 OMPRTL__kmpc_omp_target_task_alloc, 624 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 625 // new_task); 626 OMPRTL__kmpc_omp_task, 627 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 628 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 629 // kmp_int32 didit); 630 OMPRTL__kmpc_copyprivate, 631 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 632 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 633 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 634 OMPRTL__kmpc_reduce, 635 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 636 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 637 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 638 // *lck); 639 OMPRTL__kmpc_reduce_nowait, 640 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 641 // kmp_critical_name *lck); 642 OMPRTL__kmpc_end_reduce, 643 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 644 // kmp_critical_name *lck); 645 OMPRTL__kmpc_end_reduce_nowait, 646 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 647 // kmp_task_t * new_task); 648 OMPRTL__kmpc_omp_task_begin_if0, 649 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 650 // kmp_task_t * new_task); 651 OMPRTL__kmpc_omp_task_complete_if0, 652 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 653 OMPRTL__kmpc_ordered, 654 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 655 OMPRTL__kmpc_end_ordered, 656 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 657 // global_tid); 658 OMPRTL__kmpc_omp_taskwait, 659 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 660 OMPRTL__kmpc_taskgroup, 661 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 662 OMPRTL__kmpc_end_taskgroup, 663 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 664 // int proc_bind); 665 OMPRTL__kmpc_push_proc_bind, 666 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 667 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 668 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 669 OMPRTL__kmpc_omp_task_with_deps, 670 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 671 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 672 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 673 OMPRTL__kmpc_omp_wait_deps, 674 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 675 // global_tid, kmp_int32 cncl_kind); 676 OMPRTL__kmpc_cancellationpoint, 677 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 678 // kmp_int32 cncl_kind); 679 OMPRTL__kmpc_cancel, 680 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 681 // kmp_int32 num_teams, kmp_int32 thread_limit); 682 OMPRTL__kmpc_push_num_teams, 683 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 684 // microtask, ...); 685 OMPRTL__kmpc_fork_teams, 686 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 687 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 688 // sched, kmp_uint64 grainsize, void *task_dup); 689 OMPRTL__kmpc_taskloop, 690 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 691 // num_dims, struct kmp_dim *dims); 692 OMPRTL__kmpc_doacross_init, 693 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 694 OMPRTL__kmpc_doacross_fini, 695 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 696 // *vec); 697 OMPRTL__kmpc_doacross_post, 698 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 699 // *vec); 700 OMPRTL__kmpc_doacross_wait, 701 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 702 // *data); 703 OMPRTL__kmpc_task_reduction_init, 704 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 705 // *d); 706 OMPRTL__kmpc_task_reduction_get_th_data, 707 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); 708 OMPRTL__kmpc_alloc, 709 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); 710 OMPRTL__kmpc_free, 711 712 // 713 // Offloading related calls 714 // 715 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 716 // size); 717 OMPRTL__kmpc_push_target_tripcount, 718 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 719 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 720 // *arg_types); 721 OMPRTL__tgt_target, 722 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 723 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 724 // *arg_types); 725 OMPRTL__tgt_target_nowait, 726 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 727 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 728 // *arg_types, int32_t num_teams, int32_t thread_limit); 729 OMPRTL__tgt_target_teams, 730 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 731 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 732 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 733 OMPRTL__tgt_target_teams_nowait, 734 // Call to void __tgt_register_requires(int64_t flags); 735 OMPRTL__tgt_register_requires, 736 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 737 OMPRTL__tgt_register_lib, 738 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 739 OMPRTL__tgt_unregister_lib, 740 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 741 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 742 OMPRTL__tgt_target_data_begin, 743 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 744 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 745 // *arg_types); 746 OMPRTL__tgt_target_data_begin_nowait, 747 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 748 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 749 OMPRTL__tgt_target_data_end, 750 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 751 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 752 // *arg_types); 753 OMPRTL__tgt_target_data_end_nowait, 754 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 755 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 756 OMPRTL__tgt_target_data_update, 757 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 758 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 759 // *arg_types); 760 OMPRTL__tgt_target_data_update_nowait, 761 // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 762 OMPRTL__tgt_mapper_num_components, 763 // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void 764 // *base, void *begin, int64_t size, int64_t type); 765 OMPRTL__tgt_push_mapper_component, 766 }; 767 768 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 769 /// region. 770 class CleanupTy final : public EHScopeStack::Cleanup { 771 PrePostActionTy *Action; 772 773 public: 774 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 775 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 776 if (!CGF.HaveInsertPoint()) 777 return; 778 Action->Exit(CGF); 779 } 780 }; 781 782 } // anonymous namespace 783 784 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 785 CodeGenFunction::RunCleanupsScope Scope(CGF); 786 if (PrePostAction) { 787 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 788 Callback(CodeGen, CGF, *PrePostAction); 789 } else { 790 PrePostActionTy Action; 791 Callback(CodeGen, CGF, Action); 792 } 793 } 794 795 /// Check if the combiner is a call to UDR combiner and if it is so return the 796 /// UDR decl used for reduction. 797 static const OMPDeclareReductionDecl * 798 getReductionInit(const Expr *ReductionOp) { 799 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 800 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 801 if (const auto *DRE = 802 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 803 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 804 return DRD; 805 return nullptr; 806 } 807 808 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 809 const OMPDeclareReductionDecl *DRD, 810 const Expr *InitOp, 811 Address Private, Address Original, 812 QualType Ty) { 813 if (DRD->getInitializer()) { 814 std::pair<llvm::Function *, llvm::Function *> Reduction = 815 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 816 const auto *CE = cast<CallExpr>(InitOp); 817 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 818 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 819 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 820 const auto *LHSDRE = 821 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 822 const auto *RHSDRE = 823 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 824 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 825 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 826 [=]() { return Private; }); 827 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 828 [=]() { return Original; }); 829 (void)PrivateScope.Privatize(); 830 RValue Func = RValue::get(Reduction.second); 831 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 832 CGF.EmitIgnoredExpr(InitOp); 833 } else { 834 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 835 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 836 auto *GV = new llvm::GlobalVariable( 837 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 838 llvm::GlobalValue::PrivateLinkage, Init, Name); 839 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 840 RValue InitRVal; 841 switch (CGF.getEvaluationKind(Ty)) { 842 case TEK_Scalar: 843 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 844 break; 845 case TEK_Complex: 846 InitRVal = 847 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 848 break; 849 case TEK_Aggregate: 850 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 851 break; 852 } 853 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 854 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 855 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 856 /*IsInitializer=*/false); 857 } 858 } 859 860 /// Emit initialization of arrays of complex types. 861 /// \param DestAddr Address of the array. 862 /// \param Type Type of array. 863 /// \param Init Initial expression of array. 864 /// \param SrcAddr Address of the original array. 865 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 866 QualType Type, bool EmitDeclareReductionInit, 867 const Expr *Init, 868 const OMPDeclareReductionDecl *DRD, 869 Address SrcAddr = Address::invalid()) { 870 // Perform element-by-element initialization. 871 QualType ElementTy; 872 873 // Drill down to the base element type on both arrays. 874 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 875 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 876 DestAddr = 877 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 878 if (DRD) 879 SrcAddr = 880 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 881 882 llvm::Value *SrcBegin = nullptr; 883 if (DRD) 884 SrcBegin = SrcAddr.getPointer(); 885 llvm::Value *DestBegin = DestAddr.getPointer(); 886 // Cast from pointer to array type to pointer to single element. 887 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 888 // The basic structure here is a while-do loop. 889 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 890 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 891 llvm::Value *IsEmpty = 892 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 893 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 894 895 // Enter the loop body, making that address the current address. 896 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 897 CGF.EmitBlock(BodyBB); 898 899 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 900 901 llvm::PHINode *SrcElementPHI = nullptr; 902 Address SrcElementCurrent = Address::invalid(); 903 if (DRD) { 904 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 905 "omp.arraycpy.srcElementPast"); 906 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 907 SrcElementCurrent = 908 Address(SrcElementPHI, 909 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 910 } 911 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 912 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 913 DestElementPHI->addIncoming(DestBegin, EntryBB); 914 Address DestElementCurrent = 915 Address(DestElementPHI, 916 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 917 918 // Emit copy. 919 { 920 CodeGenFunction::RunCleanupsScope InitScope(CGF); 921 if (EmitDeclareReductionInit) { 922 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 923 SrcElementCurrent, ElementTy); 924 } else 925 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 926 /*IsInitializer=*/false); 927 } 928 929 if (DRD) { 930 // Shift the address forward by one element. 931 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 932 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 933 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 934 } 935 936 // Shift the address forward by one element. 937 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 938 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 939 // Check whether we've reached the end. 940 llvm::Value *Done = 941 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 942 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 943 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 944 945 // Done. 946 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 947 } 948 949 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 950 return CGF.EmitOMPSharedLValue(E); 951 } 952 953 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 954 const Expr *E) { 955 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 956 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 957 return LValue(); 958 } 959 960 void ReductionCodeGen::emitAggregateInitialization( 961 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 962 const OMPDeclareReductionDecl *DRD) { 963 // Emit VarDecl with copy init for arrays. 964 // Get the address of the original variable captured in current 965 // captured region. 966 const auto *PrivateVD = 967 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 968 bool EmitDeclareReductionInit = 969 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 970 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 971 EmitDeclareReductionInit, 972 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 973 : PrivateVD->getInit(), 974 DRD, SharedLVal.getAddress(CGF)); 975 } 976 977 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 978 ArrayRef<const Expr *> Privates, 979 ArrayRef<const Expr *> ReductionOps) { 980 ClausesData.reserve(Shareds.size()); 981 SharedAddresses.reserve(Shareds.size()); 982 Sizes.reserve(Shareds.size()); 983 BaseDecls.reserve(Shareds.size()); 984 auto IPriv = Privates.begin(); 985 auto IRed = ReductionOps.begin(); 986 for (const Expr *Ref : Shareds) { 987 ClausesData.emplace_back(Ref, *IPriv, *IRed); 988 std::advance(IPriv, 1); 989 std::advance(IRed, 1); 990 } 991 } 992 993 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 994 assert(SharedAddresses.size() == N && 995 "Number of generated lvalues must be exactly N."); 996 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 997 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 998 SharedAddresses.emplace_back(First, Second); 999 } 1000 1001 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 1002 const auto *PrivateVD = 1003 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1004 QualType PrivateType = PrivateVD->getType(); 1005 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 1006 if (!PrivateType->isVariablyModifiedType()) { 1007 Sizes.emplace_back( 1008 CGF.getTypeSize( 1009 SharedAddresses[N].first.getType().getNonReferenceType()), 1010 nullptr); 1011 return; 1012 } 1013 llvm::Value *Size; 1014 llvm::Value *SizeInChars; 1015 auto *ElemType = cast<llvm::PointerType>( 1016 SharedAddresses[N].first.getPointer(CGF)->getType()) 1017 ->getElementType(); 1018 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 1019 if (AsArraySection) { 1020 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF), 1021 SharedAddresses[N].first.getPointer(CGF)); 1022 Size = CGF.Builder.CreateNUWAdd( 1023 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 1024 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 1025 } else { 1026 SizeInChars = CGF.getTypeSize( 1027 SharedAddresses[N].first.getType().getNonReferenceType()); 1028 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 1029 } 1030 Sizes.emplace_back(SizeInChars, Size); 1031 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1032 CGF, 1033 cast<OpaqueValueExpr>( 1034 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1035 RValue::get(Size)); 1036 CGF.EmitVariablyModifiedType(PrivateType); 1037 } 1038 1039 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 1040 llvm::Value *Size) { 1041 const auto *PrivateVD = 1042 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1043 QualType PrivateType = PrivateVD->getType(); 1044 if (!PrivateType->isVariablyModifiedType()) { 1045 assert(!Size && !Sizes[N].second && 1046 "Size should be nullptr for non-variably modified reduction " 1047 "items."); 1048 return; 1049 } 1050 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1051 CGF, 1052 cast<OpaqueValueExpr>( 1053 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1054 RValue::get(Size)); 1055 CGF.EmitVariablyModifiedType(PrivateType); 1056 } 1057 1058 void ReductionCodeGen::emitInitialization( 1059 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1060 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1061 assert(SharedAddresses.size() > N && "No variable was generated"); 1062 const auto *PrivateVD = 1063 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1064 const OMPDeclareReductionDecl *DRD = 1065 getReductionInit(ClausesData[N].ReductionOp); 1066 QualType PrivateType = PrivateVD->getType(); 1067 PrivateAddr = CGF.Builder.CreateElementBitCast( 1068 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1069 QualType SharedType = SharedAddresses[N].first.getType(); 1070 SharedLVal = CGF.MakeAddrLValue( 1071 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 1072 CGF.ConvertTypeForMem(SharedType)), 1073 SharedType, SharedAddresses[N].first.getBaseInfo(), 1074 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1075 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1076 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1077 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1078 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1079 PrivateAddr, SharedLVal.getAddress(CGF), 1080 SharedLVal.getType()); 1081 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1082 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1083 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1084 PrivateVD->getType().getQualifiers(), 1085 /*IsInitializer=*/false); 1086 } 1087 } 1088 1089 bool ReductionCodeGen::needCleanups(unsigned N) { 1090 const auto *PrivateVD = 1091 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1092 QualType PrivateType = PrivateVD->getType(); 1093 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1094 return DTorKind != QualType::DK_none; 1095 } 1096 1097 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1098 Address PrivateAddr) { 1099 const auto *PrivateVD = 1100 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1101 QualType PrivateType = PrivateVD->getType(); 1102 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1103 if (needCleanups(N)) { 1104 PrivateAddr = CGF.Builder.CreateElementBitCast( 1105 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1106 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1107 } 1108 } 1109 1110 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1111 LValue BaseLV) { 1112 BaseTy = BaseTy.getNonReferenceType(); 1113 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1114 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1115 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1116 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 1117 } else { 1118 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 1119 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1120 } 1121 BaseTy = BaseTy->getPointeeType(); 1122 } 1123 return CGF.MakeAddrLValue( 1124 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 1125 CGF.ConvertTypeForMem(ElTy)), 1126 BaseLV.getType(), BaseLV.getBaseInfo(), 1127 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1128 } 1129 1130 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1131 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1132 llvm::Value *Addr) { 1133 Address Tmp = Address::invalid(); 1134 Address TopTmp = Address::invalid(); 1135 Address MostTopTmp = Address::invalid(); 1136 BaseTy = BaseTy.getNonReferenceType(); 1137 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1138 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1139 Tmp = CGF.CreateMemTemp(BaseTy); 1140 if (TopTmp.isValid()) 1141 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1142 else 1143 MostTopTmp = Tmp; 1144 TopTmp = Tmp; 1145 BaseTy = BaseTy->getPointeeType(); 1146 } 1147 llvm::Type *Ty = BaseLVType; 1148 if (Tmp.isValid()) 1149 Ty = Tmp.getElementType(); 1150 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1151 if (Tmp.isValid()) { 1152 CGF.Builder.CreateStore(Addr, Tmp); 1153 return MostTopTmp; 1154 } 1155 return Address(Addr, BaseLVAlignment); 1156 } 1157 1158 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1159 const VarDecl *OrigVD = nullptr; 1160 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1161 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1162 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1163 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1164 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1165 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1166 DE = cast<DeclRefExpr>(Base); 1167 OrigVD = cast<VarDecl>(DE->getDecl()); 1168 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1169 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1170 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1171 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1172 DE = cast<DeclRefExpr>(Base); 1173 OrigVD = cast<VarDecl>(DE->getDecl()); 1174 } 1175 return OrigVD; 1176 } 1177 1178 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1179 Address PrivateAddr) { 1180 const DeclRefExpr *DE; 1181 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1182 BaseDecls.emplace_back(OrigVD); 1183 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1184 LValue BaseLValue = 1185 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1186 OriginalBaseLValue); 1187 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1188 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1189 llvm::Value *PrivatePointer = 1190 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1191 PrivateAddr.getPointer(), 1192 SharedAddresses[N].first.getAddress(CGF).getType()); 1193 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1194 return castToBase(CGF, OrigVD->getType(), 1195 SharedAddresses[N].first.getType(), 1196 OriginalBaseLValue.getAddress(CGF).getType(), 1197 OriginalBaseLValue.getAlignment(), Ptr); 1198 } 1199 BaseDecls.emplace_back( 1200 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1201 return PrivateAddr; 1202 } 1203 1204 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1205 const OMPDeclareReductionDecl *DRD = 1206 getReductionInit(ClausesData[N].ReductionOp); 1207 return DRD && DRD->getInitializer(); 1208 } 1209 1210 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1211 return CGF.EmitLoadOfPointerLValue( 1212 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1213 getThreadIDVariable()->getType()->castAs<PointerType>()); 1214 } 1215 1216 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1217 if (!CGF.HaveInsertPoint()) 1218 return; 1219 // 1.2.2 OpenMP Language Terminology 1220 // Structured block - An executable statement with a single entry at the 1221 // top and a single exit at the bottom. 1222 // The point of exit cannot be a branch out of the structured block. 1223 // longjmp() and throw() must not violate the entry/exit criteria. 1224 CGF.EHStack.pushTerminate(); 1225 CodeGen(CGF); 1226 CGF.EHStack.popTerminate(); 1227 } 1228 1229 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1230 CodeGenFunction &CGF) { 1231 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1232 getThreadIDVariable()->getType(), 1233 AlignmentSource::Decl); 1234 } 1235 1236 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1237 QualType FieldTy) { 1238 auto *Field = FieldDecl::Create( 1239 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1240 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1241 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1242 Field->setAccess(AS_public); 1243 DC->addDecl(Field); 1244 return Field; 1245 } 1246 1247 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1248 StringRef Separator) 1249 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1250 OffloadEntriesInfoManager(CGM) { 1251 ASTContext &C = CGM.getContext(); 1252 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1253 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1254 RD->startDefinition(); 1255 // reserved_1 1256 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1257 // flags 1258 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1259 // reserved_2 1260 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1261 // reserved_3 1262 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1263 // psource 1264 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1265 RD->completeDefinition(); 1266 IdentQTy = C.getRecordType(RD); 1267 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1268 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1269 1270 loadOffloadInfoMetadata(); 1271 } 1272 1273 bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD, 1274 const GlobalDecl &OldGD, 1275 llvm::GlobalValue *OrigAddr, 1276 bool IsForDefinition) { 1277 // Emit at least a definition for the aliasee if the the address of the 1278 // original function is requested. 1279 if (IsForDefinition || OrigAddr) 1280 (void)CGM.GetAddrOfGlobal(NewGD); 1281 StringRef NewMangledName = CGM.getMangledName(NewGD); 1282 llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName); 1283 if (Addr && !Addr->isDeclaration()) { 1284 const auto *D = cast<FunctionDecl>(OldGD.getDecl()); 1285 const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(NewGD); 1286 llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI); 1287 1288 // Create a reference to the named value. This ensures that it is emitted 1289 // if a deferred decl. 1290 llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD); 1291 1292 // Create the new alias itself, but don't set a name yet. 1293 auto *GA = 1294 llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule()); 1295 1296 if (OrigAddr) { 1297 assert(OrigAddr->isDeclaration() && "Expected declaration"); 1298 1299 GA->takeName(OrigAddr); 1300 OrigAddr->replaceAllUsesWith( 1301 llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType())); 1302 OrigAddr->eraseFromParent(); 1303 } else { 1304 GA->setName(CGM.getMangledName(OldGD)); 1305 } 1306 1307 // Set attributes which are particular to an alias; this is a 1308 // specialization of the attributes which may be set on a global function. 1309 if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() || 1310 D->isWeakImported()) 1311 GA->setLinkage(llvm::Function::WeakAnyLinkage); 1312 1313 CGM.SetCommonAttributes(OldGD, GA); 1314 return true; 1315 } 1316 return false; 1317 } 1318 1319 void CGOpenMPRuntime::clear() { 1320 InternalVars.clear(); 1321 // Clean non-target variable declarations possibly used only in debug info. 1322 for (const auto &Data : EmittedNonTargetVariables) { 1323 if (!Data.getValue().pointsToAliveValue()) 1324 continue; 1325 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1326 if (!GV) 1327 continue; 1328 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1329 continue; 1330 GV->eraseFromParent(); 1331 } 1332 // Emit aliases for the deferred aliasees. 1333 for (const auto &Pair : DeferredVariantFunction) { 1334 StringRef MangledName = CGM.getMangledName(Pair.second.second); 1335 llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName); 1336 // If not able to emit alias, just emit original declaration. 1337 (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr, 1338 /*IsForDefinition=*/false); 1339 } 1340 } 1341 1342 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1343 SmallString<128> Buffer; 1344 llvm::raw_svector_ostream OS(Buffer); 1345 StringRef Sep = FirstSeparator; 1346 for (StringRef Part : Parts) { 1347 OS << Sep << Part; 1348 Sep = Separator; 1349 } 1350 return OS.str(); 1351 } 1352 1353 static llvm::Function * 1354 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1355 const Expr *CombinerInitializer, const VarDecl *In, 1356 const VarDecl *Out, bool IsCombiner) { 1357 // void .omp_combiner.(Ty *in, Ty *out); 1358 ASTContext &C = CGM.getContext(); 1359 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1360 FunctionArgList Args; 1361 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1362 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1363 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1364 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1365 Args.push_back(&OmpOutParm); 1366 Args.push_back(&OmpInParm); 1367 const CGFunctionInfo &FnInfo = 1368 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1369 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1370 std::string Name = CGM.getOpenMPRuntime().getName( 1371 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1372 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1373 Name, &CGM.getModule()); 1374 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1375 if (CGM.getLangOpts().Optimize) { 1376 Fn->removeFnAttr(llvm::Attribute::NoInline); 1377 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1378 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1379 } 1380 CodeGenFunction CGF(CGM); 1381 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1382 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1383 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1384 Out->getLocation()); 1385 CodeGenFunction::OMPPrivateScope Scope(CGF); 1386 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1387 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1388 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1389 .getAddress(CGF); 1390 }); 1391 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1392 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1393 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1394 .getAddress(CGF); 1395 }); 1396 (void)Scope.Privatize(); 1397 if (!IsCombiner && Out->hasInit() && 1398 !CGF.isTrivialInitializer(Out->getInit())) { 1399 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1400 Out->getType().getQualifiers(), 1401 /*IsInitializer=*/true); 1402 } 1403 if (CombinerInitializer) 1404 CGF.EmitIgnoredExpr(CombinerInitializer); 1405 Scope.ForceCleanup(); 1406 CGF.FinishFunction(); 1407 return Fn; 1408 } 1409 1410 void CGOpenMPRuntime::emitUserDefinedReduction( 1411 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1412 if (UDRMap.count(D) > 0) 1413 return; 1414 llvm::Function *Combiner = emitCombinerOrInitializer( 1415 CGM, D->getType(), D->getCombiner(), 1416 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1417 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1418 /*IsCombiner=*/true); 1419 llvm::Function *Initializer = nullptr; 1420 if (const Expr *Init = D->getInitializer()) { 1421 Initializer = emitCombinerOrInitializer( 1422 CGM, D->getType(), 1423 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1424 : nullptr, 1425 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1426 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1427 /*IsCombiner=*/false); 1428 } 1429 UDRMap.try_emplace(D, Combiner, Initializer); 1430 if (CGF) { 1431 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1432 Decls.second.push_back(D); 1433 } 1434 } 1435 1436 std::pair<llvm::Function *, llvm::Function *> 1437 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1438 auto I = UDRMap.find(D); 1439 if (I != UDRMap.end()) 1440 return I->second; 1441 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1442 return UDRMap.lookup(D); 1443 } 1444 1445 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1446 // Builder if one is present. 1447 struct PushAndPopStackRAII { 1448 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1449 bool HasCancel) 1450 : OMPBuilder(OMPBuilder) { 1451 if (!OMPBuilder) 1452 return; 1453 1454 // The following callback is the crucial part of clangs cleanup process. 1455 // 1456 // NOTE: 1457 // Once the OpenMPIRBuilder is used to create parallel regions (and 1458 // similar), the cancellation destination (Dest below) is determined via 1459 // IP. That means if we have variables to finalize we split the block at IP, 1460 // use the new block (=BB) as destination to build a JumpDest (via 1461 // getJumpDestInCurrentScope(BB)) which then is fed to 1462 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1463 // to push & pop an FinalizationInfo object. 1464 // The FiniCB will still be needed but at the point where the 1465 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1466 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1467 assert(IP.getBlock()->end() == IP.getPoint() && 1468 "Clang CG should cause non-terminated block!"); 1469 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1470 CGF.Builder.restoreIP(IP); 1471 CodeGenFunction::JumpDest Dest = 1472 CGF.getOMPCancelDestination(OMPD_parallel); 1473 CGF.EmitBranchThroughCleanup(Dest); 1474 }; 1475 1476 // TODO: Remove this once we emit parallel regions through the 1477 // OpenMPIRBuilder as it can do this setup internally. 1478 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1479 {FiniCB, OMPD_parallel, HasCancel}); 1480 OMPBuilder->pushFinalizationCB(std::move(FI)); 1481 } 1482 ~PushAndPopStackRAII() { 1483 if (OMPBuilder) 1484 OMPBuilder->popFinalizationCB(); 1485 } 1486 llvm::OpenMPIRBuilder *OMPBuilder; 1487 }; 1488 1489 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1490 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1491 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1492 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1493 assert(ThreadIDVar->getType()->isPointerType() && 1494 "thread id variable must be of type kmp_int32 *"); 1495 CodeGenFunction CGF(CGM, true); 1496 bool HasCancel = false; 1497 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1498 HasCancel = OPD->hasCancel(); 1499 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1500 HasCancel = OPSD->hasCancel(); 1501 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1502 HasCancel = OPFD->hasCancel(); 1503 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1504 HasCancel = OPFD->hasCancel(); 1505 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1506 HasCancel = OPFD->hasCancel(); 1507 else if (const auto *OPFD = 1508 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1509 HasCancel = OPFD->hasCancel(); 1510 else if (const auto *OPFD = 1511 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1512 HasCancel = OPFD->hasCancel(); 1513 1514 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1515 // parallel region to make cancellation barriers work properly. 1516 llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder(); 1517 PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel); 1518 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1519 HasCancel, OutlinedHelperName); 1520 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1521 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 1522 } 1523 1524 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1525 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1526 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1527 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1528 return emitParallelOrTeamsOutlinedFunction( 1529 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1530 } 1531 1532 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1533 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1534 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1535 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1536 return emitParallelOrTeamsOutlinedFunction( 1537 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1538 } 1539 1540 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1541 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1542 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1543 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1544 bool Tied, unsigned &NumberOfParts) { 1545 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1546 PrePostActionTy &) { 1547 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1548 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1549 llvm::Value *TaskArgs[] = { 1550 UpLoc, ThreadID, 1551 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1552 TaskTVar->getType()->castAs<PointerType>()) 1553 .getPointer(CGF)}; 1554 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1555 }; 1556 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1557 UntiedCodeGen); 1558 CodeGen.setAction(Action); 1559 assert(!ThreadIDVar->getType()->isPointerType() && 1560 "thread id variable must be of type kmp_int32 for tasks"); 1561 const OpenMPDirectiveKind Region = 1562 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1563 : OMPD_task; 1564 const CapturedStmt *CS = D.getCapturedStmt(Region); 1565 const auto *TD = dyn_cast<OMPTaskDirective>(&D); 1566 CodeGenFunction CGF(CGM, true); 1567 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1568 InnermostKind, 1569 TD ? TD->hasCancel() : false, Action); 1570 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1571 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1572 if (!Tied) 1573 NumberOfParts = Action.getNumberOfParts(); 1574 return Res; 1575 } 1576 1577 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1578 const RecordDecl *RD, const CGRecordLayout &RL, 1579 ArrayRef<llvm::Constant *> Data) { 1580 llvm::StructType *StructTy = RL.getLLVMType(); 1581 unsigned PrevIdx = 0; 1582 ConstantInitBuilder CIBuilder(CGM); 1583 auto DI = Data.begin(); 1584 for (const FieldDecl *FD : RD->fields()) { 1585 unsigned Idx = RL.getLLVMFieldNo(FD); 1586 // Fill the alignment. 1587 for (unsigned I = PrevIdx; I < Idx; ++I) 1588 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1589 PrevIdx = Idx + 1; 1590 Fields.add(*DI); 1591 ++DI; 1592 } 1593 } 1594 1595 template <class... As> 1596 static llvm::GlobalVariable * 1597 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1598 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1599 As &&... Args) { 1600 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1601 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1602 ConstantInitBuilder CIBuilder(CGM); 1603 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1604 buildStructValue(Fields, CGM, RD, RL, Data); 1605 return Fields.finishAndCreateGlobal( 1606 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1607 std::forward<As>(Args)...); 1608 } 1609 1610 template <typename T> 1611 static void 1612 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1613 ArrayRef<llvm::Constant *> Data, 1614 T &Parent) { 1615 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1616 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1617 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1618 buildStructValue(Fields, CGM, RD, RL, Data); 1619 Fields.finishAndAddTo(Parent); 1620 } 1621 1622 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1623 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1624 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1625 FlagsTy FlagsKey(Flags, Reserved2Flags); 1626 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1627 if (!Entry) { 1628 if (!DefaultOpenMPPSource) { 1629 // Initialize default location for psource field of ident_t structure of 1630 // all ident_t objects. Format is ";file;function;line;column;;". 1631 // Taken from 1632 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1633 DefaultOpenMPPSource = 1634 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1635 DefaultOpenMPPSource = 1636 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1637 } 1638 1639 llvm::Constant *Data[] = { 1640 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1641 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1642 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1643 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1644 llvm::GlobalValue *DefaultOpenMPLocation = 1645 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1646 llvm::GlobalValue::PrivateLinkage); 1647 DefaultOpenMPLocation->setUnnamedAddr( 1648 llvm::GlobalValue::UnnamedAddr::Global); 1649 1650 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1651 } 1652 return Address(Entry, Align); 1653 } 1654 1655 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1656 bool AtCurrentPoint) { 1657 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1658 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1659 1660 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1661 if (AtCurrentPoint) { 1662 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1663 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1664 } else { 1665 Elem.second.ServiceInsertPt = 1666 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1667 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1668 } 1669 } 1670 1671 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1672 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1673 if (Elem.second.ServiceInsertPt) { 1674 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1675 Elem.second.ServiceInsertPt = nullptr; 1676 Ptr->eraseFromParent(); 1677 } 1678 } 1679 1680 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1681 SourceLocation Loc, 1682 unsigned Flags) { 1683 Flags |= OMP_IDENT_KMPC; 1684 // If no debug info is generated - return global default location. 1685 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1686 Loc.isInvalid()) 1687 return getOrCreateDefaultLocation(Flags).getPointer(); 1688 1689 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1690 1691 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1692 Address LocValue = Address::invalid(); 1693 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1694 if (I != OpenMPLocThreadIDMap.end()) 1695 LocValue = Address(I->second.DebugLoc, Align); 1696 1697 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1698 // GetOpenMPThreadID was called before this routine. 1699 if (!LocValue.isValid()) { 1700 // Generate "ident_t .kmpc_loc.addr;" 1701 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1702 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1703 Elem.second.DebugLoc = AI.getPointer(); 1704 LocValue = AI; 1705 1706 if (!Elem.second.ServiceInsertPt) 1707 setLocThreadIdInsertPt(CGF); 1708 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1709 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1710 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1711 CGF.getTypeSize(IdentQTy)); 1712 } 1713 1714 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1715 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1716 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1717 LValue PSource = 1718 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1719 1720 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1721 if (OMPDebugLoc == nullptr) { 1722 SmallString<128> Buffer2; 1723 llvm::raw_svector_ostream OS2(Buffer2); 1724 // Build debug location 1725 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1726 OS2 << ";" << PLoc.getFilename() << ";"; 1727 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1728 OS2 << FD->getQualifiedNameAsString(); 1729 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1730 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1731 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1732 } 1733 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1734 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1735 1736 // Our callers always pass this to a runtime function, so for 1737 // convenience, go ahead and return a naked pointer. 1738 return LocValue.getPointer(); 1739 } 1740 1741 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1742 SourceLocation Loc) { 1743 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1744 1745 llvm::Value *ThreadID = nullptr; 1746 // Check whether we've already cached a load of the thread id in this 1747 // function. 1748 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1749 if (I != OpenMPLocThreadIDMap.end()) { 1750 ThreadID = I->second.ThreadID; 1751 if (ThreadID != nullptr) 1752 return ThreadID; 1753 } 1754 // If exceptions are enabled, do not use parameter to avoid possible crash. 1755 if (auto *OMPRegionInfo = 1756 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1757 if (OMPRegionInfo->getThreadIDVariable()) { 1758 // Check if this an outlined function with thread id passed as argument. 1759 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1760 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1761 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1762 !CGF.getLangOpts().CXXExceptions || 1763 CGF.Builder.GetInsertBlock() == TopBlock || 1764 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1765 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1766 TopBlock || 1767 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1768 CGF.Builder.GetInsertBlock()) { 1769 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1770 // If value loaded in entry block, cache it and use it everywhere in 1771 // function. 1772 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1773 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1774 Elem.second.ThreadID = ThreadID; 1775 } 1776 return ThreadID; 1777 } 1778 } 1779 } 1780 1781 // This is not an outlined function region - need to call __kmpc_int32 1782 // kmpc_global_thread_num(ident_t *loc). 1783 // Generate thread id value and cache this value for use across the 1784 // function. 1785 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1786 if (!Elem.second.ServiceInsertPt) 1787 setLocThreadIdInsertPt(CGF); 1788 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1789 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1790 llvm::CallInst *Call = CGF.Builder.CreateCall( 1791 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1792 emitUpdateLocation(CGF, Loc)); 1793 Call->setCallingConv(CGF.getRuntimeCC()); 1794 Elem.second.ThreadID = Call; 1795 return Call; 1796 } 1797 1798 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1799 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1800 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1801 clearLocThreadIdInsertPt(CGF); 1802 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1803 } 1804 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1805 for(auto *D : FunctionUDRMap[CGF.CurFn]) 1806 UDRMap.erase(D); 1807 FunctionUDRMap.erase(CGF.CurFn); 1808 } 1809 auto I = FunctionUDMMap.find(CGF.CurFn); 1810 if (I != FunctionUDMMap.end()) { 1811 for(auto *D : I->second) 1812 UDMMap.erase(D); 1813 FunctionUDMMap.erase(I); 1814 } 1815 } 1816 1817 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1818 return IdentTy->getPointerTo(); 1819 } 1820 1821 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1822 if (!Kmpc_MicroTy) { 1823 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1824 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1825 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1826 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1827 } 1828 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1829 } 1830 1831 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1832 llvm::FunctionCallee RTLFn = nullptr; 1833 switch (static_cast<OpenMPRTLFunction>(Function)) { 1834 case OMPRTL__kmpc_fork_call: { 1835 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1836 // microtask, ...); 1837 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1838 getKmpc_MicroPointerTy()}; 1839 auto *FnTy = 1840 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1841 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1842 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 1843 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 1844 llvm::LLVMContext &Ctx = F->getContext(); 1845 llvm::MDBuilder MDB(Ctx); 1846 // Annotate the callback behavior of the __kmpc_fork_call: 1847 // - The callback callee is argument number 2 (microtask). 1848 // - The first two arguments of the callback callee are unknown (-1). 1849 // - All variadic arguments to the __kmpc_fork_call are passed to the 1850 // callback callee. 1851 F->addMetadata( 1852 llvm::LLVMContext::MD_callback, 1853 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 1854 2, {-1, -1}, 1855 /* VarArgsArePassed */ true)})); 1856 } 1857 } 1858 break; 1859 } 1860 case OMPRTL__kmpc_global_thread_num: { 1861 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1862 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1863 auto *FnTy = 1864 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1865 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1866 break; 1867 } 1868 case OMPRTL__kmpc_threadprivate_cached: { 1869 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1870 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1871 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1872 CGM.VoidPtrTy, CGM.SizeTy, 1873 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1874 auto *FnTy = 1875 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1876 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1877 break; 1878 } 1879 case OMPRTL__kmpc_critical: { 1880 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1881 // kmp_critical_name *crit); 1882 llvm::Type *TypeParams[] = { 1883 getIdentTyPointerTy(), CGM.Int32Ty, 1884 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1885 auto *FnTy = 1886 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1887 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1888 break; 1889 } 1890 case OMPRTL__kmpc_critical_with_hint: { 1891 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1892 // kmp_critical_name *crit, uintptr_t hint); 1893 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1894 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1895 CGM.IntPtrTy}; 1896 auto *FnTy = 1897 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1898 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1899 break; 1900 } 1901 case OMPRTL__kmpc_threadprivate_register: { 1902 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1903 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1904 // typedef void *(*kmpc_ctor)(void *); 1905 auto *KmpcCtorTy = 1906 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1907 /*isVarArg*/ false)->getPointerTo(); 1908 // typedef void *(*kmpc_cctor)(void *, void *); 1909 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1910 auto *KmpcCopyCtorTy = 1911 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1912 /*isVarArg*/ false) 1913 ->getPointerTo(); 1914 // typedef void (*kmpc_dtor)(void *); 1915 auto *KmpcDtorTy = 1916 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1917 ->getPointerTo(); 1918 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1919 KmpcCopyCtorTy, KmpcDtorTy}; 1920 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1921 /*isVarArg*/ false); 1922 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1923 break; 1924 } 1925 case OMPRTL__kmpc_end_critical: { 1926 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1927 // kmp_critical_name *crit); 1928 llvm::Type *TypeParams[] = { 1929 getIdentTyPointerTy(), CGM.Int32Ty, 1930 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1931 auto *FnTy = 1932 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1933 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1934 break; 1935 } 1936 case OMPRTL__kmpc_cancel_barrier: { 1937 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1938 // global_tid); 1939 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1940 auto *FnTy = 1941 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1942 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1943 break; 1944 } 1945 case OMPRTL__kmpc_barrier: { 1946 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1947 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1948 auto *FnTy = 1949 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1950 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1951 break; 1952 } 1953 case OMPRTL__kmpc_for_static_fini: { 1954 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1955 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1956 auto *FnTy = 1957 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1958 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1959 break; 1960 } 1961 case OMPRTL__kmpc_push_num_threads: { 1962 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1963 // kmp_int32 num_threads) 1964 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1965 CGM.Int32Ty}; 1966 auto *FnTy = 1967 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1968 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1969 break; 1970 } 1971 case OMPRTL__kmpc_serialized_parallel: { 1972 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1973 // global_tid); 1974 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1975 auto *FnTy = 1976 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1977 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1978 break; 1979 } 1980 case OMPRTL__kmpc_end_serialized_parallel: { 1981 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1982 // global_tid); 1983 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1984 auto *FnTy = 1985 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1986 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1987 break; 1988 } 1989 case OMPRTL__kmpc_flush: { 1990 // Build void __kmpc_flush(ident_t *loc); 1991 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1992 auto *FnTy = 1993 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1994 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1995 break; 1996 } 1997 case OMPRTL__kmpc_master: { 1998 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1999 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2000 auto *FnTy = 2001 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2002 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 2003 break; 2004 } 2005 case OMPRTL__kmpc_end_master: { 2006 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 2007 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2008 auto *FnTy = 2009 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2010 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 2011 break; 2012 } 2013 case OMPRTL__kmpc_omp_taskyield: { 2014 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 2015 // int end_part); 2016 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2017 auto *FnTy = 2018 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2019 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 2020 break; 2021 } 2022 case OMPRTL__kmpc_single: { 2023 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 2024 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2025 auto *FnTy = 2026 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2027 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 2028 break; 2029 } 2030 case OMPRTL__kmpc_end_single: { 2031 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 2032 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2033 auto *FnTy = 2034 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2035 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 2036 break; 2037 } 2038 case OMPRTL__kmpc_omp_task_alloc: { 2039 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 2040 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2041 // kmp_routine_entry_t *task_entry); 2042 assert(KmpRoutineEntryPtrTy != nullptr && 2043 "Type kmp_routine_entry_t must be created."); 2044 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2045 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 2046 // Return void * and then cast to particular kmp_task_t type. 2047 auto *FnTy = 2048 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2049 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 2050 break; 2051 } 2052 case OMPRTL__kmpc_omp_target_task_alloc: { 2053 // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid, 2054 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2055 // kmp_routine_entry_t *task_entry, kmp_int64 device_id); 2056 assert(KmpRoutineEntryPtrTy != nullptr && 2057 "Type kmp_routine_entry_t must be created."); 2058 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2059 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy, 2060 CGM.Int64Ty}; 2061 // Return void * and then cast to particular kmp_task_t type. 2062 auto *FnTy = 2063 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2064 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc"); 2065 break; 2066 } 2067 case OMPRTL__kmpc_omp_task: { 2068 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2069 // *new_task); 2070 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2071 CGM.VoidPtrTy}; 2072 auto *FnTy = 2073 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2074 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 2075 break; 2076 } 2077 case OMPRTL__kmpc_copyprivate: { 2078 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 2079 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 2080 // kmp_int32 didit); 2081 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2082 auto *CpyFnTy = 2083 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 2084 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 2085 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 2086 CGM.Int32Ty}; 2087 auto *FnTy = 2088 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2089 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 2090 break; 2091 } 2092 case OMPRTL__kmpc_reduce: { 2093 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 2094 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 2095 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 2096 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2097 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2098 /*isVarArg=*/false); 2099 llvm::Type *TypeParams[] = { 2100 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2101 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2102 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2103 auto *FnTy = 2104 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2105 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 2106 break; 2107 } 2108 case OMPRTL__kmpc_reduce_nowait: { 2109 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 2110 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 2111 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 2112 // *lck); 2113 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2114 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2115 /*isVarArg=*/false); 2116 llvm::Type *TypeParams[] = { 2117 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2118 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2119 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2120 auto *FnTy = 2121 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2122 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 2123 break; 2124 } 2125 case OMPRTL__kmpc_end_reduce: { 2126 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 2127 // kmp_critical_name *lck); 2128 llvm::Type *TypeParams[] = { 2129 getIdentTyPointerTy(), CGM.Int32Ty, 2130 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2131 auto *FnTy = 2132 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2133 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 2134 break; 2135 } 2136 case OMPRTL__kmpc_end_reduce_nowait: { 2137 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 2138 // kmp_critical_name *lck); 2139 llvm::Type *TypeParams[] = { 2140 getIdentTyPointerTy(), CGM.Int32Ty, 2141 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2142 auto *FnTy = 2143 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2144 RTLFn = 2145 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 2146 break; 2147 } 2148 case OMPRTL__kmpc_omp_task_begin_if0: { 2149 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2150 // *new_task); 2151 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2152 CGM.VoidPtrTy}; 2153 auto *FnTy = 2154 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2155 RTLFn = 2156 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 2157 break; 2158 } 2159 case OMPRTL__kmpc_omp_task_complete_if0: { 2160 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2161 // *new_task); 2162 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2163 CGM.VoidPtrTy}; 2164 auto *FnTy = 2165 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2166 RTLFn = CGM.CreateRuntimeFunction(FnTy, 2167 /*Name=*/"__kmpc_omp_task_complete_if0"); 2168 break; 2169 } 2170 case OMPRTL__kmpc_ordered: { 2171 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 2172 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2173 auto *FnTy = 2174 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2175 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 2176 break; 2177 } 2178 case OMPRTL__kmpc_end_ordered: { 2179 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 2180 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2181 auto *FnTy = 2182 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2183 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 2184 break; 2185 } 2186 case OMPRTL__kmpc_omp_taskwait: { 2187 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 2188 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2189 auto *FnTy = 2190 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2191 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 2192 break; 2193 } 2194 case OMPRTL__kmpc_taskgroup: { 2195 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 2196 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2197 auto *FnTy = 2198 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2199 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 2200 break; 2201 } 2202 case OMPRTL__kmpc_end_taskgroup: { 2203 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 2204 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2205 auto *FnTy = 2206 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2207 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 2208 break; 2209 } 2210 case OMPRTL__kmpc_push_proc_bind: { 2211 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 2212 // int proc_bind) 2213 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2214 auto *FnTy = 2215 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2216 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 2217 break; 2218 } 2219 case OMPRTL__kmpc_omp_task_with_deps: { 2220 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2221 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2222 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 2223 llvm::Type *TypeParams[] = { 2224 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 2225 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 2226 auto *FnTy = 2227 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2228 RTLFn = 2229 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 2230 break; 2231 } 2232 case OMPRTL__kmpc_omp_wait_deps: { 2233 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2234 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2235 // kmp_depend_info_t *noalias_dep_list); 2236 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2237 CGM.Int32Ty, CGM.VoidPtrTy, 2238 CGM.Int32Ty, CGM.VoidPtrTy}; 2239 auto *FnTy = 2240 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2241 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2242 break; 2243 } 2244 case OMPRTL__kmpc_cancellationpoint: { 2245 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2246 // global_tid, kmp_int32 cncl_kind) 2247 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2248 auto *FnTy = 2249 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2250 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2251 break; 2252 } 2253 case OMPRTL__kmpc_cancel: { 2254 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2255 // kmp_int32 cncl_kind) 2256 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2257 auto *FnTy = 2258 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2259 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2260 break; 2261 } 2262 case OMPRTL__kmpc_push_num_teams: { 2263 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2264 // kmp_int32 num_teams, kmp_int32 num_threads) 2265 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2266 CGM.Int32Ty}; 2267 auto *FnTy = 2268 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2269 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2270 break; 2271 } 2272 case OMPRTL__kmpc_fork_teams: { 2273 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2274 // microtask, ...); 2275 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2276 getKmpc_MicroPointerTy()}; 2277 auto *FnTy = 2278 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2279 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2280 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 2281 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 2282 llvm::LLVMContext &Ctx = F->getContext(); 2283 llvm::MDBuilder MDB(Ctx); 2284 // Annotate the callback behavior of the __kmpc_fork_teams: 2285 // - The callback callee is argument number 2 (microtask). 2286 // - The first two arguments of the callback callee are unknown (-1). 2287 // - All variadic arguments to the __kmpc_fork_teams are passed to the 2288 // callback callee. 2289 F->addMetadata( 2290 llvm::LLVMContext::MD_callback, 2291 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 2292 2, {-1, -1}, 2293 /* VarArgsArePassed */ true)})); 2294 } 2295 } 2296 break; 2297 } 2298 case OMPRTL__kmpc_taskloop: { 2299 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2300 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2301 // sched, kmp_uint64 grainsize, void *task_dup); 2302 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2303 CGM.IntTy, 2304 CGM.VoidPtrTy, 2305 CGM.IntTy, 2306 CGM.Int64Ty->getPointerTo(), 2307 CGM.Int64Ty->getPointerTo(), 2308 CGM.Int64Ty, 2309 CGM.IntTy, 2310 CGM.IntTy, 2311 CGM.Int64Ty, 2312 CGM.VoidPtrTy}; 2313 auto *FnTy = 2314 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2315 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2316 break; 2317 } 2318 case OMPRTL__kmpc_doacross_init: { 2319 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2320 // num_dims, struct kmp_dim *dims); 2321 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2322 CGM.Int32Ty, 2323 CGM.Int32Ty, 2324 CGM.VoidPtrTy}; 2325 auto *FnTy = 2326 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2327 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2328 break; 2329 } 2330 case OMPRTL__kmpc_doacross_fini: { 2331 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2332 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2333 auto *FnTy = 2334 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2335 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2336 break; 2337 } 2338 case OMPRTL__kmpc_doacross_post: { 2339 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2340 // *vec); 2341 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2342 CGM.Int64Ty->getPointerTo()}; 2343 auto *FnTy = 2344 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2345 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2346 break; 2347 } 2348 case OMPRTL__kmpc_doacross_wait: { 2349 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2350 // *vec); 2351 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2352 CGM.Int64Ty->getPointerTo()}; 2353 auto *FnTy = 2354 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2355 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2356 break; 2357 } 2358 case OMPRTL__kmpc_task_reduction_init: { 2359 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2360 // *data); 2361 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2362 auto *FnTy = 2363 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2364 RTLFn = 2365 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2366 break; 2367 } 2368 case OMPRTL__kmpc_task_reduction_get_th_data: { 2369 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2370 // *d); 2371 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2372 auto *FnTy = 2373 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2374 RTLFn = CGM.CreateRuntimeFunction( 2375 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2376 break; 2377 } 2378 case OMPRTL__kmpc_alloc: { 2379 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t 2380 // al); omp_allocator_handle_t type is void *. 2381 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy}; 2382 auto *FnTy = 2383 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2384 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc"); 2385 break; 2386 } 2387 case OMPRTL__kmpc_free: { 2388 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t 2389 // al); omp_allocator_handle_t type is void *. 2390 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2391 auto *FnTy = 2392 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2393 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); 2394 break; 2395 } 2396 case OMPRTL__kmpc_push_target_tripcount: { 2397 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 2398 // size); 2399 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; 2400 llvm::FunctionType *FnTy = 2401 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2402 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); 2403 break; 2404 } 2405 case OMPRTL__tgt_target: { 2406 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2407 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2408 // *arg_types); 2409 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2410 CGM.VoidPtrTy, 2411 CGM.Int32Ty, 2412 CGM.VoidPtrPtrTy, 2413 CGM.VoidPtrPtrTy, 2414 CGM.Int64Ty->getPointerTo(), 2415 CGM.Int64Ty->getPointerTo()}; 2416 auto *FnTy = 2417 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2418 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2419 break; 2420 } 2421 case OMPRTL__tgt_target_nowait: { 2422 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2423 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2424 // int64_t *arg_types); 2425 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2426 CGM.VoidPtrTy, 2427 CGM.Int32Ty, 2428 CGM.VoidPtrPtrTy, 2429 CGM.VoidPtrPtrTy, 2430 CGM.Int64Ty->getPointerTo(), 2431 CGM.Int64Ty->getPointerTo()}; 2432 auto *FnTy = 2433 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2434 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2435 break; 2436 } 2437 case OMPRTL__tgt_target_teams: { 2438 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2439 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2440 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2441 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2442 CGM.VoidPtrTy, 2443 CGM.Int32Ty, 2444 CGM.VoidPtrPtrTy, 2445 CGM.VoidPtrPtrTy, 2446 CGM.Int64Ty->getPointerTo(), 2447 CGM.Int64Ty->getPointerTo(), 2448 CGM.Int32Ty, 2449 CGM.Int32Ty}; 2450 auto *FnTy = 2451 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2452 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2453 break; 2454 } 2455 case OMPRTL__tgt_target_teams_nowait: { 2456 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2457 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 2458 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2459 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2460 CGM.VoidPtrTy, 2461 CGM.Int32Ty, 2462 CGM.VoidPtrPtrTy, 2463 CGM.VoidPtrPtrTy, 2464 CGM.Int64Ty->getPointerTo(), 2465 CGM.Int64Ty->getPointerTo(), 2466 CGM.Int32Ty, 2467 CGM.Int32Ty}; 2468 auto *FnTy = 2469 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2470 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2471 break; 2472 } 2473 case OMPRTL__tgt_register_requires: { 2474 // Build void __tgt_register_requires(int64_t flags); 2475 llvm::Type *TypeParams[] = {CGM.Int64Ty}; 2476 auto *FnTy = 2477 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2478 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); 2479 break; 2480 } 2481 case OMPRTL__tgt_register_lib: { 2482 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 2483 QualType ParamTy = 2484 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2485 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2486 auto *FnTy = 2487 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2488 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 2489 break; 2490 } 2491 case OMPRTL__tgt_unregister_lib: { 2492 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 2493 QualType ParamTy = 2494 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2495 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2496 auto *FnTy = 2497 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2498 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 2499 break; 2500 } 2501 case OMPRTL__tgt_target_data_begin: { 2502 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2503 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2504 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2505 CGM.Int32Ty, 2506 CGM.VoidPtrPtrTy, 2507 CGM.VoidPtrPtrTy, 2508 CGM.Int64Ty->getPointerTo(), 2509 CGM.Int64Ty->getPointerTo()}; 2510 auto *FnTy = 2511 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2512 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2513 break; 2514 } 2515 case OMPRTL__tgt_target_data_begin_nowait: { 2516 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2517 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2518 // *arg_types); 2519 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2520 CGM.Int32Ty, 2521 CGM.VoidPtrPtrTy, 2522 CGM.VoidPtrPtrTy, 2523 CGM.Int64Ty->getPointerTo(), 2524 CGM.Int64Ty->getPointerTo()}; 2525 auto *FnTy = 2526 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2527 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2528 break; 2529 } 2530 case OMPRTL__tgt_target_data_end: { 2531 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2532 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2533 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2534 CGM.Int32Ty, 2535 CGM.VoidPtrPtrTy, 2536 CGM.VoidPtrPtrTy, 2537 CGM.Int64Ty->getPointerTo(), 2538 CGM.Int64Ty->getPointerTo()}; 2539 auto *FnTy = 2540 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2541 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2542 break; 2543 } 2544 case OMPRTL__tgt_target_data_end_nowait: { 2545 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2546 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2547 // *arg_types); 2548 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2549 CGM.Int32Ty, 2550 CGM.VoidPtrPtrTy, 2551 CGM.VoidPtrPtrTy, 2552 CGM.Int64Ty->getPointerTo(), 2553 CGM.Int64Ty->getPointerTo()}; 2554 auto *FnTy = 2555 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2556 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2557 break; 2558 } 2559 case OMPRTL__tgt_target_data_update: { 2560 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2561 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2562 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2563 CGM.Int32Ty, 2564 CGM.VoidPtrPtrTy, 2565 CGM.VoidPtrPtrTy, 2566 CGM.Int64Ty->getPointerTo(), 2567 CGM.Int64Ty->getPointerTo()}; 2568 auto *FnTy = 2569 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2570 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2571 break; 2572 } 2573 case OMPRTL__tgt_target_data_update_nowait: { 2574 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2575 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2576 // *arg_types); 2577 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2578 CGM.Int32Ty, 2579 CGM.VoidPtrPtrTy, 2580 CGM.VoidPtrPtrTy, 2581 CGM.Int64Ty->getPointerTo(), 2582 CGM.Int64Ty->getPointerTo()}; 2583 auto *FnTy = 2584 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2585 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2586 break; 2587 } 2588 case OMPRTL__tgt_mapper_num_components: { 2589 // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 2590 llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; 2591 auto *FnTy = 2592 llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); 2593 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components"); 2594 break; 2595 } 2596 case OMPRTL__tgt_push_mapper_component: { 2597 // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void 2598 // *base, void *begin, int64_t size, int64_t type); 2599 llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy, 2600 CGM.Int64Ty, CGM.Int64Ty}; 2601 auto *FnTy = 2602 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2603 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component"); 2604 break; 2605 } 2606 } 2607 assert(RTLFn && "Unable to find OpenMP runtime function"); 2608 return RTLFn; 2609 } 2610 2611 llvm::FunctionCallee 2612 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 2613 assert((IVSize == 32 || IVSize == 64) && 2614 "IV size is not compatible with the omp runtime"); 2615 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2616 : "__kmpc_for_static_init_4u") 2617 : (IVSigned ? "__kmpc_for_static_init_8" 2618 : "__kmpc_for_static_init_8u"); 2619 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2620 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2621 llvm::Type *TypeParams[] = { 2622 getIdentTyPointerTy(), // loc 2623 CGM.Int32Ty, // tid 2624 CGM.Int32Ty, // schedtype 2625 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2626 PtrTy, // p_lower 2627 PtrTy, // p_upper 2628 PtrTy, // p_stride 2629 ITy, // incr 2630 ITy // chunk 2631 }; 2632 auto *FnTy = 2633 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2634 return CGM.CreateRuntimeFunction(FnTy, Name); 2635 } 2636 2637 llvm::FunctionCallee 2638 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 2639 assert((IVSize == 32 || IVSize == 64) && 2640 "IV size is not compatible with the omp runtime"); 2641 StringRef Name = 2642 IVSize == 32 2643 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2644 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2645 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2646 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2647 CGM.Int32Ty, // tid 2648 CGM.Int32Ty, // schedtype 2649 ITy, // lower 2650 ITy, // upper 2651 ITy, // stride 2652 ITy // chunk 2653 }; 2654 auto *FnTy = 2655 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2656 return CGM.CreateRuntimeFunction(FnTy, Name); 2657 } 2658 2659 llvm::FunctionCallee 2660 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 2661 assert((IVSize == 32 || IVSize == 64) && 2662 "IV size is not compatible with the omp runtime"); 2663 StringRef Name = 2664 IVSize == 32 2665 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2666 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2667 llvm::Type *TypeParams[] = { 2668 getIdentTyPointerTy(), // loc 2669 CGM.Int32Ty, // tid 2670 }; 2671 auto *FnTy = 2672 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2673 return CGM.CreateRuntimeFunction(FnTy, Name); 2674 } 2675 2676 llvm::FunctionCallee 2677 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 2678 assert((IVSize == 32 || IVSize == 64) && 2679 "IV size is not compatible with the omp runtime"); 2680 StringRef Name = 2681 IVSize == 32 2682 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2683 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2684 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2685 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2686 llvm::Type *TypeParams[] = { 2687 getIdentTyPointerTy(), // loc 2688 CGM.Int32Ty, // tid 2689 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2690 PtrTy, // p_lower 2691 PtrTy, // p_upper 2692 PtrTy // p_stride 2693 }; 2694 auto *FnTy = 2695 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2696 return CGM.CreateRuntimeFunction(FnTy, Name); 2697 } 2698 2699 /// Obtain information that uniquely identifies a target entry. This 2700 /// consists of the file and device IDs as well as line number associated with 2701 /// the relevant entry source location. 2702 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2703 unsigned &DeviceID, unsigned &FileID, 2704 unsigned &LineNum) { 2705 SourceManager &SM = C.getSourceManager(); 2706 2707 // The loc should be always valid and have a file ID (the user cannot use 2708 // #pragma directives in macros) 2709 2710 assert(Loc.isValid() && "Source location is expected to be always valid."); 2711 2712 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2713 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2714 2715 llvm::sys::fs::UniqueID ID; 2716 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2717 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2718 << PLoc.getFilename() << EC.message(); 2719 2720 DeviceID = ID.getDevice(); 2721 FileID = ID.getFile(); 2722 LineNum = PLoc.getLine(); 2723 } 2724 2725 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 2726 if (CGM.getLangOpts().OpenMPSimd) 2727 return Address::invalid(); 2728 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2729 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2730 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 2731 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2732 HasRequiresUnifiedSharedMemory))) { 2733 SmallString<64> PtrName; 2734 { 2735 llvm::raw_svector_ostream OS(PtrName); 2736 OS << CGM.getMangledName(GlobalDecl(VD)); 2737 if (!VD->isExternallyVisible()) { 2738 unsigned DeviceID, FileID, Line; 2739 getTargetEntryUniqueInfo(CGM.getContext(), 2740 VD->getCanonicalDecl()->getBeginLoc(), 2741 DeviceID, FileID, Line); 2742 OS << llvm::format("_%x", FileID); 2743 } 2744 OS << "_decl_tgt_ref_ptr"; 2745 } 2746 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2747 if (!Ptr) { 2748 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2749 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2750 PtrName); 2751 2752 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2753 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 2754 2755 if (!CGM.getLangOpts().OpenMPIsDevice) 2756 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2757 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2758 } 2759 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2760 } 2761 return Address::invalid(); 2762 } 2763 2764 llvm::Constant * 2765 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2766 assert(!CGM.getLangOpts().OpenMPUseTLS || 2767 !CGM.getContext().getTargetInfo().isTLSSupported()); 2768 // Lookup the entry, lazily creating it if necessary. 2769 std::string Suffix = getName({"cache", ""}); 2770 return getOrCreateInternalVariable( 2771 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 2772 } 2773 2774 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2775 const VarDecl *VD, 2776 Address VDAddr, 2777 SourceLocation Loc) { 2778 if (CGM.getLangOpts().OpenMPUseTLS && 2779 CGM.getContext().getTargetInfo().isTLSSupported()) 2780 return VDAddr; 2781 2782 llvm::Type *VarTy = VDAddr.getElementType(); 2783 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2784 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2785 CGM.Int8PtrTy), 2786 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2787 getOrCreateThreadPrivateCache(VD)}; 2788 return Address(CGF.EmitRuntimeCall( 2789 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2790 VDAddr.getAlignment()); 2791 } 2792 2793 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2794 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2795 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2796 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2797 // library. 2798 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2799 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2800 OMPLoc); 2801 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2802 // to register constructor/destructor for variable. 2803 llvm::Value *Args[] = { 2804 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2805 Ctor, CopyCtor, Dtor}; 2806 CGF.EmitRuntimeCall( 2807 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2808 } 2809 2810 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2811 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2812 bool PerformInit, CodeGenFunction *CGF) { 2813 if (CGM.getLangOpts().OpenMPUseTLS && 2814 CGM.getContext().getTargetInfo().isTLSSupported()) 2815 return nullptr; 2816 2817 VD = VD->getDefinition(CGM.getContext()); 2818 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 2819 QualType ASTTy = VD->getType(); 2820 2821 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2822 const Expr *Init = VD->getAnyInitializer(); 2823 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2824 // Generate function that re-emits the declaration's initializer into the 2825 // threadprivate copy of the variable VD 2826 CodeGenFunction CtorCGF(CGM); 2827 FunctionArgList Args; 2828 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2829 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2830 ImplicitParamDecl::Other); 2831 Args.push_back(&Dst); 2832 2833 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2834 CGM.getContext().VoidPtrTy, Args); 2835 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2836 std::string Name = getName({"__kmpc_global_ctor_", ""}); 2837 llvm::Function *Fn = 2838 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2839 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2840 Args, Loc, Loc); 2841 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2842 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2843 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2844 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2845 Arg = CtorCGF.Builder.CreateElementBitCast( 2846 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2847 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2848 /*IsInitializer=*/true); 2849 ArgVal = CtorCGF.EmitLoadOfScalar( 2850 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2851 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2852 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2853 CtorCGF.FinishFunction(); 2854 Ctor = Fn; 2855 } 2856 if (VD->getType().isDestructedType() != QualType::DK_none) { 2857 // Generate function that emits destructor call for the threadprivate copy 2858 // of the variable VD 2859 CodeGenFunction DtorCGF(CGM); 2860 FunctionArgList Args; 2861 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2862 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2863 ImplicitParamDecl::Other); 2864 Args.push_back(&Dst); 2865 2866 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2867 CGM.getContext().VoidTy, Args); 2868 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2869 std::string Name = getName({"__kmpc_global_dtor_", ""}); 2870 llvm::Function *Fn = 2871 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2872 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2873 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2874 Loc, Loc); 2875 // Create a scope with an artificial location for the body of this function. 2876 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2877 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2878 DtorCGF.GetAddrOfLocalVar(&Dst), 2879 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2880 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2881 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2882 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2883 DtorCGF.FinishFunction(); 2884 Dtor = Fn; 2885 } 2886 // Do not emit init function if it is not required. 2887 if (!Ctor && !Dtor) 2888 return nullptr; 2889 2890 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2891 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2892 /*isVarArg=*/false) 2893 ->getPointerTo(); 2894 // Copying constructor for the threadprivate variable. 2895 // Must be NULL - reserved by runtime, but currently it requires that this 2896 // parameter is always NULL. Otherwise it fires assertion. 2897 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2898 if (Ctor == nullptr) { 2899 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2900 /*isVarArg=*/false) 2901 ->getPointerTo(); 2902 Ctor = llvm::Constant::getNullValue(CtorTy); 2903 } 2904 if (Dtor == nullptr) { 2905 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2906 /*isVarArg=*/false) 2907 ->getPointerTo(); 2908 Dtor = llvm::Constant::getNullValue(DtorTy); 2909 } 2910 if (!CGF) { 2911 auto *InitFunctionTy = 2912 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2913 std::string Name = getName({"__omp_threadprivate_init_", ""}); 2914 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2915 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 2916 CodeGenFunction InitCGF(CGM); 2917 FunctionArgList ArgList; 2918 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2919 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2920 Loc, Loc); 2921 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2922 InitCGF.FinishFunction(); 2923 return InitFunction; 2924 } 2925 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2926 } 2927 return nullptr; 2928 } 2929 2930 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2931 llvm::GlobalVariable *Addr, 2932 bool PerformInit) { 2933 if (CGM.getLangOpts().OMPTargetTriples.empty() && 2934 !CGM.getLangOpts().OpenMPIsDevice) 2935 return false; 2936 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2937 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2938 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 2939 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2940 HasRequiresUnifiedSharedMemory)) 2941 return CGM.getLangOpts().OpenMPIsDevice; 2942 VD = VD->getDefinition(CGM.getContext()); 2943 if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 2944 return CGM.getLangOpts().OpenMPIsDevice; 2945 2946 QualType ASTTy = VD->getType(); 2947 2948 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 2949 // Produce the unique prefix to identify the new target regions. We use 2950 // the source location of the variable declaration which we know to not 2951 // conflict with any target region. 2952 unsigned DeviceID; 2953 unsigned FileID; 2954 unsigned Line; 2955 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2956 SmallString<128> Buffer, Out; 2957 { 2958 llvm::raw_svector_ostream OS(Buffer); 2959 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2960 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2961 } 2962 2963 const Expr *Init = VD->getAnyInitializer(); 2964 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2965 llvm::Constant *Ctor; 2966 llvm::Constant *ID; 2967 if (CGM.getLangOpts().OpenMPIsDevice) { 2968 // Generate function that re-emits the declaration's initializer into 2969 // the threadprivate copy of the variable VD 2970 CodeGenFunction CtorCGF(CGM); 2971 2972 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2973 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2974 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2975 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2976 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2977 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2978 FunctionArgList(), Loc, Loc); 2979 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2980 CtorCGF.EmitAnyExprToMem(Init, 2981 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2982 Init->getType().getQualifiers(), 2983 /*IsInitializer=*/true); 2984 CtorCGF.FinishFunction(); 2985 Ctor = Fn; 2986 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2987 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 2988 } else { 2989 Ctor = new llvm::GlobalVariable( 2990 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2991 llvm::GlobalValue::PrivateLinkage, 2992 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2993 ID = Ctor; 2994 } 2995 2996 // Register the information for the entry associated with the constructor. 2997 Out.clear(); 2998 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2999 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 3000 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 3001 } 3002 if (VD->getType().isDestructedType() != QualType::DK_none) { 3003 llvm::Constant *Dtor; 3004 llvm::Constant *ID; 3005 if (CGM.getLangOpts().OpenMPIsDevice) { 3006 // Generate function that emits destructor call for the threadprivate 3007 // copy of the variable VD 3008 CodeGenFunction DtorCGF(CGM); 3009 3010 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 3011 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 3012 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 3013 FTy, Twine(Buffer, "_dtor"), FI, Loc); 3014 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 3015 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 3016 FunctionArgList(), Loc, Loc); 3017 // Create a scope with an artificial location for the body of this 3018 // function. 3019 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 3020 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 3021 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 3022 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 3023 DtorCGF.FinishFunction(); 3024 Dtor = Fn; 3025 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 3026 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 3027 } else { 3028 Dtor = new llvm::GlobalVariable( 3029 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 3030 llvm::GlobalValue::PrivateLinkage, 3031 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 3032 ID = Dtor; 3033 } 3034 // Register the information for the entry associated with the destructor. 3035 Out.clear(); 3036 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 3037 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 3038 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 3039 } 3040 return CGM.getLangOpts().OpenMPIsDevice; 3041 } 3042 3043 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 3044 QualType VarType, 3045 StringRef Name) { 3046 std::string Suffix = getName({"artificial", ""}); 3047 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 3048 llvm::Value *GAddr = 3049 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 3050 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 3051 CGM.getTarget().isTLSSupported()) { 3052 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 3053 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 3054 } 3055 std::string CacheSuffix = getName({"cache", ""}); 3056 llvm::Value *Args[] = { 3057 emitUpdateLocation(CGF, SourceLocation()), 3058 getThreadID(CGF, SourceLocation()), 3059 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 3060 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 3061 /*isSigned=*/false), 3062 getOrCreateInternalVariable( 3063 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 3064 return Address( 3065 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3066 CGF.EmitRuntimeCall( 3067 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 3068 VarLVType->getPointerTo(/*AddrSpace=*/0)), 3069 CGM.getContext().getTypeAlignInChars(VarType)); 3070 } 3071 3072 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 3073 const RegionCodeGenTy &ThenGen, 3074 const RegionCodeGenTy &ElseGen) { 3075 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 3076 3077 // If the condition constant folds and can be elided, try to avoid emitting 3078 // the condition and the dead arm of the if/else. 3079 bool CondConstant; 3080 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 3081 if (CondConstant) 3082 ThenGen(CGF); 3083 else 3084 ElseGen(CGF); 3085 return; 3086 } 3087 3088 // Otherwise, the condition did not fold, or we couldn't elide it. Just 3089 // emit the conditional branch. 3090 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3091 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 3092 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 3093 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 3094 3095 // Emit the 'then' code. 3096 CGF.EmitBlock(ThenBlock); 3097 ThenGen(CGF); 3098 CGF.EmitBranch(ContBlock); 3099 // Emit the 'else' code if present. 3100 // There is no need to emit line number for unconditional branch. 3101 (void)ApplyDebugLocation::CreateEmpty(CGF); 3102 CGF.EmitBlock(ElseBlock); 3103 ElseGen(CGF); 3104 // There is no need to emit line number for unconditional branch. 3105 (void)ApplyDebugLocation::CreateEmpty(CGF); 3106 CGF.EmitBranch(ContBlock); 3107 // Emit the continuation block for code after the if. 3108 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 3109 } 3110 3111 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 3112 llvm::Function *OutlinedFn, 3113 ArrayRef<llvm::Value *> CapturedVars, 3114 const Expr *IfCond) { 3115 if (!CGF.HaveInsertPoint()) 3116 return; 3117 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 3118 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 3119 PrePostActionTy &) { 3120 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 3121 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3122 llvm::Value *Args[] = { 3123 RTLoc, 3124 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 3125 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 3126 llvm::SmallVector<llvm::Value *, 16> RealArgs; 3127 RealArgs.append(std::begin(Args), std::end(Args)); 3128 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 3129 3130 llvm::FunctionCallee RTLFn = 3131 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 3132 CGF.EmitRuntimeCall(RTLFn, RealArgs); 3133 }; 3134 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 3135 PrePostActionTy &) { 3136 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3137 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 3138 // Build calls: 3139 // __kmpc_serialized_parallel(&Loc, GTid); 3140 llvm::Value *Args[] = {RTLoc, ThreadID}; 3141 CGF.EmitRuntimeCall( 3142 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 3143 3144 // OutlinedFn(>id, &zero_bound, CapturedStruct); 3145 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 3146 Address ZeroAddrBound = 3147 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 3148 /*Name=*/".bound.zero.addr"); 3149 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 3150 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 3151 // ThreadId for serialized parallels is 0. 3152 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 3153 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 3154 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 3155 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 3156 3157 // __kmpc_end_serialized_parallel(&Loc, GTid); 3158 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 3159 CGF.EmitRuntimeCall( 3160 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 3161 EndArgs); 3162 }; 3163 if (IfCond) { 3164 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 3165 } else { 3166 RegionCodeGenTy ThenRCG(ThenGen); 3167 ThenRCG(CGF); 3168 } 3169 } 3170 3171 // If we're inside an (outlined) parallel region, use the region info's 3172 // thread-ID variable (it is passed in a first argument of the outlined function 3173 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 3174 // regular serial code region, get thread ID by calling kmp_int32 3175 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 3176 // return the address of that temp. 3177 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 3178 SourceLocation Loc) { 3179 if (auto *OMPRegionInfo = 3180 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3181 if (OMPRegionInfo->getThreadIDVariable()) 3182 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 3183 3184 llvm::Value *ThreadID = getThreadID(CGF, Loc); 3185 QualType Int32Ty = 3186 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 3187 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 3188 CGF.EmitStoreOfScalar(ThreadID, 3189 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 3190 3191 return ThreadIDTemp; 3192 } 3193 3194 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 3195 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 3196 SmallString<256> Buffer; 3197 llvm::raw_svector_ostream Out(Buffer); 3198 Out << Name; 3199 StringRef RuntimeName = Out.str(); 3200 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 3201 if (Elem.second) { 3202 assert(Elem.second->getType()->getPointerElementType() == Ty && 3203 "OMP internal variable has different type than requested"); 3204 return &*Elem.second; 3205 } 3206 3207 return Elem.second = new llvm::GlobalVariable( 3208 CGM.getModule(), Ty, /*IsConstant*/ false, 3209 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 3210 Elem.first(), /*InsertBefore=*/nullptr, 3211 llvm::GlobalValue::NotThreadLocal, AddressSpace); 3212 } 3213 3214 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 3215 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 3216 std::string Name = getName({Prefix, "var"}); 3217 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 3218 } 3219 3220 namespace { 3221 /// Common pre(post)-action for different OpenMP constructs. 3222 class CommonActionTy final : public PrePostActionTy { 3223 llvm::FunctionCallee EnterCallee; 3224 ArrayRef<llvm::Value *> EnterArgs; 3225 llvm::FunctionCallee ExitCallee; 3226 ArrayRef<llvm::Value *> ExitArgs; 3227 bool Conditional; 3228 llvm::BasicBlock *ContBlock = nullptr; 3229 3230 public: 3231 CommonActionTy(llvm::FunctionCallee EnterCallee, 3232 ArrayRef<llvm::Value *> EnterArgs, 3233 llvm::FunctionCallee ExitCallee, 3234 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 3235 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 3236 ExitArgs(ExitArgs), Conditional(Conditional) {} 3237 void Enter(CodeGenFunction &CGF) override { 3238 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 3239 if (Conditional) { 3240 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 3241 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3242 ContBlock = CGF.createBasicBlock("omp_if.end"); 3243 // Generate the branch (If-stmt) 3244 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 3245 CGF.EmitBlock(ThenBlock); 3246 } 3247 } 3248 void Done(CodeGenFunction &CGF) { 3249 // Emit the rest of blocks/branches 3250 CGF.EmitBranch(ContBlock); 3251 CGF.EmitBlock(ContBlock, true); 3252 } 3253 void Exit(CodeGenFunction &CGF) override { 3254 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 3255 } 3256 }; 3257 } // anonymous namespace 3258 3259 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 3260 StringRef CriticalName, 3261 const RegionCodeGenTy &CriticalOpGen, 3262 SourceLocation Loc, const Expr *Hint) { 3263 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 3264 // CriticalOpGen(); 3265 // __kmpc_end_critical(ident_t *, gtid, Lock); 3266 // Prepare arguments and build a call to __kmpc_critical 3267 if (!CGF.HaveInsertPoint()) 3268 return; 3269 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3270 getCriticalRegionLock(CriticalName)}; 3271 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 3272 std::end(Args)); 3273 if (Hint) { 3274 EnterArgs.push_back(CGF.Builder.CreateIntCast( 3275 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 3276 } 3277 CommonActionTy Action( 3278 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 3279 : OMPRTL__kmpc_critical), 3280 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 3281 CriticalOpGen.setAction(Action); 3282 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 3283 } 3284 3285 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 3286 const RegionCodeGenTy &MasterOpGen, 3287 SourceLocation Loc) { 3288 if (!CGF.HaveInsertPoint()) 3289 return; 3290 // if(__kmpc_master(ident_t *, gtid)) { 3291 // MasterOpGen(); 3292 // __kmpc_end_master(ident_t *, gtid); 3293 // } 3294 // Prepare arguments and build a call to __kmpc_master 3295 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3296 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 3297 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 3298 /*Conditional=*/true); 3299 MasterOpGen.setAction(Action); 3300 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 3301 Action.Done(CGF); 3302 } 3303 3304 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 3305 SourceLocation Loc) { 3306 if (!CGF.HaveInsertPoint()) 3307 return; 3308 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 3309 llvm::Value *Args[] = { 3310 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3311 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 3312 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 3313 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3314 Region->emitUntiedSwitch(CGF); 3315 } 3316 3317 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 3318 const RegionCodeGenTy &TaskgroupOpGen, 3319 SourceLocation Loc) { 3320 if (!CGF.HaveInsertPoint()) 3321 return; 3322 // __kmpc_taskgroup(ident_t *, gtid); 3323 // TaskgroupOpGen(); 3324 // __kmpc_end_taskgroup(ident_t *, gtid); 3325 // Prepare arguments and build a call to __kmpc_taskgroup 3326 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3327 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3328 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3329 Args); 3330 TaskgroupOpGen.setAction(Action); 3331 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3332 } 3333 3334 /// Given an array of pointers to variables, project the address of a 3335 /// given variable. 3336 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3337 unsigned Index, const VarDecl *Var) { 3338 // Pull out the pointer to the variable. 3339 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 3340 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3341 3342 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3343 Addr = CGF.Builder.CreateElementBitCast( 3344 Addr, CGF.ConvertTypeForMem(Var->getType())); 3345 return Addr; 3346 } 3347 3348 static llvm::Value *emitCopyprivateCopyFunction( 3349 CodeGenModule &CGM, llvm::Type *ArgsType, 3350 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3351 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3352 SourceLocation Loc) { 3353 ASTContext &C = CGM.getContext(); 3354 // void copy_func(void *LHSArg, void *RHSArg); 3355 FunctionArgList Args; 3356 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3357 ImplicitParamDecl::Other); 3358 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3359 ImplicitParamDecl::Other); 3360 Args.push_back(&LHSArg); 3361 Args.push_back(&RHSArg); 3362 const auto &CGFI = 3363 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3364 std::string Name = 3365 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 3366 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 3367 llvm::GlobalValue::InternalLinkage, Name, 3368 &CGM.getModule()); 3369 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3370 Fn->setDoesNotRecurse(); 3371 CodeGenFunction CGF(CGM); 3372 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3373 // Dest = (void*[n])(LHSArg); 3374 // Src = (void*[n])(RHSArg); 3375 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3376 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3377 ArgsType), CGF.getPointerAlign()); 3378 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3379 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3380 ArgsType), CGF.getPointerAlign()); 3381 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3382 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3383 // ... 3384 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3385 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3386 const auto *DestVar = 3387 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3388 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3389 3390 const auto *SrcVar = 3391 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3392 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3393 3394 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3395 QualType Type = VD->getType(); 3396 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3397 } 3398 CGF.FinishFunction(); 3399 return Fn; 3400 } 3401 3402 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3403 const RegionCodeGenTy &SingleOpGen, 3404 SourceLocation Loc, 3405 ArrayRef<const Expr *> CopyprivateVars, 3406 ArrayRef<const Expr *> SrcExprs, 3407 ArrayRef<const Expr *> DstExprs, 3408 ArrayRef<const Expr *> AssignmentOps) { 3409 if (!CGF.HaveInsertPoint()) 3410 return; 3411 assert(CopyprivateVars.size() == SrcExprs.size() && 3412 CopyprivateVars.size() == DstExprs.size() && 3413 CopyprivateVars.size() == AssignmentOps.size()); 3414 ASTContext &C = CGM.getContext(); 3415 // int32 did_it = 0; 3416 // if(__kmpc_single(ident_t *, gtid)) { 3417 // SingleOpGen(); 3418 // __kmpc_end_single(ident_t *, gtid); 3419 // did_it = 1; 3420 // } 3421 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3422 // <copy_func>, did_it); 3423 3424 Address DidIt = Address::invalid(); 3425 if (!CopyprivateVars.empty()) { 3426 // int32 did_it = 0; 3427 QualType KmpInt32Ty = 3428 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3429 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3430 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3431 } 3432 // Prepare arguments and build a call to __kmpc_single 3433 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3434 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3435 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3436 /*Conditional=*/true); 3437 SingleOpGen.setAction(Action); 3438 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3439 if (DidIt.isValid()) { 3440 // did_it = 1; 3441 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3442 } 3443 Action.Done(CGF); 3444 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3445 // <copy_func>, did_it); 3446 if (DidIt.isValid()) { 3447 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3448 QualType CopyprivateArrayTy = C.getConstantArrayType( 3449 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 3450 /*IndexTypeQuals=*/0); 3451 // Create a list of all private variables for copyprivate. 3452 Address CopyprivateList = 3453 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3454 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3455 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 3456 CGF.Builder.CreateStore( 3457 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3458 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 3459 CGF.VoidPtrTy), 3460 Elem); 3461 } 3462 // Build function that copies private values from single region to all other 3463 // threads in the corresponding parallel region. 3464 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3465 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3466 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3467 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3468 Address CL = 3469 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3470 CGF.VoidPtrTy); 3471 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3472 llvm::Value *Args[] = { 3473 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3474 getThreadID(CGF, Loc), // i32 <gtid> 3475 BufSize, // size_t <buf_size> 3476 CL.getPointer(), // void *<copyprivate list> 3477 CpyFn, // void (*) (void *, void *) <copy_func> 3478 DidItVal // i32 did_it 3479 }; 3480 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3481 } 3482 } 3483 3484 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3485 const RegionCodeGenTy &OrderedOpGen, 3486 SourceLocation Loc, bool IsThreads) { 3487 if (!CGF.HaveInsertPoint()) 3488 return; 3489 // __kmpc_ordered(ident_t *, gtid); 3490 // OrderedOpGen(); 3491 // __kmpc_end_ordered(ident_t *, gtid); 3492 // Prepare arguments and build a call to __kmpc_ordered 3493 if (IsThreads) { 3494 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3495 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3496 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3497 Args); 3498 OrderedOpGen.setAction(Action); 3499 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3500 return; 3501 } 3502 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3503 } 3504 3505 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 3506 unsigned Flags; 3507 if (Kind == OMPD_for) 3508 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3509 else if (Kind == OMPD_sections) 3510 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3511 else if (Kind == OMPD_single) 3512 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3513 else if (Kind == OMPD_barrier) 3514 Flags = OMP_IDENT_BARRIER_EXPL; 3515 else 3516 Flags = OMP_IDENT_BARRIER_IMPL; 3517 return Flags; 3518 } 3519 3520 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 3521 CodeGenFunction &CGF, const OMPLoopDirective &S, 3522 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 3523 // Check if the loop directive is actually a doacross loop directive. In this 3524 // case choose static, 1 schedule. 3525 if (llvm::any_of( 3526 S.getClausesOfKind<OMPOrderedClause>(), 3527 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 3528 ScheduleKind = OMPC_SCHEDULE_static; 3529 // Chunk size is 1 in this case. 3530 llvm::APInt ChunkSize(32, 1); 3531 ChunkExpr = IntegerLiteral::Create( 3532 CGF.getContext(), ChunkSize, 3533 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3534 SourceLocation()); 3535 } 3536 } 3537 3538 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3539 OpenMPDirectiveKind Kind, bool EmitChecks, 3540 bool ForceSimpleCall) { 3541 // Check if we should use the OMPBuilder 3542 auto *OMPRegionInfo = 3543 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 3544 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3545 if (OMPBuilder) { 3546 CGF.Builder.restoreIP(OMPBuilder->CreateBarrier( 3547 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 3548 return; 3549 } 3550 3551 if (!CGF.HaveInsertPoint()) 3552 return; 3553 // Build call __kmpc_cancel_barrier(loc, thread_id); 3554 // Build call __kmpc_barrier(loc, thread_id); 3555 unsigned Flags = getDefaultFlagsForBarriers(Kind); 3556 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3557 // thread_id); 3558 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3559 getThreadID(CGF, Loc)}; 3560 if (OMPRegionInfo) { 3561 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3562 llvm::Value *Result = CGF.EmitRuntimeCall( 3563 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3564 if (EmitChecks) { 3565 // if (__kmpc_cancel_barrier()) { 3566 // exit from construct; 3567 // } 3568 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3569 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3570 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3571 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3572 CGF.EmitBlock(ExitBB); 3573 // exit from construct; 3574 CodeGenFunction::JumpDest CancelDestination = 3575 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3576 CGF.EmitBranchThroughCleanup(CancelDestination); 3577 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3578 } 3579 return; 3580 } 3581 } 3582 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3583 } 3584 3585 /// Map the OpenMP loop schedule to the runtime enumeration. 3586 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3587 bool Chunked, bool Ordered) { 3588 switch (ScheduleKind) { 3589 case OMPC_SCHEDULE_static: 3590 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3591 : (Ordered ? OMP_ord_static : OMP_sch_static); 3592 case OMPC_SCHEDULE_dynamic: 3593 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3594 case OMPC_SCHEDULE_guided: 3595 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3596 case OMPC_SCHEDULE_runtime: 3597 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3598 case OMPC_SCHEDULE_auto: 3599 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3600 case OMPC_SCHEDULE_unknown: 3601 assert(!Chunked && "chunk was specified but schedule kind not known"); 3602 return Ordered ? OMP_ord_static : OMP_sch_static; 3603 } 3604 llvm_unreachable("Unexpected runtime schedule"); 3605 } 3606 3607 /// Map the OpenMP distribute schedule to the runtime enumeration. 3608 static OpenMPSchedType 3609 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3610 // only static is allowed for dist_schedule 3611 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3612 } 3613 3614 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3615 bool Chunked) const { 3616 OpenMPSchedType Schedule = 3617 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3618 return Schedule == OMP_sch_static; 3619 } 3620 3621 bool CGOpenMPRuntime::isStaticNonchunked( 3622 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3623 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3624 return Schedule == OMP_dist_sch_static; 3625 } 3626 3627 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 3628 bool Chunked) const { 3629 OpenMPSchedType Schedule = 3630 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3631 return Schedule == OMP_sch_static_chunked; 3632 } 3633 3634 bool CGOpenMPRuntime::isStaticChunked( 3635 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3636 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3637 return Schedule == OMP_dist_sch_static_chunked; 3638 } 3639 3640 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3641 OpenMPSchedType Schedule = 3642 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3643 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3644 return Schedule != OMP_sch_static; 3645 } 3646 3647 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 3648 OpenMPScheduleClauseModifier M1, 3649 OpenMPScheduleClauseModifier M2) { 3650 int Modifier = 0; 3651 switch (M1) { 3652 case OMPC_SCHEDULE_MODIFIER_monotonic: 3653 Modifier = OMP_sch_modifier_monotonic; 3654 break; 3655 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3656 Modifier = OMP_sch_modifier_nonmonotonic; 3657 break; 3658 case OMPC_SCHEDULE_MODIFIER_simd: 3659 if (Schedule == OMP_sch_static_chunked) 3660 Schedule = OMP_sch_static_balanced_chunked; 3661 break; 3662 case OMPC_SCHEDULE_MODIFIER_last: 3663 case OMPC_SCHEDULE_MODIFIER_unknown: 3664 break; 3665 } 3666 switch (M2) { 3667 case OMPC_SCHEDULE_MODIFIER_monotonic: 3668 Modifier = OMP_sch_modifier_monotonic; 3669 break; 3670 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3671 Modifier = OMP_sch_modifier_nonmonotonic; 3672 break; 3673 case OMPC_SCHEDULE_MODIFIER_simd: 3674 if (Schedule == OMP_sch_static_chunked) 3675 Schedule = OMP_sch_static_balanced_chunked; 3676 break; 3677 case OMPC_SCHEDULE_MODIFIER_last: 3678 case OMPC_SCHEDULE_MODIFIER_unknown: 3679 break; 3680 } 3681 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 3682 // If the static schedule kind is specified or if the ordered clause is 3683 // specified, and if the nonmonotonic modifier is not specified, the effect is 3684 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 3685 // modifier is specified, the effect is as if the nonmonotonic modifier is 3686 // specified. 3687 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 3688 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 3689 Schedule == OMP_sch_static_balanced_chunked || 3690 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 3691 Schedule == OMP_dist_sch_static_chunked || 3692 Schedule == OMP_dist_sch_static)) 3693 Modifier = OMP_sch_modifier_nonmonotonic; 3694 } 3695 return Schedule | Modifier; 3696 } 3697 3698 void CGOpenMPRuntime::emitForDispatchInit( 3699 CodeGenFunction &CGF, SourceLocation Loc, 3700 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3701 bool Ordered, const DispatchRTInput &DispatchValues) { 3702 if (!CGF.HaveInsertPoint()) 3703 return; 3704 OpenMPSchedType Schedule = getRuntimeSchedule( 3705 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3706 assert(Ordered || 3707 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3708 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3709 Schedule != OMP_sch_static_balanced_chunked)); 3710 // Call __kmpc_dispatch_init( 3711 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3712 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3713 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3714 3715 // If the Chunk was not specified in the clause - use default value 1. 3716 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3717 : CGF.Builder.getIntN(IVSize, 1); 3718 llvm::Value *Args[] = { 3719 emitUpdateLocation(CGF, Loc), 3720 getThreadID(CGF, Loc), 3721 CGF.Builder.getInt32(addMonoNonMonoModifier( 3722 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3723 DispatchValues.LB, // Lower 3724 DispatchValues.UB, // Upper 3725 CGF.Builder.getIntN(IVSize, 1), // Stride 3726 Chunk // Chunk 3727 }; 3728 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3729 } 3730 3731 static void emitForStaticInitCall( 3732 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3733 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 3734 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3735 const CGOpenMPRuntime::StaticRTInput &Values) { 3736 if (!CGF.HaveInsertPoint()) 3737 return; 3738 3739 assert(!Values.Ordered); 3740 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3741 Schedule == OMP_sch_static_balanced_chunked || 3742 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3743 Schedule == OMP_dist_sch_static || 3744 Schedule == OMP_dist_sch_static_chunked); 3745 3746 // Call __kmpc_for_static_init( 3747 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3748 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3749 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3750 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3751 llvm::Value *Chunk = Values.Chunk; 3752 if (Chunk == nullptr) { 3753 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3754 Schedule == OMP_dist_sch_static) && 3755 "expected static non-chunked schedule"); 3756 // If the Chunk was not specified in the clause - use default value 1. 3757 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3758 } else { 3759 assert((Schedule == OMP_sch_static_chunked || 3760 Schedule == OMP_sch_static_balanced_chunked || 3761 Schedule == OMP_ord_static_chunked || 3762 Schedule == OMP_dist_sch_static_chunked) && 3763 "expected static chunked schedule"); 3764 } 3765 llvm::Value *Args[] = { 3766 UpdateLocation, 3767 ThreadId, 3768 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 3769 M2)), // Schedule type 3770 Values.IL.getPointer(), // &isLastIter 3771 Values.LB.getPointer(), // &LB 3772 Values.UB.getPointer(), // &UB 3773 Values.ST.getPointer(), // &Stride 3774 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3775 Chunk // Chunk 3776 }; 3777 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3778 } 3779 3780 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3781 SourceLocation Loc, 3782 OpenMPDirectiveKind DKind, 3783 const OpenMPScheduleTy &ScheduleKind, 3784 const StaticRTInput &Values) { 3785 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3786 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3787 assert(isOpenMPWorksharingDirective(DKind) && 3788 "Expected loop-based or sections-based directive."); 3789 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3790 isOpenMPLoopDirective(DKind) 3791 ? OMP_IDENT_WORK_LOOP 3792 : OMP_IDENT_WORK_SECTIONS); 3793 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3794 llvm::FunctionCallee StaticInitFunction = 3795 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3796 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3797 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3798 } 3799 3800 void CGOpenMPRuntime::emitDistributeStaticInit( 3801 CodeGenFunction &CGF, SourceLocation Loc, 3802 OpenMPDistScheduleClauseKind SchedKind, 3803 const CGOpenMPRuntime::StaticRTInput &Values) { 3804 OpenMPSchedType ScheduleNum = 3805 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3806 llvm::Value *UpdatedLocation = 3807 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3808 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3809 llvm::FunctionCallee StaticInitFunction = 3810 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3811 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3812 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3813 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3814 } 3815 3816 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3817 SourceLocation Loc, 3818 OpenMPDirectiveKind DKind) { 3819 if (!CGF.HaveInsertPoint()) 3820 return; 3821 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3822 llvm::Value *Args[] = { 3823 emitUpdateLocation(CGF, Loc, 3824 isOpenMPDistributeDirective(DKind) 3825 ? OMP_IDENT_WORK_DISTRIBUTE 3826 : isOpenMPLoopDirective(DKind) 3827 ? OMP_IDENT_WORK_LOOP 3828 : OMP_IDENT_WORK_SECTIONS), 3829 getThreadID(CGF, Loc)}; 3830 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3831 Args); 3832 } 3833 3834 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3835 SourceLocation Loc, 3836 unsigned IVSize, 3837 bool IVSigned) { 3838 if (!CGF.HaveInsertPoint()) 3839 return; 3840 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3841 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3842 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3843 } 3844 3845 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3846 SourceLocation Loc, unsigned IVSize, 3847 bool IVSigned, Address IL, 3848 Address LB, Address UB, 3849 Address ST) { 3850 // Call __kmpc_dispatch_next( 3851 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3852 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3853 // kmp_int[32|64] *p_stride); 3854 llvm::Value *Args[] = { 3855 emitUpdateLocation(CGF, Loc), 3856 getThreadID(CGF, Loc), 3857 IL.getPointer(), // &isLastIter 3858 LB.getPointer(), // &Lower 3859 UB.getPointer(), // &Upper 3860 ST.getPointer() // &Stride 3861 }; 3862 llvm::Value *Call = 3863 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3864 return CGF.EmitScalarConversion( 3865 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3866 CGF.getContext().BoolTy, Loc); 3867 } 3868 3869 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3870 llvm::Value *NumThreads, 3871 SourceLocation Loc) { 3872 if (!CGF.HaveInsertPoint()) 3873 return; 3874 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3875 llvm::Value *Args[] = { 3876 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3877 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3878 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3879 Args); 3880 } 3881 3882 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3883 ProcBindKind ProcBind, 3884 SourceLocation Loc) { 3885 if (!CGF.HaveInsertPoint()) 3886 return; 3887 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 3888 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3889 llvm::Value *Args[] = { 3890 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3891 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 3892 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3893 } 3894 3895 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3896 SourceLocation Loc) { 3897 if (!CGF.HaveInsertPoint()) 3898 return; 3899 // Build call void __kmpc_flush(ident_t *loc) 3900 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3901 emitUpdateLocation(CGF, Loc)); 3902 } 3903 3904 namespace { 3905 /// Indexes of fields for type kmp_task_t. 3906 enum KmpTaskTFields { 3907 /// List of shared variables. 3908 KmpTaskTShareds, 3909 /// Task routine. 3910 KmpTaskTRoutine, 3911 /// Partition id for the untied tasks. 3912 KmpTaskTPartId, 3913 /// Function with call of destructors for private variables. 3914 Data1, 3915 /// Task priority. 3916 Data2, 3917 /// (Taskloops only) Lower bound. 3918 KmpTaskTLowerBound, 3919 /// (Taskloops only) Upper bound. 3920 KmpTaskTUpperBound, 3921 /// (Taskloops only) Stride. 3922 KmpTaskTStride, 3923 /// (Taskloops only) Is last iteration flag. 3924 KmpTaskTLastIter, 3925 /// (Taskloops only) Reduction data. 3926 KmpTaskTReductions, 3927 }; 3928 } // anonymous namespace 3929 3930 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3931 return OffloadEntriesTargetRegion.empty() && 3932 OffloadEntriesDeviceGlobalVar.empty(); 3933 } 3934 3935 /// Initialize target region entry. 3936 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3937 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3938 StringRef ParentName, unsigned LineNum, 3939 unsigned Order) { 3940 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3941 "only required for the device " 3942 "code generation."); 3943 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3944 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3945 OMPTargetRegionEntryTargetRegion); 3946 ++OffloadingEntriesNum; 3947 } 3948 3949 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3950 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3951 StringRef ParentName, unsigned LineNum, 3952 llvm::Constant *Addr, llvm::Constant *ID, 3953 OMPTargetRegionEntryKind Flags) { 3954 // If we are emitting code for a target, the entry is already initialized, 3955 // only has to be registered. 3956 if (CGM.getLangOpts().OpenMPIsDevice) { 3957 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3958 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3959 DiagnosticsEngine::Error, 3960 "Unable to find target region on line '%0' in the device code."); 3961 CGM.getDiags().Report(DiagID) << LineNum; 3962 return; 3963 } 3964 auto &Entry = 3965 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3966 assert(Entry.isValid() && "Entry not initialized!"); 3967 Entry.setAddress(Addr); 3968 Entry.setID(ID); 3969 Entry.setFlags(Flags); 3970 } else { 3971 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3972 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3973 ++OffloadingEntriesNum; 3974 } 3975 } 3976 3977 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3978 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3979 unsigned LineNum) const { 3980 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3981 if (PerDevice == OffloadEntriesTargetRegion.end()) 3982 return false; 3983 auto PerFile = PerDevice->second.find(FileID); 3984 if (PerFile == PerDevice->second.end()) 3985 return false; 3986 auto PerParentName = PerFile->second.find(ParentName); 3987 if (PerParentName == PerFile->second.end()) 3988 return false; 3989 auto PerLine = PerParentName->second.find(LineNum); 3990 if (PerLine == PerParentName->second.end()) 3991 return false; 3992 // Fail if this entry is already registered. 3993 if (PerLine->second.getAddress() || PerLine->second.getID()) 3994 return false; 3995 return true; 3996 } 3997 3998 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3999 const OffloadTargetRegionEntryInfoActTy &Action) { 4000 // Scan all target region entries and perform the provided action. 4001 for (const auto &D : OffloadEntriesTargetRegion) 4002 for (const auto &F : D.second) 4003 for (const auto &P : F.second) 4004 for (const auto &L : P.second) 4005 Action(D.first, F.first, P.first(), L.first, L.second); 4006 } 4007 4008 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4009 initializeDeviceGlobalVarEntryInfo(StringRef Name, 4010 OMPTargetGlobalVarEntryKind Flags, 4011 unsigned Order) { 4012 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 4013 "only required for the device " 4014 "code generation."); 4015 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 4016 ++OffloadingEntriesNum; 4017 } 4018 4019 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4020 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 4021 CharUnits VarSize, 4022 OMPTargetGlobalVarEntryKind Flags, 4023 llvm::GlobalValue::LinkageTypes Linkage) { 4024 if (CGM.getLangOpts().OpenMPIsDevice) { 4025 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 4026 assert(Entry.isValid() && Entry.getFlags() == Flags && 4027 "Entry not initialized!"); 4028 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 4029 "Resetting with the new address."); 4030 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 4031 if (Entry.getVarSize().isZero()) { 4032 Entry.setVarSize(VarSize); 4033 Entry.setLinkage(Linkage); 4034 } 4035 return; 4036 } 4037 Entry.setVarSize(VarSize); 4038 Entry.setLinkage(Linkage); 4039 Entry.setAddress(Addr); 4040 } else { 4041 if (hasDeviceGlobalVarEntryInfo(VarName)) { 4042 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 4043 assert(Entry.isValid() && Entry.getFlags() == Flags && 4044 "Entry not initialized!"); 4045 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 4046 "Resetting with the new address."); 4047 if (Entry.getVarSize().isZero()) { 4048 Entry.setVarSize(VarSize); 4049 Entry.setLinkage(Linkage); 4050 } 4051 return; 4052 } 4053 OffloadEntriesDeviceGlobalVar.try_emplace( 4054 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 4055 ++OffloadingEntriesNum; 4056 } 4057 } 4058 4059 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4060 actOnDeviceGlobalVarEntriesInfo( 4061 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 4062 // Scan all target region entries and perform the provided action. 4063 for (const auto &E : OffloadEntriesDeviceGlobalVar) 4064 Action(E.getKey(), E.getValue()); 4065 } 4066 4067 void CGOpenMPRuntime::createOffloadEntry( 4068 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 4069 llvm::GlobalValue::LinkageTypes Linkage) { 4070 StringRef Name = Addr->getName(); 4071 llvm::Module &M = CGM.getModule(); 4072 llvm::LLVMContext &C = M.getContext(); 4073 4074 // Create constant string with the name. 4075 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 4076 4077 std::string StringName = getName({"omp_offloading", "entry_name"}); 4078 auto *Str = new llvm::GlobalVariable( 4079 M, StrPtrInit->getType(), /*isConstant=*/true, 4080 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 4081 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4082 4083 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 4084 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 4085 llvm::ConstantInt::get(CGM.SizeTy, Size), 4086 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 4087 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 4088 std::string EntryName = getName({"omp_offloading", "entry", ""}); 4089 llvm::GlobalVariable *Entry = createGlobalStruct( 4090 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 4091 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 4092 4093 // The entry has to be created in the section the linker expects it to be. 4094 Entry->setSection("omp_offloading_entries"); 4095 } 4096 4097 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 4098 // Emit the offloading entries and metadata so that the device codegen side 4099 // can easily figure out what to emit. The produced metadata looks like 4100 // this: 4101 // 4102 // !omp_offload.info = !{!1, ...} 4103 // 4104 // Right now we only generate metadata for function that contain target 4105 // regions. 4106 4107 // If we are in simd mode or there are no entries, we don't need to do 4108 // anything. 4109 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 4110 return; 4111 4112 llvm::Module &M = CGM.getModule(); 4113 llvm::LLVMContext &C = M.getContext(); 4114 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 4115 SourceLocation, StringRef>, 4116 16> 4117 OrderedEntries(OffloadEntriesInfoManager.size()); 4118 llvm::SmallVector<StringRef, 16> ParentFunctions( 4119 OffloadEntriesInfoManager.size()); 4120 4121 // Auxiliary methods to create metadata values and strings. 4122 auto &&GetMDInt = [this](unsigned V) { 4123 return llvm::ConstantAsMetadata::get( 4124 llvm::ConstantInt::get(CGM.Int32Ty, V)); 4125 }; 4126 4127 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 4128 4129 // Create the offloading info metadata node. 4130 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 4131 4132 // Create function that emits metadata for each target region entry; 4133 auto &&TargetRegionMetadataEmitter = 4134 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 4135 &GetMDString]( 4136 unsigned DeviceID, unsigned FileID, StringRef ParentName, 4137 unsigned Line, 4138 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 4139 // Generate metadata for target regions. Each entry of this metadata 4140 // contains: 4141 // - Entry 0 -> Kind of this type of metadata (0). 4142 // - Entry 1 -> Device ID of the file where the entry was identified. 4143 // - Entry 2 -> File ID of the file where the entry was identified. 4144 // - Entry 3 -> Mangled name of the function where the entry was 4145 // identified. 4146 // - Entry 4 -> Line in the file where the entry was identified. 4147 // - Entry 5 -> Order the entry was created. 4148 // The first element of the metadata node is the kind. 4149 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 4150 GetMDInt(FileID), GetMDString(ParentName), 4151 GetMDInt(Line), GetMDInt(E.getOrder())}; 4152 4153 SourceLocation Loc; 4154 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 4155 E = CGM.getContext().getSourceManager().fileinfo_end(); 4156 I != E; ++I) { 4157 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 4158 I->getFirst()->getUniqueID().getFile() == FileID) { 4159 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 4160 I->getFirst(), Line, 1); 4161 break; 4162 } 4163 } 4164 // Save this entry in the right position of the ordered entries array. 4165 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 4166 ParentFunctions[E.getOrder()] = ParentName; 4167 4168 // Add metadata to the named metadata node. 4169 MD->addOperand(llvm::MDNode::get(C, Ops)); 4170 }; 4171 4172 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 4173 TargetRegionMetadataEmitter); 4174 4175 // Create function that emits metadata for each device global variable entry; 4176 auto &&DeviceGlobalVarMetadataEmitter = 4177 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 4178 MD](StringRef MangledName, 4179 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 4180 &E) { 4181 // Generate metadata for global variables. Each entry of this metadata 4182 // contains: 4183 // - Entry 0 -> Kind of this type of metadata (1). 4184 // - Entry 1 -> Mangled name of the variable. 4185 // - Entry 2 -> Declare target kind. 4186 // - Entry 3 -> Order the entry was created. 4187 // The first element of the metadata node is the kind. 4188 llvm::Metadata *Ops[] = { 4189 GetMDInt(E.getKind()), GetMDString(MangledName), 4190 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 4191 4192 // Save this entry in the right position of the ordered entries array. 4193 OrderedEntries[E.getOrder()] = 4194 std::make_tuple(&E, SourceLocation(), MangledName); 4195 4196 // Add metadata to the named metadata node. 4197 MD->addOperand(llvm::MDNode::get(C, Ops)); 4198 }; 4199 4200 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 4201 DeviceGlobalVarMetadataEmitter); 4202 4203 for (const auto &E : OrderedEntries) { 4204 assert(std::get<0>(E) && "All ordered entries must exist!"); 4205 if (const auto *CE = 4206 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 4207 std::get<0>(E))) { 4208 if (!CE->getID() || !CE->getAddress()) { 4209 // Do not blame the entry if the parent funtion is not emitted. 4210 StringRef FnName = ParentFunctions[CE->getOrder()]; 4211 if (!CGM.GetGlobalValue(FnName)) 4212 continue; 4213 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4214 DiagnosticsEngine::Error, 4215 "Offloading entry for target region in %0 is incorrect: either the " 4216 "address or the ID is invalid."); 4217 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 4218 continue; 4219 } 4220 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 4221 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 4222 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 4223 OffloadEntryInfoDeviceGlobalVar>( 4224 std::get<0>(E))) { 4225 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 4226 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4227 CE->getFlags()); 4228 switch (Flags) { 4229 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 4230 if (CGM.getLangOpts().OpenMPIsDevice && 4231 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 4232 continue; 4233 if (!CE->getAddress()) { 4234 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4235 DiagnosticsEngine::Error, "Offloading entry for declare target " 4236 "variable %0 is incorrect: the " 4237 "address is invalid."); 4238 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 4239 continue; 4240 } 4241 // The vaiable has no definition - no need to add the entry. 4242 if (CE->getVarSize().isZero()) 4243 continue; 4244 break; 4245 } 4246 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 4247 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 4248 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 4249 "Declaret target link address is set."); 4250 if (CGM.getLangOpts().OpenMPIsDevice) 4251 continue; 4252 if (!CE->getAddress()) { 4253 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4254 DiagnosticsEngine::Error, 4255 "Offloading entry for declare target variable is incorrect: the " 4256 "address is invalid."); 4257 CGM.getDiags().Report(DiagID); 4258 continue; 4259 } 4260 break; 4261 } 4262 createOffloadEntry(CE->getAddress(), CE->getAddress(), 4263 CE->getVarSize().getQuantity(), Flags, 4264 CE->getLinkage()); 4265 } else { 4266 llvm_unreachable("Unsupported entry kind."); 4267 } 4268 } 4269 } 4270 4271 /// Loads all the offload entries information from the host IR 4272 /// metadata. 4273 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 4274 // If we are in target mode, load the metadata from the host IR. This code has 4275 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 4276 4277 if (!CGM.getLangOpts().OpenMPIsDevice) 4278 return; 4279 4280 if (CGM.getLangOpts().OMPHostIRFile.empty()) 4281 return; 4282 4283 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 4284 if (auto EC = Buf.getError()) { 4285 CGM.getDiags().Report(diag::err_cannot_open_file) 4286 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4287 return; 4288 } 4289 4290 llvm::LLVMContext C; 4291 auto ME = expectedToErrorOrAndEmitErrors( 4292 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 4293 4294 if (auto EC = ME.getError()) { 4295 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4296 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 4297 CGM.getDiags().Report(DiagID) 4298 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4299 return; 4300 } 4301 4302 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4303 if (!MD) 4304 return; 4305 4306 for (llvm::MDNode *MN : MD->operands()) { 4307 auto &&GetMDInt = [MN](unsigned Idx) { 4308 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4309 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4310 }; 4311 4312 auto &&GetMDString = [MN](unsigned Idx) { 4313 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4314 return V->getString(); 4315 }; 4316 4317 switch (GetMDInt(0)) { 4318 default: 4319 llvm_unreachable("Unexpected metadata!"); 4320 break; 4321 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4322 OffloadingEntryInfoTargetRegion: 4323 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4324 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4325 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4326 /*Order=*/GetMDInt(5)); 4327 break; 4328 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4329 OffloadingEntryInfoDeviceGlobalVar: 4330 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4331 /*MangledName=*/GetMDString(1), 4332 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4333 /*Flags=*/GetMDInt(2)), 4334 /*Order=*/GetMDInt(3)); 4335 break; 4336 } 4337 } 4338 } 4339 4340 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4341 if (!KmpRoutineEntryPtrTy) { 4342 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4343 ASTContext &C = CGM.getContext(); 4344 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4345 FunctionProtoType::ExtProtoInfo EPI; 4346 KmpRoutineEntryPtrQTy = C.getPointerType( 4347 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4348 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4349 } 4350 } 4351 4352 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4353 // Make sure the type of the entry is already created. This is the type we 4354 // have to create: 4355 // struct __tgt_offload_entry{ 4356 // void *addr; // Pointer to the offload entry info. 4357 // // (function or global) 4358 // char *name; // Name of the function or global. 4359 // size_t size; // Size of the entry info (0 if it a function). 4360 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4361 // int32_t reserved; // Reserved, to use by the runtime library. 4362 // }; 4363 if (TgtOffloadEntryQTy.isNull()) { 4364 ASTContext &C = CGM.getContext(); 4365 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4366 RD->startDefinition(); 4367 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4368 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4369 addFieldToRecordDecl(C, RD, C.getSizeType()); 4370 addFieldToRecordDecl( 4371 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4372 addFieldToRecordDecl( 4373 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4374 RD->completeDefinition(); 4375 RD->addAttr(PackedAttr::CreateImplicit(C)); 4376 TgtOffloadEntryQTy = C.getRecordType(RD); 4377 } 4378 return TgtOffloadEntryQTy; 4379 } 4380 4381 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 4382 // These are the types we need to build: 4383 // struct __tgt_device_image{ 4384 // void *ImageStart; // Pointer to the target code start. 4385 // void *ImageEnd; // Pointer to the target code end. 4386 // // We also add the host entries to the device image, as it may be useful 4387 // // for the target runtime to have access to that information. 4388 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 4389 // // the entries. 4390 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4391 // // entries (non inclusive). 4392 // }; 4393 if (TgtDeviceImageQTy.isNull()) { 4394 ASTContext &C = CGM.getContext(); 4395 RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image"); 4396 RD->startDefinition(); 4397 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4398 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4399 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4400 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4401 RD->completeDefinition(); 4402 TgtDeviceImageQTy = C.getRecordType(RD); 4403 } 4404 return TgtDeviceImageQTy; 4405 } 4406 4407 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 4408 // struct __tgt_bin_desc{ 4409 // int32_t NumDevices; // Number of devices supported. 4410 // __tgt_device_image *DeviceImages; // Arrays of device images 4411 // // (one per device). 4412 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 4413 // // entries. 4414 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4415 // // entries (non inclusive). 4416 // }; 4417 if (TgtBinaryDescriptorQTy.isNull()) { 4418 ASTContext &C = CGM.getContext(); 4419 RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc"); 4420 RD->startDefinition(); 4421 addFieldToRecordDecl( 4422 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4423 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 4424 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4425 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4426 RD->completeDefinition(); 4427 TgtBinaryDescriptorQTy = C.getRecordType(RD); 4428 } 4429 return TgtBinaryDescriptorQTy; 4430 } 4431 4432 namespace { 4433 struct PrivateHelpersTy { 4434 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 4435 const VarDecl *PrivateElemInit) 4436 : Original(Original), PrivateCopy(PrivateCopy), 4437 PrivateElemInit(PrivateElemInit) {} 4438 const VarDecl *Original; 4439 const VarDecl *PrivateCopy; 4440 const VarDecl *PrivateElemInit; 4441 }; 4442 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4443 } // anonymous namespace 4444 4445 static RecordDecl * 4446 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4447 if (!Privates.empty()) { 4448 ASTContext &C = CGM.getContext(); 4449 // Build struct .kmp_privates_t. { 4450 // /* private vars */ 4451 // }; 4452 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4453 RD->startDefinition(); 4454 for (const auto &Pair : Privates) { 4455 const VarDecl *VD = Pair.second.Original; 4456 QualType Type = VD->getType().getNonReferenceType(); 4457 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4458 if (VD->hasAttrs()) { 4459 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4460 E(VD->getAttrs().end()); 4461 I != E; ++I) 4462 FD->addAttr(*I); 4463 } 4464 } 4465 RD->completeDefinition(); 4466 return RD; 4467 } 4468 return nullptr; 4469 } 4470 4471 static RecordDecl * 4472 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4473 QualType KmpInt32Ty, 4474 QualType KmpRoutineEntryPointerQTy) { 4475 ASTContext &C = CGM.getContext(); 4476 // Build struct kmp_task_t { 4477 // void * shareds; 4478 // kmp_routine_entry_t routine; 4479 // kmp_int32 part_id; 4480 // kmp_cmplrdata_t data1; 4481 // kmp_cmplrdata_t data2; 4482 // For taskloops additional fields: 4483 // kmp_uint64 lb; 4484 // kmp_uint64 ub; 4485 // kmp_int64 st; 4486 // kmp_int32 liter; 4487 // void * reductions; 4488 // }; 4489 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4490 UD->startDefinition(); 4491 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4492 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4493 UD->completeDefinition(); 4494 QualType KmpCmplrdataTy = C.getRecordType(UD); 4495 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4496 RD->startDefinition(); 4497 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4498 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4499 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4500 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4501 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4502 if (isOpenMPTaskLoopDirective(Kind)) { 4503 QualType KmpUInt64Ty = 4504 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4505 QualType KmpInt64Ty = 4506 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4507 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4508 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4509 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4510 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4511 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4512 } 4513 RD->completeDefinition(); 4514 return RD; 4515 } 4516 4517 static RecordDecl * 4518 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4519 ArrayRef<PrivateDataTy> Privates) { 4520 ASTContext &C = CGM.getContext(); 4521 // Build struct kmp_task_t_with_privates { 4522 // kmp_task_t task_data; 4523 // .kmp_privates_t. privates; 4524 // }; 4525 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4526 RD->startDefinition(); 4527 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4528 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4529 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4530 RD->completeDefinition(); 4531 return RD; 4532 } 4533 4534 /// Emit a proxy function which accepts kmp_task_t as the second 4535 /// argument. 4536 /// \code 4537 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4538 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4539 /// For taskloops: 4540 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4541 /// tt->reductions, tt->shareds); 4542 /// return 0; 4543 /// } 4544 /// \endcode 4545 static llvm::Function * 4546 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4547 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4548 QualType KmpTaskTWithPrivatesPtrQTy, 4549 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4550 QualType SharedsPtrTy, llvm::Function *TaskFunction, 4551 llvm::Value *TaskPrivatesMap) { 4552 ASTContext &C = CGM.getContext(); 4553 FunctionArgList Args; 4554 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4555 ImplicitParamDecl::Other); 4556 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4557 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4558 ImplicitParamDecl::Other); 4559 Args.push_back(&GtidArg); 4560 Args.push_back(&TaskTypeArg); 4561 const auto &TaskEntryFnInfo = 4562 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4563 llvm::FunctionType *TaskEntryTy = 4564 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4565 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 4566 auto *TaskEntry = llvm::Function::Create( 4567 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4568 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4569 TaskEntry->setDoesNotRecurse(); 4570 CodeGenFunction CGF(CGM); 4571 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4572 Loc, Loc); 4573 4574 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4575 // tt, 4576 // For taskloops: 4577 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4578 // tt->task_data.shareds); 4579 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4580 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4581 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4582 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4583 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4584 const auto *KmpTaskTWithPrivatesQTyRD = 4585 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4586 LValue Base = 4587 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4588 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4589 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4590 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4591 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 4592 4593 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4594 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4595 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4596 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4597 CGF.ConvertTypeForMem(SharedsPtrTy)); 4598 4599 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4600 llvm::Value *PrivatesParam; 4601 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4602 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4603 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4604 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 4605 } else { 4606 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4607 } 4608 4609 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4610 TaskPrivatesMap, 4611 CGF.Builder 4612 .CreatePointerBitCastOrAddrSpaceCast( 4613 TDBase.getAddress(CGF), CGF.VoidPtrTy) 4614 .getPointer()}; 4615 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4616 std::end(CommonArgs)); 4617 if (isOpenMPTaskLoopDirective(Kind)) { 4618 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4619 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4620 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4621 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4622 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4623 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4624 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4625 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4626 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4627 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4628 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4629 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4630 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4631 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4632 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4633 CallArgs.push_back(LBParam); 4634 CallArgs.push_back(UBParam); 4635 CallArgs.push_back(StParam); 4636 CallArgs.push_back(LIParam); 4637 CallArgs.push_back(RParam); 4638 } 4639 CallArgs.push_back(SharedsParam); 4640 4641 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4642 CallArgs); 4643 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4644 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4645 CGF.FinishFunction(); 4646 return TaskEntry; 4647 } 4648 4649 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4650 SourceLocation Loc, 4651 QualType KmpInt32Ty, 4652 QualType KmpTaskTWithPrivatesPtrQTy, 4653 QualType KmpTaskTWithPrivatesQTy) { 4654 ASTContext &C = CGM.getContext(); 4655 FunctionArgList Args; 4656 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4657 ImplicitParamDecl::Other); 4658 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4659 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4660 ImplicitParamDecl::Other); 4661 Args.push_back(&GtidArg); 4662 Args.push_back(&TaskTypeArg); 4663 const auto &DestructorFnInfo = 4664 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4665 llvm::FunctionType *DestructorFnTy = 4666 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4667 std::string Name = 4668 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 4669 auto *DestructorFn = 4670 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4671 Name, &CGM.getModule()); 4672 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4673 DestructorFnInfo); 4674 DestructorFn->setDoesNotRecurse(); 4675 CodeGenFunction CGF(CGM); 4676 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4677 Args, Loc, Loc); 4678 4679 LValue Base = CGF.EmitLoadOfPointerLValue( 4680 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4681 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4682 const auto *KmpTaskTWithPrivatesQTyRD = 4683 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4684 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4685 Base = CGF.EmitLValueForField(Base, *FI); 4686 for (const auto *Field : 4687 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4688 if (QualType::DestructionKind DtorKind = 4689 Field->getType().isDestructedType()) { 4690 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4691 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 4692 } 4693 } 4694 CGF.FinishFunction(); 4695 return DestructorFn; 4696 } 4697 4698 /// Emit a privates mapping function for correct handling of private and 4699 /// firstprivate variables. 4700 /// \code 4701 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4702 /// **noalias priv1,..., <tyn> **noalias privn) { 4703 /// *priv1 = &.privates.priv1; 4704 /// ...; 4705 /// *privn = &.privates.privn; 4706 /// } 4707 /// \endcode 4708 static llvm::Value * 4709 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4710 ArrayRef<const Expr *> PrivateVars, 4711 ArrayRef<const Expr *> FirstprivateVars, 4712 ArrayRef<const Expr *> LastprivateVars, 4713 QualType PrivatesQTy, 4714 ArrayRef<PrivateDataTy> Privates) { 4715 ASTContext &C = CGM.getContext(); 4716 FunctionArgList Args; 4717 ImplicitParamDecl TaskPrivatesArg( 4718 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4719 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4720 ImplicitParamDecl::Other); 4721 Args.push_back(&TaskPrivatesArg); 4722 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4723 unsigned Counter = 1; 4724 for (const Expr *E : PrivateVars) { 4725 Args.push_back(ImplicitParamDecl::Create( 4726 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4727 C.getPointerType(C.getPointerType(E->getType())) 4728 .withConst() 4729 .withRestrict(), 4730 ImplicitParamDecl::Other)); 4731 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4732 PrivateVarsPos[VD] = Counter; 4733 ++Counter; 4734 } 4735 for (const Expr *E : FirstprivateVars) { 4736 Args.push_back(ImplicitParamDecl::Create( 4737 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4738 C.getPointerType(C.getPointerType(E->getType())) 4739 .withConst() 4740 .withRestrict(), 4741 ImplicitParamDecl::Other)); 4742 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4743 PrivateVarsPos[VD] = Counter; 4744 ++Counter; 4745 } 4746 for (const Expr *E : LastprivateVars) { 4747 Args.push_back(ImplicitParamDecl::Create( 4748 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4749 C.getPointerType(C.getPointerType(E->getType())) 4750 .withConst() 4751 .withRestrict(), 4752 ImplicitParamDecl::Other)); 4753 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4754 PrivateVarsPos[VD] = Counter; 4755 ++Counter; 4756 } 4757 const auto &TaskPrivatesMapFnInfo = 4758 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4759 llvm::FunctionType *TaskPrivatesMapTy = 4760 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4761 std::string Name = 4762 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 4763 auto *TaskPrivatesMap = llvm::Function::Create( 4764 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 4765 &CGM.getModule()); 4766 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4767 TaskPrivatesMapFnInfo); 4768 if (CGM.getLangOpts().Optimize) { 4769 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4770 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4771 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4772 } 4773 CodeGenFunction CGF(CGM); 4774 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4775 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4776 4777 // *privi = &.privates.privi; 4778 LValue Base = CGF.EmitLoadOfPointerLValue( 4779 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4780 TaskPrivatesArg.getType()->castAs<PointerType>()); 4781 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4782 Counter = 0; 4783 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4784 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4785 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4786 LValue RefLVal = 4787 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4788 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4789 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 4790 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 4791 ++Counter; 4792 } 4793 CGF.FinishFunction(); 4794 return TaskPrivatesMap; 4795 } 4796 4797 /// Emit initialization for private variables in task-based directives. 4798 static void emitPrivatesInit(CodeGenFunction &CGF, 4799 const OMPExecutableDirective &D, 4800 Address KmpTaskSharedsPtr, LValue TDBase, 4801 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4802 QualType SharedsTy, QualType SharedsPtrTy, 4803 const OMPTaskDataTy &Data, 4804 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4805 ASTContext &C = CGF.getContext(); 4806 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4807 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4808 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4809 ? OMPD_taskloop 4810 : OMPD_task; 4811 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4812 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4813 LValue SrcBase; 4814 bool IsTargetTask = 4815 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4816 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4817 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4818 // PointersArray and SizesArray. The original variables for these arrays are 4819 // not captured and we get their addresses explicitly. 4820 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || 4821 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4822 SrcBase = CGF.MakeAddrLValue( 4823 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4824 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4825 SharedsTy); 4826 } 4827 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4828 for (const PrivateDataTy &Pair : Privates) { 4829 const VarDecl *VD = Pair.second.PrivateCopy; 4830 const Expr *Init = VD->getAnyInitializer(); 4831 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4832 !CGF.isTrivialInitializer(Init)))) { 4833 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4834 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4835 const VarDecl *OriginalVD = Pair.second.Original; 4836 // Check if the variable is the target-based BasePointersArray, 4837 // PointersArray or SizesArray. 4838 LValue SharedRefLValue; 4839 QualType Type = PrivateLValue.getType(); 4840 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4841 if (IsTargetTask && !SharedField) { 4842 assert(isa<ImplicitParamDecl>(OriginalVD) && 4843 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4844 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4845 ->getNumParams() == 0 && 4846 isa<TranslationUnitDecl>( 4847 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4848 ->getDeclContext()) && 4849 "Expected artificial target data variable."); 4850 SharedRefLValue = 4851 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4852 } else { 4853 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4854 SharedRefLValue = CGF.MakeAddrLValue( 4855 Address(SharedRefLValue.getPointer(CGF), 4856 C.getDeclAlign(OriginalVD)), 4857 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4858 SharedRefLValue.getTBAAInfo()); 4859 } 4860 if (Type->isArrayType()) { 4861 // Initialize firstprivate array. 4862 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4863 // Perform simple memcpy. 4864 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4865 } else { 4866 // Initialize firstprivate array using element-by-element 4867 // initialization. 4868 CGF.EmitOMPAggregateAssign( 4869 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 4870 Type, 4871 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4872 Address SrcElement) { 4873 // Clean up any temporaries needed by the initialization. 4874 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4875 InitScope.addPrivate( 4876 Elem, [SrcElement]() -> Address { return SrcElement; }); 4877 (void)InitScope.Privatize(); 4878 // Emit initialization for single element. 4879 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4880 CGF, &CapturesInfo); 4881 CGF.EmitAnyExprToMem(Init, DestElement, 4882 Init->getType().getQualifiers(), 4883 /*IsInitializer=*/false); 4884 }); 4885 } 4886 } else { 4887 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4888 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 4889 return SharedRefLValue.getAddress(CGF); 4890 }); 4891 (void)InitScope.Privatize(); 4892 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4893 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4894 /*capturedByInit=*/false); 4895 } 4896 } else { 4897 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4898 } 4899 } 4900 ++FI; 4901 } 4902 } 4903 4904 /// Check if duplication function is required for taskloops. 4905 static bool checkInitIsRequired(CodeGenFunction &CGF, 4906 ArrayRef<PrivateDataTy> Privates) { 4907 bool InitRequired = false; 4908 for (const PrivateDataTy &Pair : Privates) { 4909 const VarDecl *VD = Pair.second.PrivateCopy; 4910 const Expr *Init = VD->getAnyInitializer(); 4911 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4912 !CGF.isTrivialInitializer(Init)); 4913 if (InitRequired) 4914 break; 4915 } 4916 return InitRequired; 4917 } 4918 4919 4920 /// Emit task_dup function (for initialization of 4921 /// private/firstprivate/lastprivate vars and last_iter flag) 4922 /// \code 4923 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4924 /// lastpriv) { 4925 /// // setup lastprivate flag 4926 /// task_dst->last = lastpriv; 4927 /// // could be constructor calls here... 4928 /// } 4929 /// \endcode 4930 static llvm::Value * 4931 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4932 const OMPExecutableDirective &D, 4933 QualType KmpTaskTWithPrivatesPtrQTy, 4934 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4935 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4936 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4937 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4938 ASTContext &C = CGM.getContext(); 4939 FunctionArgList Args; 4940 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4941 KmpTaskTWithPrivatesPtrQTy, 4942 ImplicitParamDecl::Other); 4943 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4944 KmpTaskTWithPrivatesPtrQTy, 4945 ImplicitParamDecl::Other); 4946 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4947 ImplicitParamDecl::Other); 4948 Args.push_back(&DstArg); 4949 Args.push_back(&SrcArg); 4950 Args.push_back(&LastprivArg); 4951 const auto &TaskDupFnInfo = 4952 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4953 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4954 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4955 auto *TaskDup = llvm::Function::Create( 4956 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4957 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4958 TaskDup->setDoesNotRecurse(); 4959 CodeGenFunction CGF(CGM); 4960 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4961 Loc); 4962 4963 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4964 CGF.GetAddrOfLocalVar(&DstArg), 4965 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4966 // task_dst->liter = lastpriv; 4967 if (WithLastIter) { 4968 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4969 LValue Base = CGF.EmitLValueForField( 4970 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4971 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4972 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4973 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4974 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4975 } 4976 4977 // Emit initial values for private copies (if any). 4978 assert(!Privates.empty()); 4979 Address KmpTaskSharedsPtr = Address::invalid(); 4980 if (!Data.FirstprivateVars.empty()) { 4981 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4982 CGF.GetAddrOfLocalVar(&SrcArg), 4983 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4984 LValue Base = CGF.EmitLValueForField( 4985 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4986 KmpTaskSharedsPtr = Address( 4987 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4988 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4989 KmpTaskTShareds)), 4990 Loc), 4991 CGF.getNaturalTypeAlignment(SharedsTy)); 4992 } 4993 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4994 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4995 CGF.FinishFunction(); 4996 return TaskDup; 4997 } 4998 4999 /// Checks if destructor function is required to be generated. 5000 /// \return true if cleanups are required, false otherwise. 5001 static bool 5002 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 5003 bool NeedsCleanup = false; 5004 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 5005 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 5006 for (const FieldDecl *FD : PrivateRD->fields()) { 5007 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 5008 if (NeedsCleanup) 5009 break; 5010 } 5011 return NeedsCleanup; 5012 } 5013 5014 CGOpenMPRuntime::TaskResultTy 5015 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 5016 const OMPExecutableDirective &D, 5017 llvm::Function *TaskFunction, QualType SharedsTy, 5018 Address Shareds, const OMPTaskDataTy &Data) { 5019 ASTContext &C = CGM.getContext(); 5020 llvm::SmallVector<PrivateDataTy, 4> Privates; 5021 // Aggregate privates and sort them by the alignment. 5022 auto I = Data.PrivateCopies.begin(); 5023 for (const Expr *E : Data.PrivateVars) { 5024 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5025 Privates.emplace_back( 5026 C.getDeclAlign(VD), 5027 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5028 /*PrivateElemInit=*/nullptr)); 5029 ++I; 5030 } 5031 I = Data.FirstprivateCopies.begin(); 5032 auto IElemInitRef = Data.FirstprivateInits.begin(); 5033 for (const Expr *E : Data.FirstprivateVars) { 5034 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5035 Privates.emplace_back( 5036 C.getDeclAlign(VD), 5037 PrivateHelpersTy( 5038 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5039 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 5040 ++I; 5041 ++IElemInitRef; 5042 } 5043 I = Data.LastprivateCopies.begin(); 5044 for (const Expr *E : Data.LastprivateVars) { 5045 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5046 Privates.emplace_back( 5047 C.getDeclAlign(VD), 5048 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5049 /*PrivateElemInit=*/nullptr)); 5050 ++I; 5051 } 5052 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 5053 return L.first > R.first; 5054 }); 5055 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 5056 // Build type kmp_routine_entry_t (if not built yet). 5057 emitKmpRoutineEntryT(KmpInt32Ty); 5058 // Build type kmp_task_t (if not built yet). 5059 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 5060 if (SavedKmpTaskloopTQTy.isNull()) { 5061 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5062 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5063 } 5064 KmpTaskTQTy = SavedKmpTaskloopTQTy; 5065 } else { 5066 assert((D.getDirectiveKind() == OMPD_task || 5067 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 5068 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 5069 "Expected taskloop, task or target directive"); 5070 if (SavedKmpTaskTQTy.isNull()) { 5071 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5072 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5073 } 5074 KmpTaskTQTy = SavedKmpTaskTQTy; 5075 } 5076 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 5077 // Build particular struct kmp_task_t for the given task. 5078 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 5079 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 5080 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 5081 QualType KmpTaskTWithPrivatesPtrQTy = 5082 C.getPointerType(KmpTaskTWithPrivatesQTy); 5083 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 5084 llvm::Type *KmpTaskTWithPrivatesPtrTy = 5085 KmpTaskTWithPrivatesTy->getPointerTo(); 5086 llvm::Value *KmpTaskTWithPrivatesTySize = 5087 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 5088 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 5089 5090 // Emit initial values for private copies (if any). 5091 llvm::Value *TaskPrivatesMap = nullptr; 5092 llvm::Type *TaskPrivatesMapTy = 5093 std::next(TaskFunction->arg_begin(), 3)->getType(); 5094 if (!Privates.empty()) { 5095 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 5096 TaskPrivatesMap = emitTaskPrivateMappingFunction( 5097 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 5098 FI->getType(), Privates); 5099 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5100 TaskPrivatesMap, TaskPrivatesMapTy); 5101 } else { 5102 TaskPrivatesMap = llvm::ConstantPointerNull::get( 5103 cast<llvm::PointerType>(TaskPrivatesMapTy)); 5104 } 5105 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 5106 // kmp_task_t *tt); 5107 llvm::Function *TaskEntry = emitProxyTaskFunction( 5108 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5109 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 5110 TaskPrivatesMap); 5111 5112 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 5113 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 5114 // kmp_routine_entry_t *task_entry); 5115 // Task flags. Format is taken from 5116 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 5117 // description of kmp_tasking_flags struct. 5118 enum { 5119 TiedFlag = 0x1, 5120 FinalFlag = 0x2, 5121 DestructorsFlag = 0x8, 5122 PriorityFlag = 0x20 5123 }; 5124 unsigned Flags = Data.Tied ? TiedFlag : 0; 5125 bool NeedsCleanup = false; 5126 if (!Privates.empty()) { 5127 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 5128 if (NeedsCleanup) 5129 Flags = Flags | DestructorsFlag; 5130 } 5131 if (Data.Priority.getInt()) 5132 Flags = Flags | PriorityFlag; 5133 llvm::Value *TaskFlags = 5134 Data.Final.getPointer() 5135 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 5136 CGF.Builder.getInt32(FinalFlag), 5137 CGF.Builder.getInt32(/*C=*/0)) 5138 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 5139 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 5140 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 5141 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 5142 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 5143 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5144 TaskEntry, KmpRoutineEntryPtrTy)}; 5145 llvm::Value *NewTask; 5146 if (D.hasClausesOfKind<OMPNowaitClause>()) { 5147 // Check if we have any device clause associated with the directive. 5148 const Expr *Device = nullptr; 5149 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 5150 Device = C->getDevice(); 5151 // Emit device ID if any otherwise use default value. 5152 llvm::Value *DeviceID; 5153 if (Device) 5154 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 5155 CGF.Int64Ty, /*isSigned=*/true); 5156 else 5157 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 5158 AllocArgs.push_back(DeviceID); 5159 NewTask = CGF.EmitRuntimeCall( 5160 createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs); 5161 } else { 5162 NewTask = CGF.EmitRuntimeCall( 5163 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 5164 } 5165 llvm::Value *NewTaskNewTaskTTy = 5166 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5167 NewTask, KmpTaskTWithPrivatesPtrTy); 5168 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 5169 KmpTaskTWithPrivatesQTy); 5170 LValue TDBase = 5171 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 5172 // Fill the data in the resulting kmp_task_t record. 5173 // Copy shareds if there are any. 5174 Address KmpTaskSharedsPtr = Address::invalid(); 5175 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 5176 KmpTaskSharedsPtr = 5177 Address(CGF.EmitLoadOfScalar( 5178 CGF.EmitLValueForField( 5179 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 5180 KmpTaskTShareds)), 5181 Loc), 5182 CGF.getNaturalTypeAlignment(SharedsTy)); 5183 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 5184 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 5185 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 5186 } 5187 // Emit initial values for private copies (if any). 5188 TaskResultTy Result; 5189 if (!Privates.empty()) { 5190 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 5191 SharedsTy, SharedsPtrTy, Data, Privates, 5192 /*ForDup=*/false); 5193 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 5194 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 5195 Result.TaskDupFn = emitTaskDupFunction( 5196 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 5197 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 5198 /*WithLastIter=*/!Data.LastprivateVars.empty()); 5199 } 5200 } 5201 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 5202 enum { Priority = 0, Destructors = 1 }; 5203 // Provide pointer to function with destructors for privates. 5204 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 5205 const RecordDecl *KmpCmplrdataUD = 5206 (*FI)->getType()->getAsUnionType()->getDecl(); 5207 if (NeedsCleanup) { 5208 llvm::Value *DestructorFn = emitDestructorsFunction( 5209 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5210 KmpTaskTWithPrivatesQTy); 5211 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 5212 LValue DestructorsLV = CGF.EmitLValueForField( 5213 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 5214 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5215 DestructorFn, KmpRoutineEntryPtrTy), 5216 DestructorsLV); 5217 } 5218 // Set priority. 5219 if (Data.Priority.getInt()) { 5220 LValue Data2LV = CGF.EmitLValueForField( 5221 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 5222 LValue PriorityLV = CGF.EmitLValueForField( 5223 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 5224 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 5225 } 5226 Result.NewTask = NewTask; 5227 Result.TaskEntry = TaskEntry; 5228 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 5229 Result.TDBase = TDBase; 5230 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 5231 return Result; 5232 } 5233 5234 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5235 const OMPExecutableDirective &D, 5236 llvm::Function *TaskFunction, 5237 QualType SharedsTy, Address Shareds, 5238 const Expr *IfCond, 5239 const OMPTaskDataTy &Data) { 5240 if (!CGF.HaveInsertPoint()) 5241 return; 5242 5243 TaskResultTy Result = 5244 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5245 llvm::Value *NewTask = Result.NewTask; 5246 llvm::Function *TaskEntry = Result.TaskEntry; 5247 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5248 LValue TDBase = Result.TDBase; 5249 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5250 ASTContext &C = CGM.getContext(); 5251 // Process list of dependences. 5252 Address DependenciesArray = Address::invalid(); 5253 unsigned NumDependencies = Data.Dependences.size(); 5254 if (NumDependencies) { 5255 // Dependence kind for RTL. 5256 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 }; 5257 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 5258 RecordDecl *KmpDependInfoRD; 5259 QualType FlagsTy = 5260 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 5261 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5262 if (KmpDependInfoTy.isNull()) { 5263 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 5264 KmpDependInfoRD->startDefinition(); 5265 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 5266 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 5267 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 5268 KmpDependInfoRD->completeDefinition(); 5269 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 5270 } else { 5271 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5272 } 5273 // Define type kmp_depend_info[<Dependences.size()>]; 5274 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5275 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 5276 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5277 // kmp_depend_info[<Dependences.size()>] deps; 5278 DependenciesArray = 5279 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 5280 for (unsigned I = 0; I < NumDependencies; ++I) { 5281 const Expr *E = Data.Dependences[I].second; 5282 LValue Addr = CGF.EmitLValue(E); 5283 llvm::Value *Size; 5284 QualType Ty = E->getType(); 5285 if (const auto *ASE = 5286 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 5287 LValue UpAddrLVal = 5288 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 5289 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 5290 UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 5291 llvm::Value *LowIntPtr = 5292 CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGM.SizeTy); 5293 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 5294 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 5295 } else { 5296 Size = CGF.getTypeSize(Ty); 5297 } 5298 LValue Base = CGF.MakeAddrLValue( 5299 CGF.Builder.CreateConstArrayGEP(DependenciesArray, I), 5300 KmpDependInfoTy); 5301 // deps[i].base_addr = &<Dependences[i].second>; 5302 LValue BaseAddrLVal = CGF.EmitLValueForField( 5303 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5304 CGF.EmitStoreOfScalar( 5305 CGF.Builder.CreatePtrToInt(Addr.getPointer(CGF), CGF.IntPtrTy), 5306 BaseAddrLVal); 5307 // deps[i].len = sizeof(<Dependences[i].second>); 5308 LValue LenLVal = CGF.EmitLValueForField( 5309 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 5310 CGF.EmitStoreOfScalar(Size, LenLVal); 5311 // deps[i].flags = <Dependences[i].first>; 5312 RTLDependenceKindTy DepKind; 5313 switch (Data.Dependences[I].first) { 5314 case OMPC_DEPEND_in: 5315 DepKind = DepIn; 5316 break; 5317 // Out and InOut dependencies must use the same code. 5318 case OMPC_DEPEND_out: 5319 case OMPC_DEPEND_inout: 5320 DepKind = DepInOut; 5321 break; 5322 case OMPC_DEPEND_mutexinoutset: 5323 DepKind = DepMutexInOutSet; 5324 break; 5325 case OMPC_DEPEND_source: 5326 case OMPC_DEPEND_sink: 5327 case OMPC_DEPEND_unknown: 5328 llvm_unreachable("Unknown task dependence type"); 5329 } 5330 LValue FlagsLVal = CGF.EmitLValueForField( 5331 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5332 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5333 FlagsLVal); 5334 } 5335 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5336 CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy); 5337 } 5338 5339 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5340 // libcall. 5341 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5342 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5343 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5344 // list is not empty 5345 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5346 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5347 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5348 llvm::Value *DepTaskArgs[7]; 5349 if (NumDependencies) { 5350 DepTaskArgs[0] = UpLoc; 5351 DepTaskArgs[1] = ThreadID; 5352 DepTaskArgs[2] = NewTask; 5353 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 5354 DepTaskArgs[4] = DependenciesArray.getPointer(); 5355 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5356 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5357 } 5358 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 5359 &TaskArgs, 5360 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5361 if (!Data.Tied) { 5362 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5363 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5364 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5365 } 5366 if (NumDependencies) { 5367 CGF.EmitRuntimeCall( 5368 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5369 } else { 5370 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5371 TaskArgs); 5372 } 5373 // Check if parent region is untied and build return for untied task; 5374 if (auto *Region = 5375 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5376 Region->emitUntiedSwitch(CGF); 5377 }; 5378 5379 llvm::Value *DepWaitTaskArgs[6]; 5380 if (NumDependencies) { 5381 DepWaitTaskArgs[0] = UpLoc; 5382 DepWaitTaskArgs[1] = ThreadID; 5383 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 5384 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5385 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5386 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5387 } 5388 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5389 NumDependencies, &DepWaitTaskArgs, 5390 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5391 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5392 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5393 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5394 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5395 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5396 // is specified. 5397 if (NumDependencies) 5398 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5399 DepWaitTaskArgs); 5400 // Call proxy_task_entry(gtid, new_task); 5401 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5402 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5403 Action.Enter(CGF); 5404 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5405 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5406 OutlinedFnArgs); 5407 }; 5408 5409 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5410 // kmp_task_t *new_task); 5411 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5412 // kmp_task_t *new_task); 5413 RegionCodeGenTy RCG(CodeGen); 5414 CommonActionTy Action( 5415 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5416 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5417 RCG.setAction(Action); 5418 RCG(CGF); 5419 }; 5420 5421 if (IfCond) { 5422 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5423 } else { 5424 RegionCodeGenTy ThenRCG(ThenCodeGen); 5425 ThenRCG(CGF); 5426 } 5427 } 5428 5429 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5430 const OMPLoopDirective &D, 5431 llvm::Function *TaskFunction, 5432 QualType SharedsTy, Address Shareds, 5433 const Expr *IfCond, 5434 const OMPTaskDataTy &Data) { 5435 if (!CGF.HaveInsertPoint()) 5436 return; 5437 TaskResultTy Result = 5438 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5439 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5440 // libcall. 5441 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5442 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5443 // sched, kmp_uint64 grainsize, void *task_dup); 5444 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5445 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5446 llvm::Value *IfVal; 5447 if (IfCond) { 5448 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5449 /*isSigned=*/true); 5450 } else { 5451 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5452 } 5453 5454 LValue LBLVal = CGF.EmitLValueForField( 5455 Result.TDBase, 5456 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5457 const auto *LBVar = 5458 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5459 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5460 LBLVal.getQuals(), 5461 /*IsInitializer=*/true); 5462 LValue UBLVal = CGF.EmitLValueForField( 5463 Result.TDBase, 5464 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5465 const auto *UBVar = 5466 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5467 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 5468 UBLVal.getQuals(), 5469 /*IsInitializer=*/true); 5470 LValue StLVal = CGF.EmitLValueForField( 5471 Result.TDBase, 5472 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5473 const auto *StVar = 5474 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5475 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 5476 StLVal.getQuals(), 5477 /*IsInitializer=*/true); 5478 // Store reductions address. 5479 LValue RedLVal = CGF.EmitLValueForField( 5480 Result.TDBase, 5481 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5482 if (Data.Reductions) { 5483 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5484 } else { 5485 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 5486 CGF.getContext().VoidPtrTy); 5487 } 5488 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5489 llvm::Value *TaskArgs[] = { 5490 UpLoc, 5491 ThreadID, 5492 Result.NewTask, 5493 IfVal, 5494 LBLVal.getPointer(CGF), 5495 UBLVal.getPointer(CGF), 5496 CGF.EmitLoadOfScalar(StLVal, Loc), 5497 llvm::ConstantInt::getSigned( 5498 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5499 llvm::ConstantInt::getSigned( 5500 CGF.IntTy, Data.Schedule.getPointer() 5501 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5502 : NoSchedule), 5503 Data.Schedule.getPointer() 5504 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5505 /*isSigned=*/false) 5506 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5507 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5508 Result.TaskDupFn, CGF.VoidPtrTy) 5509 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5510 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 5511 } 5512 5513 /// Emit reduction operation for each element of array (required for 5514 /// array sections) LHS op = RHS. 5515 /// \param Type Type of array. 5516 /// \param LHSVar Variable on the left side of the reduction operation 5517 /// (references element of array in original variable). 5518 /// \param RHSVar Variable on the right side of the reduction operation 5519 /// (references element of array in original variable). 5520 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5521 /// RHSVar. 5522 static void EmitOMPAggregateReduction( 5523 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5524 const VarDecl *RHSVar, 5525 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5526 const Expr *, const Expr *)> &RedOpGen, 5527 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5528 const Expr *UpExpr = nullptr) { 5529 // Perform element-by-element initialization. 5530 QualType ElementTy; 5531 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5532 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5533 5534 // Drill down to the base element type on both arrays. 5535 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5536 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5537 5538 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5539 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5540 // Cast from pointer to array type to pointer to single element. 5541 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5542 // The basic structure here is a while-do loop. 5543 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5544 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5545 llvm::Value *IsEmpty = 5546 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5547 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5548 5549 // Enter the loop body, making that address the current address. 5550 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5551 CGF.EmitBlock(BodyBB); 5552 5553 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5554 5555 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5556 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5557 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5558 Address RHSElementCurrent = 5559 Address(RHSElementPHI, 5560 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5561 5562 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5563 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5564 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5565 Address LHSElementCurrent = 5566 Address(LHSElementPHI, 5567 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5568 5569 // Emit copy. 5570 CodeGenFunction::OMPPrivateScope Scope(CGF); 5571 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5572 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5573 Scope.Privatize(); 5574 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5575 Scope.ForceCleanup(); 5576 5577 // Shift the address forward by one element. 5578 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5579 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5580 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5581 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5582 // Check whether we've reached the end. 5583 llvm::Value *Done = 5584 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5585 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5586 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5587 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5588 5589 // Done. 5590 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5591 } 5592 5593 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5594 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5595 /// UDR combiner function. 5596 static void emitReductionCombiner(CodeGenFunction &CGF, 5597 const Expr *ReductionOp) { 5598 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5599 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5600 if (const auto *DRE = 5601 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5602 if (const auto *DRD = 5603 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5604 std::pair<llvm::Function *, llvm::Function *> Reduction = 5605 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5606 RValue Func = RValue::get(Reduction.first); 5607 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5608 CGF.EmitIgnoredExpr(ReductionOp); 5609 return; 5610 } 5611 CGF.EmitIgnoredExpr(ReductionOp); 5612 } 5613 5614 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5615 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5616 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5617 ArrayRef<const Expr *> ReductionOps) { 5618 ASTContext &C = CGM.getContext(); 5619 5620 // void reduction_func(void *LHSArg, void *RHSArg); 5621 FunctionArgList Args; 5622 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5623 ImplicitParamDecl::Other); 5624 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5625 ImplicitParamDecl::Other); 5626 Args.push_back(&LHSArg); 5627 Args.push_back(&RHSArg); 5628 const auto &CGFI = 5629 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5630 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5631 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5632 llvm::GlobalValue::InternalLinkage, Name, 5633 &CGM.getModule()); 5634 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5635 Fn->setDoesNotRecurse(); 5636 CodeGenFunction CGF(CGM); 5637 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5638 5639 // Dst = (void*[n])(LHSArg); 5640 // Src = (void*[n])(RHSArg); 5641 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5642 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5643 ArgsType), CGF.getPointerAlign()); 5644 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5645 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5646 ArgsType), CGF.getPointerAlign()); 5647 5648 // ... 5649 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5650 // ... 5651 CodeGenFunction::OMPPrivateScope Scope(CGF); 5652 auto IPriv = Privates.begin(); 5653 unsigned Idx = 0; 5654 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5655 const auto *RHSVar = 5656 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5657 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5658 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5659 }); 5660 const auto *LHSVar = 5661 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5662 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5663 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5664 }); 5665 QualType PrivTy = (*IPriv)->getType(); 5666 if (PrivTy->isVariablyModifiedType()) { 5667 // Get array size and emit VLA type. 5668 ++Idx; 5669 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5670 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5671 const VariableArrayType *VLA = 5672 CGF.getContext().getAsVariableArrayType(PrivTy); 5673 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5674 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5675 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5676 CGF.EmitVariablyModifiedType(PrivTy); 5677 } 5678 } 5679 Scope.Privatize(); 5680 IPriv = Privates.begin(); 5681 auto ILHS = LHSExprs.begin(); 5682 auto IRHS = RHSExprs.begin(); 5683 for (const Expr *E : ReductionOps) { 5684 if ((*IPriv)->getType()->isArrayType()) { 5685 // Emit reduction for array section. 5686 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5687 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5688 EmitOMPAggregateReduction( 5689 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5690 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5691 emitReductionCombiner(CGF, E); 5692 }); 5693 } else { 5694 // Emit reduction for array subscript or single variable. 5695 emitReductionCombiner(CGF, E); 5696 } 5697 ++IPriv; 5698 ++ILHS; 5699 ++IRHS; 5700 } 5701 Scope.ForceCleanup(); 5702 CGF.FinishFunction(); 5703 return Fn; 5704 } 5705 5706 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5707 const Expr *ReductionOp, 5708 const Expr *PrivateRef, 5709 const DeclRefExpr *LHS, 5710 const DeclRefExpr *RHS) { 5711 if (PrivateRef->getType()->isArrayType()) { 5712 // Emit reduction for array section. 5713 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5714 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5715 EmitOMPAggregateReduction( 5716 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5717 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5718 emitReductionCombiner(CGF, ReductionOp); 5719 }); 5720 } else { 5721 // Emit reduction for array subscript or single variable. 5722 emitReductionCombiner(CGF, ReductionOp); 5723 } 5724 } 5725 5726 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5727 ArrayRef<const Expr *> Privates, 5728 ArrayRef<const Expr *> LHSExprs, 5729 ArrayRef<const Expr *> RHSExprs, 5730 ArrayRef<const Expr *> ReductionOps, 5731 ReductionOptionsTy Options) { 5732 if (!CGF.HaveInsertPoint()) 5733 return; 5734 5735 bool WithNowait = Options.WithNowait; 5736 bool SimpleReduction = Options.SimpleReduction; 5737 5738 // Next code should be emitted for reduction: 5739 // 5740 // static kmp_critical_name lock = { 0 }; 5741 // 5742 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5743 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5744 // ... 5745 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5746 // *(Type<n>-1*)rhs[<n>-1]); 5747 // } 5748 // 5749 // ... 5750 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5751 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5752 // RedList, reduce_func, &<lock>)) { 5753 // case 1: 5754 // ... 5755 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5756 // ... 5757 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5758 // break; 5759 // case 2: 5760 // ... 5761 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5762 // ... 5763 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5764 // break; 5765 // default:; 5766 // } 5767 // 5768 // if SimpleReduction is true, only the next code is generated: 5769 // ... 5770 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5771 // ... 5772 5773 ASTContext &C = CGM.getContext(); 5774 5775 if (SimpleReduction) { 5776 CodeGenFunction::RunCleanupsScope Scope(CGF); 5777 auto IPriv = Privates.begin(); 5778 auto ILHS = LHSExprs.begin(); 5779 auto IRHS = RHSExprs.begin(); 5780 for (const Expr *E : ReductionOps) { 5781 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5782 cast<DeclRefExpr>(*IRHS)); 5783 ++IPriv; 5784 ++ILHS; 5785 ++IRHS; 5786 } 5787 return; 5788 } 5789 5790 // 1. Build a list of reduction variables. 5791 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5792 auto Size = RHSExprs.size(); 5793 for (const Expr *E : Privates) { 5794 if (E->getType()->isVariablyModifiedType()) 5795 // Reserve place for array size. 5796 ++Size; 5797 } 5798 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5799 QualType ReductionArrayTy = 5800 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5801 /*IndexTypeQuals=*/0); 5802 Address ReductionList = 5803 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5804 auto IPriv = Privates.begin(); 5805 unsigned Idx = 0; 5806 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5807 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5808 CGF.Builder.CreateStore( 5809 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5810 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 5811 Elem); 5812 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5813 // Store array size. 5814 ++Idx; 5815 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5816 llvm::Value *Size = CGF.Builder.CreateIntCast( 5817 CGF.getVLASize( 5818 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5819 .NumElts, 5820 CGF.SizeTy, /*isSigned=*/false); 5821 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5822 Elem); 5823 } 5824 } 5825 5826 // 2. Emit reduce_func(). 5827 llvm::Function *ReductionFn = emitReductionFunction( 5828 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5829 LHSExprs, RHSExprs, ReductionOps); 5830 5831 // 3. Create static kmp_critical_name lock = { 0 }; 5832 std::string Name = getName({"reduction"}); 5833 llvm::Value *Lock = getCriticalRegionLock(Name); 5834 5835 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5836 // RedList, reduce_func, &<lock>); 5837 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5838 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5839 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5840 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5841 ReductionList.getPointer(), CGF.VoidPtrTy); 5842 llvm::Value *Args[] = { 5843 IdentTLoc, // ident_t *<loc> 5844 ThreadId, // i32 <gtid> 5845 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5846 ReductionArrayTySize, // size_type sizeof(RedList) 5847 RL, // void *RedList 5848 ReductionFn, // void (*) (void *, void *) <reduce_func> 5849 Lock // kmp_critical_name *&<lock> 5850 }; 5851 llvm::Value *Res = CGF.EmitRuntimeCall( 5852 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 5853 : OMPRTL__kmpc_reduce), 5854 Args); 5855 5856 // 5. Build switch(res) 5857 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5858 llvm::SwitchInst *SwInst = 5859 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5860 5861 // 6. Build case 1: 5862 // ... 5863 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5864 // ... 5865 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5866 // break; 5867 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5868 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5869 CGF.EmitBlock(Case1BB); 5870 5871 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5872 llvm::Value *EndArgs[] = { 5873 IdentTLoc, // ident_t *<loc> 5874 ThreadId, // i32 <gtid> 5875 Lock // kmp_critical_name *&<lock> 5876 }; 5877 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5878 CodeGenFunction &CGF, PrePostActionTy &Action) { 5879 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5880 auto IPriv = Privates.begin(); 5881 auto ILHS = LHSExprs.begin(); 5882 auto IRHS = RHSExprs.begin(); 5883 for (const Expr *E : ReductionOps) { 5884 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5885 cast<DeclRefExpr>(*IRHS)); 5886 ++IPriv; 5887 ++ILHS; 5888 ++IRHS; 5889 } 5890 }; 5891 RegionCodeGenTy RCG(CodeGen); 5892 CommonActionTy Action( 5893 nullptr, llvm::None, 5894 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 5895 : OMPRTL__kmpc_end_reduce), 5896 EndArgs); 5897 RCG.setAction(Action); 5898 RCG(CGF); 5899 5900 CGF.EmitBranch(DefaultBB); 5901 5902 // 7. Build case 2: 5903 // ... 5904 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5905 // ... 5906 // break; 5907 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5908 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5909 CGF.EmitBlock(Case2BB); 5910 5911 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5912 CodeGenFunction &CGF, PrePostActionTy &Action) { 5913 auto ILHS = LHSExprs.begin(); 5914 auto IRHS = RHSExprs.begin(); 5915 auto IPriv = Privates.begin(); 5916 for (const Expr *E : ReductionOps) { 5917 const Expr *XExpr = nullptr; 5918 const Expr *EExpr = nullptr; 5919 const Expr *UpExpr = nullptr; 5920 BinaryOperatorKind BO = BO_Comma; 5921 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5922 if (BO->getOpcode() == BO_Assign) { 5923 XExpr = BO->getLHS(); 5924 UpExpr = BO->getRHS(); 5925 } 5926 } 5927 // Try to emit update expression as a simple atomic. 5928 const Expr *RHSExpr = UpExpr; 5929 if (RHSExpr) { 5930 // Analyze RHS part of the whole expression. 5931 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5932 RHSExpr->IgnoreParenImpCasts())) { 5933 // If this is a conditional operator, analyze its condition for 5934 // min/max reduction operator. 5935 RHSExpr = ACO->getCond(); 5936 } 5937 if (const auto *BORHS = 5938 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5939 EExpr = BORHS->getRHS(); 5940 BO = BORHS->getOpcode(); 5941 } 5942 } 5943 if (XExpr) { 5944 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5945 auto &&AtomicRedGen = [BO, VD, 5946 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5947 const Expr *EExpr, const Expr *UpExpr) { 5948 LValue X = CGF.EmitLValue(XExpr); 5949 RValue E; 5950 if (EExpr) 5951 E = CGF.EmitAnyExpr(EExpr); 5952 CGF.EmitOMPAtomicSimpleUpdateExpr( 5953 X, E, BO, /*IsXLHSInRHSPart=*/true, 5954 llvm::AtomicOrdering::Monotonic, Loc, 5955 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5956 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5957 PrivateScope.addPrivate( 5958 VD, [&CGF, VD, XRValue, Loc]() { 5959 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5960 CGF.emitOMPSimpleStore( 5961 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5962 VD->getType().getNonReferenceType(), Loc); 5963 return LHSTemp; 5964 }); 5965 (void)PrivateScope.Privatize(); 5966 return CGF.EmitAnyExpr(UpExpr); 5967 }); 5968 }; 5969 if ((*IPriv)->getType()->isArrayType()) { 5970 // Emit atomic reduction for array section. 5971 const auto *RHSVar = 5972 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5973 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5974 AtomicRedGen, XExpr, EExpr, UpExpr); 5975 } else { 5976 // Emit atomic reduction for array subscript or single variable. 5977 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5978 } 5979 } else { 5980 // Emit as a critical region. 5981 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5982 const Expr *, const Expr *) { 5983 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5984 std::string Name = RT.getName({"atomic_reduction"}); 5985 RT.emitCriticalRegion( 5986 CGF, Name, 5987 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5988 Action.Enter(CGF); 5989 emitReductionCombiner(CGF, E); 5990 }, 5991 Loc); 5992 }; 5993 if ((*IPriv)->getType()->isArrayType()) { 5994 const auto *LHSVar = 5995 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5996 const auto *RHSVar = 5997 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5998 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5999 CritRedGen); 6000 } else { 6001 CritRedGen(CGF, nullptr, nullptr, nullptr); 6002 } 6003 } 6004 ++ILHS; 6005 ++IRHS; 6006 ++IPriv; 6007 } 6008 }; 6009 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 6010 if (!WithNowait) { 6011 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 6012 llvm::Value *EndArgs[] = { 6013 IdentTLoc, // ident_t *<loc> 6014 ThreadId, // i32 <gtid> 6015 Lock // kmp_critical_name *&<lock> 6016 }; 6017 CommonActionTy Action(nullptr, llvm::None, 6018 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 6019 EndArgs); 6020 AtomicRCG.setAction(Action); 6021 AtomicRCG(CGF); 6022 } else { 6023 AtomicRCG(CGF); 6024 } 6025 6026 CGF.EmitBranch(DefaultBB); 6027 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 6028 } 6029 6030 /// Generates unique name for artificial threadprivate variables. 6031 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 6032 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 6033 const Expr *Ref) { 6034 SmallString<256> Buffer; 6035 llvm::raw_svector_ostream Out(Buffer); 6036 const clang::DeclRefExpr *DE; 6037 const VarDecl *D = ::getBaseDecl(Ref, DE); 6038 if (!D) 6039 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 6040 D = D->getCanonicalDecl(); 6041 std::string Name = CGM.getOpenMPRuntime().getName( 6042 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 6043 Out << Prefix << Name << "_" 6044 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 6045 return Out.str(); 6046 } 6047 6048 /// Emits reduction initializer function: 6049 /// \code 6050 /// void @.red_init(void* %arg) { 6051 /// %0 = bitcast void* %arg to <type>* 6052 /// store <type> <init>, <type>* %0 6053 /// ret void 6054 /// } 6055 /// \endcode 6056 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 6057 SourceLocation Loc, 6058 ReductionCodeGen &RCG, unsigned N) { 6059 ASTContext &C = CGM.getContext(); 6060 FunctionArgList Args; 6061 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6062 ImplicitParamDecl::Other); 6063 Args.emplace_back(&Param); 6064 const auto &FnInfo = 6065 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6066 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6067 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 6068 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6069 Name, &CGM.getModule()); 6070 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6071 Fn->setDoesNotRecurse(); 6072 CodeGenFunction CGF(CGM); 6073 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6074 Address PrivateAddr = CGF.EmitLoadOfPointer( 6075 CGF.GetAddrOfLocalVar(&Param), 6076 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6077 llvm::Value *Size = nullptr; 6078 // If the size of the reduction item is non-constant, load it from global 6079 // threadprivate variable. 6080 if (RCG.getSizes(N).second) { 6081 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6082 CGF, CGM.getContext().getSizeType(), 6083 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6084 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6085 CGM.getContext().getSizeType(), Loc); 6086 } 6087 RCG.emitAggregateType(CGF, N, Size); 6088 LValue SharedLVal; 6089 // If initializer uses initializer from declare reduction construct, emit a 6090 // pointer to the address of the original reduction item (reuired by reduction 6091 // initializer) 6092 if (RCG.usesReductionInitializer(N)) { 6093 Address SharedAddr = 6094 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6095 CGF, CGM.getContext().VoidPtrTy, 6096 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6097 SharedAddr = CGF.EmitLoadOfPointer( 6098 SharedAddr, 6099 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 6100 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 6101 } else { 6102 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 6103 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 6104 CGM.getContext().VoidPtrTy); 6105 } 6106 // Emit the initializer: 6107 // %0 = bitcast void* %arg to <type>* 6108 // store <type> <init>, <type>* %0 6109 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 6110 [](CodeGenFunction &) { return false; }); 6111 CGF.FinishFunction(); 6112 return Fn; 6113 } 6114 6115 /// Emits reduction combiner function: 6116 /// \code 6117 /// void @.red_comb(void* %arg0, void* %arg1) { 6118 /// %lhs = bitcast void* %arg0 to <type>* 6119 /// %rhs = bitcast void* %arg1 to <type>* 6120 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 6121 /// store <type> %2, <type>* %lhs 6122 /// ret void 6123 /// } 6124 /// \endcode 6125 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 6126 SourceLocation Loc, 6127 ReductionCodeGen &RCG, unsigned N, 6128 const Expr *ReductionOp, 6129 const Expr *LHS, const Expr *RHS, 6130 const Expr *PrivateRef) { 6131 ASTContext &C = CGM.getContext(); 6132 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 6133 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 6134 FunctionArgList Args; 6135 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 6136 C.VoidPtrTy, ImplicitParamDecl::Other); 6137 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6138 ImplicitParamDecl::Other); 6139 Args.emplace_back(&ParamInOut); 6140 Args.emplace_back(&ParamIn); 6141 const auto &FnInfo = 6142 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6143 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6144 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 6145 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6146 Name, &CGM.getModule()); 6147 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6148 Fn->setDoesNotRecurse(); 6149 CodeGenFunction CGF(CGM); 6150 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6151 llvm::Value *Size = nullptr; 6152 // If the size of the reduction item is non-constant, load it from global 6153 // threadprivate variable. 6154 if (RCG.getSizes(N).second) { 6155 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6156 CGF, CGM.getContext().getSizeType(), 6157 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6158 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6159 CGM.getContext().getSizeType(), Loc); 6160 } 6161 RCG.emitAggregateType(CGF, N, Size); 6162 // Remap lhs and rhs variables to the addresses of the function arguments. 6163 // %lhs = bitcast void* %arg0 to <type>* 6164 // %rhs = bitcast void* %arg1 to <type>* 6165 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6166 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6167 // Pull out the pointer to the variable. 6168 Address PtrAddr = CGF.EmitLoadOfPointer( 6169 CGF.GetAddrOfLocalVar(&ParamInOut), 6170 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6171 return CGF.Builder.CreateElementBitCast( 6172 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6173 }); 6174 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6175 // Pull out the pointer to the variable. 6176 Address PtrAddr = CGF.EmitLoadOfPointer( 6177 CGF.GetAddrOfLocalVar(&ParamIn), 6178 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6179 return CGF.Builder.CreateElementBitCast( 6180 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6181 }); 6182 PrivateScope.Privatize(); 6183 // Emit the combiner body: 6184 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6185 // store <type> %2, <type>* %lhs 6186 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6187 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6188 cast<DeclRefExpr>(RHS)); 6189 CGF.FinishFunction(); 6190 return Fn; 6191 } 6192 6193 /// Emits reduction finalizer function: 6194 /// \code 6195 /// void @.red_fini(void* %arg) { 6196 /// %0 = bitcast void* %arg to <type>* 6197 /// <destroy>(<type>* %0) 6198 /// ret void 6199 /// } 6200 /// \endcode 6201 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6202 SourceLocation Loc, 6203 ReductionCodeGen &RCG, unsigned N) { 6204 if (!RCG.needCleanups(N)) 6205 return nullptr; 6206 ASTContext &C = CGM.getContext(); 6207 FunctionArgList Args; 6208 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6209 ImplicitParamDecl::Other); 6210 Args.emplace_back(&Param); 6211 const auto &FnInfo = 6212 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6213 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6214 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6215 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6216 Name, &CGM.getModule()); 6217 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6218 Fn->setDoesNotRecurse(); 6219 CodeGenFunction CGF(CGM); 6220 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6221 Address PrivateAddr = CGF.EmitLoadOfPointer( 6222 CGF.GetAddrOfLocalVar(&Param), 6223 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6224 llvm::Value *Size = nullptr; 6225 // If the size of the reduction item is non-constant, load it from global 6226 // threadprivate variable. 6227 if (RCG.getSizes(N).second) { 6228 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6229 CGF, CGM.getContext().getSizeType(), 6230 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6231 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6232 CGM.getContext().getSizeType(), Loc); 6233 } 6234 RCG.emitAggregateType(CGF, N, Size); 6235 // Emit the finalizer body: 6236 // <destroy>(<type>* %0) 6237 RCG.emitCleanups(CGF, N, PrivateAddr); 6238 CGF.FinishFunction(Loc); 6239 return Fn; 6240 } 6241 6242 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6243 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6244 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6245 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6246 return nullptr; 6247 6248 // Build typedef struct: 6249 // kmp_task_red_input { 6250 // void *reduce_shar; // shared reduction item 6251 // size_t reduce_size; // size of data item 6252 // void *reduce_init; // data initialization routine 6253 // void *reduce_fini; // data finalization routine 6254 // void *reduce_comb; // data combiner routine 6255 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6256 // } kmp_task_red_input_t; 6257 ASTContext &C = CGM.getContext(); 6258 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 6259 RD->startDefinition(); 6260 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6261 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6262 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6263 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6264 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6265 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6266 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6267 RD->completeDefinition(); 6268 QualType RDType = C.getRecordType(RD); 6269 unsigned Size = Data.ReductionVars.size(); 6270 llvm::APInt ArraySize(/*numBits=*/64, Size); 6271 QualType ArrayRDType = C.getConstantArrayType( 6272 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6273 // kmp_task_red_input_t .rd_input.[Size]; 6274 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6275 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 6276 Data.ReductionOps); 6277 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6278 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6279 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6280 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6281 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6282 TaskRedInput.getPointer(), Idxs, 6283 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6284 ".rd_input.gep."); 6285 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6286 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6287 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6288 RCG.emitSharedLValue(CGF, Cnt); 6289 llvm::Value *CastedShared = 6290 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6291 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6292 RCG.emitAggregateType(CGF, Cnt); 6293 llvm::Value *SizeValInChars; 6294 llvm::Value *SizeVal; 6295 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6296 // We use delayed creation/initialization for VLAs, array sections and 6297 // custom reduction initializations. It is required because runtime does not 6298 // provide the way to pass the sizes of VLAs/array sections to 6299 // initializer/combiner/finalizer functions and does not pass the pointer to 6300 // original reduction item to the initializer. Instead threadprivate global 6301 // variables are used to store these values and use them in the functions. 6302 bool DelayedCreation = !!SizeVal; 6303 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6304 /*isSigned=*/false); 6305 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6306 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6307 // ElemLVal.reduce_init = init; 6308 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6309 llvm::Value *InitAddr = 6310 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6311 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6312 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 6313 // ElemLVal.reduce_fini = fini; 6314 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6315 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6316 llvm::Value *FiniAddr = Fini 6317 ? CGF.EmitCastToVoidPtr(Fini) 6318 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6319 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6320 // ElemLVal.reduce_comb = comb; 6321 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6322 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6323 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6324 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6325 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6326 // ElemLVal.flags = 0; 6327 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6328 if (DelayedCreation) { 6329 CGF.EmitStoreOfScalar( 6330 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6331 FlagsLVal); 6332 } else 6333 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6334 FlagsLVal.getType()); 6335 } 6336 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 6337 // *data); 6338 llvm::Value *Args[] = { 6339 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6340 /*isSigned=*/true), 6341 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6342 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6343 CGM.VoidPtrTy)}; 6344 return CGF.EmitRuntimeCall( 6345 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 6346 } 6347 6348 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6349 SourceLocation Loc, 6350 ReductionCodeGen &RCG, 6351 unsigned N) { 6352 auto Sizes = RCG.getSizes(N); 6353 // Emit threadprivate global variable if the type is non-constant 6354 // (Sizes.second = nullptr). 6355 if (Sizes.second) { 6356 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6357 /*isSigned=*/false); 6358 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6359 CGF, CGM.getContext().getSizeType(), 6360 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6361 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6362 } 6363 // Store address of the original reduction item if custom initializer is used. 6364 if (RCG.usesReductionInitializer(N)) { 6365 Address SharedAddr = getAddrOfArtificialThreadPrivate( 6366 CGF, CGM.getContext().VoidPtrTy, 6367 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6368 CGF.Builder.CreateStore( 6369 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6370 RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy), 6371 SharedAddr, /*IsVolatile=*/false); 6372 } 6373 } 6374 6375 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6376 SourceLocation Loc, 6377 llvm::Value *ReductionsPtr, 6378 LValue SharedLVal) { 6379 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6380 // *d); 6381 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6382 CGM.IntTy, 6383 /*isSigned=*/true), 6384 ReductionsPtr, 6385 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6386 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6387 return Address( 6388 CGF.EmitRuntimeCall( 6389 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6390 SharedLVal.getAlignment()); 6391 } 6392 6393 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6394 SourceLocation Loc) { 6395 if (!CGF.HaveInsertPoint()) 6396 return; 6397 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6398 // global_tid); 6399 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6400 // Ignore return result until untied tasks are supported. 6401 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 6402 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6403 Region->emitUntiedSwitch(CGF); 6404 } 6405 6406 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6407 OpenMPDirectiveKind InnerKind, 6408 const RegionCodeGenTy &CodeGen, 6409 bool HasCancel) { 6410 if (!CGF.HaveInsertPoint()) 6411 return; 6412 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6413 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6414 } 6415 6416 namespace { 6417 enum RTCancelKind { 6418 CancelNoreq = 0, 6419 CancelParallel = 1, 6420 CancelLoop = 2, 6421 CancelSections = 3, 6422 CancelTaskgroup = 4 6423 }; 6424 } // anonymous namespace 6425 6426 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6427 RTCancelKind CancelKind = CancelNoreq; 6428 if (CancelRegion == OMPD_parallel) 6429 CancelKind = CancelParallel; 6430 else if (CancelRegion == OMPD_for) 6431 CancelKind = CancelLoop; 6432 else if (CancelRegion == OMPD_sections) 6433 CancelKind = CancelSections; 6434 else { 6435 assert(CancelRegion == OMPD_taskgroup); 6436 CancelKind = CancelTaskgroup; 6437 } 6438 return CancelKind; 6439 } 6440 6441 void CGOpenMPRuntime::emitCancellationPointCall( 6442 CodeGenFunction &CGF, SourceLocation Loc, 6443 OpenMPDirectiveKind CancelRegion) { 6444 if (!CGF.HaveInsertPoint()) 6445 return; 6446 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6447 // global_tid, kmp_int32 cncl_kind); 6448 if (auto *OMPRegionInfo = 6449 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6450 // For 'cancellation point taskgroup', the task region info may not have a 6451 // cancel. This may instead happen in another adjacent task. 6452 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6453 llvm::Value *Args[] = { 6454 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6455 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6456 // Ignore return result until untied tasks are supported. 6457 llvm::Value *Result = CGF.EmitRuntimeCall( 6458 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 6459 // if (__kmpc_cancellationpoint()) { 6460 // exit from construct; 6461 // } 6462 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6463 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6464 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6465 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6466 CGF.EmitBlock(ExitBB); 6467 // exit from construct; 6468 CodeGenFunction::JumpDest CancelDest = 6469 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6470 CGF.EmitBranchThroughCleanup(CancelDest); 6471 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6472 } 6473 } 6474 } 6475 6476 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6477 const Expr *IfCond, 6478 OpenMPDirectiveKind CancelRegion) { 6479 if (!CGF.HaveInsertPoint()) 6480 return; 6481 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6482 // kmp_int32 cncl_kind); 6483 if (auto *OMPRegionInfo = 6484 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6485 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 6486 PrePostActionTy &) { 6487 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6488 llvm::Value *Args[] = { 6489 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6490 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6491 // Ignore return result until untied tasks are supported. 6492 llvm::Value *Result = CGF.EmitRuntimeCall( 6493 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 6494 // if (__kmpc_cancel()) { 6495 // exit from construct; 6496 // } 6497 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6498 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6499 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6500 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6501 CGF.EmitBlock(ExitBB); 6502 // exit from construct; 6503 CodeGenFunction::JumpDest CancelDest = 6504 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6505 CGF.EmitBranchThroughCleanup(CancelDest); 6506 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6507 }; 6508 if (IfCond) { 6509 emitIfClause(CGF, IfCond, ThenGen, 6510 [](CodeGenFunction &, PrePostActionTy &) {}); 6511 } else { 6512 RegionCodeGenTy ThenRCG(ThenGen); 6513 ThenRCG(CGF); 6514 } 6515 } 6516 } 6517 6518 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6519 const OMPExecutableDirective &D, StringRef ParentName, 6520 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6521 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6522 assert(!ParentName.empty() && "Invalid target region parent name!"); 6523 HasEmittedTargetRegion = true; 6524 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6525 IsOffloadEntry, CodeGen); 6526 } 6527 6528 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6529 const OMPExecutableDirective &D, StringRef ParentName, 6530 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6531 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6532 // Create a unique name for the entry function using the source location 6533 // information of the current target region. The name will be something like: 6534 // 6535 // __omp_offloading_DD_FFFF_PP_lBB 6536 // 6537 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6538 // mangled name of the function that encloses the target region and BB is the 6539 // line number of the target region. 6540 6541 unsigned DeviceID; 6542 unsigned FileID; 6543 unsigned Line; 6544 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6545 Line); 6546 SmallString<64> EntryFnName; 6547 { 6548 llvm::raw_svector_ostream OS(EntryFnName); 6549 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6550 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6551 } 6552 6553 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6554 6555 CodeGenFunction CGF(CGM, true); 6556 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6557 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6558 6559 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 6560 6561 // If this target outline function is not an offload entry, we don't need to 6562 // register it. 6563 if (!IsOffloadEntry) 6564 return; 6565 6566 // The target region ID is used by the runtime library to identify the current 6567 // target region, so it only has to be unique and not necessarily point to 6568 // anything. It could be the pointer to the outlined function that implements 6569 // the target region, but we aren't using that so that the compiler doesn't 6570 // need to keep that, and could therefore inline the host function if proven 6571 // worthwhile during optimization. In the other hand, if emitting code for the 6572 // device, the ID has to be the function address so that it can retrieved from 6573 // the offloading entry and launched by the runtime library. We also mark the 6574 // outlined function to have external linkage in case we are emitting code for 6575 // the device, because these functions will be entry points to the device. 6576 6577 if (CGM.getLangOpts().OpenMPIsDevice) { 6578 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6579 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6580 OutlinedFn->setDSOLocal(false); 6581 } else { 6582 std::string Name = getName({EntryFnName, "region_id"}); 6583 OutlinedFnID = new llvm::GlobalVariable( 6584 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6585 llvm::GlobalValue::WeakAnyLinkage, 6586 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6587 } 6588 6589 // Register the information for the entry associated with this target region. 6590 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6591 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6592 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6593 } 6594 6595 /// Checks if the expression is constant or does not have non-trivial function 6596 /// calls. 6597 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6598 // We can skip constant expressions. 6599 // We can skip expressions with trivial calls or simple expressions. 6600 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6601 !E->hasNonTrivialCall(Ctx)) && 6602 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6603 } 6604 6605 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6606 const Stmt *Body) { 6607 const Stmt *Child = Body->IgnoreContainers(); 6608 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6609 Child = nullptr; 6610 for (const Stmt *S : C->body()) { 6611 if (const auto *E = dyn_cast<Expr>(S)) { 6612 if (isTrivial(Ctx, E)) 6613 continue; 6614 } 6615 // Some of the statements can be ignored. 6616 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6617 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6618 continue; 6619 // Analyze declarations. 6620 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6621 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6622 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6623 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6624 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6625 isa<UsingDirectiveDecl>(D) || 6626 isa<OMPDeclareReductionDecl>(D) || 6627 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6628 return true; 6629 const auto *VD = dyn_cast<VarDecl>(D); 6630 if (!VD) 6631 return false; 6632 return VD->isConstexpr() || 6633 ((VD->getType().isTrivialType(Ctx) || 6634 VD->getType()->isReferenceType()) && 6635 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6636 })) 6637 continue; 6638 } 6639 // Found multiple children - cannot get the one child only. 6640 if (Child) 6641 return nullptr; 6642 Child = S; 6643 } 6644 if (Child) 6645 Child = Child->IgnoreContainers(); 6646 } 6647 return Child; 6648 } 6649 6650 /// Emit the number of teams for a target directive. Inspect the num_teams 6651 /// clause associated with a teams construct combined or closely nested 6652 /// with the target directive. 6653 /// 6654 /// Emit a team of size one for directives such as 'target parallel' that 6655 /// have no associated teams construct. 6656 /// 6657 /// Otherwise, return nullptr. 6658 static llvm::Value * 6659 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6660 const OMPExecutableDirective &D) { 6661 assert(!CGF.getLangOpts().OpenMPIsDevice && 6662 "Clauses associated with the teams directive expected to be emitted " 6663 "only for the host!"); 6664 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6665 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6666 "Expected target-based executable directive."); 6667 CGBuilderTy &Bld = CGF.Builder; 6668 switch (DirectiveKind) { 6669 case OMPD_target: { 6670 const auto *CS = D.getInnermostCapturedStmt(); 6671 const auto *Body = 6672 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6673 const Stmt *ChildStmt = 6674 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6675 if (const auto *NestedDir = 6676 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6677 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6678 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6679 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6680 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6681 const Expr *NumTeams = 6682 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6683 llvm::Value *NumTeamsVal = 6684 CGF.EmitScalarExpr(NumTeams, 6685 /*IgnoreResultAssign*/ true); 6686 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6687 /*isSigned=*/true); 6688 } 6689 return Bld.getInt32(0); 6690 } 6691 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6692 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6693 return Bld.getInt32(1); 6694 return Bld.getInt32(0); 6695 } 6696 return nullptr; 6697 } 6698 case OMPD_target_teams: 6699 case OMPD_target_teams_distribute: 6700 case OMPD_target_teams_distribute_simd: 6701 case OMPD_target_teams_distribute_parallel_for: 6702 case OMPD_target_teams_distribute_parallel_for_simd: { 6703 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6704 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6705 const Expr *NumTeams = 6706 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6707 llvm::Value *NumTeamsVal = 6708 CGF.EmitScalarExpr(NumTeams, 6709 /*IgnoreResultAssign*/ true); 6710 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6711 /*isSigned=*/true); 6712 } 6713 return Bld.getInt32(0); 6714 } 6715 case OMPD_target_parallel: 6716 case OMPD_target_parallel_for: 6717 case OMPD_target_parallel_for_simd: 6718 case OMPD_target_simd: 6719 return Bld.getInt32(1); 6720 case OMPD_parallel: 6721 case OMPD_for: 6722 case OMPD_parallel_for: 6723 case OMPD_parallel_master: 6724 case OMPD_parallel_sections: 6725 case OMPD_for_simd: 6726 case OMPD_parallel_for_simd: 6727 case OMPD_cancel: 6728 case OMPD_cancellation_point: 6729 case OMPD_ordered: 6730 case OMPD_threadprivate: 6731 case OMPD_allocate: 6732 case OMPD_task: 6733 case OMPD_simd: 6734 case OMPD_sections: 6735 case OMPD_section: 6736 case OMPD_single: 6737 case OMPD_master: 6738 case OMPD_critical: 6739 case OMPD_taskyield: 6740 case OMPD_barrier: 6741 case OMPD_taskwait: 6742 case OMPD_taskgroup: 6743 case OMPD_atomic: 6744 case OMPD_flush: 6745 case OMPD_teams: 6746 case OMPD_target_data: 6747 case OMPD_target_exit_data: 6748 case OMPD_target_enter_data: 6749 case OMPD_distribute: 6750 case OMPD_distribute_simd: 6751 case OMPD_distribute_parallel_for: 6752 case OMPD_distribute_parallel_for_simd: 6753 case OMPD_teams_distribute: 6754 case OMPD_teams_distribute_simd: 6755 case OMPD_teams_distribute_parallel_for: 6756 case OMPD_teams_distribute_parallel_for_simd: 6757 case OMPD_target_update: 6758 case OMPD_declare_simd: 6759 case OMPD_declare_variant: 6760 case OMPD_declare_target: 6761 case OMPD_end_declare_target: 6762 case OMPD_declare_reduction: 6763 case OMPD_declare_mapper: 6764 case OMPD_taskloop: 6765 case OMPD_taskloop_simd: 6766 case OMPD_master_taskloop: 6767 case OMPD_master_taskloop_simd: 6768 case OMPD_parallel_master_taskloop: 6769 case OMPD_parallel_master_taskloop_simd: 6770 case OMPD_requires: 6771 case OMPD_unknown: 6772 break; 6773 } 6774 llvm_unreachable("Unexpected directive kind."); 6775 } 6776 6777 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6778 llvm::Value *DefaultThreadLimitVal) { 6779 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6780 CGF.getContext(), CS->getCapturedStmt()); 6781 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6782 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6783 llvm::Value *NumThreads = nullptr; 6784 llvm::Value *CondVal = nullptr; 6785 // Handle if clause. If if clause present, the number of threads is 6786 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6787 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6788 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6789 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6790 const OMPIfClause *IfClause = nullptr; 6791 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6792 if (C->getNameModifier() == OMPD_unknown || 6793 C->getNameModifier() == OMPD_parallel) { 6794 IfClause = C; 6795 break; 6796 } 6797 } 6798 if (IfClause) { 6799 const Expr *Cond = IfClause->getCondition(); 6800 bool Result; 6801 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6802 if (!Result) 6803 return CGF.Builder.getInt32(1); 6804 } else { 6805 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6806 if (const auto *PreInit = 6807 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6808 for (const auto *I : PreInit->decls()) { 6809 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6810 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6811 } else { 6812 CodeGenFunction::AutoVarEmission Emission = 6813 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6814 CGF.EmitAutoVarCleanups(Emission); 6815 } 6816 } 6817 } 6818 CondVal = CGF.EvaluateExprAsBool(Cond); 6819 } 6820 } 6821 } 6822 // Check the value of num_threads clause iff if clause was not specified 6823 // or is not evaluated to false. 6824 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6825 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6826 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6827 const auto *NumThreadsClause = 6828 Dir->getSingleClause<OMPNumThreadsClause>(); 6829 CodeGenFunction::LexicalScope Scope( 6830 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6831 if (const auto *PreInit = 6832 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6833 for (const auto *I : PreInit->decls()) { 6834 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6835 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6836 } else { 6837 CodeGenFunction::AutoVarEmission Emission = 6838 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6839 CGF.EmitAutoVarCleanups(Emission); 6840 } 6841 } 6842 } 6843 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6844 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6845 /*isSigned=*/false); 6846 if (DefaultThreadLimitVal) 6847 NumThreads = CGF.Builder.CreateSelect( 6848 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6849 DefaultThreadLimitVal, NumThreads); 6850 } else { 6851 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6852 : CGF.Builder.getInt32(0); 6853 } 6854 // Process condition of the if clause. 6855 if (CondVal) { 6856 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6857 CGF.Builder.getInt32(1)); 6858 } 6859 return NumThreads; 6860 } 6861 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6862 return CGF.Builder.getInt32(1); 6863 return DefaultThreadLimitVal; 6864 } 6865 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6866 : CGF.Builder.getInt32(0); 6867 } 6868 6869 /// Emit the number of threads for a target directive. Inspect the 6870 /// thread_limit clause associated with a teams construct combined or closely 6871 /// nested with the target directive. 6872 /// 6873 /// Emit the num_threads clause for directives such as 'target parallel' that 6874 /// have no associated teams construct. 6875 /// 6876 /// Otherwise, return nullptr. 6877 static llvm::Value * 6878 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6879 const OMPExecutableDirective &D) { 6880 assert(!CGF.getLangOpts().OpenMPIsDevice && 6881 "Clauses associated with the teams directive expected to be emitted " 6882 "only for the host!"); 6883 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6884 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6885 "Expected target-based executable directive."); 6886 CGBuilderTy &Bld = CGF.Builder; 6887 llvm::Value *ThreadLimitVal = nullptr; 6888 llvm::Value *NumThreadsVal = nullptr; 6889 switch (DirectiveKind) { 6890 case OMPD_target: { 6891 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6892 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6893 return NumThreads; 6894 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6895 CGF.getContext(), CS->getCapturedStmt()); 6896 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6897 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6898 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6899 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6900 const auto *ThreadLimitClause = 6901 Dir->getSingleClause<OMPThreadLimitClause>(); 6902 CodeGenFunction::LexicalScope Scope( 6903 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6904 if (const auto *PreInit = 6905 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6906 for (const auto *I : PreInit->decls()) { 6907 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6908 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6909 } else { 6910 CodeGenFunction::AutoVarEmission Emission = 6911 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6912 CGF.EmitAutoVarCleanups(Emission); 6913 } 6914 } 6915 } 6916 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6917 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6918 ThreadLimitVal = 6919 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6920 } 6921 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6922 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6923 CS = Dir->getInnermostCapturedStmt(); 6924 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6925 CGF.getContext(), CS->getCapturedStmt()); 6926 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6927 } 6928 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6929 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6930 CS = Dir->getInnermostCapturedStmt(); 6931 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6932 return NumThreads; 6933 } 6934 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6935 return Bld.getInt32(1); 6936 } 6937 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6938 } 6939 case OMPD_target_teams: { 6940 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6941 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6942 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6943 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6944 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6945 ThreadLimitVal = 6946 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6947 } 6948 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6949 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6950 return NumThreads; 6951 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6952 CGF.getContext(), CS->getCapturedStmt()); 6953 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6954 if (Dir->getDirectiveKind() == OMPD_distribute) { 6955 CS = Dir->getInnermostCapturedStmt(); 6956 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6957 return NumThreads; 6958 } 6959 } 6960 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6961 } 6962 case OMPD_target_teams_distribute: 6963 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6964 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6965 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6966 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6967 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6968 ThreadLimitVal = 6969 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6970 } 6971 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6972 case OMPD_target_parallel: 6973 case OMPD_target_parallel_for: 6974 case OMPD_target_parallel_for_simd: 6975 case OMPD_target_teams_distribute_parallel_for: 6976 case OMPD_target_teams_distribute_parallel_for_simd: { 6977 llvm::Value *CondVal = nullptr; 6978 // Handle if clause. If if clause present, the number of threads is 6979 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6980 if (D.hasClausesOfKind<OMPIfClause>()) { 6981 const OMPIfClause *IfClause = nullptr; 6982 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6983 if (C->getNameModifier() == OMPD_unknown || 6984 C->getNameModifier() == OMPD_parallel) { 6985 IfClause = C; 6986 break; 6987 } 6988 } 6989 if (IfClause) { 6990 const Expr *Cond = IfClause->getCondition(); 6991 bool Result; 6992 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6993 if (!Result) 6994 return Bld.getInt32(1); 6995 } else { 6996 CodeGenFunction::RunCleanupsScope Scope(CGF); 6997 CondVal = CGF.EvaluateExprAsBool(Cond); 6998 } 6999 } 7000 } 7001 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7002 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7003 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7004 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7005 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7006 ThreadLimitVal = 7007 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7008 } 7009 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7010 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7011 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7012 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7013 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7014 NumThreadsVal = 7015 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7016 ThreadLimitVal = ThreadLimitVal 7017 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7018 ThreadLimitVal), 7019 NumThreadsVal, ThreadLimitVal) 7020 : NumThreadsVal; 7021 } 7022 if (!ThreadLimitVal) 7023 ThreadLimitVal = Bld.getInt32(0); 7024 if (CondVal) 7025 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7026 return ThreadLimitVal; 7027 } 7028 case OMPD_target_teams_distribute_simd: 7029 case OMPD_target_simd: 7030 return Bld.getInt32(1); 7031 case OMPD_parallel: 7032 case OMPD_for: 7033 case OMPD_parallel_for: 7034 case OMPD_parallel_master: 7035 case OMPD_parallel_sections: 7036 case OMPD_for_simd: 7037 case OMPD_parallel_for_simd: 7038 case OMPD_cancel: 7039 case OMPD_cancellation_point: 7040 case OMPD_ordered: 7041 case OMPD_threadprivate: 7042 case OMPD_allocate: 7043 case OMPD_task: 7044 case OMPD_simd: 7045 case OMPD_sections: 7046 case OMPD_section: 7047 case OMPD_single: 7048 case OMPD_master: 7049 case OMPD_critical: 7050 case OMPD_taskyield: 7051 case OMPD_barrier: 7052 case OMPD_taskwait: 7053 case OMPD_taskgroup: 7054 case OMPD_atomic: 7055 case OMPD_flush: 7056 case OMPD_teams: 7057 case OMPD_target_data: 7058 case OMPD_target_exit_data: 7059 case OMPD_target_enter_data: 7060 case OMPD_distribute: 7061 case OMPD_distribute_simd: 7062 case OMPD_distribute_parallel_for: 7063 case OMPD_distribute_parallel_for_simd: 7064 case OMPD_teams_distribute: 7065 case OMPD_teams_distribute_simd: 7066 case OMPD_teams_distribute_parallel_for: 7067 case OMPD_teams_distribute_parallel_for_simd: 7068 case OMPD_target_update: 7069 case OMPD_declare_simd: 7070 case OMPD_declare_variant: 7071 case OMPD_declare_target: 7072 case OMPD_end_declare_target: 7073 case OMPD_declare_reduction: 7074 case OMPD_declare_mapper: 7075 case OMPD_taskloop: 7076 case OMPD_taskloop_simd: 7077 case OMPD_master_taskloop: 7078 case OMPD_master_taskloop_simd: 7079 case OMPD_parallel_master_taskloop: 7080 case OMPD_parallel_master_taskloop_simd: 7081 case OMPD_requires: 7082 case OMPD_unknown: 7083 break; 7084 } 7085 llvm_unreachable("Unsupported directive kind."); 7086 } 7087 7088 namespace { 7089 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7090 7091 // Utility to handle information from clauses associated with a given 7092 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7093 // It provides a convenient interface to obtain the information and generate 7094 // code for that information. 7095 class MappableExprsHandler { 7096 public: 7097 /// Values for bit flags used to specify the mapping type for 7098 /// offloading. 7099 enum OpenMPOffloadMappingFlags : uint64_t { 7100 /// No flags 7101 OMP_MAP_NONE = 0x0, 7102 /// Allocate memory on the device and move data from host to device. 7103 OMP_MAP_TO = 0x01, 7104 /// Allocate memory on the device and move data from device to host. 7105 OMP_MAP_FROM = 0x02, 7106 /// Always perform the requested mapping action on the element, even 7107 /// if it was already mapped before. 7108 OMP_MAP_ALWAYS = 0x04, 7109 /// Delete the element from the device environment, ignoring the 7110 /// current reference count associated with the element. 7111 OMP_MAP_DELETE = 0x08, 7112 /// The element being mapped is a pointer-pointee pair; both the 7113 /// pointer and the pointee should be mapped. 7114 OMP_MAP_PTR_AND_OBJ = 0x10, 7115 /// This flags signals that the base address of an entry should be 7116 /// passed to the target kernel as an argument. 7117 OMP_MAP_TARGET_PARAM = 0x20, 7118 /// Signal that the runtime library has to return the device pointer 7119 /// in the current position for the data being mapped. Used when we have the 7120 /// use_device_ptr clause. 7121 OMP_MAP_RETURN_PARAM = 0x40, 7122 /// This flag signals that the reference being passed is a pointer to 7123 /// private data. 7124 OMP_MAP_PRIVATE = 0x80, 7125 /// Pass the element to the device by value. 7126 OMP_MAP_LITERAL = 0x100, 7127 /// Implicit map 7128 OMP_MAP_IMPLICIT = 0x200, 7129 /// Close is a hint to the runtime to allocate memory close to 7130 /// the target device. 7131 OMP_MAP_CLOSE = 0x400, 7132 /// The 16 MSBs of the flags indicate whether the entry is member of some 7133 /// struct/class. 7134 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7135 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7136 }; 7137 7138 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7139 static unsigned getFlagMemberOffset() { 7140 unsigned Offset = 0; 7141 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7142 Remain = Remain >> 1) 7143 Offset++; 7144 return Offset; 7145 } 7146 7147 /// Class that associates information with a base pointer to be passed to the 7148 /// runtime library. 7149 class BasePointerInfo { 7150 /// The base pointer. 7151 llvm::Value *Ptr = nullptr; 7152 /// The base declaration that refers to this device pointer, or null if 7153 /// there is none. 7154 const ValueDecl *DevPtrDecl = nullptr; 7155 7156 public: 7157 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7158 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7159 llvm::Value *operator*() const { return Ptr; } 7160 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7161 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7162 }; 7163 7164 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7165 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7166 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7167 7168 /// Map between a struct and the its lowest & highest elements which have been 7169 /// mapped. 7170 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7171 /// HE(FieldIndex, Pointer)} 7172 struct StructRangeInfoTy { 7173 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7174 0, Address::invalid()}; 7175 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7176 0, Address::invalid()}; 7177 Address Base = Address::invalid(); 7178 }; 7179 7180 private: 7181 /// Kind that defines how a device pointer has to be returned. 7182 struct MapInfo { 7183 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7184 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7185 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7186 bool ReturnDevicePointer = false; 7187 bool IsImplicit = false; 7188 7189 MapInfo() = default; 7190 MapInfo( 7191 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7192 OpenMPMapClauseKind MapType, 7193 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7194 bool ReturnDevicePointer, bool IsImplicit) 7195 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7196 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 7197 }; 7198 7199 /// If use_device_ptr is used on a pointer which is a struct member and there 7200 /// is no map information about it, then emission of that entry is deferred 7201 /// until the whole struct has been processed. 7202 struct DeferredDevicePtrEntryTy { 7203 const Expr *IE = nullptr; 7204 const ValueDecl *VD = nullptr; 7205 7206 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 7207 : IE(IE), VD(VD) {} 7208 }; 7209 7210 /// The target directive from where the mappable clauses were extracted. It 7211 /// is either a executable directive or a user-defined mapper directive. 7212 llvm::PointerUnion<const OMPExecutableDirective *, 7213 const OMPDeclareMapperDecl *> 7214 CurDir; 7215 7216 /// Function the directive is being generated for. 7217 CodeGenFunction &CGF; 7218 7219 /// Set of all first private variables in the current directive. 7220 /// bool data is set to true if the variable is implicitly marked as 7221 /// firstprivate, false otherwise. 7222 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7223 7224 /// Map between device pointer declarations and their expression components. 7225 /// The key value for declarations in 'this' is null. 7226 llvm::DenseMap< 7227 const ValueDecl *, 7228 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7229 DevPointersMap; 7230 7231 llvm::Value *getExprTypeSize(const Expr *E) const { 7232 QualType ExprTy = E->getType().getCanonicalType(); 7233 7234 // Reference types are ignored for mapping purposes. 7235 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7236 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7237 7238 // Given that an array section is considered a built-in type, we need to 7239 // do the calculation based on the length of the section instead of relying 7240 // on CGF.getTypeSize(E->getType()). 7241 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7242 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7243 OAE->getBase()->IgnoreParenImpCasts()) 7244 .getCanonicalType(); 7245 7246 // If there is no length associated with the expression and lower bound is 7247 // not specified too, that means we are using the whole length of the 7248 // base. 7249 if (!OAE->getLength() && OAE->getColonLoc().isValid() && 7250 !OAE->getLowerBound()) 7251 return CGF.getTypeSize(BaseTy); 7252 7253 llvm::Value *ElemSize; 7254 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7255 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7256 } else { 7257 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7258 assert(ATy && "Expecting array type if not a pointer type."); 7259 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7260 } 7261 7262 // If we don't have a length at this point, that is because we have an 7263 // array section with a single element. 7264 if (!OAE->getLength() && OAE->getColonLoc().isInvalid()) 7265 return ElemSize; 7266 7267 if (const Expr *LenExpr = OAE->getLength()) { 7268 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7269 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7270 CGF.getContext().getSizeType(), 7271 LenExpr->getExprLoc()); 7272 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7273 } 7274 assert(!OAE->getLength() && OAE->getColonLoc().isValid() && 7275 OAE->getLowerBound() && "expected array_section[lb:]."); 7276 // Size = sizetype - lb * elemtype; 7277 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7278 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7279 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7280 CGF.getContext().getSizeType(), 7281 OAE->getLowerBound()->getExprLoc()); 7282 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7283 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7284 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7285 LengthVal = CGF.Builder.CreateSelect( 7286 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7287 return LengthVal; 7288 } 7289 return CGF.getTypeSize(ExprTy); 7290 } 7291 7292 /// Return the corresponding bits for a given map clause modifier. Add 7293 /// a flag marking the map as a pointer if requested. Add a flag marking the 7294 /// map as the first one of a series of maps that relate to the same map 7295 /// expression. 7296 OpenMPOffloadMappingFlags getMapTypeBits( 7297 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7298 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7299 OpenMPOffloadMappingFlags Bits = 7300 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7301 switch (MapType) { 7302 case OMPC_MAP_alloc: 7303 case OMPC_MAP_release: 7304 // alloc and release is the default behavior in the runtime library, i.e. 7305 // if we don't pass any bits alloc/release that is what the runtime is 7306 // going to do. Therefore, we don't need to signal anything for these two 7307 // type modifiers. 7308 break; 7309 case OMPC_MAP_to: 7310 Bits |= OMP_MAP_TO; 7311 break; 7312 case OMPC_MAP_from: 7313 Bits |= OMP_MAP_FROM; 7314 break; 7315 case OMPC_MAP_tofrom: 7316 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7317 break; 7318 case OMPC_MAP_delete: 7319 Bits |= OMP_MAP_DELETE; 7320 break; 7321 case OMPC_MAP_unknown: 7322 llvm_unreachable("Unexpected map type!"); 7323 } 7324 if (AddPtrFlag) 7325 Bits |= OMP_MAP_PTR_AND_OBJ; 7326 if (AddIsTargetParamFlag) 7327 Bits |= OMP_MAP_TARGET_PARAM; 7328 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7329 != MapModifiers.end()) 7330 Bits |= OMP_MAP_ALWAYS; 7331 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7332 != MapModifiers.end()) 7333 Bits |= OMP_MAP_CLOSE; 7334 return Bits; 7335 } 7336 7337 /// Return true if the provided expression is a final array section. A 7338 /// final array section, is one whose length can't be proved to be one. 7339 bool isFinalArraySectionExpression(const Expr *E) const { 7340 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7341 7342 // It is not an array section and therefore not a unity-size one. 7343 if (!OASE) 7344 return false; 7345 7346 // An array section with no colon always refer to a single element. 7347 if (OASE->getColonLoc().isInvalid()) 7348 return false; 7349 7350 const Expr *Length = OASE->getLength(); 7351 7352 // If we don't have a length we have to check if the array has size 1 7353 // for this dimension. Also, we should always expect a length if the 7354 // base type is pointer. 7355 if (!Length) { 7356 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7357 OASE->getBase()->IgnoreParenImpCasts()) 7358 .getCanonicalType(); 7359 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7360 return ATy->getSize().getSExtValue() != 1; 7361 // If we don't have a constant dimension length, we have to consider 7362 // the current section as having any size, so it is not necessarily 7363 // unitary. If it happen to be unity size, that's user fault. 7364 return true; 7365 } 7366 7367 // Check if the length evaluates to 1. 7368 Expr::EvalResult Result; 7369 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7370 return true; // Can have more that size 1. 7371 7372 llvm::APSInt ConstLength = Result.Val.getInt(); 7373 return ConstLength.getSExtValue() != 1; 7374 } 7375 7376 /// Generate the base pointers, section pointers, sizes and map type 7377 /// bits for the provided map type, map modifier, and expression components. 7378 /// \a IsFirstComponent should be set to true if the provided set of 7379 /// components is the first associated with a capture. 7380 void generateInfoForComponentList( 7381 OpenMPMapClauseKind MapType, 7382 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7383 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7384 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7385 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7386 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 7387 bool IsImplicit, 7388 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7389 OverlappedElements = llvm::None) const { 7390 // The following summarizes what has to be generated for each map and the 7391 // types below. The generated information is expressed in this order: 7392 // base pointer, section pointer, size, flags 7393 // (to add to the ones that come from the map type and modifier). 7394 // 7395 // double d; 7396 // int i[100]; 7397 // float *p; 7398 // 7399 // struct S1 { 7400 // int i; 7401 // float f[50]; 7402 // } 7403 // struct S2 { 7404 // int i; 7405 // float f[50]; 7406 // S1 s; 7407 // double *p; 7408 // struct S2 *ps; 7409 // } 7410 // S2 s; 7411 // S2 *ps; 7412 // 7413 // map(d) 7414 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7415 // 7416 // map(i) 7417 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7418 // 7419 // map(i[1:23]) 7420 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7421 // 7422 // map(p) 7423 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7424 // 7425 // map(p[1:24]) 7426 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7427 // 7428 // map(s) 7429 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7430 // 7431 // map(s.i) 7432 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7433 // 7434 // map(s.s.f) 7435 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7436 // 7437 // map(s.p) 7438 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7439 // 7440 // map(to: s.p[:22]) 7441 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7442 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7443 // &(s.p), &(s.p[0]), 22*sizeof(double), 7444 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7445 // (*) alloc space for struct members, only this is a target parameter 7446 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7447 // optimizes this entry out, same in the examples below) 7448 // (***) map the pointee (map: to) 7449 // 7450 // map(s.ps) 7451 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7452 // 7453 // map(from: s.ps->s.i) 7454 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7455 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7456 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7457 // 7458 // map(to: s.ps->ps) 7459 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7460 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7461 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7462 // 7463 // map(s.ps->ps->ps) 7464 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7465 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7466 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7467 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7468 // 7469 // map(to: s.ps->ps->s.f[:22]) 7470 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7471 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7472 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7473 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7474 // 7475 // map(ps) 7476 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7477 // 7478 // map(ps->i) 7479 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7480 // 7481 // map(ps->s.f) 7482 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7483 // 7484 // map(from: ps->p) 7485 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7486 // 7487 // map(to: ps->p[:22]) 7488 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7489 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7490 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7491 // 7492 // map(ps->ps) 7493 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7494 // 7495 // map(from: ps->ps->s.i) 7496 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7497 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7498 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7499 // 7500 // map(from: ps->ps->ps) 7501 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7502 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7503 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7504 // 7505 // map(ps->ps->ps->ps) 7506 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7507 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7508 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7509 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7510 // 7511 // map(to: ps->ps->ps->s.f[:22]) 7512 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7513 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7514 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7515 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7516 // 7517 // map(to: s.f[:22]) map(from: s.p[:33]) 7518 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7519 // sizeof(double*) (**), TARGET_PARAM 7520 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7521 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7522 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7523 // (*) allocate contiguous space needed to fit all mapped members even if 7524 // we allocate space for members not mapped (in this example, 7525 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7526 // them as well because they fall between &s.f[0] and &s.p) 7527 // 7528 // map(from: s.f[:22]) map(to: ps->p[:33]) 7529 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7530 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7531 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7532 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7533 // (*) the struct this entry pertains to is the 2nd element in the list of 7534 // arguments, hence MEMBER_OF(2) 7535 // 7536 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7537 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7538 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7539 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7540 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7541 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7542 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7543 // (*) the struct this entry pertains to is the 4th element in the list 7544 // of arguments, hence MEMBER_OF(4) 7545 7546 // Track if the map information being generated is the first for a capture. 7547 bool IsCaptureFirstInfo = IsFirstComponentList; 7548 // When the variable is on a declare target link or in a to clause with 7549 // unified memory, a reference is needed to hold the host/device address 7550 // of the variable. 7551 bool RequiresReference = false; 7552 7553 // Scan the components from the base to the complete expression. 7554 auto CI = Components.rbegin(); 7555 auto CE = Components.rend(); 7556 auto I = CI; 7557 7558 // Track if the map information being generated is the first for a list of 7559 // components. 7560 bool IsExpressionFirstInfo = true; 7561 Address BP = Address::invalid(); 7562 const Expr *AssocExpr = I->getAssociatedExpression(); 7563 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7564 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7565 7566 if (isa<MemberExpr>(AssocExpr)) { 7567 // The base is the 'this' pointer. The content of the pointer is going 7568 // to be the base of the field being mapped. 7569 BP = CGF.LoadCXXThisAddress(); 7570 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7571 (OASE && 7572 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7573 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7574 } else { 7575 // The base is the reference to the variable. 7576 // BP = &Var. 7577 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 7578 if (const auto *VD = 7579 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7580 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7581 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7582 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7583 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7584 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7585 RequiresReference = true; 7586 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7587 } 7588 } 7589 } 7590 7591 // If the variable is a pointer and is being dereferenced (i.e. is not 7592 // the last component), the base has to be the pointer itself, not its 7593 // reference. References are ignored for mapping purposes. 7594 QualType Ty = 7595 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7596 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7597 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7598 7599 // We do not need to generate individual map information for the 7600 // pointer, it can be associated with the combined storage. 7601 ++I; 7602 } 7603 } 7604 7605 // Track whether a component of the list should be marked as MEMBER_OF some 7606 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7607 // in a component list should be marked as MEMBER_OF, all subsequent entries 7608 // do not belong to the base struct. E.g. 7609 // struct S2 s; 7610 // s.ps->ps->ps->f[:] 7611 // (1) (2) (3) (4) 7612 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7613 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7614 // is the pointee of ps(2) which is not member of struct s, so it should not 7615 // be marked as such (it is still PTR_AND_OBJ). 7616 // The variable is initialized to false so that PTR_AND_OBJ entries which 7617 // are not struct members are not considered (e.g. array of pointers to 7618 // data). 7619 bool ShouldBeMemberOf = false; 7620 7621 // Variable keeping track of whether or not we have encountered a component 7622 // in the component list which is a member expression. Useful when we have a 7623 // pointer or a final array section, in which case it is the previous 7624 // component in the list which tells us whether we have a member expression. 7625 // E.g. X.f[:] 7626 // While processing the final array section "[:]" it is "f" which tells us 7627 // whether we are dealing with a member of a declared struct. 7628 const MemberExpr *EncounteredME = nullptr; 7629 7630 for (; I != CE; ++I) { 7631 // If the current component is member of a struct (parent struct) mark it. 7632 if (!EncounteredME) { 7633 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7634 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7635 // as MEMBER_OF the parent struct. 7636 if (EncounteredME) 7637 ShouldBeMemberOf = true; 7638 } 7639 7640 auto Next = std::next(I); 7641 7642 // We need to generate the addresses and sizes if this is the last 7643 // component, if the component is a pointer or if it is an array section 7644 // whose length can't be proved to be one. If this is a pointer, it 7645 // becomes the base address for the following components. 7646 7647 // A final array section, is one whose length can't be proved to be one. 7648 bool IsFinalArraySection = 7649 isFinalArraySectionExpression(I->getAssociatedExpression()); 7650 7651 // Get information on whether the element is a pointer. Have to do a 7652 // special treatment for array sections given that they are built-in 7653 // types. 7654 const auto *OASE = 7655 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7656 bool IsPointer = 7657 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7658 .getCanonicalType() 7659 ->isAnyPointerType()) || 7660 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7661 7662 if (Next == CE || IsPointer || IsFinalArraySection) { 7663 // If this is not the last component, we expect the pointer to be 7664 // associated with an array expression or member expression. 7665 assert((Next == CE || 7666 isa<MemberExpr>(Next->getAssociatedExpression()) || 7667 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7668 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 7669 "Unexpected expression"); 7670 7671 Address LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 7672 .getAddress(CGF); 7673 7674 // If this component is a pointer inside the base struct then we don't 7675 // need to create any entry for it - it will be combined with the object 7676 // it is pointing to into a single PTR_AND_OBJ entry. 7677 bool IsMemberPointer = 7678 IsPointer && EncounteredME && 7679 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7680 EncounteredME); 7681 if (!OverlappedElements.empty()) { 7682 // Handle base element with the info for overlapped elements. 7683 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7684 assert(Next == CE && 7685 "Expected last element for the overlapped elements."); 7686 assert(!IsPointer && 7687 "Unexpected base element with the pointer type."); 7688 // Mark the whole struct as the struct that requires allocation on the 7689 // device. 7690 PartialStruct.LowestElem = {0, LB}; 7691 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7692 I->getAssociatedExpression()->getType()); 7693 Address HB = CGF.Builder.CreateConstGEP( 7694 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7695 CGF.VoidPtrTy), 7696 TypeSize.getQuantity() - 1); 7697 PartialStruct.HighestElem = { 7698 std::numeric_limits<decltype( 7699 PartialStruct.HighestElem.first)>::max(), 7700 HB}; 7701 PartialStruct.Base = BP; 7702 // Emit data for non-overlapped data. 7703 OpenMPOffloadMappingFlags Flags = 7704 OMP_MAP_MEMBER_OF | 7705 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7706 /*AddPtrFlag=*/false, 7707 /*AddIsTargetParamFlag=*/false); 7708 LB = BP; 7709 llvm::Value *Size = nullptr; 7710 // Do bitcopy of all non-overlapped structure elements. 7711 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7712 Component : OverlappedElements) { 7713 Address ComponentLB = Address::invalid(); 7714 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7715 Component) { 7716 if (MC.getAssociatedDeclaration()) { 7717 ComponentLB = 7718 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7719 .getAddress(CGF); 7720 Size = CGF.Builder.CreatePtrDiff( 7721 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7722 CGF.EmitCastToVoidPtr(LB.getPointer())); 7723 break; 7724 } 7725 } 7726 BasePointers.push_back(BP.getPointer()); 7727 Pointers.push_back(LB.getPointer()); 7728 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, 7729 /*isSigned=*/true)); 7730 Types.push_back(Flags); 7731 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7732 } 7733 BasePointers.push_back(BP.getPointer()); 7734 Pointers.push_back(LB.getPointer()); 7735 Size = CGF.Builder.CreatePtrDiff( 7736 CGF.EmitCastToVoidPtr( 7737 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7738 CGF.EmitCastToVoidPtr(LB.getPointer())); 7739 Sizes.push_back( 7740 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7741 Types.push_back(Flags); 7742 break; 7743 } 7744 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7745 if (!IsMemberPointer) { 7746 BasePointers.push_back(BP.getPointer()); 7747 Pointers.push_back(LB.getPointer()); 7748 Sizes.push_back( 7749 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7750 7751 // We need to add a pointer flag for each map that comes from the 7752 // same expression except for the first one. We also need to signal 7753 // this map is the first one that relates with the current capture 7754 // (there is a set of entries for each capture). 7755 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7756 MapType, MapModifiers, IsImplicit, 7757 !IsExpressionFirstInfo || RequiresReference, 7758 IsCaptureFirstInfo && !RequiresReference); 7759 7760 if (!IsExpressionFirstInfo) { 7761 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7762 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7763 if (IsPointer) 7764 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7765 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7766 7767 if (ShouldBeMemberOf) { 7768 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7769 // should be later updated with the correct value of MEMBER_OF. 7770 Flags |= OMP_MAP_MEMBER_OF; 7771 // From now on, all subsequent PTR_AND_OBJ entries should not be 7772 // marked as MEMBER_OF. 7773 ShouldBeMemberOf = false; 7774 } 7775 } 7776 7777 Types.push_back(Flags); 7778 } 7779 7780 // If we have encountered a member expression so far, keep track of the 7781 // mapped member. If the parent is "*this", then the value declaration 7782 // is nullptr. 7783 if (EncounteredME) { 7784 const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl()); 7785 unsigned FieldIndex = FD->getFieldIndex(); 7786 7787 // Update info about the lowest and highest elements for this struct 7788 if (!PartialStruct.Base.isValid()) { 7789 PartialStruct.LowestElem = {FieldIndex, LB}; 7790 PartialStruct.HighestElem = {FieldIndex, LB}; 7791 PartialStruct.Base = BP; 7792 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7793 PartialStruct.LowestElem = {FieldIndex, LB}; 7794 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7795 PartialStruct.HighestElem = {FieldIndex, LB}; 7796 } 7797 } 7798 7799 // If we have a final array section, we are done with this expression. 7800 if (IsFinalArraySection) 7801 break; 7802 7803 // The pointer becomes the base for the next element. 7804 if (Next != CE) 7805 BP = LB; 7806 7807 IsExpressionFirstInfo = false; 7808 IsCaptureFirstInfo = false; 7809 } 7810 } 7811 } 7812 7813 /// Return the adjusted map modifiers if the declaration a capture refers to 7814 /// appears in a first-private clause. This is expected to be used only with 7815 /// directives that start with 'target'. 7816 MappableExprsHandler::OpenMPOffloadMappingFlags 7817 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7818 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7819 7820 // A first private variable captured by reference will use only the 7821 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7822 // declaration is known as first-private in this handler. 7823 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7824 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7825 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7826 return MappableExprsHandler::OMP_MAP_ALWAYS | 7827 MappableExprsHandler::OMP_MAP_TO; 7828 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7829 return MappableExprsHandler::OMP_MAP_TO | 7830 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 7831 return MappableExprsHandler::OMP_MAP_PRIVATE | 7832 MappableExprsHandler::OMP_MAP_TO; 7833 } 7834 return MappableExprsHandler::OMP_MAP_TO | 7835 MappableExprsHandler::OMP_MAP_FROM; 7836 } 7837 7838 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7839 // Rotate by getFlagMemberOffset() bits. 7840 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7841 << getFlagMemberOffset()); 7842 } 7843 7844 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7845 OpenMPOffloadMappingFlags MemberOfFlag) { 7846 // If the entry is PTR_AND_OBJ but has not been marked with the special 7847 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7848 // marked as MEMBER_OF. 7849 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7850 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7851 return; 7852 7853 // Reset the placeholder value to prepare the flag for the assignment of the 7854 // proper MEMBER_OF value. 7855 Flags &= ~OMP_MAP_MEMBER_OF; 7856 Flags |= MemberOfFlag; 7857 } 7858 7859 void getPlainLayout(const CXXRecordDecl *RD, 7860 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7861 bool AsBase) const { 7862 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7863 7864 llvm::StructType *St = 7865 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7866 7867 unsigned NumElements = St->getNumElements(); 7868 llvm::SmallVector< 7869 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7870 RecordLayout(NumElements); 7871 7872 // Fill bases. 7873 for (const auto &I : RD->bases()) { 7874 if (I.isVirtual()) 7875 continue; 7876 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7877 // Ignore empty bases. 7878 if (Base->isEmpty() || CGF.getContext() 7879 .getASTRecordLayout(Base) 7880 .getNonVirtualSize() 7881 .isZero()) 7882 continue; 7883 7884 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7885 RecordLayout[FieldIndex] = Base; 7886 } 7887 // Fill in virtual bases. 7888 for (const auto &I : RD->vbases()) { 7889 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7890 // Ignore empty bases. 7891 if (Base->isEmpty()) 7892 continue; 7893 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7894 if (RecordLayout[FieldIndex]) 7895 continue; 7896 RecordLayout[FieldIndex] = Base; 7897 } 7898 // Fill in all the fields. 7899 assert(!RD->isUnion() && "Unexpected union."); 7900 for (const auto *Field : RD->fields()) { 7901 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7902 // will fill in later.) 7903 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 7904 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7905 RecordLayout[FieldIndex] = Field; 7906 } 7907 } 7908 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7909 &Data : RecordLayout) { 7910 if (Data.isNull()) 7911 continue; 7912 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7913 getPlainLayout(Base, Layout, /*AsBase=*/true); 7914 else 7915 Layout.push_back(Data.get<const FieldDecl *>()); 7916 } 7917 } 7918 7919 public: 7920 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7921 : CurDir(&Dir), CGF(CGF) { 7922 // Extract firstprivate clause information. 7923 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7924 for (const auto *D : C->varlists()) 7925 FirstPrivateDecls.try_emplace( 7926 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 7927 // Extract device pointer clause information. 7928 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7929 for (auto L : C->component_lists()) 7930 DevPointersMap[L.first].push_back(L.second); 7931 } 7932 7933 /// Constructor for the declare mapper directive. 7934 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 7935 : CurDir(&Dir), CGF(CGF) {} 7936 7937 /// Generate code for the combined entry if we have a partially mapped struct 7938 /// and take care of the mapping flags of the arguments corresponding to 7939 /// individual struct members. 7940 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 7941 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7942 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 7943 const StructRangeInfoTy &PartialStruct) const { 7944 // Base is the base of the struct 7945 BasePointers.push_back(PartialStruct.Base.getPointer()); 7946 // Pointer is the address of the lowest element 7947 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7948 Pointers.push_back(LB); 7949 // Size is (addr of {highest+1} element) - (addr of lowest element) 7950 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7951 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7952 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7953 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7954 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7955 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 7956 /*isSigned=*/false); 7957 Sizes.push_back(Size); 7958 // Map type is always TARGET_PARAM 7959 Types.push_back(OMP_MAP_TARGET_PARAM); 7960 // Remove TARGET_PARAM flag from the first element 7961 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7962 7963 // All other current entries will be MEMBER_OF the combined entry 7964 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7965 // 0xFFFF in the MEMBER_OF field). 7966 OpenMPOffloadMappingFlags MemberOfFlag = 7967 getMemberOfFlag(BasePointers.size() - 1); 7968 for (auto &M : CurTypes) 7969 setCorrectMemberOfFlag(M, MemberOfFlag); 7970 } 7971 7972 /// Generate all the base pointers, section pointers, sizes and map 7973 /// types for the extracted mappable expressions. Also, for each item that 7974 /// relates with a device pointer, a pair of the relevant declaration and 7975 /// index where it occurs is appended to the device pointers info array. 7976 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 7977 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7978 MapFlagsArrayTy &Types) const { 7979 // We have to process the component lists that relate with the same 7980 // declaration in a single chunk so that we can generate the map flags 7981 // correctly. Therefore, we organize all lists in a map. 7982 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 7983 7984 // Helper function to fill the information map for the different supported 7985 // clauses. 7986 auto &&InfoGen = [&Info]( 7987 const ValueDecl *D, 7988 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7989 OpenMPMapClauseKind MapType, 7990 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7991 bool ReturnDevicePointer, bool IsImplicit) { 7992 const ValueDecl *VD = 7993 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 7994 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 7995 IsImplicit); 7996 }; 7997 7998 assert(CurDir.is<const OMPExecutableDirective *>() && 7999 "Expect a executable directive"); 8000 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8001 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 8002 for (const auto L : C->component_lists()) { 8003 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 8004 /*ReturnDevicePointer=*/false, C->isImplicit()); 8005 } 8006 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 8007 for (const auto L : C->component_lists()) { 8008 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 8009 /*ReturnDevicePointer=*/false, C->isImplicit()); 8010 } 8011 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 8012 for (const auto L : C->component_lists()) { 8013 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 8014 /*ReturnDevicePointer=*/false, C->isImplicit()); 8015 } 8016 8017 // Look at the use_device_ptr clause information and mark the existing map 8018 // entries as such. If there is no map information for an entry in the 8019 // use_device_ptr list, we create one with map type 'alloc' and zero size 8020 // section. It is the user fault if that was not mapped before. If there is 8021 // no map information and the pointer is a struct member, then we defer the 8022 // emission of that entry until the whole struct has been processed. 8023 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 8024 DeferredInfo; 8025 8026 for (const auto *C : 8027 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 8028 for (const auto L : C->component_lists()) { 8029 assert(!L.second.empty() && "Not expecting empty list of components!"); 8030 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 8031 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8032 const Expr *IE = L.second.back().getAssociatedExpression(); 8033 // If the first component is a member expression, we have to look into 8034 // 'this', which maps to null in the map of map information. Otherwise 8035 // look directly for the information. 8036 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8037 8038 // We potentially have map information for this declaration already. 8039 // Look for the first set of components that refer to it. 8040 if (It != Info.end()) { 8041 auto CI = std::find_if( 8042 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 8043 return MI.Components.back().getAssociatedDeclaration() == VD; 8044 }); 8045 // If we found a map entry, signal that the pointer has to be returned 8046 // and move on to the next declaration. 8047 if (CI != It->second.end()) { 8048 CI->ReturnDevicePointer = true; 8049 continue; 8050 } 8051 } 8052 8053 // We didn't find any match in our map information - generate a zero 8054 // size array section - if the pointer is a struct member we defer this 8055 // action until the whole struct has been processed. 8056 if (isa<MemberExpr>(IE)) { 8057 // Insert the pointer into Info to be processed by 8058 // generateInfoForComponentList. Because it is a member pointer 8059 // without a pointee, no entry will be generated for it, therefore 8060 // we need to generate one after the whole struct has been processed. 8061 // Nonetheless, generateInfoForComponentList must be called to take 8062 // the pointer into account for the calculation of the range of the 8063 // partial struct. 8064 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 8065 /*ReturnDevicePointer=*/false, C->isImplicit()); 8066 DeferredInfo[nullptr].emplace_back(IE, VD); 8067 } else { 8068 llvm::Value *Ptr = 8069 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8070 BasePointers.emplace_back(Ptr, VD); 8071 Pointers.push_back(Ptr); 8072 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8073 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8074 } 8075 } 8076 } 8077 8078 for (const auto &M : Info) { 8079 // We need to know when we generate information for the first component 8080 // associated with a capture, because the mapping flags depend on it. 8081 bool IsFirstComponentList = true; 8082 8083 // Temporary versions of arrays 8084 MapBaseValuesArrayTy CurBasePointers; 8085 MapValuesArrayTy CurPointers; 8086 MapValuesArrayTy CurSizes; 8087 MapFlagsArrayTy CurTypes; 8088 StructRangeInfoTy PartialStruct; 8089 8090 for (const MapInfo &L : M.second) { 8091 assert(!L.Components.empty() && 8092 "Not expecting declaration with no component lists."); 8093 8094 // Remember the current base pointer index. 8095 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 8096 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8097 CurBasePointers, CurPointers, CurSizes, 8098 CurTypes, PartialStruct, 8099 IsFirstComponentList, L.IsImplicit); 8100 8101 // If this entry relates with a device pointer, set the relevant 8102 // declaration and add the 'return pointer' flag. 8103 if (L.ReturnDevicePointer) { 8104 assert(CurBasePointers.size() > CurrentBasePointersIdx && 8105 "Unexpected number of mapped base pointers."); 8106 8107 const ValueDecl *RelevantVD = 8108 L.Components.back().getAssociatedDeclaration(); 8109 assert(RelevantVD && 8110 "No relevant declaration related with device pointer??"); 8111 8112 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 8113 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8114 } 8115 IsFirstComponentList = false; 8116 } 8117 8118 // Append any pending zero-length pointers which are struct members and 8119 // used with use_device_ptr. 8120 auto CI = DeferredInfo.find(M.first); 8121 if (CI != DeferredInfo.end()) { 8122 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8123 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8124 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 8125 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 8126 CurBasePointers.emplace_back(BasePtr, L.VD); 8127 CurPointers.push_back(Ptr); 8128 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8129 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8130 // value MEMBER_OF=FFFF so that the entry is later updated with the 8131 // correct value of MEMBER_OF. 8132 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8133 OMP_MAP_MEMBER_OF); 8134 } 8135 } 8136 8137 // If there is an entry in PartialStruct it means we have a struct with 8138 // individual members mapped. Emit an extra combined entry. 8139 if (PartialStruct.Base.isValid()) 8140 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8141 PartialStruct); 8142 8143 // We need to append the results of this capture to what we already have. 8144 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8145 Pointers.append(CurPointers.begin(), CurPointers.end()); 8146 Sizes.append(CurSizes.begin(), CurSizes.end()); 8147 Types.append(CurTypes.begin(), CurTypes.end()); 8148 } 8149 } 8150 8151 /// Generate all the base pointers, section pointers, sizes and map types for 8152 /// the extracted map clauses of user-defined mapper. 8153 void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, 8154 MapValuesArrayTy &Pointers, 8155 MapValuesArrayTy &Sizes, 8156 MapFlagsArrayTy &Types) const { 8157 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8158 "Expect a declare mapper directive"); 8159 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8160 // We have to process the component lists that relate with the same 8161 // declaration in a single chunk so that we can generate the map flags 8162 // correctly. Therefore, we organize all lists in a map. 8163 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8164 8165 // Helper function to fill the information map for the different supported 8166 // clauses. 8167 auto &&InfoGen = [&Info]( 8168 const ValueDecl *D, 8169 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8170 OpenMPMapClauseKind MapType, 8171 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8172 bool ReturnDevicePointer, bool IsImplicit) { 8173 const ValueDecl *VD = 8174 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8175 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8176 IsImplicit); 8177 }; 8178 8179 for (const auto *C : CurMapperDir->clauselists()) { 8180 const auto *MC = cast<OMPMapClause>(C); 8181 for (const auto L : MC->component_lists()) { 8182 InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), 8183 /*ReturnDevicePointer=*/false, MC->isImplicit()); 8184 } 8185 } 8186 8187 for (const auto &M : Info) { 8188 // We need to know when we generate information for the first component 8189 // associated with a capture, because the mapping flags depend on it. 8190 bool IsFirstComponentList = true; 8191 8192 // Temporary versions of arrays 8193 MapBaseValuesArrayTy CurBasePointers; 8194 MapValuesArrayTy CurPointers; 8195 MapValuesArrayTy CurSizes; 8196 MapFlagsArrayTy CurTypes; 8197 StructRangeInfoTy PartialStruct; 8198 8199 for (const MapInfo &L : M.second) { 8200 assert(!L.Components.empty() && 8201 "Not expecting declaration with no component lists."); 8202 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8203 CurBasePointers, CurPointers, CurSizes, 8204 CurTypes, PartialStruct, 8205 IsFirstComponentList, L.IsImplicit); 8206 IsFirstComponentList = false; 8207 } 8208 8209 // If there is an entry in PartialStruct it means we have a struct with 8210 // individual members mapped. Emit an extra combined entry. 8211 if (PartialStruct.Base.isValid()) 8212 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8213 PartialStruct); 8214 8215 // We need to append the results of this capture to what we already have. 8216 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8217 Pointers.append(CurPointers.begin(), CurPointers.end()); 8218 Sizes.append(CurSizes.begin(), CurSizes.end()); 8219 Types.append(CurTypes.begin(), CurTypes.end()); 8220 } 8221 } 8222 8223 /// Emit capture info for lambdas for variables captured by reference. 8224 void generateInfoForLambdaCaptures( 8225 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 8226 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8227 MapFlagsArrayTy &Types, 8228 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8229 const auto *RD = VD->getType() 8230 .getCanonicalType() 8231 .getNonReferenceType() 8232 ->getAsCXXRecordDecl(); 8233 if (!RD || !RD->isLambda()) 8234 return; 8235 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8236 LValue VDLVal = CGF.MakeAddrLValue( 8237 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8238 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8239 FieldDecl *ThisCapture = nullptr; 8240 RD->getCaptureFields(Captures, ThisCapture); 8241 if (ThisCapture) { 8242 LValue ThisLVal = 8243 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8244 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8245 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8246 VDLVal.getPointer(CGF)); 8247 BasePointers.push_back(ThisLVal.getPointer(CGF)); 8248 Pointers.push_back(ThisLValVal.getPointer(CGF)); 8249 Sizes.push_back( 8250 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8251 CGF.Int64Ty, /*isSigned=*/true)); 8252 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8253 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8254 } 8255 for (const LambdaCapture &LC : RD->captures()) { 8256 if (!LC.capturesVariable()) 8257 continue; 8258 const VarDecl *VD = LC.getCapturedVar(); 8259 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8260 continue; 8261 auto It = Captures.find(VD); 8262 assert(It != Captures.end() && "Found lambda capture without field."); 8263 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8264 if (LC.getCaptureKind() == LCK_ByRef) { 8265 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8266 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8267 VDLVal.getPointer(CGF)); 8268 BasePointers.push_back(VarLVal.getPointer(CGF)); 8269 Pointers.push_back(VarLValVal.getPointer(CGF)); 8270 Sizes.push_back(CGF.Builder.CreateIntCast( 8271 CGF.getTypeSize( 8272 VD->getType().getCanonicalType().getNonReferenceType()), 8273 CGF.Int64Ty, /*isSigned=*/true)); 8274 } else { 8275 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8276 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8277 VDLVal.getPointer(CGF)); 8278 BasePointers.push_back(VarLVal.getPointer(CGF)); 8279 Pointers.push_back(VarRVal.getScalarVal()); 8280 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8281 } 8282 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8283 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8284 } 8285 } 8286 8287 /// Set correct indices for lambdas captures. 8288 void adjustMemberOfForLambdaCaptures( 8289 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8290 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8291 MapFlagsArrayTy &Types) const { 8292 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8293 // Set correct member_of idx for all implicit lambda captures. 8294 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8295 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8296 continue; 8297 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8298 assert(BasePtr && "Unable to find base lambda address."); 8299 int TgtIdx = -1; 8300 for (unsigned J = I; J > 0; --J) { 8301 unsigned Idx = J - 1; 8302 if (Pointers[Idx] != BasePtr) 8303 continue; 8304 TgtIdx = Idx; 8305 break; 8306 } 8307 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8308 // All other current entries will be MEMBER_OF the combined entry 8309 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8310 // 0xFFFF in the MEMBER_OF field). 8311 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8312 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8313 } 8314 } 8315 8316 /// Generate the base pointers, section pointers, sizes and map types 8317 /// associated to a given capture. 8318 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8319 llvm::Value *Arg, 8320 MapBaseValuesArrayTy &BasePointers, 8321 MapValuesArrayTy &Pointers, 8322 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8323 StructRangeInfoTy &PartialStruct) const { 8324 assert(!Cap->capturesVariableArrayType() && 8325 "Not expecting to generate map info for a variable array type!"); 8326 8327 // We need to know when we generating information for the first component 8328 const ValueDecl *VD = Cap->capturesThis() 8329 ? nullptr 8330 : Cap->getCapturedVar()->getCanonicalDecl(); 8331 8332 // If this declaration appears in a is_device_ptr clause we just have to 8333 // pass the pointer by value. If it is a reference to a declaration, we just 8334 // pass its value. 8335 if (DevPointersMap.count(VD)) { 8336 BasePointers.emplace_back(Arg, VD); 8337 Pointers.push_back(Arg); 8338 Sizes.push_back( 8339 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8340 CGF.Int64Ty, /*isSigned=*/true)); 8341 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8342 return; 8343 } 8344 8345 using MapData = 8346 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8347 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 8348 SmallVector<MapData, 4> DeclComponentLists; 8349 assert(CurDir.is<const OMPExecutableDirective *>() && 8350 "Expect a executable directive"); 8351 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8352 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8353 for (const auto L : C->decl_component_lists(VD)) { 8354 assert(L.first == VD && 8355 "We got information for the wrong declaration??"); 8356 assert(!L.second.empty() && 8357 "Not expecting declaration with no component lists."); 8358 DeclComponentLists.emplace_back(L.second, C->getMapType(), 8359 C->getMapTypeModifiers(), 8360 C->isImplicit()); 8361 } 8362 } 8363 8364 // Find overlapping elements (including the offset from the base element). 8365 llvm::SmallDenseMap< 8366 const MapData *, 8367 llvm::SmallVector< 8368 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8369 4> 8370 OverlappedData; 8371 size_t Count = 0; 8372 for (const MapData &L : DeclComponentLists) { 8373 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8374 OpenMPMapClauseKind MapType; 8375 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8376 bool IsImplicit; 8377 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8378 ++Count; 8379 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8380 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8381 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 8382 auto CI = Components.rbegin(); 8383 auto CE = Components.rend(); 8384 auto SI = Components1.rbegin(); 8385 auto SE = Components1.rend(); 8386 for (; CI != CE && SI != SE; ++CI, ++SI) { 8387 if (CI->getAssociatedExpression()->getStmtClass() != 8388 SI->getAssociatedExpression()->getStmtClass()) 8389 break; 8390 // Are we dealing with different variables/fields? 8391 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8392 break; 8393 } 8394 // Found overlapping if, at least for one component, reached the head of 8395 // the components list. 8396 if (CI == CE || SI == SE) { 8397 assert((CI != CE || SI != SE) && 8398 "Unexpected full match of the mapping components."); 8399 const MapData &BaseData = CI == CE ? L : L1; 8400 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8401 SI == SE ? Components : Components1; 8402 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8403 OverlappedElements.getSecond().push_back(SubData); 8404 } 8405 } 8406 } 8407 // Sort the overlapped elements for each item. 8408 llvm::SmallVector<const FieldDecl *, 4> Layout; 8409 if (!OverlappedData.empty()) { 8410 if (const auto *CRD = 8411 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8412 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8413 else { 8414 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8415 Layout.append(RD->field_begin(), RD->field_end()); 8416 } 8417 } 8418 for (auto &Pair : OverlappedData) { 8419 llvm::sort( 8420 Pair.getSecond(), 8421 [&Layout]( 8422 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8423 OMPClauseMappableExprCommon::MappableExprComponentListRef 8424 Second) { 8425 auto CI = First.rbegin(); 8426 auto CE = First.rend(); 8427 auto SI = Second.rbegin(); 8428 auto SE = Second.rend(); 8429 for (; CI != CE && SI != SE; ++CI, ++SI) { 8430 if (CI->getAssociatedExpression()->getStmtClass() != 8431 SI->getAssociatedExpression()->getStmtClass()) 8432 break; 8433 // Are we dealing with different variables/fields? 8434 if (CI->getAssociatedDeclaration() != 8435 SI->getAssociatedDeclaration()) 8436 break; 8437 } 8438 8439 // Lists contain the same elements. 8440 if (CI == CE && SI == SE) 8441 return false; 8442 8443 // List with less elements is less than list with more elements. 8444 if (CI == CE || SI == SE) 8445 return CI == CE; 8446 8447 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8448 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8449 if (FD1->getParent() == FD2->getParent()) 8450 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8451 const auto It = 8452 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8453 return FD == FD1 || FD == FD2; 8454 }); 8455 return *It == FD1; 8456 }); 8457 } 8458 8459 // Associated with a capture, because the mapping flags depend on it. 8460 // Go through all of the elements with the overlapped elements. 8461 for (const auto &Pair : OverlappedData) { 8462 const MapData &L = *Pair.getFirst(); 8463 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8464 OpenMPMapClauseKind MapType; 8465 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8466 bool IsImplicit; 8467 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8468 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8469 OverlappedComponents = Pair.getSecond(); 8470 bool IsFirstComponentList = true; 8471 generateInfoForComponentList(MapType, MapModifiers, Components, 8472 BasePointers, Pointers, Sizes, Types, 8473 PartialStruct, IsFirstComponentList, 8474 IsImplicit, OverlappedComponents); 8475 } 8476 // Go through other elements without overlapped elements. 8477 bool IsFirstComponentList = OverlappedData.empty(); 8478 for (const MapData &L : DeclComponentLists) { 8479 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8480 OpenMPMapClauseKind MapType; 8481 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8482 bool IsImplicit; 8483 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8484 auto It = OverlappedData.find(&L); 8485 if (It == OverlappedData.end()) 8486 generateInfoForComponentList(MapType, MapModifiers, Components, 8487 BasePointers, Pointers, Sizes, Types, 8488 PartialStruct, IsFirstComponentList, 8489 IsImplicit); 8490 IsFirstComponentList = false; 8491 } 8492 } 8493 8494 /// Generate the base pointers, section pointers, sizes and map types 8495 /// associated with the declare target link variables. 8496 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 8497 MapValuesArrayTy &Pointers, 8498 MapValuesArrayTy &Sizes, 8499 MapFlagsArrayTy &Types) const { 8500 assert(CurDir.is<const OMPExecutableDirective *>() && 8501 "Expect a executable directive"); 8502 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8503 // Map other list items in the map clause which are not captured variables 8504 // but "declare target link" global variables. 8505 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8506 for (const auto L : C->component_lists()) { 8507 if (!L.first) 8508 continue; 8509 const auto *VD = dyn_cast<VarDecl>(L.first); 8510 if (!VD) 8511 continue; 8512 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8513 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8514 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8515 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 8516 continue; 8517 StructRangeInfoTy PartialStruct; 8518 generateInfoForComponentList( 8519 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 8520 Pointers, Sizes, Types, PartialStruct, 8521 /*IsFirstComponentList=*/true, C->isImplicit()); 8522 assert(!PartialStruct.Base.isValid() && 8523 "No partial structs for declare target link expected."); 8524 } 8525 } 8526 } 8527 8528 /// Generate the default map information for a given capture \a CI, 8529 /// record field declaration \a RI and captured value \a CV. 8530 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8531 const FieldDecl &RI, llvm::Value *CV, 8532 MapBaseValuesArrayTy &CurBasePointers, 8533 MapValuesArrayTy &CurPointers, 8534 MapValuesArrayTy &CurSizes, 8535 MapFlagsArrayTy &CurMapTypes) const { 8536 bool IsImplicit = true; 8537 // Do the default mapping. 8538 if (CI.capturesThis()) { 8539 CurBasePointers.push_back(CV); 8540 CurPointers.push_back(CV); 8541 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8542 CurSizes.push_back( 8543 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8544 CGF.Int64Ty, /*isSigned=*/true)); 8545 // Default map type. 8546 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8547 } else if (CI.capturesVariableByCopy()) { 8548 CurBasePointers.push_back(CV); 8549 CurPointers.push_back(CV); 8550 if (!RI.getType()->isAnyPointerType()) { 8551 // We have to signal to the runtime captures passed by value that are 8552 // not pointers. 8553 CurMapTypes.push_back(OMP_MAP_LITERAL); 8554 CurSizes.push_back(CGF.Builder.CreateIntCast( 8555 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8556 } else { 8557 // Pointers are implicitly mapped with a zero size and no flags 8558 // (other than first map that is added for all implicit maps). 8559 CurMapTypes.push_back(OMP_MAP_NONE); 8560 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8561 } 8562 const VarDecl *VD = CI.getCapturedVar(); 8563 auto I = FirstPrivateDecls.find(VD); 8564 if (I != FirstPrivateDecls.end()) 8565 IsImplicit = I->getSecond(); 8566 } else { 8567 assert(CI.capturesVariable() && "Expected captured reference."); 8568 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8569 QualType ElementType = PtrTy->getPointeeType(); 8570 CurSizes.push_back(CGF.Builder.CreateIntCast( 8571 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8572 // The default map type for a scalar/complex type is 'to' because by 8573 // default the value doesn't have to be retrieved. For an aggregate 8574 // type, the default is 'tofrom'. 8575 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 8576 const VarDecl *VD = CI.getCapturedVar(); 8577 auto I = FirstPrivateDecls.find(VD); 8578 if (I != FirstPrivateDecls.end() && 8579 VD->getType().isConstant(CGF.getContext())) { 8580 llvm::Constant *Addr = 8581 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8582 // Copy the value of the original variable to the new global copy. 8583 CGF.Builder.CreateMemCpy( 8584 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 8585 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8586 CurSizes.back(), /*IsVolatile=*/false); 8587 // Use new global variable as the base pointers. 8588 CurBasePointers.push_back(Addr); 8589 CurPointers.push_back(Addr); 8590 } else { 8591 CurBasePointers.push_back(CV); 8592 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8593 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8594 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8595 AlignmentSource::Decl)); 8596 CurPointers.push_back(PtrAddr.getPointer()); 8597 } else { 8598 CurPointers.push_back(CV); 8599 } 8600 } 8601 if (I != FirstPrivateDecls.end()) 8602 IsImplicit = I->getSecond(); 8603 } 8604 // Every default map produces a single argument which is a target parameter. 8605 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 8606 8607 // Add flag stating this is an implicit map. 8608 if (IsImplicit) 8609 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 8610 } 8611 }; 8612 } // anonymous namespace 8613 8614 /// Emit the arrays used to pass the captures and map information to the 8615 /// offloading runtime library. If there is no map or capture information, 8616 /// return nullptr by reference. 8617 static void 8618 emitOffloadingArrays(CodeGenFunction &CGF, 8619 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 8620 MappableExprsHandler::MapValuesArrayTy &Pointers, 8621 MappableExprsHandler::MapValuesArrayTy &Sizes, 8622 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 8623 CGOpenMPRuntime::TargetDataInfo &Info) { 8624 CodeGenModule &CGM = CGF.CGM; 8625 ASTContext &Ctx = CGF.getContext(); 8626 8627 // Reset the array information. 8628 Info.clearArrayInfo(); 8629 Info.NumberOfPtrs = BasePointers.size(); 8630 8631 if (Info.NumberOfPtrs) { 8632 // Detect if we have any capture size requiring runtime evaluation of the 8633 // size so that a constant array could be eventually used. 8634 bool hasRuntimeEvaluationCaptureSize = false; 8635 for (llvm::Value *S : Sizes) 8636 if (!isa<llvm::Constant>(S)) { 8637 hasRuntimeEvaluationCaptureSize = true; 8638 break; 8639 } 8640 8641 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8642 QualType PointerArrayType = Ctx.getConstantArrayType( 8643 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 8644 /*IndexTypeQuals=*/0); 8645 8646 Info.BasePointersArray = 8647 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8648 Info.PointersArray = 8649 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8650 8651 // If we don't have any VLA types or other types that require runtime 8652 // evaluation, we can use a constant array for the map sizes, otherwise we 8653 // need to fill up the arrays as we do for the pointers. 8654 QualType Int64Ty = 8655 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8656 if (hasRuntimeEvaluationCaptureSize) { 8657 QualType SizeArrayType = Ctx.getConstantArrayType( 8658 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 8659 /*IndexTypeQuals=*/0); 8660 Info.SizesArray = 8661 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8662 } else { 8663 // We expect all the sizes to be constant, so we collect them to create 8664 // a constant array. 8665 SmallVector<llvm::Constant *, 16> ConstSizes; 8666 for (llvm::Value *S : Sizes) 8667 ConstSizes.push_back(cast<llvm::Constant>(S)); 8668 8669 auto *SizesArrayInit = llvm::ConstantArray::get( 8670 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 8671 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8672 auto *SizesArrayGbl = new llvm::GlobalVariable( 8673 CGM.getModule(), SizesArrayInit->getType(), 8674 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8675 SizesArrayInit, Name); 8676 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8677 Info.SizesArray = SizesArrayGbl; 8678 } 8679 8680 // The map types are always constant so we don't need to generate code to 8681 // fill arrays. Instead, we create an array constant. 8682 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 8683 llvm::copy(MapTypes, Mapping.begin()); 8684 llvm::Constant *MapTypesArrayInit = 8685 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8686 std::string MaptypesName = 8687 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8688 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8689 CGM.getModule(), MapTypesArrayInit->getType(), 8690 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8691 MapTypesArrayInit, MaptypesName); 8692 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8693 Info.MapTypesArray = MapTypesArrayGbl; 8694 8695 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8696 llvm::Value *BPVal = *BasePointers[I]; 8697 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8698 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8699 Info.BasePointersArray, 0, I); 8700 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8701 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8702 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8703 CGF.Builder.CreateStore(BPVal, BPAddr); 8704 8705 if (Info.requiresDevicePointerInfo()) 8706 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 8707 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8708 8709 llvm::Value *PVal = Pointers[I]; 8710 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8711 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8712 Info.PointersArray, 0, I); 8713 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8714 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8715 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8716 CGF.Builder.CreateStore(PVal, PAddr); 8717 8718 if (hasRuntimeEvaluationCaptureSize) { 8719 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8720 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8721 Info.SizesArray, 8722 /*Idx0=*/0, 8723 /*Idx1=*/I); 8724 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 8725 CGF.Builder.CreateStore( 8726 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true), 8727 SAddr); 8728 } 8729 } 8730 } 8731 } 8732 8733 /// Emit the arguments to be passed to the runtime library based on the 8734 /// arrays of pointers, sizes and map types. 8735 static void emitOffloadingArraysArgument( 8736 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8737 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8738 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 8739 CodeGenModule &CGM = CGF.CGM; 8740 if (Info.NumberOfPtrs) { 8741 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8742 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8743 Info.BasePointersArray, 8744 /*Idx0=*/0, /*Idx1=*/0); 8745 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8746 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8747 Info.PointersArray, 8748 /*Idx0=*/0, 8749 /*Idx1=*/0); 8750 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8751 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 8752 /*Idx0=*/0, /*Idx1=*/0); 8753 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8754 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8755 Info.MapTypesArray, 8756 /*Idx0=*/0, 8757 /*Idx1=*/0); 8758 } else { 8759 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8760 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8761 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8762 MapTypesArrayArg = 8763 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8764 } 8765 } 8766 8767 /// Check for inner distribute directive. 8768 static const OMPExecutableDirective * 8769 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8770 const auto *CS = D.getInnermostCapturedStmt(); 8771 const auto *Body = 8772 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8773 const Stmt *ChildStmt = 8774 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8775 8776 if (const auto *NestedDir = 8777 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8778 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8779 switch (D.getDirectiveKind()) { 8780 case OMPD_target: 8781 if (isOpenMPDistributeDirective(DKind)) 8782 return NestedDir; 8783 if (DKind == OMPD_teams) { 8784 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8785 /*IgnoreCaptured=*/true); 8786 if (!Body) 8787 return nullptr; 8788 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8789 if (const auto *NND = 8790 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8791 DKind = NND->getDirectiveKind(); 8792 if (isOpenMPDistributeDirective(DKind)) 8793 return NND; 8794 } 8795 } 8796 return nullptr; 8797 case OMPD_target_teams: 8798 if (isOpenMPDistributeDirective(DKind)) 8799 return NestedDir; 8800 return nullptr; 8801 case OMPD_target_parallel: 8802 case OMPD_target_simd: 8803 case OMPD_target_parallel_for: 8804 case OMPD_target_parallel_for_simd: 8805 return nullptr; 8806 case OMPD_target_teams_distribute: 8807 case OMPD_target_teams_distribute_simd: 8808 case OMPD_target_teams_distribute_parallel_for: 8809 case OMPD_target_teams_distribute_parallel_for_simd: 8810 case OMPD_parallel: 8811 case OMPD_for: 8812 case OMPD_parallel_for: 8813 case OMPD_parallel_master: 8814 case OMPD_parallel_sections: 8815 case OMPD_for_simd: 8816 case OMPD_parallel_for_simd: 8817 case OMPD_cancel: 8818 case OMPD_cancellation_point: 8819 case OMPD_ordered: 8820 case OMPD_threadprivate: 8821 case OMPD_allocate: 8822 case OMPD_task: 8823 case OMPD_simd: 8824 case OMPD_sections: 8825 case OMPD_section: 8826 case OMPD_single: 8827 case OMPD_master: 8828 case OMPD_critical: 8829 case OMPD_taskyield: 8830 case OMPD_barrier: 8831 case OMPD_taskwait: 8832 case OMPD_taskgroup: 8833 case OMPD_atomic: 8834 case OMPD_flush: 8835 case OMPD_teams: 8836 case OMPD_target_data: 8837 case OMPD_target_exit_data: 8838 case OMPD_target_enter_data: 8839 case OMPD_distribute: 8840 case OMPD_distribute_simd: 8841 case OMPD_distribute_parallel_for: 8842 case OMPD_distribute_parallel_for_simd: 8843 case OMPD_teams_distribute: 8844 case OMPD_teams_distribute_simd: 8845 case OMPD_teams_distribute_parallel_for: 8846 case OMPD_teams_distribute_parallel_for_simd: 8847 case OMPD_target_update: 8848 case OMPD_declare_simd: 8849 case OMPD_declare_variant: 8850 case OMPD_declare_target: 8851 case OMPD_end_declare_target: 8852 case OMPD_declare_reduction: 8853 case OMPD_declare_mapper: 8854 case OMPD_taskloop: 8855 case OMPD_taskloop_simd: 8856 case OMPD_master_taskloop: 8857 case OMPD_master_taskloop_simd: 8858 case OMPD_parallel_master_taskloop: 8859 case OMPD_parallel_master_taskloop_simd: 8860 case OMPD_requires: 8861 case OMPD_unknown: 8862 llvm_unreachable("Unexpected directive."); 8863 } 8864 } 8865 8866 return nullptr; 8867 } 8868 8869 /// Emit the user-defined mapper function. The code generation follows the 8870 /// pattern in the example below. 8871 /// \code 8872 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 8873 /// void *base, void *begin, 8874 /// int64_t size, int64_t type) { 8875 /// // Allocate space for an array section first. 8876 /// if (size > 1 && !maptype.IsDelete) 8877 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8878 /// size*sizeof(Ty), clearToFrom(type)); 8879 /// // Map members. 8880 /// for (unsigned i = 0; i < size; i++) { 8881 /// // For each component specified by this mapper: 8882 /// for (auto c : all_components) { 8883 /// if (c.hasMapper()) 8884 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 8885 /// c.arg_type); 8886 /// else 8887 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 8888 /// c.arg_begin, c.arg_size, c.arg_type); 8889 /// } 8890 /// } 8891 /// // Delete the array section. 8892 /// if (size > 1 && maptype.IsDelete) 8893 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8894 /// size*sizeof(Ty), clearToFrom(type)); 8895 /// } 8896 /// \endcode 8897 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 8898 CodeGenFunction *CGF) { 8899 if (UDMMap.count(D) > 0) 8900 return; 8901 ASTContext &C = CGM.getContext(); 8902 QualType Ty = D->getType(); 8903 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 8904 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 8905 auto *MapperVarDecl = 8906 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 8907 SourceLocation Loc = D->getLocation(); 8908 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 8909 8910 // Prepare mapper function arguments and attributes. 8911 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8912 C.VoidPtrTy, ImplicitParamDecl::Other); 8913 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 8914 ImplicitParamDecl::Other); 8915 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8916 C.VoidPtrTy, ImplicitParamDecl::Other); 8917 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8918 ImplicitParamDecl::Other); 8919 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8920 ImplicitParamDecl::Other); 8921 FunctionArgList Args; 8922 Args.push_back(&HandleArg); 8923 Args.push_back(&BaseArg); 8924 Args.push_back(&BeginArg); 8925 Args.push_back(&SizeArg); 8926 Args.push_back(&TypeArg); 8927 const CGFunctionInfo &FnInfo = 8928 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 8929 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 8930 SmallString<64> TyStr; 8931 llvm::raw_svector_ostream Out(TyStr); 8932 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 8933 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 8934 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 8935 Name, &CGM.getModule()); 8936 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 8937 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 8938 // Start the mapper function code generation. 8939 CodeGenFunction MapperCGF(CGM); 8940 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 8941 // Compute the starting and end addreses of array elements. 8942 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 8943 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 8944 C.getPointerType(Int64Ty), Loc); 8945 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 8946 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 8947 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 8948 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 8949 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 8950 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 8951 C.getPointerType(Int64Ty), Loc); 8952 // Prepare common arguments for array initiation and deletion. 8953 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 8954 MapperCGF.GetAddrOfLocalVar(&HandleArg), 8955 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8956 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 8957 MapperCGF.GetAddrOfLocalVar(&BaseArg), 8958 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8959 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 8960 MapperCGF.GetAddrOfLocalVar(&BeginArg), 8961 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8962 8963 // Emit array initiation if this is an array section and \p MapType indicates 8964 // that memory allocation is required. 8965 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 8966 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 8967 ElementSize, HeadBB, /*IsInit=*/true); 8968 8969 // Emit a for loop to iterate through SizeArg of elements and map all of them. 8970 8971 // Emit the loop header block. 8972 MapperCGF.EmitBlock(HeadBB); 8973 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 8974 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 8975 // Evaluate whether the initial condition is satisfied. 8976 llvm::Value *IsEmpty = 8977 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 8978 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 8979 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 8980 8981 // Emit the loop body block. 8982 MapperCGF.EmitBlock(BodyBB); 8983 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 8984 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 8985 PtrPHI->addIncoming(PtrBegin, EntryBB); 8986 Address PtrCurrent = 8987 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 8988 .getAlignment() 8989 .alignmentOfArrayElement(ElementSize)); 8990 // Privatize the declared variable of mapper to be the current array element. 8991 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 8992 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 8993 return MapperCGF 8994 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 8995 .getAddress(MapperCGF); 8996 }); 8997 (void)Scope.Privatize(); 8998 8999 // Get map clause information. Fill up the arrays with all mapped variables. 9000 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9001 MappableExprsHandler::MapValuesArrayTy Pointers; 9002 MappableExprsHandler::MapValuesArrayTy Sizes; 9003 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9004 MappableExprsHandler MEHandler(*D, MapperCGF); 9005 MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); 9006 9007 // Call the runtime API __tgt_mapper_num_components to get the number of 9008 // pre-existing components. 9009 llvm::Value *OffloadingArgs[] = {Handle}; 9010 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9011 createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs); 9012 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9013 PreviousSize, 9014 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9015 9016 // Fill up the runtime mapper handle for all components. 9017 for (unsigned I = 0; I < BasePointers.size(); ++I) { 9018 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9019 *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9020 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9021 Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9022 llvm::Value *CurSizeArg = Sizes[I]; 9023 9024 // Extract the MEMBER_OF field from the map type. 9025 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 9026 MapperCGF.EmitBlock(MemberBB); 9027 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); 9028 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 9029 OriMapType, 9030 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 9031 llvm::BasicBlock *MemberCombineBB = 9032 MapperCGF.createBasicBlock("omp.member.combine"); 9033 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 9034 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 9035 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 9036 // Add the number of pre-existing components to the MEMBER_OF field if it 9037 // is valid. 9038 MapperCGF.EmitBlock(MemberCombineBB); 9039 llvm::Value *CombinedMember = 9040 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9041 // Do nothing if it is not a member of previous components. 9042 MapperCGF.EmitBlock(TypeBB); 9043 llvm::PHINode *MemberMapType = 9044 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9045 MemberMapType->addIncoming(OriMapType, MemberBB); 9046 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9047 9048 // Combine the map type inherited from user-defined mapper with that 9049 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9050 // bits of the \a MapType, which is the input argument of the mapper 9051 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9052 // bits of MemberMapType. 9053 // [OpenMP 5.0], 1.2.6. map-type decay. 9054 // | alloc | to | from | tofrom | release | delete 9055 // ---------------------------------------------------------- 9056 // alloc | alloc | alloc | alloc | alloc | release | delete 9057 // to | alloc | to | alloc | to | release | delete 9058 // from | alloc | alloc | from | from | release | delete 9059 // tofrom | alloc | to | from | tofrom | release | delete 9060 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9061 MapType, 9062 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9063 MappableExprsHandler::OMP_MAP_FROM)); 9064 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9065 llvm::BasicBlock *AllocElseBB = 9066 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9067 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9068 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9069 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9070 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9071 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9072 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9073 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9074 MapperCGF.EmitBlock(AllocBB); 9075 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9076 MemberMapType, 9077 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9078 MappableExprsHandler::OMP_MAP_FROM))); 9079 MapperCGF.Builder.CreateBr(EndBB); 9080 MapperCGF.EmitBlock(AllocElseBB); 9081 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9082 LeftToFrom, 9083 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9084 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9085 // In case of to, clear OMP_MAP_FROM. 9086 MapperCGF.EmitBlock(ToBB); 9087 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9088 MemberMapType, 9089 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9090 MapperCGF.Builder.CreateBr(EndBB); 9091 MapperCGF.EmitBlock(ToElseBB); 9092 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9093 LeftToFrom, 9094 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9095 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9096 // In case of from, clear OMP_MAP_TO. 9097 MapperCGF.EmitBlock(FromBB); 9098 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9099 MemberMapType, 9100 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9101 // In case of tofrom, do nothing. 9102 MapperCGF.EmitBlock(EndBB); 9103 llvm::PHINode *CurMapType = 9104 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9105 CurMapType->addIncoming(AllocMapType, AllocBB); 9106 CurMapType->addIncoming(ToMapType, ToBB); 9107 CurMapType->addIncoming(FromMapType, FromBB); 9108 CurMapType->addIncoming(MemberMapType, ToElseBB); 9109 9110 // TODO: call the corresponding mapper function if a user-defined mapper is 9111 // associated with this map clause. 9112 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9113 // data structure. 9114 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9115 CurSizeArg, CurMapType}; 9116 MapperCGF.EmitRuntimeCall( 9117 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), 9118 OffloadingArgs); 9119 } 9120 9121 // Update the pointer to point to the next element that needs to be mapped, 9122 // and check whether we have mapped all elements. 9123 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9124 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9125 PtrPHI->addIncoming(PtrNext, BodyBB); 9126 llvm::Value *IsDone = 9127 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9128 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9129 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9130 9131 MapperCGF.EmitBlock(ExitBB); 9132 // Emit array deletion if this is an array section and \p MapType indicates 9133 // that deletion is required. 9134 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9135 ElementSize, DoneBB, /*IsInit=*/false); 9136 9137 // Emit the function exit block. 9138 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9139 MapperCGF.FinishFunction(); 9140 UDMMap.try_emplace(D, Fn); 9141 if (CGF) { 9142 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9143 Decls.second.push_back(D); 9144 } 9145 } 9146 9147 /// Emit the array initialization or deletion portion for user-defined mapper 9148 /// code generation. First, it evaluates whether an array section is mapped and 9149 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9150 /// true, and \a MapType indicates to not delete this array, array 9151 /// initialization code is generated. If \a IsInit is false, and \a MapType 9152 /// indicates to not this array, array deletion code is generated. 9153 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9154 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9155 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9156 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9157 StringRef Prefix = IsInit ? ".init" : ".del"; 9158 9159 // Evaluate if this is an array section. 9160 llvm::BasicBlock *IsDeleteBB = 9161 MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete"); 9162 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix); 9163 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9164 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9165 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9166 9167 // Evaluate if we are going to delete this section. 9168 MapperCGF.EmitBlock(IsDeleteBB); 9169 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9170 MapType, 9171 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9172 llvm::Value *DeleteCond; 9173 if (IsInit) { 9174 DeleteCond = MapperCGF.Builder.CreateIsNull( 9175 DeleteBit, "omp.array" + Prefix + ".delete"); 9176 } else { 9177 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9178 DeleteBit, "omp.array" + Prefix + ".delete"); 9179 } 9180 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9181 9182 MapperCGF.EmitBlock(BodyBB); 9183 // Get the array size by multiplying element size and element number (i.e., \p 9184 // Size). 9185 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9186 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9187 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9188 // memory allocation/deletion purpose only. 9189 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9190 MapType, 9191 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9192 MappableExprsHandler::OMP_MAP_FROM))); 9193 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9194 // data structure. 9195 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9196 MapperCGF.EmitRuntimeCall( 9197 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs); 9198 } 9199 9200 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9201 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9202 llvm::Value *DeviceID, 9203 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9204 const OMPLoopDirective &D)> 9205 SizeEmitter) { 9206 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9207 const OMPExecutableDirective *TD = &D; 9208 // Get nested teams distribute kind directive, if any. 9209 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9210 TD = getNestedDistributeDirective(CGM.getContext(), D); 9211 if (!TD) 9212 return; 9213 const auto *LD = cast<OMPLoopDirective>(TD); 9214 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, 9215 PrePostActionTy &) { 9216 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9217 llvm::Value *Args[] = {DeviceID, NumIterations}; 9218 CGF.EmitRuntimeCall( 9219 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); 9220 } 9221 }; 9222 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9223 } 9224 9225 void CGOpenMPRuntime::emitTargetCall( 9226 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9227 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9228 const Expr *Device, 9229 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9230 const OMPLoopDirective &D)> 9231 SizeEmitter) { 9232 if (!CGF.HaveInsertPoint()) 9233 return; 9234 9235 assert(OutlinedFn && "Invalid outlined function!"); 9236 9237 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 9238 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9239 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9240 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9241 PrePostActionTy &) { 9242 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9243 }; 9244 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9245 9246 CodeGenFunction::OMPTargetDataInfo InputInfo; 9247 llvm::Value *MapTypesArray = nullptr; 9248 // Fill up the pointer arrays and transfer execution to the device. 9249 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9250 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, 9251 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9252 // On top of the arrays that were filled up, the target offloading call 9253 // takes as arguments the device id as well as the host pointer. The host 9254 // pointer is used by the runtime library to identify the current target 9255 // region, so it only has to be unique and not necessarily point to 9256 // anything. It could be the pointer to the outlined function that 9257 // implements the target region, but we aren't using that so that the 9258 // compiler doesn't need to keep that, and could therefore inline the host 9259 // function if proven worthwhile during optimization. 9260 9261 // From this point on, we need to have an ID of the target region defined. 9262 assert(OutlinedFnID && "Invalid outlined function ID!"); 9263 9264 // Emit device ID if any. 9265 llvm::Value *DeviceID; 9266 if (Device) { 9267 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9268 CGF.Int64Ty, /*isSigned=*/true); 9269 } else { 9270 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9271 } 9272 9273 // Emit the number of elements in the offloading arrays. 9274 llvm::Value *PointerNum = 9275 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9276 9277 // Return value of the runtime offloading call. 9278 llvm::Value *Return; 9279 9280 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9281 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9282 9283 // Emit tripcount for the target loop-based directive. 9284 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 9285 9286 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9287 // The target region is an outlined function launched by the runtime 9288 // via calls __tgt_target() or __tgt_target_teams(). 9289 // 9290 // __tgt_target() launches a target region with one team and one thread, 9291 // executing a serial region. This master thread may in turn launch 9292 // more threads within its team upon encountering a parallel region, 9293 // however, no additional teams can be launched on the device. 9294 // 9295 // __tgt_target_teams() launches a target region with one or more teams, 9296 // each with one or more threads. This call is required for target 9297 // constructs such as: 9298 // 'target teams' 9299 // 'target' / 'teams' 9300 // 'target teams distribute parallel for' 9301 // 'target parallel' 9302 // and so on. 9303 // 9304 // Note that on the host and CPU targets, the runtime implementation of 9305 // these calls simply call the outlined function without forking threads. 9306 // The outlined functions themselves have runtime calls to 9307 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9308 // the compiler in emitTeamsCall() and emitParallelCall(). 9309 // 9310 // In contrast, on the NVPTX target, the implementation of 9311 // __tgt_target_teams() launches a GPU kernel with the requested number 9312 // of teams and threads so no additional calls to the runtime are required. 9313 if (NumTeams) { 9314 // If we have NumTeams defined this means that we have an enclosed teams 9315 // region. Therefore we also expect to have NumThreads defined. These two 9316 // values should be defined in the presence of a teams directive, 9317 // regardless of having any clauses associated. If the user is using teams 9318 // but no clauses, these two values will be the default that should be 9319 // passed to the runtime library - a 32-bit integer with the value zero. 9320 assert(NumThreads && "Thread limit expression should be available along " 9321 "with number of teams."); 9322 llvm::Value *OffloadingArgs[] = {DeviceID, 9323 OutlinedFnID, 9324 PointerNum, 9325 InputInfo.BasePointersArray.getPointer(), 9326 InputInfo.PointersArray.getPointer(), 9327 InputInfo.SizesArray.getPointer(), 9328 MapTypesArray, 9329 NumTeams, 9330 NumThreads}; 9331 Return = CGF.EmitRuntimeCall( 9332 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 9333 : OMPRTL__tgt_target_teams), 9334 OffloadingArgs); 9335 } else { 9336 llvm::Value *OffloadingArgs[] = {DeviceID, 9337 OutlinedFnID, 9338 PointerNum, 9339 InputInfo.BasePointersArray.getPointer(), 9340 InputInfo.PointersArray.getPointer(), 9341 InputInfo.SizesArray.getPointer(), 9342 MapTypesArray}; 9343 Return = CGF.EmitRuntimeCall( 9344 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 9345 : OMPRTL__tgt_target), 9346 OffloadingArgs); 9347 } 9348 9349 // Check the error code and execute the host version if required. 9350 llvm::BasicBlock *OffloadFailedBlock = 9351 CGF.createBasicBlock("omp_offload.failed"); 9352 llvm::BasicBlock *OffloadContBlock = 9353 CGF.createBasicBlock("omp_offload.cont"); 9354 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9355 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9356 9357 CGF.EmitBlock(OffloadFailedBlock); 9358 if (RequiresOuterTask) { 9359 CapturedVars.clear(); 9360 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9361 } 9362 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9363 CGF.EmitBranch(OffloadContBlock); 9364 9365 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9366 }; 9367 9368 // Notify that the host version must be executed. 9369 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 9370 RequiresOuterTask](CodeGenFunction &CGF, 9371 PrePostActionTy &) { 9372 if (RequiresOuterTask) { 9373 CapturedVars.clear(); 9374 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9375 } 9376 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9377 }; 9378 9379 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 9380 &CapturedVars, RequiresOuterTask, 9381 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 9382 // Fill up the arrays with all the captured variables. 9383 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9384 MappableExprsHandler::MapValuesArrayTy Pointers; 9385 MappableExprsHandler::MapValuesArrayTy Sizes; 9386 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9387 9388 // Get mappable expression information. 9389 MappableExprsHandler MEHandler(D, CGF); 9390 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9391 9392 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9393 auto CV = CapturedVars.begin(); 9394 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9395 CE = CS.capture_end(); 9396 CI != CE; ++CI, ++RI, ++CV) { 9397 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 9398 MappableExprsHandler::MapValuesArrayTy CurPointers; 9399 MappableExprsHandler::MapValuesArrayTy CurSizes; 9400 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 9401 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9402 9403 // VLA sizes are passed to the outlined region by copy and do not have map 9404 // information associated. 9405 if (CI->capturesVariableArrayType()) { 9406 CurBasePointers.push_back(*CV); 9407 CurPointers.push_back(*CV); 9408 CurSizes.push_back(CGF.Builder.CreateIntCast( 9409 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9410 // Copy to the device as an argument. No need to retrieve it. 9411 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 9412 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 9413 MappableExprsHandler::OMP_MAP_IMPLICIT); 9414 } else { 9415 // If we have any information in the map clause, we use it, otherwise we 9416 // just do a default mapping. 9417 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 9418 CurSizes, CurMapTypes, PartialStruct); 9419 if (CurBasePointers.empty()) 9420 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 9421 CurPointers, CurSizes, CurMapTypes); 9422 // Generate correct mapping for variables captured by reference in 9423 // lambdas. 9424 if (CI->capturesVariable()) 9425 MEHandler.generateInfoForLambdaCaptures( 9426 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 9427 CurMapTypes, LambdaPointers); 9428 } 9429 // We expect to have at least an element of information for this capture. 9430 assert(!CurBasePointers.empty() && 9431 "Non-existing map pointer for capture!"); 9432 assert(CurBasePointers.size() == CurPointers.size() && 9433 CurBasePointers.size() == CurSizes.size() && 9434 CurBasePointers.size() == CurMapTypes.size() && 9435 "Inconsistent map information sizes!"); 9436 9437 // If there is an entry in PartialStruct it means we have a struct with 9438 // individual members mapped. Emit an extra combined entry. 9439 if (PartialStruct.Base.isValid()) 9440 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 9441 CurMapTypes, PartialStruct); 9442 9443 // We need to append the results of this capture to what we already have. 9444 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 9445 Pointers.append(CurPointers.begin(), CurPointers.end()); 9446 Sizes.append(CurSizes.begin(), CurSizes.end()); 9447 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 9448 } 9449 // Adjust MEMBER_OF flags for the lambdas captures. 9450 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 9451 Pointers, MapTypes); 9452 // Map other list items in the map clause which are not captured variables 9453 // but "declare target link" global variables. 9454 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 9455 MapTypes); 9456 9457 TargetDataInfo Info; 9458 // Fill up the arrays and create the arguments. 9459 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9460 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9461 Info.PointersArray, Info.SizesArray, 9462 Info.MapTypesArray, Info); 9463 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9464 InputInfo.BasePointersArray = 9465 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9466 InputInfo.PointersArray = 9467 Address(Info.PointersArray, CGM.getPointerAlign()); 9468 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 9469 MapTypesArray = Info.MapTypesArray; 9470 if (RequiresOuterTask) 9471 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9472 else 9473 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9474 }; 9475 9476 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 9477 CodeGenFunction &CGF, PrePostActionTy &) { 9478 if (RequiresOuterTask) { 9479 CodeGenFunction::OMPTargetDataInfo InputInfo; 9480 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9481 } else { 9482 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9483 } 9484 }; 9485 9486 // If we have a target function ID it means that we need to support 9487 // offloading, otherwise, just execute on the host. We need to execute on host 9488 // regardless of the conditional in the if clause if, e.g., the user do not 9489 // specify target triples. 9490 if (OutlinedFnID) { 9491 if (IfCond) { 9492 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9493 } else { 9494 RegionCodeGenTy ThenRCG(TargetThenGen); 9495 ThenRCG(CGF); 9496 } 9497 } else { 9498 RegionCodeGenTy ElseRCG(TargetElseGen); 9499 ElseRCG(CGF); 9500 } 9501 } 9502 9503 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9504 StringRef ParentName) { 9505 if (!S) 9506 return; 9507 9508 // Codegen OMP target directives that offload compute to the device. 9509 bool RequiresDeviceCodegen = 9510 isa<OMPExecutableDirective>(S) && 9511 isOpenMPTargetExecutionDirective( 9512 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9513 9514 if (RequiresDeviceCodegen) { 9515 const auto &E = *cast<OMPExecutableDirective>(S); 9516 unsigned DeviceID; 9517 unsigned FileID; 9518 unsigned Line; 9519 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 9520 FileID, Line); 9521 9522 // Is this a target region that should not be emitted as an entry point? If 9523 // so just signal we are done with this target region. 9524 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 9525 ParentName, Line)) 9526 return; 9527 9528 switch (E.getDirectiveKind()) { 9529 case OMPD_target: 9530 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9531 cast<OMPTargetDirective>(E)); 9532 break; 9533 case OMPD_target_parallel: 9534 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9535 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9536 break; 9537 case OMPD_target_teams: 9538 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9539 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9540 break; 9541 case OMPD_target_teams_distribute: 9542 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9543 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9544 break; 9545 case OMPD_target_teams_distribute_simd: 9546 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9547 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9548 break; 9549 case OMPD_target_parallel_for: 9550 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9551 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9552 break; 9553 case OMPD_target_parallel_for_simd: 9554 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9555 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9556 break; 9557 case OMPD_target_simd: 9558 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9559 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9560 break; 9561 case OMPD_target_teams_distribute_parallel_for: 9562 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9563 CGM, ParentName, 9564 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9565 break; 9566 case OMPD_target_teams_distribute_parallel_for_simd: 9567 CodeGenFunction:: 9568 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9569 CGM, ParentName, 9570 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9571 break; 9572 case OMPD_parallel: 9573 case OMPD_for: 9574 case OMPD_parallel_for: 9575 case OMPD_parallel_master: 9576 case OMPD_parallel_sections: 9577 case OMPD_for_simd: 9578 case OMPD_parallel_for_simd: 9579 case OMPD_cancel: 9580 case OMPD_cancellation_point: 9581 case OMPD_ordered: 9582 case OMPD_threadprivate: 9583 case OMPD_allocate: 9584 case OMPD_task: 9585 case OMPD_simd: 9586 case OMPD_sections: 9587 case OMPD_section: 9588 case OMPD_single: 9589 case OMPD_master: 9590 case OMPD_critical: 9591 case OMPD_taskyield: 9592 case OMPD_barrier: 9593 case OMPD_taskwait: 9594 case OMPD_taskgroup: 9595 case OMPD_atomic: 9596 case OMPD_flush: 9597 case OMPD_teams: 9598 case OMPD_target_data: 9599 case OMPD_target_exit_data: 9600 case OMPD_target_enter_data: 9601 case OMPD_distribute: 9602 case OMPD_distribute_simd: 9603 case OMPD_distribute_parallel_for: 9604 case OMPD_distribute_parallel_for_simd: 9605 case OMPD_teams_distribute: 9606 case OMPD_teams_distribute_simd: 9607 case OMPD_teams_distribute_parallel_for: 9608 case OMPD_teams_distribute_parallel_for_simd: 9609 case OMPD_target_update: 9610 case OMPD_declare_simd: 9611 case OMPD_declare_variant: 9612 case OMPD_declare_target: 9613 case OMPD_end_declare_target: 9614 case OMPD_declare_reduction: 9615 case OMPD_declare_mapper: 9616 case OMPD_taskloop: 9617 case OMPD_taskloop_simd: 9618 case OMPD_master_taskloop: 9619 case OMPD_master_taskloop_simd: 9620 case OMPD_parallel_master_taskloop: 9621 case OMPD_parallel_master_taskloop_simd: 9622 case OMPD_requires: 9623 case OMPD_unknown: 9624 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9625 } 9626 return; 9627 } 9628 9629 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9630 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9631 return; 9632 9633 scanForTargetRegionsFunctions( 9634 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9635 return; 9636 } 9637 9638 // If this is a lambda function, look into its body. 9639 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9640 S = L->getBody(); 9641 9642 // Keep looking for target regions recursively. 9643 for (const Stmt *II : S->children()) 9644 scanForTargetRegionsFunctions(II, ParentName); 9645 } 9646 9647 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9648 // If emitting code for the host, we do not process FD here. Instead we do 9649 // the normal code generation. 9650 if (!CGM.getLangOpts().OpenMPIsDevice) { 9651 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 9652 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9653 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9654 // Do not emit device_type(nohost) functions for the host. 9655 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 9656 return true; 9657 } 9658 return false; 9659 } 9660 9661 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9662 StringRef Name = CGM.getMangledName(GD); 9663 // Try to detect target regions in the function. 9664 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 9665 scanForTargetRegionsFunctions(FD->getBody(), Name); 9666 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9667 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9668 // Do not emit device_type(nohost) functions for the host. 9669 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 9670 return true; 9671 } 9672 9673 // Do not to emit function if it is not marked as declare target. 9674 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9675 AlreadyEmittedTargetFunctions.count(Name) == 0; 9676 } 9677 9678 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9679 if (!CGM.getLangOpts().OpenMPIsDevice) 9680 return false; 9681 9682 // Check if there are Ctors/Dtors in this declaration and look for target 9683 // regions in it. We use the complete variant to produce the kernel name 9684 // mangling. 9685 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9686 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9687 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9688 StringRef ParentName = 9689 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9690 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9691 } 9692 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9693 StringRef ParentName = 9694 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9695 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9696 } 9697 } 9698 9699 // Do not to emit variable if it is not marked as declare target. 9700 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9701 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9702 cast<VarDecl>(GD.getDecl())); 9703 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9704 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9705 HasRequiresUnifiedSharedMemory)) { 9706 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9707 return true; 9708 } 9709 return false; 9710 } 9711 9712 llvm::Constant * 9713 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9714 const VarDecl *VD) { 9715 assert(VD->getType().isConstant(CGM.getContext()) && 9716 "Expected constant variable."); 9717 StringRef VarName; 9718 llvm::Constant *Addr; 9719 llvm::GlobalValue::LinkageTypes Linkage; 9720 QualType Ty = VD->getType(); 9721 SmallString<128> Buffer; 9722 { 9723 unsigned DeviceID; 9724 unsigned FileID; 9725 unsigned Line; 9726 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9727 FileID, Line); 9728 llvm::raw_svector_ostream OS(Buffer); 9729 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9730 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9731 VarName = OS.str(); 9732 } 9733 Linkage = llvm::GlobalValue::InternalLinkage; 9734 Addr = 9735 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9736 getDefaultFirstprivateAddressSpace()); 9737 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9738 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9739 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9740 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9741 VarName, Addr, VarSize, 9742 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9743 return Addr; 9744 } 9745 9746 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9747 llvm::Constant *Addr) { 9748 if (CGM.getLangOpts().OMPTargetTriples.empty() && 9749 !CGM.getLangOpts().OpenMPIsDevice) 9750 return; 9751 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9752 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9753 if (!Res) { 9754 if (CGM.getLangOpts().OpenMPIsDevice) { 9755 // Register non-target variables being emitted in device code (debug info 9756 // may cause this). 9757 StringRef VarName = CGM.getMangledName(VD); 9758 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9759 } 9760 return; 9761 } 9762 // Register declare target variables. 9763 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9764 StringRef VarName; 9765 CharUnits VarSize; 9766 llvm::GlobalValue::LinkageTypes Linkage; 9767 9768 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9769 !HasRequiresUnifiedSharedMemory) { 9770 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9771 VarName = CGM.getMangledName(VD); 9772 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9773 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9774 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9775 } else { 9776 VarSize = CharUnits::Zero(); 9777 } 9778 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 9779 // Temp solution to prevent optimizations of the internal variables. 9780 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 9781 std::string RefName = getName({VarName, "ref"}); 9782 if (!CGM.GetGlobalValue(RefName)) { 9783 llvm::Constant *AddrRef = 9784 getOrCreateInternalVariable(Addr->getType(), RefName); 9785 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 9786 GVAddrRef->setConstant(/*Val=*/true); 9787 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 9788 GVAddrRef->setInitializer(Addr); 9789 CGM.addCompilerUsedGlobal(GVAddrRef); 9790 } 9791 } 9792 } else { 9793 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 9794 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9795 HasRequiresUnifiedSharedMemory)) && 9796 "Declare target attribute must link or to with unified memory."); 9797 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 9798 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 9799 else 9800 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9801 9802 if (CGM.getLangOpts().OpenMPIsDevice) { 9803 VarName = Addr->getName(); 9804 Addr = nullptr; 9805 } else { 9806 VarName = getAddrOfDeclareTargetVar(VD).getName(); 9807 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 9808 } 9809 VarSize = CGM.getPointerSize(); 9810 Linkage = llvm::GlobalValue::WeakAnyLinkage; 9811 } 9812 9813 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9814 VarName, Addr, VarSize, Flags, Linkage); 9815 } 9816 9817 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9818 if (isa<FunctionDecl>(GD.getDecl()) || 9819 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9820 return emitTargetFunctions(GD); 9821 9822 return emitTargetGlobalVariable(GD); 9823 } 9824 9825 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9826 for (const VarDecl *VD : DeferredGlobalVariables) { 9827 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9828 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9829 if (!Res) 9830 continue; 9831 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9832 !HasRequiresUnifiedSharedMemory) { 9833 CGM.EmitGlobal(VD); 9834 } else { 9835 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 9836 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9837 HasRequiresUnifiedSharedMemory)) && 9838 "Expected link clause or to clause with unified memory."); 9839 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 9840 } 9841 } 9842 } 9843 9844 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 9845 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 9846 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 9847 " Expected target-based directive."); 9848 } 9849 9850 void CGOpenMPRuntime::checkArchForUnifiedAddressing( 9851 const OMPRequiresDecl *D) { 9852 for (const OMPClause *Clause : D->clauselists()) { 9853 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 9854 HasRequiresUnifiedSharedMemory = true; 9855 break; 9856 } 9857 } 9858 } 9859 9860 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 9861 LangAS &AS) { 9862 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 9863 return false; 9864 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 9865 switch(A->getAllocatorType()) { 9866 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 9867 // Not supported, fallback to the default mem space. 9868 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 9869 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 9870 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 9871 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 9872 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 9873 case OMPAllocateDeclAttr::OMPConstMemAlloc: 9874 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 9875 AS = LangAS::Default; 9876 return true; 9877 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 9878 llvm_unreachable("Expected predefined allocator for the variables with the " 9879 "static storage."); 9880 } 9881 return false; 9882 } 9883 9884 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 9885 return HasRequiresUnifiedSharedMemory; 9886 } 9887 9888 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 9889 CodeGenModule &CGM) 9890 : CGM(CGM) { 9891 if (CGM.getLangOpts().OpenMPIsDevice) { 9892 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 9893 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 9894 } 9895 } 9896 9897 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 9898 if (CGM.getLangOpts().OpenMPIsDevice) 9899 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 9900 } 9901 9902 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 9903 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 9904 return true; 9905 9906 StringRef Name = CGM.getMangledName(GD); 9907 const auto *D = cast<FunctionDecl>(GD.getDecl()); 9908 // Do not to emit function if it is marked as declare target as it was already 9909 // emitted. 9910 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 9911 if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) { 9912 if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name))) 9913 return !F->isDeclaration(); 9914 return false; 9915 } 9916 return true; 9917 } 9918 9919 return !AlreadyEmittedTargetFunctions.insert(Name).second; 9920 } 9921 9922 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 9923 // If we don't have entries or if we are emitting code for the device, we 9924 // don't need to do anything. 9925 if (CGM.getLangOpts().OMPTargetTriples.empty() || 9926 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 9927 (OffloadEntriesInfoManager.empty() && 9928 !HasEmittedDeclareTargetRegion && 9929 !HasEmittedTargetRegion)) 9930 return nullptr; 9931 9932 // Create and register the function that handles the requires directives. 9933 ASTContext &C = CGM.getContext(); 9934 9935 llvm::Function *RequiresRegFn; 9936 { 9937 CodeGenFunction CGF(CGM); 9938 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 9939 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 9940 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 9941 RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); 9942 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 9943 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 9944 // TODO: check for other requires clauses. 9945 // The requires directive takes effect only when a target region is 9946 // present in the compilation unit. Otherwise it is ignored and not 9947 // passed to the runtime. This avoids the runtime from throwing an error 9948 // for mismatching requires clauses across compilation units that don't 9949 // contain at least 1 target region. 9950 assert((HasEmittedTargetRegion || 9951 HasEmittedDeclareTargetRegion || 9952 !OffloadEntriesInfoManager.empty()) && 9953 "Target or declare target region expected."); 9954 if (HasRequiresUnifiedSharedMemory) 9955 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 9956 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires), 9957 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 9958 CGF.FinishFunction(); 9959 } 9960 return RequiresRegFn; 9961 } 9962 9963 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 9964 const OMPExecutableDirective &D, 9965 SourceLocation Loc, 9966 llvm::Function *OutlinedFn, 9967 ArrayRef<llvm::Value *> CapturedVars) { 9968 if (!CGF.HaveInsertPoint()) 9969 return; 9970 9971 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9972 CodeGenFunction::RunCleanupsScope Scope(CGF); 9973 9974 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 9975 llvm::Value *Args[] = { 9976 RTLoc, 9977 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 9978 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 9979 llvm::SmallVector<llvm::Value *, 16> RealArgs; 9980 RealArgs.append(std::begin(Args), std::end(Args)); 9981 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 9982 9983 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 9984 CGF.EmitRuntimeCall(RTLFn, RealArgs); 9985 } 9986 9987 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 9988 const Expr *NumTeams, 9989 const Expr *ThreadLimit, 9990 SourceLocation Loc) { 9991 if (!CGF.HaveInsertPoint()) 9992 return; 9993 9994 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9995 9996 llvm::Value *NumTeamsVal = 9997 NumTeams 9998 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 9999 CGF.CGM.Int32Ty, /* isSigned = */ true) 10000 : CGF.Builder.getInt32(0); 10001 10002 llvm::Value *ThreadLimitVal = 10003 ThreadLimit 10004 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10005 CGF.CGM.Int32Ty, /* isSigned = */ true) 10006 : CGF.Builder.getInt32(0); 10007 10008 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10009 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10010 ThreadLimitVal}; 10011 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 10012 PushNumTeamsArgs); 10013 } 10014 10015 void CGOpenMPRuntime::emitTargetDataCalls( 10016 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10017 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10018 if (!CGF.HaveInsertPoint()) 10019 return; 10020 10021 // Action used to replace the default codegen action and turn privatization 10022 // off. 10023 PrePostActionTy NoPrivAction; 10024 10025 // Generate the code for the opening of the data environment. Capture all the 10026 // arguments of the runtime call by reference because they are used in the 10027 // closing of the region. 10028 auto &&BeginThenGen = [this, &D, Device, &Info, 10029 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10030 // Fill up the arrays with all the mapped variables. 10031 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10032 MappableExprsHandler::MapValuesArrayTy Pointers; 10033 MappableExprsHandler::MapValuesArrayTy Sizes; 10034 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10035 10036 // Get map clause information. 10037 MappableExprsHandler MCHandler(D, CGF); 10038 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10039 10040 // Fill up the arrays and create the arguments. 10041 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10042 10043 llvm::Value *BasePointersArrayArg = nullptr; 10044 llvm::Value *PointersArrayArg = nullptr; 10045 llvm::Value *SizesArrayArg = nullptr; 10046 llvm::Value *MapTypesArrayArg = nullptr; 10047 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10048 SizesArrayArg, MapTypesArrayArg, Info); 10049 10050 // Emit device ID if any. 10051 llvm::Value *DeviceID = nullptr; 10052 if (Device) { 10053 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10054 CGF.Int64Ty, /*isSigned=*/true); 10055 } else { 10056 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10057 } 10058 10059 // Emit the number of elements in the offloading arrays. 10060 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10061 10062 llvm::Value *OffloadingArgs[] = { 10063 DeviceID, PointerNum, BasePointersArrayArg, 10064 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10065 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 10066 OffloadingArgs); 10067 10068 // If device pointer privatization is required, emit the body of the region 10069 // here. It will have to be duplicated: with and without privatization. 10070 if (!Info.CaptureDeviceAddrMap.empty()) 10071 CodeGen(CGF); 10072 }; 10073 10074 // Generate code for the closing of the data region. 10075 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10076 PrePostActionTy &) { 10077 assert(Info.isValid() && "Invalid data environment closing arguments."); 10078 10079 llvm::Value *BasePointersArrayArg = nullptr; 10080 llvm::Value *PointersArrayArg = nullptr; 10081 llvm::Value *SizesArrayArg = nullptr; 10082 llvm::Value *MapTypesArrayArg = nullptr; 10083 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10084 SizesArrayArg, MapTypesArrayArg, Info); 10085 10086 // Emit device ID if any. 10087 llvm::Value *DeviceID = nullptr; 10088 if (Device) { 10089 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10090 CGF.Int64Ty, /*isSigned=*/true); 10091 } else { 10092 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10093 } 10094 10095 // Emit the number of elements in the offloading arrays. 10096 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10097 10098 llvm::Value *OffloadingArgs[] = { 10099 DeviceID, PointerNum, BasePointersArrayArg, 10100 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10101 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 10102 OffloadingArgs); 10103 }; 10104 10105 // If we need device pointer privatization, we need to emit the body of the 10106 // region with no privatization in the 'else' branch of the conditional. 10107 // Otherwise, we don't have to do anything. 10108 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10109 PrePostActionTy &) { 10110 if (!Info.CaptureDeviceAddrMap.empty()) { 10111 CodeGen.setAction(NoPrivAction); 10112 CodeGen(CGF); 10113 } 10114 }; 10115 10116 // We don't have to do anything to close the region if the if clause evaluates 10117 // to false. 10118 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10119 10120 if (IfCond) { 10121 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10122 } else { 10123 RegionCodeGenTy RCG(BeginThenGen); 10124 RCG(CGF); 10125 } 10126 10127 // If we don't require privatization of device pointers, we emit the body in 10128 // between the runtime calls. This avoids duplicating the body code. 10129 if (Info.CaptureDeviceAddrMap.empty()) { 10130 CodeGen.setAction(NoPrivAction); 10131 CodeGen(CGF); 10132 } 10133 10134 if (IfCond) { 10135 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10136 } else { 10137 RegionCodeGenTy RCG(EndThenGen); 10138 RCG(CGF); 10139 } 10140 } 10141 10142 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10143 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10144 const Expr *Device) { 10145 if (!CGF.HaveInsertPoint()) 10146 return; 10147 10148 assert((isa<OMPTargetEnterDataDirective>(D) || 10149 isa<OMPTargetExitDataDirective>(D) || 10150 isa<OMPTargetUpdateDirective>(D)) && 10151 "Expecting either target enter, exit data, or update directives."); 10152 10153 CodeGenFunction::OMPTargetDataInfo InputInfo; 10154 llvm::Value *MapTypesArray = nullptr; 10155 // Generate the code for the opening of the data environment. 10156 auto &&ThenGen = [this, &D, Device, &InputInfo, 10157 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10158 // Emit device ID if any. 10159 llvm::Value *DeviceID = nullptr; 10160 if (Device) { 10161 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10162 CGF.Int64Ty, /*isSigned=*/true); 10163 } else { 10164 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10165 } 10166 10167 // Emit the number of elements in the offloading arrays. 10168 llvm::Constant *PointerNum = 10169 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10170 10171 llvm::Value *OffloadingArgs[] = {DeviceID, 10172 PointerNum, 10173 InputInfo.BasePointersArray.getPointer(), 10174 InputInfo.PointersArray.getPointer(), 10175 InputInfo.SizesArray.getPointer(), 10176 MapTypesArray}; 10177 10178 // Select the right runtime function call for each expected standalone 10179 // directive. 10180 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10181 OpenMPRTLFunction RTLFn; 10182 switch (D.getDirectiveKind()) { 10183 case OMPD_target_enter_data: 10184 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 10185 : OMPRTL__tgt_target_data_begin; 10186 break; 10187 case OMPD_target_exit_data: 10188 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 10189 : OMPRTL__tgt_target_data_end; 10190 break; 10191 case OMPD_target_update: 10192 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 10193 : OMPRTL__tgt_target_data_update; 10194 break; 10195 case OMPD_parallel: 10196 case OMPD_for: 10197 case OMPD_parallel_for: 10198 case OMPD_parallel_master: 10199 case OMPD_parallel_sections: 10200 case OMPD_for_simd: 10201 case OMPD_parallel_for_simd: 10202 case OMPD_cancel: 10203 case OMPD_cancellation_point: 10204 case OMPD_ordered: 10205 case OMPD_threadprivate: 10206 case OMPD_allocate: 10207 case OMPD_task: 10208 case OMPD_simd: 10209 case OMPD_sections: 10210 case OMPD_section: 10211 case OMPD_single: 10212 case OMPD_master: 10213 case OMPD_critical: 10214 case OMPD_taskyield: 10215 case OMPD_barrier: 10216 case OMPD_taskwait: 10217 case OMPD_taskgroup: 10218 case OMPD_atomic: 10219 case OMPD_flush: 10220 case OMPD_teams: 10221 case OMPD_target_data: 10222 case OMPD_distribute: 10223 case OMPD_distribute_simd: 10224 case OMPD_distribute_parallel_for: 10225 case OMPD_distribute_parallel_for_simd: 10226 case OMPD_teams_distribute: 10227 case OMPD_teams_distribute_simd: 10228 case OMPD_teams_distribute_parallel_for: 10229 case OMPD_teams_distribute_parallel_for_simd: 10230 case OMPD_declare_simd: 10231 case OMPD_declare_variant: 10232 case OMPD_declare_target: 10233 case OMPD_end_declare_target: 10234 case OMPD_declare_reduction: 10235 case OMPD_declare_mapper: 10236 case OMPD_taskloop: 10237 case OMPD_taskloop_simd: 10238 case OMPD_master_taskloop: 10239 case OMPD_master_taskloop_simd: 10240 case OMPD_parallel_master_taskloop: 10241 case OMPD_parallel_master_taskloop_simd: 10242 case OMPD_target: 10243 case OMPD_target_simd: 10244 case OMPD_target_teams_distribute: 10245 case OMPD_target_teams_distribute_simd: 10246 case OMPD_target_teams_distribute_parallel_for: 10247 case OMPD_target_teams_distribute_parallel_for_simd: 10248 case OMPD_target_teams: 10249 case OMPD_target_parallel: 10250 case OMPD_target_parallel_for: 10251 case OMPD_target_parallel_for_simd: 10252 case OMPD_requires: 10253 case OMPD_unknown: 10254 llvm_unreachable("Unexpected standalone target data directive."); 10255 break; 10256 } 10257 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 10258 }; 10259 10260 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 10261 CodeGenFunction &CGF, PrePostActionTy &) { 10262 // Fill up the arrays with all the mapped variables. 10263 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10264 MappableExprsHandler::MapValuesArrayTy Pointers; 10265 MappableExprsHandler::MapValuesArrayTy Sizes; 10266 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10267 10268 // Get map clause information. 10269 MappableExprsHandler MEHandler(D, CGF); 10270 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10271 10272 TargetDataInfo Info; 10273 // Fill up the arrays and create the arguments. 10274 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10275 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10276 Info.PointersArray, Info.SizesArray, 10277 Info.MapTypesArray, Info); 10278 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10279 InputInfo.BasePointersArray = 10280 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10281 InputInfo.PointersArray = 10282 Address(Info.PointersArray, CGM.getPointerAlign()); 10283 InputInfo.SizesArray = 10284 Address(Info.SizesArray, CGM.getPointerAlign()); 10285 MapTypesArray = Info.MapTypesArray; 10286 if (D.hasClausesOfKind<OMPDependClause>()) 10287 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10288 else 10289 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10290 }; 10291 10292 if (IfCond) { 10293 emitIfClause(CGF, IfCond, TargetThenGen, 10294 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10295 } else { 10296 RegionCodeGenTy ThenRCG(TargetThenGen); 10297 ThenRCG(CGF); 10298 } 10299 } 10300 10301 namespace { 10302 /// Kind of parameter in a function with 'declare simd' directive. 10303 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 10304 /// Attribute set of the parameter. 10305 struct ParamAttrTy { 10306 ParamKindTy Kind = Vector; 10307 llvm::APSInt StrideOrArg; 10308 llvm::APSInt Alignment; 10309 }; 10310 } // namespace 10311 10312 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10313 ArrayRef<ParamAttrTy> ParamAttrs) { 10314 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10315 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10316 // of that clause. The VLEN value must be power of 2. 10317 // In other case the notion of the function`s "characteristic data type" (CDT) 10318 // is used to compute the vector length. 10319 // CDT is defined in the following order: 10320 // a) For non-void function, the CDT is the return type. 10321 // b) If the function has any non-uniform, non-linear parameters, then the 10322 // CDT is the type of the first such parameter. 10323 // c) If the CDT determined by a) or b) above is struct, union, or class 10324 // type which is pass-by-value (except for the type that maps to the 10325 // built-in complex data type), the characteristic data type is int. 10326 // d) If none of the above three cases is applicable, the CDT is int. 10327 // The VLEN is then determined based on the CDT and the size of vector 10328 // register of that ISA for which current vector version is generated. The 10329 // VLEN is computed using the formula below: 10330 // VLEN = sizeof(vector_register) / sizeof(CDT), 10331 // where vector register size specified in section 3.2.1 Registers and the 10332 // Stack Frame of original AMD64 ABI document. 10333 QualType RetType = FD->getReturnType(); 10334 if (RetType.isNull()) 10335 return 0; 10336 ASTContext &C = FD->getASTContext(); 10337 QualType CDT; 10338 if (!RetType.isNull() && !RetType->isVoidType()) { 10339 CDT = RetType; 10340 } else { 10341 unsigned Offset = 0; 10342 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10343 if (ParamAttrs[Offset].Kind == Vector) 10344 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10345 ++Offset; 10346 } 10347 if (CDT.isNull()) { 10348 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10349 if (ParamAttrs[I + Offset].Kind == Vector) { 10350 CDT = FD->getParamDecl(I)->getType(); 10351 break; 10352 } 10353 } 10354 } 10355 } 10356 if (CDT.isNull()) 10357 CDT = C.IntTy; 10358 CDT = CDT->getCanonicalTypeUnqualified(); 10359 if (CDT->isRecordType() || CDT->isUnionType()) 10360 CDT = C.IntTy; 10361 return C.getTypeSize(CDT); 10362 } 10363 10364 static void 10365 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10366 const llvm::APSInt &VLENVal, 10367 ArrayRef<ParamAttrTy> ParamAttrs, 10368 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10369 struct ISADataTy { 10370 char ISA; 10371 unsigned VecRegSize; 10372 }; 10373 ISADataTy ISAData[] = { 10374 { 10375 'b', 128 10376 }, // SSE 10377 { 10378 'c', 256 10379 }, // AVX 10380 { 10381 'd', 256 10382 }, // AVX2 10383 { 10384 'e', 512 10385 }, // AVX512 10386 }; 10387 llvm::SmallVector<char, 2> Masked; 10388 switch (State) { 10389 case OMPDeclareSimdDeclAttr::BS_Undefined: 10390 Masked.push_back('N'); 10391 Masked.push_back('M'); 10392 break; 10393 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10394 Masked.push_back('N'); 10395 break; 10396 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10397 Masked.push_back('M'); 10398 break; 10399 } 10400 for (char Mask : Masked) { 10401 for (const ISADataTy &Data : ISAData) { 10402 SmallString<256> Buffer; 10403 llvm::raw_svector_ostream Out(Buffer); 10404 Out << "_ZGV" << Data.ISA << Mask; 10405 if (!VLENVal) { 10406 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10407 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10408 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10409 } else { 10410 Out << VLENVal; 10411 } 10412 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 10413 switch (ParamAttr.Kind){ 10414 case LinearWithVarStride: 10415 Out << 's' << ParamAttr.StrideOrArg; 10416 break; 10417 case Linear: 10418 Out << 'l'; 10419 if (!!ParamAttr.StrideOrArg) 10420 Out << ParamAttr.StrideOrArg; 10421 break; 10422 case Uniform: 10423 Out << 'u'; 10424 break; 10425 case Vector: 10426 Out << 'v'; 10427 break; 10428 } 10429 if (!!ParamAttr.Alignment) 10430 Out << 'a' << ParamAttr.Alignment; 10431 } 10432 Out << '_' << Fn->getName(); 10433 Fn->addFnAttr(Out.str()); 10434 } 10435 } 10436 } 10437 10438 // This are the Functions that are needed to mangle the name of the 10439 // vector functions generated by the compiler, according to the rules 10440 // defined in the "Vector Function ABI specifications for AArch64", 10441 // available at 10442 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10443 10444 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 10445 /// 10446 /// TODO: Need to implement the behavior for reference marked with a 10447 /// var or no linear modifiers (1.b in the section). For this, we 10448 /// need to extend ParamKindTy to support the linear modifiers. 10449 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10450 QT = QT.getCanonicalType(); 10451 10452 if (QT->isVoidType()) 10453 return false; 10454 10455 if (Kind == ParamKindTy::Uniform) 10456 return false; 10457 10458 if (Kind == ParamKindTy::Linear) 10459 return false; 10460 10461 // TODO: Handle linear references with modifiers 10462 10463 if (Kind == ParamKindTy::LinearWithVarStride) 10464 return false; 10465 10466 return true; 10467 } 10468 10469 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10470 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10471 QT = QT.getCanonicalType(); 10472 unsigned Size = C.getTypeSize(QT); 10473 10474 // Only scalars and complex within 16 bytes wide set PVB to true. 10475 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10476 return false; 10477 10478 if (QT->isFloatingType()) 10479 return true; 10480 10481 if (QT->isIntegerType()) 10482 return true; 10483 10484 if (QT->isPointerType()) 10485 return true; 10486 10487 // TODO: Add support for complex types (section 3.1.2, item 2). 10488 10489 return false; 10490 } 10491 10492 /// Computes the lane size (LS) of a return type or of an input parameter, 10493 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10494 /// TODO: Add support for references, section 3.2.1, item 1. 10495 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10496 if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10497 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10498 if (getAArch64PBV(PTy, C)) 10499 return C.getTypeSize(PTy); 10500 } 10501 if (getAArch64PBV(QT, C)) 10502 return C.getTypeSize(QT); 10503 10504 return C.getTypeSize(C.getUIntPtrType()); 10505 } 10506 10507 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10508 // signature of the scalar function, as defined in 3.2.2 of the 10509 // AAVFABI. 10510 static std::tuple<unsigned, unsigned, bool> 10511 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10512 QualType RetType = FD->getReturnType().getCanonicalType(); 10513 10514 ASTContext &C = FD->getASTContext(); 10515 10516 bool OutputBecomesInput = false; 10517 10518 llvm::SmallVector<unsigned, 8> Sizes; 10519 if (!RetType->isVoidType()) { 10520 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10521 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10522 OutputBecomesInput = true; 10523 } 10524 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10525 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10526 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10527 } 10528 10529 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10530 // The LS of a function parameter / return value can only be a power 10531 // of 2, starting from 8 bits, up to 128. 10532 assert(std::all_of(Sizes.begin(), Sizes.end(), 10533 [](unsigned Size) { 10534 return Size == 8 || Size == 16 || Size == 32 || 10535 Size == 64 || Size == 128; 10536 }) && 10537 "Invalid size"); 10538 10539 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10540 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10541 OutputBecomesInput); 10542 } 10543 10544 /// Mangle the parameter part of the vector function name according to 10545 /// their OpenMP classification. The mangling function is defined in 10546 /// section 3.5 of the AAVFABI. 10547 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10548 SmallString<256> Buffer; 10549 llvm::raw_svector_ostream Out(Buffer); 10550 for (const auto &ParamAttr : ParamAttrs) { 10551 switch (ParamAttr.Kind) { 10552 case LinearWithVarStride: 10553 Out << "ls" << ParamAttr.StrideOrArg; 10554 break; 10555 case Linear: 10556 Out << 'l'; 10557 // Don't print the step value if it is not present or if it is 10558 // equal to 1. 10559 if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1) 10560 Out << ParamAttr.StrideOrArg; 10561 break; 10562 case Uniform: 10563 Out << 'u'; 10564 break; 10565 case Vector: 10566 Out << 'v'; 10567 break; 10568 } 10569 10570 if (!!ParamAttr.Alignment) 10571 Out << 'a' << ParamAttr.Alignment; 10572 } 10573 10574 return Out.str(); 10575 } 10576 10577 // Function used to add the attribute. The parameter `VLEN` is 10578 // templated to allow the use of "x" when targeting scalable functions 10579 // for SVE. 10580 template <typename T> 10581 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10582 char ISA, StringRef ParSeq, 10583 StringRef MangledName, bool OutputBecomesInput, 10584 llvm::Function *Fn) { 10585 SmallString<256> Buffer; 10586 llvm::raw_svector_ostream Out(Buffer); 10587 Out << Prefix << ISA << LMask << VLEN; 10588 if (OutputBecomesInput) 10589 Out << "v"; 10590 Out << ParSeq << "_" << MangledName; 10591 Fn->addFnAttr(Out.str()); 10592 } 10593 10594 // Helper function to generate the Advanced SIMD names depending on 10595 // the value of the NDS when simdlen is not present. 10596 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10597 StringRef Prefix, char ISA, 10598 StringRef ParSeq, StringRef MangledName, 10599 bool OutputBecomesInput, 10600 llvm::Function *Fn) { 10601 switch (NDS) { 10602 case 8: 10603 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10604 OutputBecomesInput, Fn); 10605 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10606 OutputBecomesInput, Fn); 10607 break; 10608 case 16: 10609 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10610 OutputBecomesInput, Fn); 10611 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10612 OutputBecomesInput, Fn); 10613 break; 10614 case 32: 10615 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10616 OutputBecomesInput, Fn); 10617 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10618 OutputBecomesInput, Fn); 10619 break; 10620 case 64: 10621 case 128: 10622 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10623 OutputBecomesInput, Fn); 10624 break; 10625 default: 10626 llvm_unreachable("Scalar type is too wide."); 10627 } 10628 } 10629 10630 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10631 static void emitAArch64DeclareSimdFunction( 10632 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10633 ArrayRef<ParamAttrTy> ParamAttrs, 10634 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10635 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10636 10637 // Get basic data for building the vector signature. 10638 const auto Data = getNDSWDS(FD, ParamAttrs); 10639 const unsigned NDS = std::get<0>(Data); 10640 const unsigned WDS = std::get<1>(Data); 10641 const bool OutputBecomesInput = std::get<2>(Data); 10642 10643 // Check the values provided via `simdlen` by the user. 10644 // 1. A `simdlen(1)` doesn't produce vector signatures, 10645 if (UserVLEN == 1) { 10646 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10647 DiagnosticsEngine::Warning, 10648 "The clause simdlen(1) has no effect when targeting aarch64."); 10649 CGM.getDiags().Report(SLoc, DiagID); 10650 return; 10651 } 10652 10653 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10654 // Advanced SIMD output. 10655 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10656 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10657 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10658 "power of 2 when targeting Advanced SIMD."); 10659 CGM.getDiags().Report(SLoc, DiagID); 10660 return; 10661 } 10662 10663 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10664 // limits. 10665 if (ISA == 's' && UserVLEN != 0) { 10666 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10667 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10668 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10669 "lanes in the architectural constraints " 10670 "for SVE (min is 128-bit, max is " 10671 "2048-bit, by steps of 128-bit)"); 10672 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10673 return; 10674 } 10675 } 10676 10677 // Sort out parameter sequence. 10678 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10679 StringRef Prefix = "_ZGV"; 10680 // Generate simdlen from user input (if any). 10681 if (UserVLEN) { 10682 if (ISA == 's') { 10683 // SVE generates only a masked function. 10684 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10685 OutputBecomesInput, Fn); 10686 } else { 10687 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10688 // Advanced SIMD generates one or two functions, depending on 10689 // the `[not]inbranch` clause. 10690 switch (State) { 10691 case OMPDeclareSimdDeclAttr::BS_Undefined: 10692 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10693 OutputBecomesInput, Fn); 10694 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10695 OutputBecomesInput, Fn); 10696 break; 10697 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10698 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10699 OutputBecomesInput, Fn); 10700 break; 10701 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10702 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10703 OutputBecomesInput, Fn); 10704 break; 10705 } 10706 } 10707 } else { 10708 // If no user simdlen is provided, follow the AAVFABI rules for 10709 // generating the vector length. 10710 if (ISA == 's') { 10711 // SVE, section 3.4.1, item 1. 10712 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10713 OutputBecomesInput, Fn); 10714 } else { 10715 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10716 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10717 // two vector names depending on the use of the clause 10718 // `[not]inbranch`. 10719 switch (State) { 10720 case OMPDeclareSimdDeclAttr::BS_Undefined: 10721 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10722 OutputBecomesInput, Fn); 10723 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10724 OutputBecomesInput, Fn); 10725 break; 10726 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10727 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10728 OutputBecomesInput, Fn); 10729 break; 10730 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10731 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10732 OutputBecomesInput, Fn); 10733 break; 10734 } 10735 } 10736 } 10737 } 10738 10739 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10740 llvm::Function *Fn) { 10741 ASTContext &C = CGM.getContext(); 10742 FD = FD->getMostRecentDecl(); 10743 // Map params to their positions in function decl. 10744 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10745 if (isa<CXXMethodDecl>(FD)) 10746 ParamPositions.try_emplace(FD, 0); 10747 unsigned ParamPos = ParamPositions.size(); 10748 for (const ParmVarDecl *P : FD->parameters()) { 10749 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10750 ++ParamPos; 10751 } 10752 while (FD) { 10753 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10754 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10755 // Mark uniform parameters. 10756 for (const Expr *E : Attr->uniforms()) { 10757 E = E->IgnoreParenImpCasts(); 10758 unsigned Pos; 10759 if (isa<CXXThisExpr>(E)) { 10760 Pos = ParamPositions[FD]; 10761 } else { 10762 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10763 ->getCanonicalDecl(); 10764 Pos = ParamPositions[PVD]; 10765 } 10766 ParamAttrs[Pos].Kind = Uniform; 10767 } 10768 // Get alignment info. 10769 auto NI = Attr->alignments_begin(); 10770 for (const Expr *E : Attr->aligneds()) { 10771 E = E->IgnoreParenImpCasts(); 10772 unsigned Pos; 10773 QualType ParmTy; 10774 if (isa<CXXThisExpr>(E)) { 10775 Pos = ParamPositions[FD]; 10776 ParmTy = E->getType(); 10777 } else { 10778 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10779 ->getCanonicalDecl(); 10780 Pos = ParamPositions[PVD]; 10781 ParmTy = PVD->getType(); 10782 } 10783 ParamAttrs[Pos].Alignment = 10784 (*NI) 10785 ? (*NI)->EvaluateKnownConstInt(C) 10786 : llvm::APSInt::getUnsigned( 10787 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10788 .getQuantity()); 10789 ++NI; 10790 } 10791 // Mark linear parameters. 10792 auto SI = Attr->steps_begin(); 10793 auto MI = Attr->modifiers_begin(); 10794 for (const Expr *E : Attr->linears()) { 10795 E = E->IgnoreParenImpCasts(); 10796 unsigned Pos; 10797 if (isa<CXXThisExpr>(E)) { 10798 Pos = ParamPositions[FD]; 10799 } else { 10800 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10801 ->getCanonicalDecl(); 10802 Pos = ParamPositions[PVD]; 10803 } 10804 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 10805 ParamAttr.Kind = Linear; 10806 if (*SI) { 10807 Expr::EvalResult Result; 10808 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 10809 if (const auto *DRE = 10810 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 10811 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 10812 ParamAttr.Kind = LinearWithVarStride; 10813 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 10814 ParamPositions[StridePVD->getCanonicalDecl()]); 10815 } 10816 } 10817 } else { 10818 ParamAttr.StrideOrArg = Result.Val.getInt(); 10819 } 10820 } 10821 ++SI; 10822 ++MI; 10823 } 10824 llvm::APSInt VLENVal; 10825 SourceLocation ExprLoc; 10826 const Expr *VLENExpr = Attr->getSimdlen(); 10827 if (VLENExpr) { 10828 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 10829 ExprLoc = VLENExpr->getExprLoc(); 10830 } 10831 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 10832 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 10833 CGM.getTriple().getArch() == llvm::Triple::x86_64) { 10834 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 10835 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 10836 unsigned VLEN = VLENVal.getExtValue(); 10837 StringRef MangledName = Fn->getName(); 10838 if (CGM.getTarget().hasFeature("sve")) 10839 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10840 MangledName, 's', 128, Fn, ExprLoc); 10841 if (CGM.getTarget().hasFeature("neon")) 10842 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10843 MangledName, 'n', 128, Fn, ExprLoc); 10844 } 10845 } 10846 FD = FD->getPreviousDecl(); 10847 } 10848 } 10849 10850 namespace { 10851 /// Cleanup action for doacross support. 10852 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 10853 public: 10854 static const int DoacrossFinArgs = 2; 10855 10856 private: 10857 llvm::FunctionCallee RTLFn; 10858 llvm::Value *Args[DoacrossFinArgs]; 10859 10860 public: 10861 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 10862 ArrayRef<llvm::Value *> CallArgs) 10863 : RTLFn(RTLFn) { 10864 assert(CallArgs.size() == DoacrossFinArgs); 10865 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10866 } 10867 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10868 if (!CGF.HaveInsertPoint()) 10869 return; 10870 CGF.EmitRuntimeCall(RTLFn, Args); 10871 } 10872 }; 10873 } // namespace 10874 10875 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 10876 const OMPLoopDirective &D, 10877 ArrayRef<Expr *> NumIterations) { 10878 if (!CGF.HaveInsertPoint()) 10879 return; 10880 10881 ASTContext &C = CGM.getContext(); 10882 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 10883 RecordDecl *RD; 10884 if (KmpDimTy.isNull()) { 10885 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 10886 // kmp_int64 lo; // lower 10887 // kmp_int64 up; // upper 10888 // kmp_int64 st; // stride 10889 // }; 10890 RD = C.buildImplicitRecord("kmp_dim"); 10891 RD->startDefinition(); 10892 addFieldToRecordDecl(C, RD, Int64Ty); 10893 addFieldToRecordDecl(C, RD, Int64Ty); 10894 addFieldToRecordDecl(C, RD, Int64Ty); 10895 RD->completeDefinition(); 10896 KmpDimTy = C.getRecordType(RD); 10897 } else { 10898 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 10899 } 10900 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 10901 QualType ArrayTy = 10902 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 10903 10904 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 10905 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 10906 enum { LowerFD = 0, UpperFD, StrideFD }; 10907 // Fill dims with data. 10908 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 10909 LValue DimsLVal = CGF.MakeAddrLValue( 10910 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 10911 // dims.upper = num_iterations; 10912 LValue UpperLVal = CGF.EmitLValueForField( 10913 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 10914 llvm::Value *NumIterVal = 10915 CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]), 10916 D.getNumIterations()->getType(), Int64Ty, 10917 D.getNumIterations()->getExprLoc()); 10918 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 10919 // dims.stride = 1; 10920 LValue StrideLVal = CGF.EmitLValueForField( 10921 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 10922 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 10923 StrideLVal); 10924 } 10925 10926 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 10927 // kmp_int32 num_dims, struct kmp_dim * dims); 10928 llvm::Value *Args[] = { 10929 emitUpdateLocation(CGF, D.getBeginLoc()), 10930 getThreadID(CGF, D.getBeginLoc()), 10931 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 10932 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 10933 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 10934 CGM.VoidPtrTy)}; 10935 10936 llvm::FunctionCallee RTLFn = 10937 createRuntimeFunction(OMPRTL__kmpc_doacross_init); 10938 CGF.EmitRuntimeCall(RTLFn, Args); 10939 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 10940 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 10941 llvm::FunctionCallee FiniRTLFn = 10942 createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 10943 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 10944 llvm::makeArrayRef(FiniArgs)); 10945 } 10946 10947 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 10948 const OMPDependClause *C) { 10949 QualType Int64Ty = 10950 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 10951 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 10952 QualType ArrayTy = CGM.getContext().getConstantArrayType( 10953 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 10954 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 10955 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 10956 const Expr *CounterVal = C->getLoopData(I); 10957 assert(CounterVal); 10958 llvm::Value *CntVal = CGF.EmitScalarConversion( 10959 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 10960 CounterVal->getExprLoc()); 10961 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 10962 /*Volatile=*/false, Int64Ty); 10963 } 10964 llvm::Value *Args[] = { 10965 emitUpdateLocation(CGF, C->getBeginLoc()), 10966 getThreadID(CGF, C->getBeginLoc()), 10967 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 10968 llvm::FunctionCallee RTLFn; 10969 if (C->getDependencyKind() == OMPC_DEPEND_source) { 10970 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 10971 } else { 10972 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 10973 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 10974 } 10975 CGF.EmitRuntimeCall(RTLFn, Args); 10976 } 10977 10978 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 10979 llvm::FunctionCallee Callee, 10980 ArrayRef<llvm::Value *> Args) const { 10981 assert(Loc.isValid() && "Outlined function call location must be valid."); 10982 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 10983 10984 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 10985 if (Fn->doesNotThrow()) { 10986 CGF.EmitNounwindRuntimeCall(Fn, Args); 10987 return; 10988 } 10989 } 10990 CGF.EmitRuntimeCall(Callee, Args); 10991 } 10992 10993 void CGOpenMPRuntime::emitOutlinedFunctionCall( 10994 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 10995 ArrayRef<llvm::Value *> Args) const { 10996 emitCall(CGF, Loc, OutlinedFn, Args); 10997 } 10998 10999 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11000 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11001 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11002 HasEmittedDeclareTargetRegion = true; 11003 } 11004 11005 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11006 const VarDecl *NativeParam, 11007 const VarDecl *TargetParam) const { 11008 return CGF.GetAddrOfLocalVar(NativeParam); 11009 } 11010 11011 namespace { 11012 /// Cleanup action for allocate support. 11013 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11014 public: 11015 static const int CleanupArgs = 3; 11016 11017 private: 11018 llvm::FunctionCallee RTLFn; 11019 llvm::Value *Args[CleanupArgs]; 11020 11021 public: 11022 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 11023 ArrayRef<llvm::Value *> CallArgs) 11024 : RTLFn(RTLFn) { 11025 assert(CallArgs.size() == CleanupArgs && 11026 "Size of arguments does not match."); 11027 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11028 } 11029 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11030 if (!CGF.HaveInsertPoint()) 11031 return; 11032 CGF.EmitRuntimeCall(RTLFn, Args); 11033 } 11034 }; 11035 } // namespace 11036 11037 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11038 const VarDecl *VD) { 11039 if (!VD) 11040 return Address::invalid(); 11041 const VarDecl *CVD = VD->getCanonicalDecl(); 11042 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 11043 return Address::invalid(); 11044 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11045 // Use the default allocation. 11046 if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 11047 !AA->getAllocator()) 11048 return Address::invalid(); 11049 llvm::Value *Size; 11050 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11051 if (CVD->getType()->isVariablyModifiedType()) { 11052 Size = CGF.getTypeSize(CVD->getType()); 11053 // Align the size: ((size + align - 1) / align) * align 11054 Size = CGF.Builder.CreateNUWAdd( 11055 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11056 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11057 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11058 } else { 11059 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11060 Size = CGM.getSize(Sz.alignTo(Align)); 11061 } 11062 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11063 assert(AA->getAllocator() && 11064 "Expected allocator expression for non-default allocator."); 11065 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11066 // According to the standard, the original allocator type is a enum (integer). 11067 // Convert to pointer type, if required. 11068 if (Allocator->getType()->isIntegerTy()) 11069 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 11070 else if (Allocator->getType()->isPointerTy()) 11071 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 11072 CGM.VoidPtrTy); 11073 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11074 11075 llvm::Value *Addr = 11076 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, 11077 CVD->getName() + ".void.addr"); 11078 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 11079 Allocator}; 11080 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); 11081 11082 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11083 llvm::makeArrayRef(FiniArgs)); 11084 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11085 Addr, 11086 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 11087 CVD->getName() + ".addr"); 11088 return Address(Addr, Align); 11089 } 11090 11091 namespace { 11092 using OMPContextSelectorData = 11093 OpenMPCtxSelectorData<ArrayRef<StringRef>, llvm::APSInt>; 11094 using CompleteOMPContextSelectorData = SmallVector<OMPContextSelectorData, 4>; 11095 } // anonymous namespace 11096 11097 /// Checks current context and returns true if it matches the context selector. 11098 template <OpenMPContextSelectorSetKind CtxSet, OpenMPContextSelectorKind Ctx, 11099 typename... Arguments> 11100 static bool checkContext(const OMPContextSelectorData &Data, 11101 Arguments... Params) { 11102 assert(Data.CtxSet != OMP_CTX_SET_unknown && Data.Ctx != OMP_CTX_unknown && 11103 "Unknown context selector or context selector set."); 11104 return false; 11105 } 11106 11107 /// Checks for implementation={vendor(<vendor>)} context selector. 11108 /// \returns true iff <vendor>="llvm", false otherwise. 11109 template <> 11110 bool checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>( 11111 const OMPContextSelectorData &Data) { 11112 return llvm::all_of(Data.Names, 11113 [](StringRef S) { return !S.compare_lower("llvm"); }); 11114 } 11115 11116 /// Checks for device={kind(<kind>)} context selector. 11117 /// \returns true if <kind>="host" and compilation is for host. 11118 /// true if <kind>="nohost" and compilation is for device. 11119 /// true if <kind>="cpu" and compilation is for Arm, X86 or PPC CPU. 11120 /// true if <kind>="gpu" and compilation is for NVPTX or AMDGCN. 11121 /// false otherwise. 11122 template <> 11123 bool checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>( 11124 const OMPContextSelectorData &Data, CodeGenModule &CGM) { 11125 for (StringRef Name : Data.Names) { 11126 if (!Name.compare_lower("host")) { 11127 if (CGM.getLangOpts().OpenMPIsDevice) 11128 return false; 11129 continue; 11130 } 11131 if (!Name.compare_lower("nohost")) { 11132 if (!CGM.getLangOpts().OpenMPIsDevice) 11133 return false; 11134 continue; 11135 } 11136 switch (CGM.getTriple().getArch()) { 11137 case llvm::Triple::arm: 11138 case llvm::Triple::armeb: 11139 case llvm::Triple::aarch64: 11140 case llvm::Triple::aarch64_be: 11141 case llvm::Triple::aarch64_32: 11142 case llvm::Triple::ppc: 11143 case llvm::Triple::ppc64: 11144 case llvm::Triple::ppc64le: 11145 case llvm::Triple::x86: 11146 case llvm::Triple::x86_64: 11147 if (Name.compare_lower("cpu")) 11148 return false; 11149 break; 11150 case llvm::Triple::amdgcn: 11151 case llvm::Triple::nvptx: 11152 case llvm::Triple::nvptx64: 11153 if (Name.compare_lower("gpu")) 11154 return false; 11155 break; 11156 case llvm::Triple::UnknownArch: 11157 case llvm::Triple::arc: 11158 case llvm::Triple::avr: 11159 case llvm::Triple::bpfel: 11160 case llvm::Triple::bpfeb: 11161 case llvm::Triple::hexagon: 11162 case llvm::Triple::mips: 11163 case llvm::Triple::mipsel: 11164 case llvm::Triple::mips64: 11165 case llvm::Triple::mips64el: 11166 case llvm::Triple::msp430: 11167 case llvm::Triple::r600: 11168 case llvm::Triple::riscv32: 11169 case llvm::Triple::riscv64: 11170 case llvm::Triple::sparc: 11171 case llvm::Triple::sparcv9: 11172 case llvm::Triple::sparcel: 11173 case llvm::Triple::systemz: 11174 case llvm::Triple::tce: 11175 case llvm::Triple::tcele: 11176 case llvm::Triple::thumb: 11177 case llvm::Triple::thumbeb: 11178 case llvm::Triple::xcore: 11179 case llvm::Triple::le32: 11180 case llvm::Triple::le64: 11181 case llvm::Triple::amdil: 11182 case llvm::Triple::amdil64: 11183 case llvm::Triple::hsail: 11184 case llvm::Triple::hsail64: 11185 case llvm::Triple::spir: 11186 case llvm::Triple::spir64: 11187 case llvm::Triple::kalimba: 11188 case llvm::Triple::shave: 11189 case llvm::Triple::lanai: 11190 case llvm::Triple::wasm32: 11191 case llvm::Triple::wasm64: 11192 case llvm::Triple::renderscript32: 11193 case llvm::Triple::renderscript64: 11194 return false; 11195 } 11196 } 11197 return true; 11198 } 11199 11200 bool matchesContext(CodeGenModule &CGM, 11201 const CompleteOMPContextSelectorData &ContextData) { 11202 for (const OMPContextSelectorData &Data : ContextData) { 11203 switch (Data.Ctx) { 11204 case OMP_CTX_vendor: 11205 assert(Data.CtxSet == OMP_CTX_SET_implementation && 11206 "Expected implementation context selector set."); 11207 if (!checkContext<OMP_CTX_SET_implementation, OMP_CTX_vendor>(Data)) 11208 return false; 11209 break; 11210 case OMP_CTX_kind: 11211 assert(Data.CtxSet == OMP_CTX_SET_device && 11212 "Expected device context selector set."); 11213 if (!checkContext<OMP_CTX_SET_device, OMP_CTX_kind, CodeGenModule &>(Data, 11214 CGM)) 11215 return false; 11216 break; 11217 case OMP_CTX_unknown: 11218 llvm_unreachable("Unknown context selector kind."); 11219 } 11220 } 11221 return true; 11222 } 11223 11224 static CompleteOMPContextSelectorData 11225 translateAttrToContextSelectorData(ASTContext &C, 11226 const OMPDeclareVariantAttr *A) { 11227 CompleteOMPContextSelectorData Data; 11228 for (unsigned I = 0, E = A->scores_size(); I < E; ++I) { 11229 Data.emplace_back(); 11230 auto CtxSet = static_cast<OpenMPContextSelectorSetKind>( 11231 *std::next(A->ctxSelectorSets_begin(), I)); 11232 auto Ctx = static_cast<OpenMPContextSelectorKind>( 11233 *std::next(A->ctxSelectors_begin(), I)); 11234 Data.back().CtxSet = CtxSet; 11235 Data.back().Ctx = Ctx; 11236 const Expr *Score = *std::next(A->scores_begin(), I); 11237 Data.back().Score = Score->EvaluateKnownConstInt(C); 11238 switch (Ctx) { 11239 case OMP_CTX_vendor: 11240 assert(CtxSet == OMP_CTX_SET_implementation && 11241 "Expected implementation context selector set."); 11242 Data.back().Names = 11243 llvm::makeArrayRef(A->implVendors_begin(), A->implVendors_end()); 11244 break; 11245 case OMP_CTX_kind: 11246 assert(CtxSet == OMP_CTX_SET_device && 11247 "Expected device context selector set."); 11248 Data.back().Names = 11249 llvm::makeArrayRef(A->deviceKinds_begin(), A->deviceKinds_end()); 11250 break; 11251 case OMP_CTX_unknown: 11252 llvm_unreachable("Unknown context selector kind."); 11253 } 11254 } 11255 return Data; 11256 } 11257 11258 static bool isStrictSubset(const CompleteOMPContextSelectorData &LHS, 11259 const CompleteOMPContextSelectorData &RHS) { 11260 llvm::SmallDenseMap<std::pair<int, int>, llvm::StringSet<>, 4> RHSData; 11261 for (const OMPContextSelectorData &D : RHS) { 11262 auto &Pair = RHSData.FindAndConstruct(std::make_pair(D.CtxSet, D.Ctx)); 11263 Pair.getSecond().insert(D.Names.begin(), D.Names.end()); 11264 } 11265 bool AllSetsAreEqual = true; 11266 for (const OMPContextSelectorData &D : LHS) { 11267 auto It = RHSData.find(std::make_pair(D.CtxSet, D.Ctx)); 11268 if (It == RHSData.end()) 11269 return false; 11270 if (D.Names.size() > It->getSecond().size()) 11271 return false; 11272 if (llvm::set_union(It->getSecond(), D.Names)) 11273 return false; 11274 AllSetsAreEqual = 11275 AllSetsAreEqual && (D.Names.size() == It->getSecond().size()); 11276 } 11277 11278 return LHS.size() != RHS.size() || !AllSetsAreEqual; 11279 } 11280 11281 static bool greaterCtxScore(const CompleteOMPContextSelectorData &LHS, 11282 const CompleteOMPContextSelectorData &RHS) { 11283 // Score is calculated as sum of all scores + 1. 11284 llvm::APSInt LHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false); 11285 bool RHSIsSubsetOfLHS = isStrictSubset(RHS, LHS); 11286 if (RHSIsSubsetOfLHS) { 11287 LHSScore = llvm::APSInt::get(0); 11288 } else { 11289 for (const OMPContextSelectorData &Data : LHS) { 11290 if (Data.Score.getBitWidth() > LHSScore.getBitWidth()) { 11291 LHSScore = LHSScore.extend(Data.Score.getBitWidth()) + Data.Score; 11292 } else if (Data.Score.getBitWidth() < LHSScore.getBitWidth()) { 11293 LHSScore += Data.Score.extend(LHSScore.getBitWidth()); 11294 } else { 11295 LHSScore += Data.Score; 11296 } 11297 } 11298 } 11299 llvm::APSInt RHSScore(llvm::APInt(64, 1), /*isUnsigned=*/false); 11300 if (!RHSIsSubsetOfLHS && isStrictSubset(LHS, RHS)) { 11301 RHSScore = llvm::APSInt::get(0); 11302 } else { 11303 for (const OMPContextSelectorData &Data : RHS) { 11304 if (Data.Score.getBitWidth() > RHSScore.getBitWidth()) { 11305 RHSScore = RHSScore.extend(Data.Score.getBitWidth()) + Data.Score; 11306 } else if (Data.Score.getBitWidth() < RHSScore.getBitWidth()) { 11307 RHSScore += Data.Score.extend(RHSScore.getBitWidth()); 11308 } else { 11309 RHSScore += Data.Score; 11310 } 11311 } 11312 } 11313 return llvm::APSInt::compareValues(LHSScore, RHSScore) >= 0; 11314 } 11315 11316 /// Finds the variant function that matches current context with its context 11317 /// selector. 11318 static const FunctionDecl *getDeclareVariantFunction(CodeGenModule &CGM, 11319 const FunctionDecl *FD) { 11320 if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>()) 11321 return FD; 11322 // Iterate through all DeclareVariant attributes and check context selectors. 11323 const OMPDeclareVariantAttr *TopMostAttr = nullptr; 11324 CompleteOMPContextSelectorData TopMostData; 11325 for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) { 11326 CompleteOMPContextSelectorData Data = 11327 translateAttrToContextSelectorData(CGM.getContext(), A); 11328 if (!matchesContext(CGM, Data)) 11329 continue; 11330 // If the attribute matches the context, find the attribute with the highest 11331 // score. 11332 if (!TopMostAttr || !greaterCtxScore(TopMostData, Data)) { 11333 TopMostAttr = A; 11334 TopMostData.swap(Data); 11335 } 11336 } 11337 if (!TopMostAttr) 11338 return FD; 11339 return cast<FunctionDecl>( 11340 cast<DeclRefExpr>(TopMostAttr->getVariantFuncRef()->IgnoreParenImpCasts()) 11341 ->getDecl()); 11342 } 11343 11344 bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) { 11345 const auto *D = cast<FunctionDecl>(GD.getDecl()); 11346 // If the original function is defined already, use its definition. 11347 StringRef MangledName = CGM.getMangledName(GD); 11348 llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName); 11349 if (Orig && !Orig->isDeclaration()) 11350 return false; 11351 const FunctionDecl *NewFD = getDeclareVariantFunction(CGM, D); 11352 // Emit original function if it does not have declare variant attribute or the 11353 // context does not match. 11354 if (NewFD == D) 11355 return false; 11356 GlobalDecl NewGD = GD.getWithDecl(NewFD); 11357 if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) { 11358 DeferredVariantFunction.erase(D); 11359 return true; 11360 } 11361 DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD))); 11362 return true; 11363 } 11364 11365 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11366 CodeGenModule &CGM, const OMPLoopDirective &S) 11367 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11368 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11369 if (!NeedToPush) 11370 return; 11371 NontemporalDeclsSet &DS = 11372 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11373 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11374 for (const Stmt *Ref : C->private_refs()) { 11375 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11376 const ValueDecl *VD; 11377 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11378 VD = DRE->getDecl(); 11379 } else { 11380 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11381 assert((ME->isImplicitCXXThis() || 11382 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11383 "Expected member of current class."); 11384 VD = ME->getMemberDecl(); 11385 } 11386 DS.insert(VD); 11387 } 11388 } 11389 } 11390 11391 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11392 if (!NeedToPush) 11393 return; 11394 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11395 } 11396 11397 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11398 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11399 11400 return llvm::any_of( 11401 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11402 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 11403 } 11404 11405 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11406 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 11407 : CGM(CGF.CGM), 11408 NeedToPush(llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11409 [](const OMPLastprivateClause *C) { 11410 return C->getKind() == 11411 OMPC_LASTPRIVATE_conditional; 11412 })) { 11413 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11414 if (!NeedToPush) 11415 return; 11416 LastprivateConditionalData &Data = 11417 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11418 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11419 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 11420 continue; 11421 11422 for (const Expr *Ref : C->varlists()) { 11423 Data.DeclToUniqeName.try_emplace( 11424 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 11425 generateUniqueName(CGM, "pl_cond", Ref)); 11426 } 11427 } 11428 Data.IVLVal = IVLVal; 11429 // In simd only mode or for simd directives no need to generate threadprivate 11430 // references for the loop iteration counter, we can use the original one 11431 // since outlining cannot happen in simd regions. 11432 if (CGF.getLangOpts().OpenMPSimd || 11433 isOpenMPSimdDirective(S.getDirectiveKind())) { 11434 Data.UseOriginalIV = true; 11435 return; 11436 } 11437 llvm::SmallString<16> Buffer; 11438 llvm::raw_svector_ostream OS(Buffer); 11439 PresumedLoc PLoc = 11440 CGM.getContext().getSourceManager().getPresumedLoc(S.getBeginLoc()); 11441 assert(PLoc.isValid() && "Source location is expected to be always valid."); 11442 11443 llvm::sys::fs::UniqueID ID; 11444 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 11445 CGM.getDiags().Report(diag::err_cannot_open_file) 11446 << PLoc.getFilename() << EC.message(); 11447 OS << "$pl_cond_" << ID.getDevice() << "_" << ID.getFile() << "_" 11448 << PLoc.getLine() << "_" << PLoc.getColumn() << "$iv"; 11449 Data.IVName = OS.str(); 11450 } 11451 11452 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 11453 if (!NeedToPush) 11454 return; 11455 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11456 } 11457 11458 void CGOpenMPRuntime::initLastprivateConditionalCounter( 11459 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 11460 if (CGM.getLangOpts().OpenMPSimd || 11461 !llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11462 [](const OMPLastprivateClause *C) { 11463 return C->getKind() == OMPC_LASTPRIVATE_conditional; 11464 })) 11465 return; 11466 const CGOpenMPRuntime::LastprivateConditionalData &Data = 11467 LastprivateConditionalStack.back(); 11468 if (Data.UseOriginalIV) 11469 return; 11470 // Global loop counter. Required to handle inner parallel-for regions. 11471 // global_iv = iv; 11472 Address GlobIVAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 11473 CGF, Data.IVLVal.getType(), Data.IVName); 11474 LValue GlobIVLVal = CGF.MakeAddrLValue(GlobIVAddr, Data.IVLVal.getType()); 11475 llvm::Value *IVVal = CGF.EmitLoadOfScalar(Data.IVLVal, S.getBeginLoc()); 11476 CGF.EmitStoreOfScalar(IVVal, GlobIVLVal); 11477 } 11478 11479 namespace { 11480 /// Checks if the lastprivate conditional variable is referenced in LHS. 11481 class LastprivateConditionalRefChecker final 11482 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 11483 CodeGenFunction &CGF; 11484 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 11485 const Expr *FoundE = nullptr; 11486 const Decl *FoundD = nullptr; 11487 StringRef UniqueDeclName; 11488 LValue IVLVal; 11489 StringRef IVName; 11490 SourceLocation Loc; 11491 bool UseOriginalIV = false; 11492 11493 public: 11494 bool VisitDeclRefExpr(const DeclRefExpr *E) { 11495 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11496 llvm::reverse(LPM)) { 11497 auto It = D.DeclToUniqeName.find(E->getDecl()); 11498 if (It == D.DeclToUniqeName.end()) 11499 continue; 11500 FoundE = E; 11501 FoundD = E->getDecl()->getCanonicalDecl(); 11502 UniqueDeclName = It->getSecond(); 11503 IVLVal = D.IVLVal; 11504 IVName = D.IVName; 11505 UseOriginalIV = D.UseOriginalIV; 11506 break; 11507 } 11508 return FoundE == E; 11509 } 11510 bool VisitMemberExpr(const MemberExpr *E) { 11511 if (!CGF.IsWrappedCXXThis(E->getBase())) 11512 return false; 11513 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 11514 llvm::reverse(LPM)) { 11515 auto It = D.DeclToUniqeName.find(E->getMemberDecl()); 11516 if (It == D.DeclToUniqeName.end()) 11517 continue; 11518 FoundE = E; 11519 FoundD = E->getMemberDecl()->getCanonicalDecl(); 11520 UniqueDeclName = It->getSecond(); 11521 IVLVal = D.IVLVal; 11522 IVName = D.IVName; 11523 UseOriginalIV = D.UseOriginalIV; 11524 break; 11525 } 11526 return FoundE == E; 11527 } 11528 bool VisitStmt(const Stmt *S) { 11529 for (const Stmt *Child : S->children()) { 11530 if (!Child) 11531 continue; 11532 if (const auto *E = dyn_cast<Expr>(Child)) 11533 if (!E->isGLValue()) 11534 continue; 11535 if (Visit(Child)) 11536 return true; 11537 } 11538 return false; 11539 } 11540 explicit LastprivateConditionalRefChecker( 11541 CodeGenFunction &CGF, 11542 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 11543 : CGF(CGF), LPM(LPM) {} 11544 std::tuple<const Expr *, const Decl *, StringRef, LValue, StringRef, bool> 11545 getFoundData() const { 11546 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, 11547 UseOriginalIV); 11548 } 11549 }; 11550 } // namespace 11551 11552 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 11553 const Expr *LHS) { 11554 if (CGF.getLangOpts().OpenMP < 50) 11555 return; 11556 LastprivateConditionalRefChecker Checker(CGF, LastprivateConditionalStack); 11557 if (!Checker.Visit(LHS)) 11558 return; 11559 const Expr *FoundE; 11560 const Decl *FoundD; 11561 StringRef UniqueDeclName; 11562 LValue IVLVal; 11563 StringRef IVName; 11564 bool UseOriginalIV; 11565 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, UseOriginalIV) = 11566 Checker.getFoundData(); 11567 11568 // Last updated loop counter for the lastprivate conditional var. 11569 // int<xx> last_iv = 0; 11570 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 11571 llvm::Constant *LastIV = 11572 getOrCreateInternalVariable(LLIVTy, UniqueDeclName + "$iv"); 11573 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 11574 IVLVal.getAlignment().getAsAlign()); 11575 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 11576 11577 // Private address of the lastprivate conditional in the current context. 11578 // priv_a 11579 LValue LVal = CGF.EmitLValue(FoundE); 11580 // Last value of the lastprivate conditional. 11581 // decltype(priv_a) last_a; 11582 llvm::Constant *Last = getOrCreateInternalVariable( 11583 LVal.getAddress(CGF).getElementType(), UniqueDeclName); 11584 cast<llvm::GlobalVariable>(Last)->setAlignment( 11585 LVal.getAlignment().getAsAlign()); 11586 LValue LastLVal = 11587 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 11588 11589 // Global loop counter. Required to handle inner parallel-for regions. 11590 // global_iv 11591 if (!UseOriginalIV) { 11592 Address IVAddr = 11593 getAddrOfArtificialThreadPrivate(CGF, IVLVal.getType(), IVName); 11594 IVLVal = CGF.MakeAddrLValue(IVAddr, IVLVal.getType()); 11595 } 11596 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, FoundE->getExprLoc()); 11597 11598 // #pragma omp critical(a) 11599 // if (last_iv <= iv) { 11600 // last_iv = iv; 11601 // last_a = priv_a; 11602 // } 11603 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 11604 FoundE](CodeGenFunction &CGF, PrePostActionTy &Action) { 11605 Action.Enter(CGF); 11606 llvm::Value *LastIVVal = 11607 CGF.EmitLoadOfScalar(LastIVLVal, FoundE->getExprLoc()); 11608 // (last_iv <= global_iv) ? Check if the variable is updated and store new 11609 // value in global var. 11610 llvm::Value *CmpRes; 11611 if (IVLVal.getType()->isSignedIntegerType()) { 11612 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 11613 } else { 11614 assert(IVLVal.getType()->isUnsignedIntegerType() && 11615 "Loop iteration variable must be integer."); 11616 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 11617 } 11618 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 11619 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 11620 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 11621 // { 11622 CGF.EmitBlock(ThenBB); 11623 11624 // last_iv = global_iv; 11625 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 11626 11627 // last_a = priv_a; 11628 switch (CGF.getEvaluationKind(LVal.getType())) { 11629 case TEK_Scalar: { 11630 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, FoundE->getExprLoc()); 11631 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 11632 break; 11633 } 11634 case TEK_Complex: { 11635 CodeGenFunction::ComplexPairTy PrivVal = 11636 CGF.EmitLoadOfComplex(LVal, FoundE->getExprLoc()); 11637 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 11638 break; 11639 } 11640 case TEK_Aggregate: 11641 llvm_unreachable( 11642 "Aggregates are not supported in lastprivate conditional."); 11643 } 11644 // } 11645 CGF.EmitBranch(ExitBB); 11646 // There is no need to emit line number for unconditional branch. 11647 (void)ApplyDebugLocation::CreateEmpty(CGF); 11648 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 11649 }; 11650 11651 if (CGM.getLangOpts().OpenMPSimd) { 11652 // Do not emit as a critical region as no parallel region could be emitted. 11653 RegionCodeGenTy ThenRCG(CodeGen); 11654 ThenRCG(CGF); 11655 } else { 11656 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, FoundE->getExprLoc()); 11657 } 11658 } 11659 11660 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 11661 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 11662 SourceLocation Loc) { 11663 if (CGF.getLangOpts().OpenMP < 50) 11664 return; 11665 auto It = LastprivateConditionalStack.back().DeclToUniqeName.find(VD); 11666 assert(It != LastprivateConditionalStack.back().DeclToUniqeName.end() && 11667 "Unknown lastprivate conditional variable."); 11668 StringRef UniqueName = It->getSecond(); 11669 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 11670 // The variable was not updated in the region - exit. 11671 if (!GV) 11672 return; 11673 LValue LPLVal = CGF.MakeAddrLValue( 11674 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 11675 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 11676 CGF.EmitStoreOfScalar(Res, PrivLVal); 11677 } 11678 11679 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11680 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11681 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11682 llvm_unreachable("Not supported in SIMD-only mode"); 11683 } 11684 11685 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11686 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11687 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11688 llvm_unreachable("Not supported in SIMD-only mode"); 11689 } 11690 11691 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11692 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11693 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11694 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11695 bool Tied, unsigned &NumberOfParts) { 11696 llvm_unreachable("Not supported in SIMD-only mode"); 11697 } 11698 11699 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11700 SourceLocation Loc, 11701 llvm::Function *OutlinedFn, 11702 ArrayRef<llvm::Value *> CapturedVars, 11703 const Expr *IfCond) { 11704 llvm_unreachable("Not supported in SIMD-only mode"); 11705 } 11706 11707 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11708 CodeGenFunction &CGF, StringRef CriticalName, 11709 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11710 const Expr *Hint) { 11711 llvm_unreachable("Not supported in SIMD-only mode"); 11712 } 11713 11714 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11715 const RegionCodeGenTy &MasterOpGen, 11716 SourceLocation Loc) { 11717 llvm_unreachable("Not supported in SIMD-only mode"); 11718 } 11719 11720 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11721 SourceLocation Loc) { 11722 llvm_unreachable("Not supported in SIMD-only mode"); 11723 } 11724 11725 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 11726 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 11727 SourceLocation Loc) { 11728 llvm_unreachable("Not supported in SIMD-only mode"); 11729 } 11730 11731 void CGOpenMPSIMDRuntime::emitSingleRegion( 11732 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 11733 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 11734 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 11735 ArrayRef<const Expr *> AssignmentOps) { 11736 llvm_unreachable("Not supported in SIMD-only mode"); 11737 } 11738 11739 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 11740 const RegionCodeGenTy &OrderedOpGen, 11741 SourceLocation Loc, 11742 bool IsThreads) { 11743 llvm_unreachable("Not supported in SIMD-only mode"); 11744 } 11745 11746 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 11747 SourceLocation Loc, 11748 OpenMPDirectiveKind Kind, 11749 bool EmitChecks, 11750 bool ForceSimpleCall) { 11751 llvm_unreachable("Not supported in SIMD-only mode"); 11752 } 11753 11754 void CGOpenMPSIMDRuntime::emitForDispatchInit( 11755 CodeGenFunction &CGF, SourceLocation Loc, 11756 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 11757 bool Ordered, const DispatchRTInput &DispatchValues) { 11758 llvm_unreachable("Not supported in SIMD-only mode"); 11759 } 11760 11761 void CGOpenMPSIMDRuntime::emitForStaticInit( 11762 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 11763 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 11764 llvm_unreachable("Not supported in SIMD-only mode"); 11765 } 11766 11767 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 11768 CodeGenFunction &CGF, SourceLocation Loc, 11769 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 11770 llvm_unreachable("Not supported in SIMD-only mode"); 11771 } 11772 11773 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 11774 SourceLocation Loc, 11775 unsigned IVSize, 11776 bool IVSigned) { 11777 llvm_unreachable("Not supported in SIMD-only mode"); 11778 } 11779 11780 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 11781 SourceLocation Loc, 11782 OpenMPDirectiveKind DKind) { 11783 llvm_unreachable("Not supported in SIMD-only mode"); 11784 } 11785 11786 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 11787 SourceLocation Loc, 11788 unsigned IVSize, bool IVSigned, 11789 Address IL, Address LB, 11790 Address UB, Address ST) { 11791 llvm_unreachable("Not supported in SIMD-only mode"); 11792 } 11793 11794 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 11795 llvm::Value *NumThreads, 11796 SourceLocation Loc) { 11797 llvm_unreachable("Not supported in SIMD-only mode"); 11798 } 11799 11800 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 11801 ProcBindKind ProcBind, 11802 SourceLocation Loc) { 11803 llvm_unreachable("Not supported in SIMD-only mode"); 11804 } 11805 11806 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 11807 const VarDecl *VD, 11808 Address VDAddr, 11809 SourceLocation Loc) { 11810 llvm_unreachable("Not supported in SIMD-only mode"); 11811 } 11812 11813 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 11814 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 11815 CodeGenFunction *CGF) { 11816 llvm_unreachable("Not supported in SIMD-only mode"); 11817 } 11818 11819 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 11820 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 11821 llvm_unreachable("Not supported in SIMD-only mode"); 11822 } 11823 11824 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 11825 ArrayRef<const Expr *> Vars, 11826 SourceLocation Loc) { 11827 llvm_unreachable("Not supported in SIMD-only mode"); 11828 } 11829 11830 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 11831 const OMPExecutableDirective &D, 11832 llvm::Function *TaskFunction, 11833 QualType SharedsTy, Address Shareds, 11834 const Expr *IfCond, 11835 const OMPTaskDataTy &Data) { 11836 llvm_unreachable("Not supported in SIMD-only mode"); 11837 } 11838 11839 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 11840 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 11841 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 11842 const Expr *IfCond, const OMPTaskDataTy &Data) { 11843 llvm_unreachable("Not supported in SIMD-only mode"); 11844 } 11845 11846 void CGOpenMPSIMDRuntime::emitReduction( 11847 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 11848 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 11849 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 11850 assert(Options.SimpleReduction && "Only simple reduction is expected."); 11851 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 11852 ReductionOps, Options); 11853 } 11854 11855 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 11856 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 11857 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 11858 llvm_unreachable("Not supported in SIMD-only mode"); 11859 } 11860 11861 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 11862 SourceLocation Loc, 11863 ReductionCodeGen &RCG, 11864 unsigned N) { 11865 llvm_unreachable("Not supported in SIMD-only mode"); 11866 } 11867 11868 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 11869 SourceLocation Loc, 11870 llvm::Value *ReductionsPtr, 11871 LValue SharedLVal) { 11872 llvm_unreachable("Not supported in SIMD-only mode"); 11873 } 11874 11875 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 11876 SourceLocation Loc) { 11877 llvm_unreachable("Not supported in SIMD-only mode"); 11878 } 11879 11880 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 11881 CodeGenFunction &CGF, SourceLocation Loc, 11882 OpenMPDirectiveKind CancelRegion) { 11883 llvm_unreachable("Not supported in SIMD-only mode"); 11884 } 11885 11886 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 11887 SourceLocation Loc, const Expr *IfCond, 11888 OpenMPDirectiveKind CancelRegion) { 11889 llvm_unreachable("Not supported in SIMD-only mode"); 11890 } 11891 11892 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 11893 const OMPExecutableDirective &D, StringRef ParentName, 11894 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 11895 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 11896 llvm_unreachable("Not supported in SIMD-only mode"); 11897 } 11898 11899 void CGOpenMPSIMDRuntime::emitTargetCall( 11900 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11901 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 11902 const Expr *Device, 11903 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 11904 const OMPLoopDirective &D)> 11905 SizeEmitter) { 11906 llvm_unreachable("Not supported in SIMD-only mode"); 11907 } 11908 11909 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 11910 llvm_unreachable("Not supported in SIMD-only mode"); 11911 } 11912 11913 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 11914 llvm_unreachable("Not supported in SIMD-only mode"); 11915 } 11916 11917 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 11918 return false; 11919 } 11920 11921 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 11922 const OMPExecutableDirective &D, 11923 SourceLocation Loc, 11924 llvm::Function *OutlinedFn, 11925 ArrayRef<llvm::Value *> CapturedVars) { 11926 llvm_unreachable("Not supported in SIMD-only mode"); 11927 } 11928 11929 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11930 const Expr *NumTeams, 11931 const Expr *ThreadLimit, 11932 SourceLocation Loc) { 11933 llvm_unreachable("Not supported in SIMD-only mode"); 11934 } 11935 11936 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 11937 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11938 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11939 llvm_unreachable("Not supported in SIMD-only mode"); 11940 } 11941 11942 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 11943 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11944 const Expr *Device) { 11945 llvm_unreachable("Not supported in SIMD-only mode"); 11946 } 11947 11948 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11949 const OMPLoopDirective &D, 11950 ArrayRef<Expr *> NumIterations) { 11951 llvm_unreachable("Not supported in SIMD-only mode"); 11952 } 11953 11954 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11955 const OMPDependClause *C) { 11956 llvm_unreachable("Not supported in SIMD-only mode"); 11957 } 11958 11959 const VarDecl * 11960 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 11961 const VarDecl *NativeParam) const { 11962 llvm_unreachable("Not supported in SIMD-only mode"); 11963 } 11964 11965 Address 11966 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 11967 const VarDecl *NativeParam, 11968 const VarDecl *TargetParam) const { 11969 llvm_unreachable("Not supported in SIMD-only mode"); 11970 } 11971