1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGOpenMPRuntime.h" 14 #include "CGCXXABI.h" 15 #include "CGCleanup.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/AST/Attr.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/StmtOpenMP.h" 22 #include "clang/AST/StmtVisitor.h" 23 #include "clang/Basic/BitmaskEnum.h" 24 #include "clang/Basic/FileManager.h" 25 #include "clang/Basic/OpenMPKinds.h" 26 #include "clang/Basic/SourceManager.h" 27 #include "clang/CodeGen/ConstantInitBuilder.h" 28 #include "llvm/ADT/ArrayRef.h" 29 #include "llvm/ADT/SetOperations.h" 30 #include "llvm/ADT/StringExtras.h" 31 #include "llvm/Bitcode/BitcodeReader.h" 32 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 33 #include "llvm/IR/Constants.h" 34 #include "llvm/IR/DerivedTypes.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/IR/Value.h" 37 #include "llvm/Support/AtomicOrdering.h" 38 #include "llvm/Support/Format.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include <cassert> 41 #include <numeric> 42 43 using namespace clang; 44 using namespace CodeGen; 45 using namespace llvm::omp; 46 47 namespace { 48 /// Base class for handling code generation inside OpenMP regions. 49 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 50 public: 51 /// Kinds of OpenMP regions used in codegen. 52 enum CGOpenMPRegionKind { 53 /// Region with outlined function for standalone 'parallel' 54 /// directive. 55 ParallelOutlinedRegion, 56 /// Region with outlined function for standalone 'task' directive. 57 TaskOutlinedRegion, 58 /// Region for constructs that do not require function outlining, 59 /// like 'for', 'sections', 'atomic' etc. directives. 60 InlinedRegion, 61 /// Region with outlined function for standalone 'target' directive. 62 TargetRegion, 63 }; 64 65 CGOpenMPRegionInfo(const CapturedStmt &CS, 66 const CGOpenMPRegionKind RegionKind, 67 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 68 bool HasCancel) 69 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 70 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 71 72 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 73 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 74 bool HasCancel) 75 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 76 Kind(Kind), HasCancel(HasCancel) {} 77 78 /// Get a variable or parameter for storing global thread id 79 /// inside OpenMP construct. 80 virtual const VarDecl *getThreadIDVariable() const = 0; 81 82 /// Emit the captured statement body. 83 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 84 85 /// Get an LValue for the current ThreadID variable. 86 /// \return LValue for thread id variable. This LValue always has type int32*. 87 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 88 89 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 90 91 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 92 93 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 94 95 bool hasCancel() const { return HasCancel; } 96 97 static bool classof(const CGCapturedStmtInfo *Info) { 98 return Info->getKind() == CR_OpenMP; 99 } 100 101 ~CGOpenMPRegionInfo() override = default; 102 103 protected: 104 CGOpenMPRegionKind RegionKind; 105 RegionCodeGenTy CodeGen; 106 OpenMPDirectiveKind Kind; 107 bool HasCancel; 108 }; 109 110 /// API for captured statement code generation in OpenMP constructs. 111 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 112 public: 113 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 114 const RegionCodeGenTy &CodeGen, 115 OpenMPDirectiveKind Kind, bool HasCancel, 116 StringRef HelperName) 117 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 118 HasCancel), 119 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 120 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 121 } 122 123 /// Get a variable or parameter for storing global thread id 124 /// inside OpenMP construct. 125 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 126 127 /// Get the name of the capture helper. 128 StringRef getHelperName() const override { return HelperName; } 129 130 static bool classof(const CGCapturedStmtInfo *Info) { 131 return CGOpenMPRegionInfo::classof(Info) && 132 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 133 ParallelOutlinedRegion; 134 } 135 136 private: 137 /// A variable or parameter storing global thread id for OpenMP 138 /// constructs. 139 const VarDecl *ThreadIDVar; 140 StringRef HelperName; 141 }; 142 143 /// API for captured statement code generation in OpenMP constructs. 144 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 145 public: 146 class UntiedTaskActionTy final : public PrePostActionTy { 147 bool Untied; 148 const VarDecl *PartIDVar; 149 const RegionCodeGenTy UntiedCodeGen; 150 llvm::SwitchInst *UntiedSwitch = nullptr; 151 152 public: 153 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 154 const RegionCodeGenTy &UntiedCodeGen) 155 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 156 void Enter(CodeGenFunction &CGF) override { 157 if (Untied) { 158 // Emit task switching point. 159 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 160 CGF.GetAddrOfLocalVar(PartIDVar), 161 PartIDVar->getType()->castAs<PointerType>()); 162 llvm::Value *Res = 163 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 164 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 165 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 166 CGF.EmitBlock(DoneBB); 167 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 168 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 169 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 170 CGF.Builder.GetInsertBlock()); 171 emitUntiedSwitch(CGF); 172 } 173 } 174 void emitUntiedSwitch(CodeGenFunction &CGF) const { 175 if (Untied) { 176 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 177 CGF.GetAddrOfLocalVar(PartIDVar), 178 PartIDVar->getType()->castAs<PointerType>()); 179 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 180 PartIdLVal); 181 UntiedCodeGen(CGF); 182 CodeGenFunction::JumpDest CurPoint = 183 CGF.getJumpDestInCurrentScope(".untied.next."); 184 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 185 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 186 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 187 CGF.Builder.GetInsertBlock()); 188 CGF.EmitBranchThroughCleanup(CurPoint); 189 CGF.EmitBlock(CurPoint.getBlock()); 190 } 191 } 192 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 193 }; 194 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 195 const VarDecl *ThreadIDVar, 196 const RegionCodeGenTy &CodeGen, 197 OpenMPDirectiveKind Kind, bool HasCancel, 198 const UntiedTaskActionTy &Action) 199 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 200 ThreadIDVar(ThreadIDVar), Action(Action) { 201 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 202 } 203 204 /// Get a variable or parameter for storing global thread id 205 /// inside OpenMP construct. 206 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 207 208 /// Get an LValue for the current ThreadID variable. 209 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 210 211 /// Get the name of the capture helper. 212 StringRef getHelperName() const override { return ".omp_outlined."; } 213 214 void emitUntiedSwitch(CodeGenFunction &CGF) override { 215 Action.emitUntiedSwitch(CGF); 216 } 217 218 static bool classof(const CGCapturedStmtInfo *Info) { 219 return CGOpenMPRegionInfo::classof(Info) && 220 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 221 TaskOutlinedRegion; 222 } 223 224 private: 225 /// A variable or parameter storing global thread id for OpenMP 226 /// constructs. 227 const VarDecl *ThreadIDVar; 228 /// Action for emitting code for untied tasks. 229 const UntiedTaskActionTy &Action; 230 }; 231 232 /// API for inlined captured statement code generation in OpenMP 233 /// constructs. 234 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 235 public: 236 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 237 const RegionCodeGenTy &CodeGen, 238 OpenMPDirectiveKind Kind, bool HasCancel) 239 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 240 OldCSI(OldCSI), 241 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 242 243 // Retrieve the value of the context parameter. 244 llvm::Value *getContextValue() const override { 245 if (OuterRegionInfo) 246 return OuterRegionInfo->getContextValue(); 247 llvm_unreachable("No context value for inlined OpenMP region"); 248 } 249 250 void setContextValue(llvm::Value *V) override { 251 if (OuterRegionInfo) { 252 OuterRegionInfo->setContextValue(V); 253 return; 254 } 255 llvm_unreachable("No context value for inlined OpenMP region"); 256 } 257 258 /// Lookup the captured field decl for a variable. 259 const FieldDecl *lookup(const VarDecl *VD) const override { 260 if (OuterRegionInfo) 261 return OuterRegionInfo->lookup(VD); 262 // If there is no outer outlined region,no need to lookup in a list of 263 // captured variables, we can use the original one. 264 return nullptr; 265 } 266 267 FieldDecl *getThisFieldDecl() const override { 268 if (OuterRegionInfo) 269 return OuterRegionInfo->getThisFieldDecl(); 270 return nullptr; 271 } 272 273 /// Get a variable or parameter for storing global thread id 274 /// inside OpenMP construct. 275 const VarDecl *getThreadIDVariable() const override { 276 if (OuterRegionInfo) 277 return OuterRegionInfo->getThreadIDVariable(); 278 return nullptr; 279 } 280 281 /// Get an LValue for the current ThreadID variable. 282 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 285 llvm_unreachable("No LValue for inlined OpenMP construct"); 286 } 287 288 /// Get the name of the capture helper. 289 StringRef getHelperName() const override { 290 if (auto *OuterRegionInfo = getOldCSI()) 291 return OuterRegionInfo->getHelperName(); 292 llvm_unreachable("No helper name for inlined OpenMP construct"); 293 } 294 295 void emitUntiedSwitch(CodeGenFunction &CGF) override { 296 if (OuterRegionInfo) 297 OuterRegionInfo->emitUntiedSwitch(CGF); 298 } 299 300 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 301 302 static bool classof(const CGCapturedStmtInfo *Info) { 303 return CGOpenMPRegionInfo::classof(Info) && 304 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 305 } 306 307 ~CGOpenMPInlinedRegionInfo() override = default; 308 309 private: 310 /// CodeGen info about outer OpenMP region. 311 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 312 CGOpenMPRegionInfo *OuterRegionInfo; 313 }; 314 315 /// API for captured statement code generation in OpenMP target 316 /// constructs. For this captures, implicit parameters are used instead of the 317 /// captured fields. The name of the target region has to be unique in a given 318 /// application so it is provided by the client, because only the client has 319 /// the information to generate that. 320 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 321 public: 322 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 323 const RegionCodeGenTy &CodeGen, StringRef HelperName) 324 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 325 /*HasCancel=*/false), 326 HelperName(HelperName) {} 327 328 /// This is unused for target regions because each starts executing 329 /// with a single thread. 330 const VarDecl *getThreadIDVariable() const override { return nullptr; } 331 332 /// Get the name of the capture helper. 333 StringRef getHelperName() const override { return HelperName; } 334 335 static bool classof(const CGCapturedStmtInfo *Info) { 336 return CGOpenMPRegionInfo::classof(Info) && 337 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 338 } 339 340 private: 341 StringRef HelperName; 342 }; 343 344 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 345 llvm_unreachable("No codegen for expressions"); 346 } 347 /// API for generation of expressions captured in a innermost OpenMP 348 /// region. 349 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 350 public: 351 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 352 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 353 OMPD_unknown, 354 /*HasCancel=*/false), 355 PrivScope(CGF) { 356 // Make sure the globals captured in the provided statement are local by 357 // using the privatization logic. We assume the same variable is not 358 // captured more than once. 359 for (const auto &C : CS.captures()) { 360 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 361 continue; 362 363 const VarDecl *VD = C.getCapturedVar(); 364 if (VD->isLocalVarDeclOrParm()) 365 continue; 366 367 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 368 /*RefersToEnclosingVariableOrCapture=*/false, 369 VD->getType().getNonReferenceType(), VK_LValue, 370 C.getLocation()); 371 PrivScope.addPrivate( 372 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); 373 } 374 (void)PrivScope.Privatize(); 375 } 376 377 /// Lookup the captured field decl for a variable. 378 const FieldDecl *lookup(const VarDecl *VD) const override { 379 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 380 return FD; 381 return nullptr; 382 } 383 384 /// Emit the captured statement body. 385 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 386 llvm_unreachable("No body for expressions"); 387 } 388 389 /// Get a variable or parameter for storing global thread id 390 /// inside OpenMP construct. 391 const VarDecl *getThreadIDVariable() const override { 392 llvm_unreachable("No thread id for expressions"); 393 } 394 395 /// Get the name of the capture helper. 396 StringRef getHelperName() const override { 397 llvm_unreachable("No helper name for expressions"); 398 } 399 400 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 401 402 private: 403 /// Private scope to capture global variables. 404 CodeGenFunction::OMPPrivateScope PrivScope; 405 }; 406 407 /// RAII for emitting code of OpenMP constructs. 408 class InlinedOpenMPRegionRAII { 409 CodeGenFunction &CGF; 410 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 411 FieldDecl *LambdaThisCaptureField = nullptr; 412 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 413 414 public: 415 /// Constructs region for combined constructs. 416 /// \param CodeGen Code generation sequence for combined directives. Includes 417 /// a list of functions used for code generation of implicitly inlined 418 /// regions. 419 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 420 OpenMPDirectiveKind Kind, bool HasCancel) 421 : CGF(CGF) { 422 // Start emission for the construct. 423 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 424 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 425 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 426 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 427 CGF.LambdaThisCaptureField = nullptr; 428 BlockInfo = CGF.BlockInfo; 429 CGF.BlockInfo = nullptr; 430 } 431 432 ~InlinedOpenMPRegionRAII() { 433 // Restore original CapturedStmtInfo only if we're done with code emission. 434 auto *OldCSI = 435 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 436 delete CGF.CapturedStmtInfo; 437 CGF.CapturedStmtInfo = OldCSI; 438 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 439 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 440 CGF.BlockInfo = BlockInfo; 441 } 442 }; 443 444 /// Values for bit flags used in the ident_t to describe the fields. 445 /// All enumeric elements are named and described in accordance with the code 446 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 447 enum OpenMPLocationFlags : unsigned { 448 /// Use trampoline for internal microtask. 449 OMP_IDENT_IMD = 0x01, 450 /// Use c-style ident structure. 451 OMP_IDENT_KMPC = 0x02, 452 /// Atomic reduction option for kmpc_reduce. 453 OMP_ATOMIC_REDUCE = 0x10, 454 /// Explicit 'barrier' directive. 455 OMP_IDENT_BARRIER_EXPL = 0x20, 456 /// Implicit barrier in code. 457 OMP_IDENT_BARRIER_IMPL = 0x40, 458 /// Implicit barrier in 'for' directive. 459 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 460 /// Implicit barrier in 'sections' directive. 461 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 462 /// Implicit barrier in 'single' directive. 463 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 464 /// Call of __kmp_for_static_init for static loop. 465 OMP_IDENT_WORK_LOOP = 0x200, 466 /// Call of __kmp_for_static_init for sections. 467 OMP_IDENT_WORK_SECTIONS = 0x400, 468 /// Call of __kmp_for_static_init for distribute. 469 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 470 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 471 }; 472 473 namespace { 474 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 475 /// Values for bit flags for marking which requires clauses have been used. 476 enum OpenMPOffloadingRequiresDirFlags : int64_t { 477 /// flag undefined. 478 OMP_REQ_UNDEFINED = 0x000, 479 /// no requires clause present. 480 OMP_REQ_NONE = 0x001, 481 /// reverse_offload clause. 482 OMP_REQ_REVERSE_OFFLOAD = 0x002, 483 /// unified_address clause. 484 OMP_REQ_UNIFIED_ADDRESS = 0x004, 485 /// unified_shared_memory clause. 486 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 487 /// dynamic_allocators clause. 488 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 489 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 490 }; 491 492 enum OpenMPOffloadingReservedDeviceIDs { 493 /// Device ID if the device was not defined, runtime should get it 494 /// from environment variables in the spec. 495 OMP_DEVICEID_UNDEF = -1, 496 }; 497 } // anonymous namespace 498 499 /// Describes ident structure that describes a source location. 500 /// All descriptions are taken from 501 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 502 /// Original structure: 503 /// typedef struct ident { 504 /// kmp_int32 reserved_1; /**< might be used in Fortran; 505 /// see above */ 506 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 507 /// KMP_IDENT_KMPC identifies this union 508 /// member */ 509 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 510 /// see above */ 511 ///#if USE_ITT_BUILD 512 /// /* but currently used for storing 513 /// region-specific ITT */ 514 /// /* contextual information. */ 515 ///#endif /* USE_ITT_BUILD */ 516 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 517 /// C++ */ 518 /// char const *psource; /**< String describing the source location. 519 /// The string is composed of semi-colon separated 520 // fields which describe the source file, 521 /// the function and a pair of line numbers that 522 /// delimit the construct. 523 /// */ 524 /// } ident_t; 525 enum IdentFieldIndex { 526 /// might be used in Fortran 527 IdentField_Reserved_1, 528 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 529 IdentField_Flags, 530 /// Not really used in Fortran any more 531 IdentField_Reserved_2, 532 /// Source[4] in Fortran, do not use for C++ 533 IdentField_Reserved_3, 534 /// String describing the source location. The string is composed of 535 /// semi-colon separated fields which describe the source file, the function 536 /// and a pair of line numbers that delimit the construct. 537 IdentField_PSource 538 }; 539 540 /// Schedule types for 'omp for' loops (these enumerators are taken from 541 /// the enum sched_type in kmp.h). 542 enum OpenMPSchedType { 543 /// Lower bound for default (unordered) versions. 544 OMP_sch_lower = 32, 545 OMP_sch_static_chunked = 33, 546 OMP_sch_static = 34, 547 OMP_sch_dynamic_chunked = 35, 548 OMP_sch_guided_chunked = 36, 549 OMP_sch_runtime = 37, 550 OMP_sch_auto = 38, 551 /// static with chunk adjustment (e.g., simd) 552 OMP_sch_static_balanced_chunked = 45, 553 /// Lower bound for 'ordered' versions. 554 OMP_ord_lower = 64, 555 OMP_ord_static_chunked = 65, 556 OMP_ord_static = 66, 557 OMP_ord_dynamic_chunked = 67, 558 OMP_ord_guided_chunked = 68, 559 OMP_ord_runtime = 69, 560 OMP_ord_auto = 70, 561 OMP_sch_default = OMP_sch_static, 562 /// dist_schedule types 563 OMP_dist_sch_static_chunked = 91, 564 OMP_dist_sch_static = 92, 565 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 566 /// Set if the monotonic schedule modifier was present. 567 OMP_sch_modifier_monotonic = (1 << 29), 568 /// Set if the nonmonotonic schedule modifier was present. 569 OMP_sch_modifier_nonmonotonic = (1 << 30), 570 }; 571 572 enum OpenMPRTLFunction { 573 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 574 /// kmpc_micro microtask, ...); 575 OMPRTL__kmpc_fork_call, 576 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, 577 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 578 OMPRTL__kmpc_threadprivate_cached, 579 /// Call to void __kmpc_threadprivate_register( ident_t *, 580 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 581 OMPRTL__kmpc_threadprivate_register, 582 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 583 OMPRTL__kmpc_global_thread_num, 584 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 585 // kmp_critical_name *crit); 586 OMPRTL__kmpc_critical, 587 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 588 // global_tid, kmp_critical_name *crit, uintptr_t hint); 589 OMPRTL__kmpc_critical_with_hint, 590 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 591 // kmp_critical_name *crit); 592 OMPRTL__kmpc_end_critical, 593 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 594 // global_tid); 595 OMPRTL__kmpc_cancel_barrier, 596 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 597 OMPRTL__kmpc_barrier, 598 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 599 OMPRTL__kmpc_for_static_fini, 600 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 601 // global_tid); 602 OMPRTL__kmpc_serialized_parallel, 603 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 604 // global_tid); 605 OMPRTL__kmpc_end_serialized_parallel, 606 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 607 // kmp_int32 num_threads); 608 OMPRTL__kmpc_push_num_threads, 609 // Call to void __kmpc_flush(ident_t *loc); 610 OMPRTL__kmpc_flush, 611 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 612 OMPRTL__kmpc_master, 613 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 614 OMPRTL__kmpc_end_master, 615 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 616 // int end_part); 617 OMPRTL__kmpc_omp_taskyield, 618 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 619 OMPRTL__kmpc_single, 620 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 621 OMPRTL__kmpc_end_single, 622 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 623 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 624 // kmp_routine_entry_t *task_entry); 625 OMPRTL__kmpc_omp_task_alloc, 626 // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *, 627 // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, 628 // size_t sizeof_shareds, kmp_routine_entry_t *task_entry, 629 // kmp_int64 device_id); 630 OMPRTL__kmpc_omp_target_task_alloc, 631 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 632 // new_task); 633 OMPRTL__kmpc_omp_task, 634 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 635 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 636 // kmp_int32 didit); 637 OMPRTL__kmpc_copyprivate, 638 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 639 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 640 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 641 OMPRTL__kmpc_reduce, 642 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 643 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 644 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 645 // *lck); 646 OMPRTL__kmpc_reduce_nowait, 647 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 648 // kmp_critical_name *lck); 649 OMPRTL__kmpc_end_reduce, 650 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 651 // kmp_critical_name *lck); 652 OMPRTL__kmpc_end_reduce_nowait, 653 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 654 // kmp_task_t * new_task); 655 OMPRTL__kmpc_omp_task_begin_if0, 656 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 657 // kmp_task_t * new_task); 658 OMPRTL__kmpc_omp_task_complete_if0, 659 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 660 OMPRTL__kmpc_ordered, 661 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 662 OMPRTL__kmpc_end_ordered, 663 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 664 // global_tid); 665 OMPRTL__kmpc_omp_taskwait, 666 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 667 OMPRTL__kmpc_taskgroup, 668 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 669 OMPRTL__kmpc_end_taskgroup, 670 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 671 // int proc_bind); 672 OMPRTL__kmpc_push_proc_bind, 673 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 674 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 675 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 676 OMPRTL__kmpc_omp_task_with_deps, 677 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 678 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 679 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 680 OMPRTL__kmpc_omp_wait_deps, 681 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 682 // global_tid, kmp_int32 cncl_kind); 683 OMPRTL__kmpc_cancellationpoint, 684 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 685 // kmp_int32 cncl_kind); 686 OMPRTL__kmpc_cancel, 687 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 688 // kmp_int32 num_teams, kmp_int32 thread_limit); 689 OMPRTL__kmpc_push_num_teams, 690 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 691 // microtask, ...); 692 OMPRTL__kmpc_fork_teams, 693 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 694 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 695 // sched, kmp_uint64 grainsize, void *task_dup); 696 OMPRTL__kmpc_taskloop, 697 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 698 // num_dims, struct kmp_dim *dims); 699 OMPRTL__kmpc_doacross_init, 700 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 701 OMPRTL__kmpc_doacross_fini, 702 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 703 // *vec); 704 OMPRTL__kmpc_doacross_post, 705 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 706 // *vec); 707 OMPRTL__kmpc_doacross_wait, 708 // Call to void *__kmpc_taskred_init(int gtid, int num_data, void *data); 709 OMPRTL__kmpc_taskred_init, 710 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 711 // *d); 712 OMPRTL__kmpc_task_reduction_get_th_data, 713 // Call to void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 714 // is_ws, int num, void *data); 715 OMPRTL__kmpc_taskred_modifier_init, 716 // Call to void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, 717 // int is_ws); 718 OMPRTL__kmpc_task_reduction_modifier_fini, 719 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); 720 OMPRTL__kmpc_alloc, 721 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); 722 OMPRTL__kmpc_free, 723 724 // 725 // Offloading related calls 726 // 727 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 728 // size); 729 OMPRTL__kmpc_push_target_tripcount, 730 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 731 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 732 // *arg_types); 733 OMPRTL__tgt_target, 734 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 735 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 736 // *arg_types); 737 OMPRTL__tgt_target_nowait, 738 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 739 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 740 // *arg_types, int32_t num_teams, int32_t thread_limit); 741 OMPRTL__tgt_target_teams, 742 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 743 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 744 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 745 OMPRTL__tgt_target_teams_nowait, 746 // Call to void __tgt_register_requires(int64_t flags); 747 OMPRTL__tgt_register_requires, 748 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 749 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 750 OMPRTL__tgt_target_data_begin, 751 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 752 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 753 // *arg_types); 754 OMPRTL__tgt_target_data_begin_nowait, 755 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 756 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 757 OMPRTL__tgt_target_data_end, 758 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 759 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 760 // *arg_types); 761 OMPRTL__tgt_target_data_end_nowait, 762 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 763 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 764 OMPRTL__tgt_target_data_update, 765 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 766 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 767 // *arg_types); 768 OMPRTL__tgt_target_data_update_nowait, 769 // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 770 OMPRTL__tgt_mapper_num_components, 771 // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void 772 // *base, void *begin, int64_t size, int64_t type); 773 OMPRTL__tgt_push_mapper_component, 774 // Call to kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 775 // int gtid, kmp_task_t *task); 776 OMPRTL__kmpc_task_allow_completion_event, 777 }; 778 779 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 780 /// region. 781 class CleanupTy final : public EHScopeStack::Cleanup { 782 PrePostActionTy *Action; 783 784 public: 785 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 786 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 787 if (!CGF.HaveInsertPoint()) 788 return; 789 Action->Exit(CGF); 790 } 791 }; 792 793 } // anonymous namespace 794 795 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 796 CodeGenFunction::RunCleanupsScope Scope(CGF); 797 if (PrePostAction) { 798 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 799 Callback(CodeGen, CGF, *PrePostAction); 800 } else { 801 PrePostActionTy Action; 802 Callback(CodeGen, CGF, Action); 803 } 804 } 805 806 /// Check if the combiner is a call to UDR combiner and if it is so return the 807 /// UDR decl used for reduction. 808 static const OMPDeclareReductionDecl * 809 getReductionInit(const Expr *ReductionOp) { 810 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 811 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 812 if (const auto *DRE = 813 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 814 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 815 return DRD; 816 return nullptr; 817 } 818 819 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 820 const OMPDeclareReductionDecl *DRD, 821 const Expr *InitOp, 822 Address Private, Address Original, 823 QualType Ty) { 824 if (DRD->getInitializer()) { 825 std::pair<llvm::Function *, llvm::Function *> Reduction = 826 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 827 const auto *CE = cast<CallExpr>(InitOp); 828 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 829 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 830 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 831 const auto *LHSDRE = 832 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 833 const auto *RHSDRE = 834 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 835 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 836 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 837 [=]() { return Private; }); 838 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 839 [=]() { return Original; }); 840 (void)PrivateScope.Privatize(); 841 RValue Func = RValue::get(Reduction.second); 842 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 843 CGF.EmitIgnoredExpr(InitOp); 844 } else { 845 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 846 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 847 auto *GV = new llvm::GlobalVariable( 848 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 849 llvm::GlobalValue::PrivateLinkage, Init, Name); 850 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 851 RValue InitRVal; 852 switch (CGF.getEvaluationKind(Ty)) { 853 case TEK_Scalar: 854 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 855 break; 856 case TEK_Complex: 857 InitRVal = 858 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 859 break; 860 case TEK_Aggregate: 861 InitRVal = RValue::getAggregate(LV.getAddress(CGF)); 862 break; 863 } 864 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 865 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 866 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 867 /*IsInitializer=*/false); 868 } 869 } 870 871 /// Emit initialization of arrays of complex types. 872 /// \param DestAddr Address of the array. 873 /// \param Type Type of array. 874 /// \param Init Initial expression of array. 875 /// \param SrcAddr Address of the original array. 876 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 877 QualType Type, bool EmitDeclareReductionInit, 878 const Expr *Init, 879 const OMPDeclareReductionDecl *DRD, 880 Address SrcAddr = Address::invalid()) { 881 // Perform element-by-element initialization. 882 QualType ElementTy; 883 884 // Drill down to the base element type on both arrays. 885 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 886 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 887 DestAddr = 888 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 889 if (DRD) 890 SrcAddr = 891 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 892 893 llvm::Value *SrcBegin = nullptr; 894 if (DRD) 895 SrcBegin = SrcAddr.getPointer(); 896 llvm::Value *DestBegin = DestAddr.getPointer(); 897 // Cast from pointer to array type to pointer to single element. 898 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 899 // The basic structure here is a while-do loop. 900 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 901 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 902 llvm::Value *IsEmpty = 903 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 904 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 905 906 // Enter the loop body, making that address the current address. 907 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 908 CGF.EmitBlock(BodyBB); 909 910 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 911 912 llvm::PHINode *SrcElementPHI = nullptr; 913 Address SrcElementCurrent = Address::invalid(); 914 if (DRD) { 915 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 916 "omp.arraycpy.srcElementPast"); 917 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 918 SrcElementCurrent = 919 Address(SrcElementPHI, 920 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 921 } 922 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 923 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 924 DestElementPHI->addIncoming(DestBegin, EntryBB); 925 Address DestElementCurrent = 926 Address(DestElementPHI, 927 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 928 929 // Emit copy. 930 { 931 CodeGenFunction::RunCleanupsScope InitScope(CGF); 932 if (EmitDeclareReductionInit) { 933 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 934 SrcElementCurrent, ElementTy); 935 } else 936 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 937 /*IsInitializer=*/false); 938 } 939 940 if (DRD) { 941 // Shift the address forward by one element. 942 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 943 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 944 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 945 } 946 947 // Shift the address forward by one element. 948 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 949 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 950 // Check whether we've reached the end. 951 llvm::Value *Done = 952 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 953 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 954 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 955 956 // Done. 957 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 958 } 959 960 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 961 return CGF.EmitOMPSharedLValue(E); 962 } 963 964 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 965 const Expr *E) { 966 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 967 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 968 return LValue(); 969 } 970 971 void ReductionCodeGen::emitAggregateInitialization( 972 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 973 const OMPDeclareReductionDecl *DRD) { 974 // Emit VarDecl with copy init for arrays. 975 // Get the address of the original variable captured in current 976 // captured region. 977 const auto *PrivateVD = 978 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 979 bool EmitDeclareReductionInit = 980 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 981 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 982 EmitDeclareReductionInit, 983 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 984 : PrivateVD->getInit(), 985 DRD, SharedLVal.getAddress(CGF)); 986 } 987 988 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 989 ArrayRef<const Expr *> Origs, 990 ArrayRef<const Expr *> Privates, 991 ArrayRef<const Expr *> ReductionOps) { 992 ClausesData.reserve(Shareds.size()); 993 SharedAddresses.reserve(Shareds.size()); 994 Sizes.reserve(Shareds.size()); 995 BaseDecls.reserve(Shareds.size()); 996 const auto *IOrig = Origs.begin(); 997 const auto *IPriv = Privates.begin(); 998 const auto *IRed = ReductionOps.begin(); 999 for (const Expr *Ref : Shareds) { 1000 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); 1001 std::advance(IOrig, 1); 1002 std::advance(IPriv, 1); 1003 std::advance(IRed, 1); 1004 } 1005 } 1006 1007 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { 1008 assert(SharedAddresses.size() == N && OrigAddresses.size() == N && 1009 "Number of generated lvalues must be exactly N."); 1010 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); 1011 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); 1012 SharedAddresses.emplace_back(First, Second); 1013 if (ClausesData[N].Shared == ClausesData[N].Ref) { 1014 OrigAddresses.emplace_back(First, Second); 1015 } else { 1016 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 1017 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 1018 OrigAddresses.emplace_back(First, Second); 1019 } 1020 } 1021 1022 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 1023 const auto *PrivateVD = 1024 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1025 QualType PrivateType = PrivateVD->getType(); 1026 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 1027 if (!PrivateType->isVariablyModifiedType()) { 1028 Sizes.emplace_back( 1029 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), 1030 nullptr); 1031 return; 1032 } 1033 llvm::Value *Size; 1034 llvm::Value *SizeInChars; 1035 auto *ElemType = 1036 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) 1037 ->getElementType(); 1038 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 1039 if (AsArraySection) { 1040 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), 1041 OrigAddresses[N].first.getPointer(CGF)); 1042 Size = CGF.Builder.CreateNUWAdd( 1043 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 1044 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 1045 } else { 1046 SizeInChars = 1047 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); 1048 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 1049 } 1050 Sizes.emplace_back(SizeInChars, Size); 1051 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1052 CGF, 1053 cast<OpaqueValueExpr>( 1054 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1055 RValue::get(Size)); 1056 CGF.EmitVariablyModifiedType(PrivateType); 1057 } 1058 1059 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 1060 llvm::Value *Size) { 1061 const auto *PrivateVD = 1062 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1063 QualType PrivateType = PrivateVD->getType(); 1064 if (!PrivateType->isVariablyModifiedType()) { 1065 assert(!Size && !Sizes[N].second && 1066 "Size should be nullptr for non-variably modified reduction " 1067 "items."); 1068 return; 1069 } 1070 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1071 CGF, 1072 cast<OpaqueValueExpr>( 1073 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1074 RValue::get(Size)); 1075 CGF.EmitVariablyModifiedType(PrivateType); 1076 } 1077 1078 void ReductionCodeGen::emitInitialization( 1079 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1080 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1081 assert(SharedAddresses.size() > N && "No variable was generated"); 1082 const auto *PrivateVD = 1083 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1084 const OMPDeclareReductionDecl *DRD = 1085 getReductionInit(ClausesData[N].ReductionOp); 1086 QualType PrivateType = PrivateVD->getType(); 1087 PrivateAddr = CGF.Builder.CreateElementBitCast( 1088 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1089 QualType SharedType = SharedAddresses[N].first.getType(); 1090 SharedLVal = CGF.MakeAddrLValue( 1091 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), 1092 CGF.ConvertTypeForMem(SharedType)), 1093 SharedType, SharedAddresses[N].first.getBaseInfo(), 1094 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1095 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1096 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1097 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1098 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1099 PrivateAddr, SharedLVal.getAddress(CGF), 1100 SharedLVal.getType()); 1101 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1102 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1103 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1104 PrivateVD->getType().getQualifiers(), 1105 /*IsInitializer=*/false); 1106 } 1107 } 1108 1109 bool ReductionCodeGen::needCleanups(unsigned N) { 1110 const auto *PrivateVD = 1111 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1112 QualType PrivateType = PrivateVD->getType(); 1113 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1114 return DTorKind != QualType::DK_none; 1115 } 1116 1117 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1118 Address PrivateAddr) { 1119 const auto *PrivateVD = 1120 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1121 QualType PrivateType = PrivateVD->getType(); 1122 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1123 if (needCleanups(N)) { 1124 PrivateAddr = CGF.Builder.CreateElementBitCast( 1125 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1126 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1127 } 1128 } 1129 1130 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1131 LValue BaseLV) { 1132 BaseTy = BaseTy.getNonReferenceType(); 1133 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1134 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1135 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1136 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); 1137 } else { 1138 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); 1139 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1140 } 1141 BaseTy = BaseTy->getPointeeType(); 1142 } 1143 return CGF.MakeAddrLValue( 1144 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), 1145 CGF.ConvertTypeForMem(ElTy)), 1146 BaseLV.getType(), BaseLV.getBaseInfo(), 1147 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1148 } 1149 1150 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1151 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1152 llvm::Value *Addr) { 1153 Address Tmp = Address::invalid(); 1154 Address TopTmp = Address::invalid(); 1155 Address MostTopTmp = Address::invalid(); 1156 BaseTy = BaseTy.getNonReferenceType(); 1157 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1158 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1159 Tmp = CGF.CreateMemTemp(BaseTy); 1160 if (TopTmp.isValid()) 1161 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1162 else 1163 MostTopTmp = Tmp; 1164 TopTmp = Tmp; 1165 BaseTy = BaseTy->getPointeeType(); 1166 } 1167 llvm::Type *Ty = BaseLVType; 1168 if (Tmp.isValid()) 1169 Ty = Tmp.getElementType(); 1170 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1171 if (Tmp.isValid()) { 1172 CGF.Builder.CreateStore(Addr, Tmp); 1173 return MostTopTmp; 1174 } 1175 return Address(Addr, BaseLVAlignment); 1176 } 1177 1178 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1179 const VarDecl *OrigVD = nullptr; 1180 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1181 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1182 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1183 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1184 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1185 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1186 DE = cast<DeclRefExpr>(Base); 1187 OrigVD = cast<VarDecl>(DE->getDecl()); 1188 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1189 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1190 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1191 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1192 DE = cast<DeclRefExpr>(Base); 1193 OrigVD = cast<VarDecl>(DE->getDecl()); 1194 } 1195 return OrigVD; 1196 } 1197 1198 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1199 Address PrivateAddr) { 1200 const DeclRefExpr *DE; 1201 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1202 BaseDecls.emplace_back(OrigVD); 1203 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1204 LValue BaseLValue = 1205 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1206 OriginalBaseLValue); 1207 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1208 BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); 1209 llvm::Value *PrivatePointer = 1210 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1211 PrivateAddr.getPointer(), 1212 SharedAddresses[N].first.getAddress(CGF).getType()); 1213 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1214 return castToBase(CGF, OrigVD->getType(), 1215 SharedAddresses[N].first.getType(), 1216 OriginalBaseLValue.getAddress(CGF).getType(), 1217 OriginalBaseLValue.getAlignment(), Ptr); 1218 } 1219 BaseDecls.emplace_back( 1220 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1221 return PrivateAddr; 1222 } 1223 1224 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1225 const OMPDeclareReductionDecl *DRD = 1226 getReductionInit(ClausesData[N].ReductionOp); 1227 return DRD && DRD->getInitializer(); 1228 } 1229 1230 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1231 return CGF.EmitLoadOfPointerLValue( 1232 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1233 getThreadIDVariable()->getType()->castAs<PointerType>()); 1234 } 1235 1236 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1237 if (!CGF.HaveInsertPoint()) 1238 return; 1239 // 1.2.2 OpenMP Language Terminology 1240 // Structured block - An executable statement with a single entry at the 1241 // top and a single exit at the bottom. 1242 // The point of exit cannot be a branch out of the structured block. 1243 // longjmp() and throw() must not violate the entry/exit criteria. 1244 CGF.EHStack.pushTerminate(); 1245 CodeGen(CGF); 1246 CGF.EHStack.popTerminate(); 1247 } 1248 1249 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1250 CodeGenFunction &CGF) { 1251 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1252 getThreadIDVariable()->getType(), 1253 AlignmentSource::Decl); 1254 } 1255 1256 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1257 QualType FieldTy) { 1258 auto *Field = FieldDecl::Create( 1259 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1260 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1261 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1262 Field->setAccess(AS_public); 1263 DC->addDecl(Field); 1264 return Field; 1265 } 1266 1267 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1268 StringRef Separator) 1269 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1270 OffloadEntriesInfoManager(CGM) { 1271 ASTContext &C = CGM.getContext(); 1272 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1273 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1274 RD->startDefinition(); 1275 // reserved_1 1276 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1277 // flags 1278 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1279 // reserved_2 1280 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1281 // reserved_3 1282 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1283 // psource 1284 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1285 RD->completeDefinition(); 1286 IdentQTy = C.getRecordType(RD); 1287 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1288 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1289 1290 loadOffloadInfoMetadata(); 1291 } 1292 1293 void CGOpenMPRuntime::clear() { 1294 InternalVars.clear(); 1295 // Clean non-target variable declarations possibly used only in debug info. 1296 for (const auto &Data : EmittedNonTargetVariables) { 1297 if (!Data.getValue().pointsToAliveValue()) 1298 continue; 1299 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1300 if (!GV) 1301 continue; 1302 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1303 continue; 1304 GV->eraseFromParent(); 1305 } 1306 } 1307 1308 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1309 SmallString<128> Buffer; 1310 llvm::raw_svector_ostream OS(Buffer); 1311 StringRef Sep = FirstSeparator; 1312 for (StringRef Part : Parts) { 1313 OS << Sep << Part; 1314 Sep = Separator; 1315 } 1316 return std::string(OS.str()); 1317 } 1318 1319 static llvm::Function * 1320 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1321 const Expr *CombinerInitializer, const VarDecl *In, 1322 const VarDecl *Out, bool IsCombiner) { 1323 // void .omp_combiner.(Ty *in, Ty *out); 1324 ASTContext &C = CGM.getContext(); 1325 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1326 FunctionArgList Args; 1327 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1328 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1329 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1330 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1331 Args.push_back(&OmpOutParm); 1332 Args.push_back(&OmpInParm); 1333 const CGFunctionInfo &FnInfo = 1334 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1335 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1336 std::string Name = CGM.getOpenMPRuntime().getName( 1337 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1338 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1339 Name, &CGM.getModule()); 1340 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1341 if (CGM.getLangOpts().Optimize) { 1342 Fn->removeFnAttr(llvm::Attribute::NoInline); 1343 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1344 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1345 } 1346 CodeGenFunction CGF(CGM); 1347 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1348 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1349 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1350 Out->getLocation()); 1351 CodeGenFunction::OMPPrivateScope Scope(CGF); 1352 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1353 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1354 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1355 .getAddress(CGF); 1356 }); 1357 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1358 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1359 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1360 .getAddress(CGF); 1361 }); 1362 (void)Scope.Privatize(); 1363 if (!IsCombiner && Out->hasInit() && 1364 !CGF.isTrivialInitializer(Out->getInit())) { 1365 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1366 Out->getType().getQualifiers(), 1367 /*IsInitializer=*/true); 1368 } 1369 if (CombinerInitializer) 1370 CGF.EmitIgnoredExpr(CombinerInitializer); 1371 Scope.ForceCleanup(); 1372 CGF.FinishFunction(); 1373 return Fn; 1374 } 1375 1376 void CGOpenMPRuntime::emitUserDefinedReduction( 1377 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1378 if (UDRMap.count(D) > 0) 1379 return; 1380 llvm::Function *Combiner = emitCombinerOrInitializer( 1381 CGM, D->getType(), D->getCombiner(), 1382 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1383 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1384 /*IsCombiner=*/true); 1385 llvm::Function *Initializer = nullptr; 1386 if (const Expr *Init = D->getInitializer()) { 1387 Initializer = emitCombinerOrInitializer( 1388 CGM, D->getType(), 1389 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1390 : nullptr, 1391 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1392 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1393 /*IsCombiner=*/false); 1394 } 1395 UDRMap.try_emplace(D, Combiner, Initializer); 1396 if (CGF) { 1397 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1398 Decls.second.push_back(D); 1399 } 1400 } 1401 1402 std::pair<llvm::Function *, llvm::Function *> 1403 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1404 auto I = UDRMap.find(D); 1405 if (I != UDRMap.end()) 1406 return I->second; 1407 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1408 return UDRMap.lookup(D); 1409 } 1410 1411 namespace { 1412 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR 1413 // Builder if one is present. 1414 struct PushAndPopStackRAII { 1415 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, 1416 bool HasCancel) 1417 : OMPBuilder(OMPBuilder) { 1418 if (!OMPBuilder) 1419 return; 1420 1421 // The following callback is the crucial part of clangs cleanup process. 1422 // 1423 // NOTE: 1424 // Once the OpenMPIRBuilder is used to create parallel regions (and 1425 // similar), the cancellation destination (Dest below) is determined via 1426 // IP. That means if we have variables to finalize we split the block at IP, 1427 // use the new block (=BB) as destination to build a JumpDest (via 1428 // getJumpDestInCurrentScope(BB)) which then is fed to 1429 // EmitBranchThroughCleanup. Furthermore, there will not be the need 1430 // to push & pop an FinalizationInfo object. 1431 // The FiniCB will still be needed but at the point where the 1432 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. 1433 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { 1434 assert(IP.getBlock()->end() == IP.getPoint() && 1435 "Clang CG should cause non-terminated block!"); 1436 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1437 CGF.Builder.restoreIP(IP); 1438 CodeGenFunction::JumpDest Dest = 1439 CGF.getOMPCancelDestination(OMPD_parallel); 1440 CGF.EmitBranchThroughCleanup(Dest); 1441 }; 1442 1443 // TODO: Remove this once we emit parallel regions through the 1444 // OpenMPIRBuilder as it can do this setup internally. 1445 llvm::OpenMPIRBuilder::FinalizationInfo FI( 1446 {FiniCB, OMPD_parallel, HasCancel}); 1447 OMPBuilder->pushFinalizationCB(std::move(FI)); 1448 } 1449 ~PushAndPopStackRAII() { 1450 if (OMPBuilder) 1451 OMPBuilder->popFinalizationCB(); 1452 } 1453 llvm::OpenMPIRBuilder *OMPBuilder; 1454 }; 1455 } // namespace 1456 1457 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1458 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1459 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1460 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1461 assert(ThreadIDVar->getType()->isPointerType() && 1462 "thread id variable must be of type kmp_int32 *"); 1463 CodeGenFunction CGF(CGM, true); 1464 bool HasCancel = false; 1465 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1466 HasCancel = OPD->hasCancel(); 1467 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) 1468 HasCancel = OPD->hasCancel(); 1469 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1470 HasCancel = OPSD->hasCancel(); 1471 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1472 HasCancel = OPFD->hasCancel(); 1473 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1474 HasCancel = OPFD->hasCancel(); 1475 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1476 HasCancel = OPFD->hasCancel(); 1477 else if (const auto *OPFD = 1478 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1479 HasCancel = OPFD->hasCancel(); 1480 else if (const auto *OPFD = 1481 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1482 HasCancel = OPFD->hasCancel(); 1483 1484 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new 1485 // parallel region to make cancellation barriers work properly. 1486 llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder(); 1487 PushAndPopStackRAII PSR(OMPBuilder, CGF, HasCancel); 1488 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1489 HasCancel, OutlinedHelperName); 1490 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1491 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); 1492 } 1493 1494 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1495 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1496 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1497 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1498 return emitParallelOrTeamsOutlinedFunction( 1499 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1500 } 1501 1502 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1503 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1504 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1505 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1506 return emitParallelOrTeamsOutlinedFunction( 1507 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1508 } 1509 1510 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1511 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1512 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1513 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1514 bool Tied, unsigned &NumberOfParts) { 1515 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1516 PrePostActionTy &) { 1517 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1518 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1519 llvm::Value *TaskArgs[] = { 1520 UpLoc, ThreadID, 1521 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1522 TaskTVar->getType()->castAs<PointerType>()) 1523 .getPointer(CGF)}; 1524 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1525 }; 1526 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1527 UntiedCodeGen); 1528 CodeGen.setAction(Action); 1529 assert(!ThreadIDVar->getType()->isPointerType() && 1530 "thread id variable must be of type kmp_int32 for tasks"); 1531 const OpenMPDirectiveKind Region = 1532 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1533 : OMPD_task; 1534 const CapturedStmt *CS = D.getCapturedStmt(Region); 1535 bool HasCancel = false; 1536 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) 1537 HasCancel = TD->hasCancel(); 1538 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) 1539 HasCancel = TD->hasCancel(); 1540 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) 1541 HasCancel = TD->hasCancel(); 1542 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) 1543 HasCancel = TD->hasCancel(); 1544 1545 CodeGenFunction CGF(CGM, true); 1546 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1547 InnermostKind, HasCancel, Action); 1548 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1549 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1550 if (!Tied) 1551 NumberOfParts = Action.getNumberOfParts(); 1552 return Res; 1553 } 1554 1555 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1556 const RecordDecl *RD, const CGRecordLayout &RL, 1557 ArrayRef<llvm::Constant *> Data) { 1558 llvm::StructType *StructTy = RL.getLLVMType(); 1559 unsigned PrevIdx = 0; 1560 ConstantInitBuilder CIBuilder(CGM); 1561 auto DI = Data.begin(); 1562 for (const FieldDecl *FD : RD->fields()) { 1563 unsigned Idx = RL.getLLVMFieldNo(FD); 1564 // Fill the alignment. 1565 for (unsigned I = PrevIdx; I < Idx; ++I) 1566 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1567 PrevIdx = Idx + 1; 1568 Fields.add(*DI); 1569 ++DI; 1570 } 1571 } 1572 1573 template <class... As> 1574 static llvm::GlobalVariable * 1575 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1576 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1577 As &&... Args) { 1578 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1579 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1580 ConstantInitBuilder CIBuilder(CGM); 1581 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1582 buildStructValue(Fields, CGM, RD, RL, Data); 1583 return Fields.finishAndCreateGlobal( 1584 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1585 std::forward<As>(Args)...); 1586 } 1587 1588 template <typename T> 1589 static void 1590 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1591 ArrayRef<llvm::Constant *> Data, 1592 T &Parent) { 1593 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1594 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1595 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1596 buildStructValue(Fields, CGM, RD, RL, Data); 1597 Fields.finishAndAddTo(Parent); 1598 } 1599 1600 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1601 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1602 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1603 FlagsTy FlagsKey(Flags, Reserved2Flags); 1604 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1605 if (!Entry) { 1606 if (!DefaultOpenMPPSource) { 1607 // Initialize default location for psource field of ident_t structure of 1608 // all ident_t objects. Format is ";file;function;line;column;;". 1609 // Taken from 1610 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1611 DefaultOpenMPPSource = 1612 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1613 DefaultOpenMPPSource = 1614 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1615 } 1616 1617 llvm::Constant *Data[] = { 1618 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1619 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1620 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1621 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1622 llvm::GlobalValue *DefaultOpenMPLocation = 1623 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1624 llvm::GlobalValue::PrivateLinkage); 1625 DefaultOpenMPLocation->setUnnamedAddr( 1626 llvm::GlobalValue::UnnamedAddr::Global); 1627 1628 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1629 } 1630 return Address(Entry, Align); 1631 } 1632 1633 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1634 bool AtCurrentPoint) { 1635 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1636 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1637 1638 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1639 if (AtCurrentPoint) { 1640 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1641 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1642 } else { 1643 Elem.second.ServiceInsertPt = 1644 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1645 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1646 } 1647 } 1648 1649 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1650 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1651 if (Elem.second.ServiceInsertPt) { 1652 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1653 Elem.second.ServiceInsertPt = nullptr; 1654 Ptr->eraseFromParent(); 1655 } 1656 } 1657 1658 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1659 SourceLocation Loc, 1660 unsigned Flags) { 1661 Flags |= OMP_IDENT_KMPC; 1662 // If no debug info is generated - return global default location. 1663 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1664 Loc.isInvalid()) 1665 return getOrCreateDefaultLocation(Flags).getPointer(); 1666 1667 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1668 1669 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1670 Address LocValue = Address::invalid(); 1671 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1672 if (I != OpenMPLocThreadIDMap.end()) 1673 LocValue = Address(I->second.DebugLoc, Align); 1674 1675 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1676 // GetOpenMPThreadID was called before this routine. 1677 if (!LocValue.isValid()) { 1678 // Generate "ident_t .kmpc_loc.addr;" 1679 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1680 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1681 Elem.second.DebugLoc = AI.getPointer(); 1682 LocValue = AI; 1683 1684 if (!Elem.second.ServiceInsertPt) 1685 setLocThreadIdInsertPt(CGF); 1686 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1687 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1688 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1689 CGF.getTypeSize(IdentQTy)); 1690 } 1691 1692 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1693 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1694 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1695 LValue PSource = 1696 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1697 1698 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1699 if (OMPDebugLoc == nullptr) { 1700 SmallString<128> Buffer2; 1701 llvm::raw_svector_ostream OS2(Buffer2); 1702 // Build debug location 1703 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1704 OS2 << ";" << PLoc.getFilename() << ";"; 1705 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1706 OS2 << FD->getQualifiedNameAsString(); 1707 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1708 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1709 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1710 } 1711 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1712 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1713 1714 // Our callers always pass this to a runtime function, so for 1715 // convenience, go ahead and return a naked pointer. 1716 return LocValue.getPointer(); 1717 } 1718 1719 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1720 SourceLocation Loc) { 1721 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1722 1723 llvm::Value *ThreadID = nullptr; 1724 // Check whether we've already cached a load of the thread id in this 1725 // function. 1726 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1727 if (I != OpenMPLocThreadIDMap.end()) { 1728 ThreadID = I->second.ThreadID; 1729 if (ThreadID != nullptr) 1730 return ThreadID; 1731 } 1732 // If exceptions are enabled, do not use parameter to avoid possible crash. 1733 if (auto *OMPRegionInfo = 1734 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1735 if (OMPRegionInfo->getThreadIDVariable()) { 1736 // Check if this an outlined function with thread id passed as argument. 1737 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1738 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1739 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1740 !CGF.getLangOpts().CXXExceptions || 1741 CGF.Builder.GetInsertBlock() == TopBlock || 1742 !isa<llvm::Instruction>(LVal.getPointer(CGF)) || 1743 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1744 TopBlock || 1745 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == 1746 CGF.Builder.GetInsertBlock()) { 1747 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1748 // If value loaded in entry block, cache it and use it everywhere in 1749 // function. 1750 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1751 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1752 Elem.second.ThreadID = ThreadID; 1753 } 1754 return ThreadID; 1755 } 1756 } 1757 } 1758 1759 // This is not an outlined function region - need to call __kmpc_int32 1760 // kmpc_global_thread_num(ident_t *loc). 1761 // Generate thread id value and cache this value for use across the 1762 // function. 1763 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1764 if (!Elem.second.ServiceInsertPt) 1765 setLocThreadIdInsertPt(CGF); 1766 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1767 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1768 llvm::CallInst *Call = CGF.Builder.CreateCall( 1769 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1770 emitUpdateLocation(CGF, Loc)); 1771 Call->setCallingConv(CGF.getRuntimeCC()); 1772 Elem.second.ThreadID = Call; 1773 return Call; 1774 } 1775 1776 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1777 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1778 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1779 clearLocThreadIdInsertPt(CGF); 1780 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1781 } 1782 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1783 for(const auto *D : FunctionUDRMap[CGF.CurFn]) 1784 UDRMap.erase(D); 1785 FunctionUDRMap.erase(CGF.CurFn); 1786 } 1787 auto I = FunctionUDMMap.find(CGF.CurFn); 1788 if (I != FunctionUDMMap.end()) { 1789 for(const auto *D : I->second) 1790 UDMMap.erase(D); 1791 FunctionUDMMap.erase(I); 1792 } 1793 LastprivateConditionalToTypes.erase(CGF.CurFn); 1794 } 1795 1796 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1797 return IdentTy->getPointerTo(); 1798 } 1799 1800 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1801 if (!Kmpc_MicroTy) { 1802 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1803 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1804 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1805 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1806 } 1807 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1808 } 1809 1810 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1811 llvm::FunctionCallee RTLFn = nullptr; 1812 switch (static_cast<OpenMPRTLFunction>(Function)) { 1813 case OMPRTL__kmpc_fork_call: { 1814 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1815 // microtask, ...); 1816 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1817 getKmpc_MicroPointerTy()}; 1818 auto *FnTy = 1819 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1820 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1821 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 1822 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 1823 llvm::LLVMContext &Ctx = F->getContext(); 1824 llvm::MDBuilder MDB(Ctx); 1825 // Annotate the callback behavior of the __kmpc_fork_call: 1826 // - The callback callee is argument number 2 (microtask). 1827 // - The first two arguments of the callback callee are unknown (-1). 1828 // - All variadic arguments to the __kmpc_fork_call are passed to the 1829 // callback callee. 1830 F->addMetadata( 1831 llvm::LLVMContext::MD_callback, 1832 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 1833 2, {-1, -1}, 1834 /* VarArgsArePassed */ true)})); 1835 } 1836 } 1837 break; 1838 } 1839 case OMPRTL__kmpc_global_thread_num: { 1840 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1841 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1842 auto *FnTy = 1843 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1844 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1845 break; 1846 } 1847 case OMPRTL__kmpc_threadprivate_cached: { 1848 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1849 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1850 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1851 CGM.VoidPtrTy, CGM.SizeTy, 1852 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1853 auto *FnTy = 1854 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1855 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1856 break; 1857 } 1858 case OMPRTL__kmpc_critical: { 1859 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1860 // kmp_critical_name *crit); 1861 llvm::Type *TypeParams[] = { 1862 getIdentTyPointerTy(), CGM.Int32Ty, 1863 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1864 auto *FnTy = 1865 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1866 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1867 break; 1868 } 1869 case OMPRTL__kmpc_critical_with_hint: { 1870 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1871 // kmp_critical_name *crit, uintptr_t hint); 1872 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1873 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1874 CGM.IntPtrTy}; 1875 auto *FnTy = 1876 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1877 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1878 break; 1879 } 1880 case OMPRTL__kmpc_threadprivate_register: { 1881 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1882 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1883 // typedef void *(*kmpc_ctor)(void *); 1884 auto *KmpcCtorTy = 1885 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1886 /*isVarArg*/ false)->getPointerTo(); 1887 // typedef void *(*kmpc_cctor)(void *, void *); 1888 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1889 auto *KmpcCopyCtorTy = 1890 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1891 /*isVarArg*/ false) 1892 ->getPointerTo(); 1893 // typedef void (*kmpc_dtor)(void *); 1894 auto *KmpcDtorTy = 1895 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1896 ->getPointerTo(); 1897 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1898 KmpcCopyCtorTy, KmpcDtorTy}; 1899 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1900 /*isVarArg*/ false); 1901 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1902 break; 1903 } 1904 case OMPRTL__kmpc_end_critical: { 1905 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1906 // kmp_critical_name *crit); 1907 llvm::Type *TypeParams[] = { 1908 getIdentTyPointerTy(), CGM.Int32Ty, 1909 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1910 auto *FnTy = 1911 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1912 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1913 break; 1914 } 1915 case OMPRTL__kmpc_cancel_barrier: { 1916 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1917 // global_tid); 1918 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1919 auto *FnTy = 1920 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1921 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1922 break; 1923 } 1924 case OMPRTL__kmpc_barrier: { 1925 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1926 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1927 auto *FnTy = 1928 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1929 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1930 break; 1931 } 1932 case OMPRTL__kmpc_for_static_fini: { 1933 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1934 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1935 auto *FnTy = 1936 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1937 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1938 break; 1939 } 1940 case OMPRTL__kmpc_push_num_threads: { 1941 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1942 // kmp_int32 num_threads) 1943 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1944 CGM.Int32Ty}; 1945 auto *FnTy = 1946 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1947 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1948 break; 1949 } 1950 case OMPRTL__kmpc_serialized_parallel: { 1951 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1952 // global_tid); 1953 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1954 auto *FnTy = 1955 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1956 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1957 break; 1958 } 1959 case OMPRTL__kmpc_end_serialized_parallel: { 1960 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1961 // global_tid); 1962 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1963 auto *FnTy = 1964 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1965 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1966 break; 1967 } 1968 case OMPRTL__kmpc_flush: { 1969 // Build void __kmpc_flush(ident_t *loc); 1970 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1971 auto *FnTy = 1972 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1973 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1974 break; 1975 } 1976 case OMPRTL__kmpc_master: { 1977 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1978 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1979 auto *FnTy = 1980 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1981 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1982 break; 1983 } 1984 case OMPRTL__kmpc_end_master: { 1985 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1986 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1987 auto *FnTy = 1988 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1989 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1990 break; 1991 } 1992 case OMPRTL__kmpc_omp_taskyield: { 1993 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1994 // int end_part); 1995 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1996 auto *FnTy = 1997 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1998 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1999 break; 2000 } 2001 case OMPRTL__kmpc_single: { 2002 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 2003 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2004 auto *FnTy = 2005 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2006 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 2007 break; 2008 } 2009 case OMPRTL__kmpc_end_single: { 2010 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 2011 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2012 auto *FnTy = 2013 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2014 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 2015 break; 2016 } 2017 case OMPRTL__kmpc_omp_task_alloc: { 2018 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 2019 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2020 // kmp_routine_entry_t *task_entry); 2021 assert(KmpRoutineEntryPtrTy != nullptr && 2022 "Type kmp_routine_entry_t must be created."); 2023 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2024 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 2025 // Return void * and then cast to particular kmp_task_t type. 2026 auto *FnTy = 2027 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2028 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 2029 break; 2030 } 2031 case OMPRTL__kmpc_omp_target_task_alloc: { 2032 // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid, 2033 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 2034 // kmp_routine_entry_t *task_entry, kmp_int64 device_id); 2035 assert(KmpRoutineEntryPtrTy != nullptr && 2036 "Type kmp_routine_entry_t must be created."); 2037 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2038 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy, 2039 CGM.Int64Ty}; 2040 // Return void * and then cast to particular kmp_task_t type. 2041 auto *FnTy = 2042 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2043 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc"); 2044 break; 2045 } 2046 case OMPRTL__kmpc_omp_task: { 2047 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2048 // *new_task); 2049 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2050 CGM.VoidPtrTy}; 2051 auto *FnTy = 2052 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2053 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 2054 break; 2055 } 2056 case OMPRTL__kmpc_copyprivate: { 2057 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 2058 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 2059 // kmp_int32 didit); 2060 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2061 auto *CpyFnTy = 2062 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 2063 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 2064 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 2065 CGM.Int32Ty}; 2066 auto *FnTy = 2067 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2068 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 2069 break; 2070 } 2071 case OMPRTL__kmpc_reduce: { 2072 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 2073 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 2074 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 2075 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2076 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2077 /*isVarArg=*/false); 2078 llvm::Type *TypeParams[] = { 2079 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2080 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2081 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2082 auto *FnTy = 2083 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2084 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 2085 break; 2086 } 2087 case OMPRTL__kmpc_reduce_nowait: { 2088 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 2089 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 2090 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 2091 // *lck); 2092 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2093 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2094 /*isVarArg=*/false); 2095 llvm::Type *TypeParams[] = { 2096 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2097 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2098 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2099 auto *FnTy = 2100 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2101 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 2102 break; 2103 } 2104 case OMPRTL__kmpc_end_reduce: { 2105 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 2106 // kmp_critical_name *lck); 2107 llvm::Type *TypeParams[] = { 2108 getIdentTyPointerTy(), CGM.Int32Ty, 2109 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2110 auto *FnTy = 2111 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2112 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 2113 break; 2114 } 2115 case OMPRTL__kmpc_end_reduce_nowait: { 2116 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 2117 // kmp_critical_name *lck); 2118 llvm::Type *TypeParams[] = { 2119 getIdentTyPointerTy(), CGM.Int32Ty, 2120 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2121 auto *FnTy = 2122 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2123 RTLFn = 2124 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 2125 break; 2126 } 2127 case OMPRTL__kmpc_omp_task_begin_if0: { 2128 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2129 // *new_task); 2130 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2131 CGM.VoidPtrTy}; 2132 auto *FnTy = 2133 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2134 RTLFn = 2135 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 2136 break; 2137 } 2138 case OMPRTL__kmpc_omp_task_complete_if0: { 2139 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2140 // *new_task); 2141 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2142 CGM.VoidPtrTy}; 2143 auto *FnTy = 2144 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2145 RTLFn = CGM.CreateRuntimeFunction(FnTy, 2146 /*Name=*/"__kmpc_omp_task_complete_if0"); 2147 break; 2148 } 2149 case OMPRTL__kmpc_ordered: { 2150 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 2151 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2152 auto *FnTy = 2153 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2154 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 2155 break; 2156 } 2157 case OMPRTL__kmpc_end_ordered: { 2158 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 2159 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2160 auto *FnTy = 2161 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2162 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 2163 break; 2164 } 2165 case OMPRTL__kmpc_omp_taskwait: { 2166 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 2167 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2168 auto *FnTy = 2169 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2170 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 2171 break; 2172 } 2173 case OMPRTL__kmpc_taskgroup: { 2174 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 2175 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2176 auto *FnTy = 2177 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2178 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 2179 break; 2180 } 2181 case OMPRTL__kmpc_end_taskgroup: { 2182 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 2183 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2184 auto *FnTy = 2185 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2186 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 2187 break; 2188 } 2189 case OMPRTL__kmpc_push_proc_bind: { 2190 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 2191 // int proc_bind) 2192 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2193 auto *FnTy = 2194 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2195 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 2196 break; 2197 } 2198 case OMPRTL__kmpc_omp_task_with_deps: { 2199 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2200 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2201 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 2202 llvm::Type *TypeParams[] = { 2203 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 2204 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 2205 auto *FnTy = 2206 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2207 RTLFn = 2208 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 2209 break; 2210 } 2211 case OMPRTL__kmpc_omp_wait_deps: { 2212 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2213 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2214 // kmp_depend_info_t *noalias_dep_list); 2215 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2216 CGM.Int32Ty, CGM.VoidPtrTy, 2217 CGM.Int32Ty, CGM.VoidPtrTy}; 2218 auto *FnTy = 2219 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2220 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2221 break; 2222 } 2223 case OMPRTL__kmpc_cancellationpoint: { 2224 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2225 // global_tid, kmp_int32 cncl_kind) 2226 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2227 auto *FnTy = 2228 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2229 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2230 break; 2231 } 2232 case OMPRTL__kmpc_cancel: { 2233 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2234 // kmp_int32 cncl_kind) 2235 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2236 auto *FnTy = 2237 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2238 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2239 break; 2240 } 2241 case OMPRTL__kmpc_push_num_teams: { 2242 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2243 // kmp_int32 num_teams, kmp_int32 num_threads) 2244 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2245 CGM.Int32Ty}; 2246 auto *FnTy = 2247 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2248 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2249 break; 2250 } 2251 case OMPRTL__kmpc_fork_teams: { 2252 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2253 // microtask, ...); 2254 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2255 getKmpc_MicroPointerTy()}; 2256 auto *FnTy = 2257 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2258 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2259 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 2260 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 2261 llvm::LLVMContext &Ctx = F->getContext(); 2262 llvm::MDBuilder MDB(Ctx); 2263 // Annotate the callback behavior of the __kmpc_fork_teams: 2264 // - The callback callee is argument number 2 (microtask). 2265 // - The first two arguments of the callback callee are unknown (-1). 2266 // - All variadic arguments to the __kmpc_fork_teams are passed to the 2267 // callback callee. 2268 F->addMetadata( 2269 llvm::LLVMContext::MD_callback, 2270 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 2271 2, {-1, -1}, 2272 /* VarArgsArePassed */ true)})); 2273 } 2274 } 2275 break; 2276 } 2277 case OMPRTL__kmpc_taskloop: { 2278 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2279 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2280 // sched, kmp_uint64 grainsize, void *task_dup); 2281 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2282 CGM.IntTy, 2283 CGM.VoidPtrTy, 2284 CGM.IntTy, 2285 CGM.Int64Ty->getPointerTo(), 2286 CGM.Int64Ty->getPointerTo(), 2287 CGM.Int64Ty, 2288 CGM.IntTy, 2289 CGM.IntTy, 2290 CGM.Int64Ty, 2291 CGM.VoidPtrTy}; 2292 auto *FnTy = 2293 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2294 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2295 break; 2296 } 2297 case OMPRTL__kmpc_doacross_init: { 2298 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2299 // num_dims, struct kmp_dim *dims); 2300 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2301 CGM.Int32Ty, 2302 CGM.Int32Ty, 2303 CGM.VoidPtrTy}; 2304 auto *FnTy = 2305 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2306 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2307 break; 2308 } 2309 case OMPRTL__kmpc_doacross_fini: { 2310 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2311 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2312 auto *FnTy = 2313 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2314 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2315 break; 2316 } 2317 case OMPRTL__kmpc_doacross_post: { 2318 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2319 // *vec); 2320 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2321 CGM.Int64Ty->getPointerTo()}; 2322 auto *FnTy = 2323 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2324 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2325 break; 2326 } 2327 case OMPRTL__kmpc_doacross_wait: { 2328 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2329 // *vec); 2330 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2331 CGM.Int64Ty->getPointerTo()}; 2332 auto *FnTy = 2333 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2334 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2335 break; 2336 } 2337 case OMPRTL__kmpc_taskred_init: { 2338 // Build void *__kmpc_taskred_init(int gtid, int num_data, void *data); 2339 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2340 auto *FnTy = 2341 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2342 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskred_init"); 2343 break; 2344 } 2345 case OMPRTL__kmpc_task_reduction_get_th_data: { 2346 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2347 // *d); 2348 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2349 auto *FnTy = 2350 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2351 RTLFn = CGM.CreateRuntimeFunction( 2352 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2353 break; 2354 } 2355 case OMPRTL__kmpc_taskred_modifier_init: { 2356 // Build void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 2357 // is_ws, int num_data, void *data); 2358 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.IntTy, CGM.IntTy, 2359 CGM.IntTy, CGM.VoidPtrTy}; 2360 auto *FnTy = 2361 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2362 RTLFn = CGM.CreateRuntimeFunction(FnTy, 2363 /*Name=*/"__kmpc_taskred_modifier_init"); 2364 break; 2365 } 2366 case OMPRTL__kmpc_task_reduction_modifier_fini: { 2367 // Build void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, 2368 // int is_ws); 2369 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.IntTy, CGM.IntTy}; 2370 auto *FnTy = 2371 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2372 RTLFn = CGM.CreateRuntimeFunction( 2373 FnTy, 2374 /*Name=*/"__kmpc_task_reduction_modifier_fini"); 2375 break; 2376 } 2377 case OMPRTL__kmpc_alloc: { 2378 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t 2379 // al); omp_allocator_handle_t type is void *. 2380 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy}; 2381 auto *FnTy = 2382 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2383 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc"); 2384 break; 2385 } 2386 case OMPRTL__kmpc_free: { 2387 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t 2388 // al); omp_allocator_handle_t type is void *. 2389 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2390 auto *FnTy = 2391 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2392 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); 2393 break; 2394 } 2395 case OMPRTL__kmpc_push_target_tripcount: { 2396 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 2397 // size); 2398 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; 2399 llvm::FunctionType *FnTy = 2400 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2401 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); 2402 break; 2403 } 2404 case OMPRTL__tgt_target: { 2405 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2406 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2407 // *arg_types); 2408 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2409 CGM.VoidPtrTy, 2410 CGM.Int32Ty, 2411 CGM.VoidPtrPtrTy, 2412 CGM.VoidPtrPtrTy, 2413 CGM.Int64Ty->getPointerTo(), 2414 CGM.Int64Ty->getPointerTo()}; 2415 auto *FnTy = 2416 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2417 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2418 break; 2419 } 2420 case OMPRTL__tgt_target_nowait: { 2421 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2422 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2423 // int64_t *arg_types); 2424 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2425 CGM.VoidPtrTy, 2426 CGM.Int32Ty, 2427 CGM.VoidPtrPtrTy, 2428 CGM.VoidPtrPtrTy, 2429 CGM.Int64Ty->getPointerTo(), 2430 CGM.Int64Ty->getPointerTo()}; 2431 auto *FnTy = 2432 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2433 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2434 break; 2435 } 2436 case OMPRTL__tgt_target_teams: { 2437 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2438 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2439 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2440 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2441 CGM.VoidPtrTy, 2442 CGM.Int32Ty, 2443 CGM.VoidPtrPtrTy, 2444 CGM.VoidPtrPtrTy, 2445 CGM.Int64Ty->getPointerTo(), 2446 CGM.Int64Ty->getPointerTo(), 2447 CGM.Int32Ty, 2448 CGM.Int32Ty}; 2449 auto *FnTy = 2450 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2451 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2452 break; 2453 } 2454 case OMPRTL__tgt_target_teams_nowait: { 2455 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2456 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 2457 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2458 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2459 CGM.VoidPtrTy, 2460 CGM.Int32Ty, 2461 CGM.VoidPtrPtrTy, 2462 CGM.VoidPtrPtrTy, 2463 CGM.Int64Ty->getPointerTo(), 2464 CGM.Int64Ty->getPointerTo(), 2465 CGM.Int32Ty, 2466 CGM.Int32Ty}; 2467 auto *FnTy = 2468 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2469 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2470 break; 2471 } 2472 case OMPRTL__tgt_register_requires: { 2473 // Build void __tgt_register_requires(int64_t flags); 2474 llvm::Type *TypeParams[] = {CGM.Int64Ty}; 2475 auto *FnTy = 2476 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2477 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); 2478 break; 2479 } 2480 case OMPRTL__tgt_target_data_begin: { 2481 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2482 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2483 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2484 CGM.Int32Ty, 2485 CGM.VoidPtrPtrTy, 2486 CGM.VoidPtrPtrTy, 2487 CGM.Int64Ty->getPointerTo(), 2488 CGM.Int64Ty->getPointerTo()}; 2489 auto *FnTy = 2490 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2491 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2492 break; 2493 } 2494 case OMPRTL__tgt_target_data_begin_nowait: { 2495 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2496 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2497 // *arg_types); 2498 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2499 CGM.Int32Ty, 2500 CGM.VoidPtrPtrTy, 2501 CGM.VoidPtrPtrTy, 2502 CGM.Int64Ty->getPointerTo(), 2503 CGM.Int64Ty->getPointerTo()}; 2504 auto *FnTy = 2505 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2506 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2507 break; 2508 } 2509 case OMPRTL__tgt_target_data_end: { 2510 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2511 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2512 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2513 CGM.Int32Ty, 2514 CGM.VoidPtrPtrTy, 2515 CGM.VoidPtrPtrTy, 2516 CGM.Int64Ty->getPointerTo(), 2517 CGM.Int64Ty->getPointerTo()}; 2518 auto *FnTy = 2519 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2520 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2521 break; 2522 } 2523 case OMPRTL__tgt_target_data_end_nowait: { 2524 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2525 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2526 // *arg_types); 2527 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2528 CGM.Int32Ty, 2529 CGM.VoidPtrPtrTy, 2530 CGM.VoidPtrPtrTy, 2531 CGM.Int64Ty->getPointerTo(), 2532 CGM.Int64Ty->getPointerTo()}; 2533 auto *FnTy = 2534 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2535 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2536 break; 2537 } 2538 case OMPRTL__tgt_target_data_update: { 2539 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2540 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2541 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2542 CGM.Int32Ty, 2543 CGM.VoidPtrPtrTy, 2544 CGM.VoidPtrPtrTy, 2545 CGM.Int64Ty->getPointerTo(), 2546 CGM.Int64Ty->getPointerTo()}; 2547 auto *FnTy = 2548 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2549 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2550 break; 2551 } 2552 case OMPRTL__tgt_target_data_update_nowait: { 2553 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2554 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2555 // *arg_types); 2556 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2557 CGM.Int32Ty, 2558 CGM.VoidPtrPtrTy, 2559 CGM.VoidPtrPtrTy, 2560 CGM.Int64Ty->getPointerTo(), 2561 CGM.Int64Ty->getPointerTo()}; 2562 auto *FnTy = 2563 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2564 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2565 break; 2566 } 2567 case OMPRTL__tgt_mapper_num_components: { 2568 // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 2569 llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; 2570 auto *FnTy = 2571 llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); 2572 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components"); 2573 break; 2574 } 2575 case OMPRTL__tgt_push_mapper_component: { 2576 // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void 2577 // *base, void *begin, int64_t size, int64_t type); 2578 llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy, 2579 CGM.Int64Ty, CGM.Int64Ty}; 2580 auto *FnTy = 2581 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2582 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component"); 2583 break; 2584 } 2585 case OMPRTL__kmpc_task_allow_completion_event: { 2586 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 2587 // int gtid, kmp_task_t *task); 2588 auto *FnTy = llvm::FunctionType::get( 2589 CGM.VoidPtrTy, {getIdentTyPointerTy(), CGM.IntTy, CGM.VoidPtrTy}, 2590 /*isVarArg=*/false); 2591 RTLFn = 2592 CGM.CreateRuntimeFunction(FnTy, "__kmpc_task_allow_completion_event"); 2593 break; 2594 } 2595 } 2596 assert(RTLFn && "Unable to find OpenMP runtime function"); 2597 return RTLFn; 2598 } 2599 2600 llvm::FunctionCallee 2601 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 2602 assert((IVSize == 32 || IVSize == 64) && 2603 "IV size is not compatible with the omp runtime"); 2604 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2605 : "__kmpc_for_static_init_4u") 2606 : (IVSigned ? "__kmpc_for_static_init_8" 2607 : "__kmpc_for_static_init_8u"); 2608 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2609 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2610 llvm::Type *TypeParams[] = { 2611 getIdentTyPointerTy(), // loc 2612 CGM.Int32Ty, // tid 2613 CGM.Int32Ty, // schedtype 2614 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2615 PtrTy, // p_lower 2616 PtrTy, // p_upper 2617 PtrTy, // p_stride 2618 ITy, // incr 2619 ITy // chunk 2620 }; 2621 auto *FnTy = 2622 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2623 return CGM.CreateRuntimeFunction(FnTy, Name); 2624 } 2625 2626 llvm::FunctionCallee 2627 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 2628 assert((IVSize == 32 || IVSize == 64) && 2629 "IV size is not compatible with the omp runtime"); 2630 StringRef Name = 2631 IVSize == 32 2632 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2633 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2634 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2635 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2636 CGM.Int32Ty, // tid 2637 CGM.Int32Ty, // schedtype 2638 ITy, // lower 2639 ITy, // upper 2640 ITy, // stride 2641 ITy // chunk 2642 }; 2643 auto *FnTy = 2644 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2645 return CGM.CreateRuntimeFunction(FnTy, Name); 2646 } 2647 2648 llvm::FunctionCallee 2649 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 2650 assert((IVSize == 32 || IVSize == 64) && 2651 "IV size is not compatible with the omp runtime"); 2652 StringRef Name = 2653 IVSize == 32 2654 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2655 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2656 llvm::Type *TypeParams[] = { 2657 getIdentTyPointerTy(), // loc 2658 CGM.Int32Ty, // tid 2659 }; 2660 auto *FnTy = 2661 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2662 return CGM.CreateRuntimeFunction(FnTy, Name); 2663 } 2664 2665 llvm::FunctionCallee 2666 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 2667 assert((IVSize == 32 || IVSize == 64) && 2668 "IV size is not compatible with the omp runtime"); 2669 StringRef Name = 2670 IVSize == 32 2671 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2672 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2673 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2674 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2675 llvm::Type *TypeParams[] = { 2676 getIdentTyPointerTy(), // loc 2677 CGM.Int32Ty, // tid 2678 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2679 PtrTy, // p_lower 2680 PtrTy, // p_upper 2681 PtrTy // p_stride 2682 }; 2683 auto *FnTy = 2684 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2685 return CGM.CreateRuntimeFunction(FnTy, Name); 2686 } 2687 2688 /// Obtain information that uniquely identifies a target entry. This 2689 /// consists of the file and device IDs as well as line number associated with 2690 /// the relevant entry source location. 2691 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2692 unsigned &DeviceID, unsigned &FileID, 2693 unsigned &LineNum) { 2694 SourceManager &SM = C.getSourceManager(); 2695 2696 // The loc should be always valid and have a file ID (the user cannot use 2697 // #pragma directives in macros) 2698 2699 assert(Loc.isValid() && "Source location is expected to be always valid."); 2700 2701 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2702 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2703 2704 llvm::sys::fs::UniqueID ID; 2705 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2706 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2707 << PLoc.getFilename() << EC.message(); 2708 2709 DeviceID = ID.getDevice(); 2710 FileID = ID.getFile(); 2711 LineNum = PLoc.getLine(); 2712 } 2713 2714 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 2715 if (CGM.getLangOpts().OpenMPSimd) 2716 return Address::invalid(); 2717 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2718 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2719 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 2720 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2721 HasRequiresUnifiedSharedMemory))) { 2722 SmallString<64> PtrName; 2723 { 2724 llvm::raw_svector_ostream OS(PtrName); 2725 OS << CGM.getMangledName(GlobalDecl(VD)); 2726 if (!VD->isExternallyVisible()) { 2727 unsigned DeviceID, FileID, Line; 2728 getTargetEntryUniqueInfo(CGM.getContext(), 2729 VD->getCanonicalDecl()->getBeginLoc(), 2730 DeviceID, FileID, Line); 2731 OS << llvm::format("_%x", FileID); 2732 } 2733 OS << "_decl_tgt_ref_ptr"; 2734 } 2735 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2736 if (!Ptr) { 2737 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2738 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2739 PtrName); 2740 2741 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2742 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 2743 2744 if (!CGM.getLangOpts().OpenMPIsDevice) 2745 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2746 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2747 } 2748 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2749 } 2750 return Address::invalid(); 2751 } 2752 2753 llvm::Constant * 2754 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2755 assert(!CGM.getLangOpts().OpenMPUseTLS || 2756 !CGM.getContext().getTargetInfo().isTLSSupported()); 2757 // Lookup the entry, lazily creating it if necessary. 2758 std::string Suffix = getName({"cache", ""}); 2759 return getOrCreateInternalVariable( 2760 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 2761 } 2762 2763 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2764 const VarDecl *VD, 2765 Address VDAddr, 2766 SourceLocation Loc) { 2767 if (CGM.getLangOpts().OpenMPUseTLS && 2768 CGM.getContext().getTargetInfo().isTLSSupported()) 2769 return VDAddr; 2770 2771 llvm::Type *VarTy = VDAddr.getElementType(); 2772 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2773 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2774 CGM.Int8PtrTy), 2775 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2776 getOrCreateThreadPrivateCache(VD)}; 2777 return Address(CGF.EmitRuntimeCall( 2778 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2779 VDAddr.getAlignment()); 2780 } 2781 2782 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2783 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2784 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2785 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2786 // library. 2787 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2788 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2789 OMPLoc); 2790 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2791 // to register constructor/destructor for variable. 2792 llvm::Value *Args[] = { 2793 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2794 Ctor, CopyCtor, Dtor}; 2795 CGF.EmitRuntimeCall( 2796 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2797 } 2798 2799 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2800 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2801 bool PerformInit, CodeGenFunction *CGF) { 2802 if (CGM.getLangOpts().OpenMPUseTLS && 2803 CGM.getContext().getTargetInfo().isTLSSupported()) 2804 return nullptr; 2805 2806 VD = VD->getDefinition(CGM.getContext()); 2807 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 2808 QualType ASTTy = VD->getType(); 2809 2810 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2811 const Expr *Init = VD->getAnyInitializer(); 2812 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2813 // Generate function that re-emits the declaration's initializer into the 2814 // threadprivate copy of the variable VD 2815 CodeGenFunction CtorCGF(CGM); 2816 FunctionArgList Args; 2817 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2818 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2819 ImplicitParamDecl::Other); 2820 Args.push_back(&Dst); 2821 2822 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2823 CGM.getContext().VoidPtrTy, Args); 2824 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2825 std::string Name = getName({"__kmpc_global_ctor_", ""}); 2826 llvm::Function *Fn = 2827 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2828 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2829 Args, Loc, Loc); 2830 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2831 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2832 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2833 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2834 Arg = CtorCGF.Builder.CreateElementBitCast( 2835 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2836 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2837 /*IsInitializer=*/true); 2838 ArgVal = CtorCGF.EmitLoadOfScalar( 2839 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2840 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2841 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2842 CtorCGF.FinishFunction(); 2843 Ctor = Fn; 2844 } 2845 if (VD->getType().isDestructedType() != QualType::DK_none) { 2846 // Generate function that emits destructor call for the threadprivate copy 2847 // of the variable VD 2848 CodeGenFunction DtorCGF(CGM); 2849 FunctionArgList Args; 2850 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2851 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2852 ImplicitParamDecl::Other); 2853 Args.push_back(&Dst); 2854 2855 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2856 CGM.getContext().VoidTy, Args); 2857 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2858 std::string Name = getName({"__kmpc_global_dtor_", ""}); 2859 llvm::Function *Fn = 2860 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2861 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2862 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2863 Loc, Loc); 2864 // Create a scope with an artificial location for the body of this function. 2865 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2866 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2867 DtorCGF.GetAddrOfLocalVar(&Dst), 2868 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2869 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2870 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2871 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2872 DtorCGF.FinishFunction(); 2873 Dtor = Fn; 2874 } 2875 // Do not emit init function if it is not required. 2876 if (!Ctor && !Dtor) 2877 return nullptr; 2878 2879 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2880 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2881 /*isVarArg=*/false) 2882 ->getPointerTo(); 2883 // Copying constructor for the threadprivate variable. 2884 // Must be NULL - reserved by runtime, but currently it requires that this 2885 // parameter is always NULL. Otherwise it fires assertion. 2886 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2887 if (Ctor == nullptr) { 2888 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2889 /*isVarArg=*/false) 2890 ->getPointerTo(); 2891 Ctor = llvm::Constant::getNullValue(CtorTy); 2892 } 2893 if (Dtor == nullptr) { 2894 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2895 /*isVarArg=*/false) 2896 ->getPointerTo(); 2897 Dtor = llvm::Constant::getNullValue(DtorTy); 2898 } 2899 if (!CGF) { 2900 auto *InitFunctionTy = 2901 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2902 std::string Name = getName({"__omp_threadprivate_init_", ""}); 2903 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2904 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 2905 CodeGenFunction InitCGF(CGM); 2906 FunctionArgList ArgList; 2907 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2908 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2909 Loc, Loc); 2910 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2911 InitCGF.FinishFunction(); 2912 return InitFunction; 2913 } 2914 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2915 } 2916 return nullptr; 2917 } 2918 2919 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2920 llvm::GlobalVariable *Addr, 2921 bool PerformInit) { 2922 if (CGM.getLangOpts().OMPTargetTriples.empty() && 2923 !CGM.getLangOpts().OpenMPIsDevice) 2924 return false; 2925 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2926 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2927 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 2928 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2929 HasRequiresUnifiedSharedMemory)) 2930 return CGM.getLangOpts().OpenMPIsDevice; 2931 VD = VD->getDefinition(CGM.getContext()); 2932 assert(VD && "Unknown VarDecl"); 2933 2934 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 2935 return CGM.getLangOpts().OpenMPIsDevice; 2936 2937 QualType ASTTy = VD->getType(); 2938 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 2939 2940 // Produce the unique prefix to identify the new target regions. We use 2941 // the source location of the variable declaration which we know to not 2942 // conflict with any target region. 2943 unsigned DeviceID; 2944 unsigned FileID; 2945 unsigned Line; 2946 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2947 SmallString<128> Buffer, Out; 2948 { 2949 llvm::raw_svector_ostream OS(Buffer); 2950 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2951 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2952 } 2953 2954 const Expr *Init = VD->getAnyInitializer(); 2955 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2956 llvm::Constant *Ctor; 2957 llvm::Constant *ID; 2958 if (CGM.getLangOpts().OpenMPIsDevice) { 2959 // Generate function that re-emits the declaration's initializer into 2960 // the threadprivate copy of the variable VD 2961 CodeGenFunction CtorCGF(CGM); 2962 2963 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2964 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2965 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2966 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2967 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2968 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2969 FunctionArgList(), Loc, Loc); 2970 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2971 CtorCGF.EmitAnyExprToMem(Init, 2972 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2973 Init->getType().getQualifiers(), 2974 /*IsInitializer=*/true); 2975 CtorCGF.FinishFunction(); 2976 Ctor = Fn; 2977 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2978 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 2979 } else { 2980 Ctor = new llvm::GlobalVariable( 2981 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2982 llvm::GlobalValue::PrivateLinkage, 2983 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2984 ID = Ctor; 2985 } 2986 2987 // Register the information for the entry associated with the constructor. 2988 Out.clear(); 2989 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2990 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2991 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2992 } 2993 if (VD->getType().isDestructedType() != QualType::DK_none) { 2994 llvm::Constant *Dtor; 2995 llvm::Constant *ID; 2996 if (CGM.getLangOpts().OpenMPIsDevice) { 2997 // Generate function that emits destructor call for the threadprivate 2998 // copy of the variable VD 2999 CodeGenFunction DtorCGF(CGM); 3000 3001 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 3002 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 3003 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 3004 FTy, Twine(Buffer, "_dtor"), FI, Loc); 3005 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 3006 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 3007 FunctionArgList(), Loc, Loc); 3008 // Create a scope with an artificial location for the body of this 3009 // function. 3010 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 3011 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 3012 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 3013 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 3014 DtorCGF.FinishFunction(); 3015 Dtor = Fn; 3016 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 3017 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 3018 } else { 3019 Dtor = new llvm::GlobalVariable( 3020 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 3021 llvm::GlobalValue::PrivateLinkage, 3022 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 3023 ID = Dtor; 3024 } 3025 // Register the information for the entry associated with the destructor. 3026 Out.clear(); 3027 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 3028 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 3029 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 3030 } 3031 return CGM.getLangOpts().OpenMPIsDevice; 3032 } 3033 3034 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 3035 QualType VarType, 3036 StringRef Name) { 3037 std::string Suffix = getName({"artificial", ""}); 3038 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 3039 llvm::Value *GAddr = 3040 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 3041 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && 3042 CGM.getTarget().isTLSSupported()) { 3043 cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); 3044 return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); 3045 } 3046 std::string CacheSuffix = getName({"cache", ""}); 3047 llvm::Value *Args[] = { 3048 emitUpdateLocation(CGF, SourceLocation()), 3049 getThreadID(CGF, SourceLocation()), 3050 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 3051 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 3052 /*isSigned=*/false), 3053 getOrCreateInternalVariable( 3054 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 3055 return Address( 3056 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3057 CGF.EmitRuntimeCall( 3058 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 3059 VarLVType->getPointerTo(/*AddrSpace=*/0)), 3060 CGM.getContext().getTypeAlignInChars(VarType)); 3061 } 3062 3063 void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, 3064 const RegionCodeGenTy &ThenGen, 3065 const RegionCodeGenTy &ElseGen) { 3066 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 3067 3068 // If the condition constant folds and can be elided, try to avoid emitting 3069 // the condition and the dead arm of the if/else. 3070 bool CondConstant; 3071 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 3072 if (CondConstant) 3073 ThenGen(CGF); 3074 else 3075 ElseGen(CGF); 3076 return; 3077 } 3078 3079 // Otherwise, the condition did not fold, or we couldn't elide it. Just 3080 // emit the conditional branch. 3081 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3082 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 3083 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 3084 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 3085 3086 // Emit the 'then' code. 3087 CGF.EmitBlock(ThenBlock); 3088 ThenGen(CGF); 3089 CGF.EmitBranch(ContBlock); 3090 // Emit the 'else' code if present. 3091 // There is no need to emit line number for unconditional branch. 3092 (void)ApplyDebugLocation::CreateEmpty(CGF); 3093 CGF.EmitBlock(ElseBlock); 3094 ElseGen(CGF); 3095 // There is no need to emit line number for unconditional branch. 3096 (void)ApplyDebugLocation::CreateEmpty(CGF); 3097 CGF.EmitBranch(ContBlock); 3098 // Emit the continuation block for code after the if. 3099 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 3100 } 3101 3102 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 3103 llvm::Function *OutlinedFn, 3104 ArrayRef<llvm::Value *> CapturedVars, 3105 const Expr *IfCond) { 3106 if (!CGF.HaveInsertPoint()) 3107 return; 3108 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 3109 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 3110 PrePostActionTy &) { 3111 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 3112 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3113 llvm::Value *Args[] = { 3114 RTLoc, 3115 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 3116 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 3117 llvm::SmallVector<llvm::Value *, 16> RealArgs; 3118 RealArgs.append(std::begin(Args), std::end(Args)); 3119 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 3120 3121 llvm::FunctionCallee RTLFn = 3122 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 3123 CGF.EmitRuntimeCall(RTLFn, RealArgs); 3124 }; 3125 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 3126 PrePostActionTy &) { 3127 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3128 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 3129 // Build calls: 3130 // __kmpc_serialized_parallel(&Loc, GTid); 3131 llvm::Value *Args[] = {RTLoc, ThreadID}; 3132 CGF.EmitRuntimeCall( 3133 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 3134 3135 // OutlinedFn(>id, &zero_bound, CapturedStruct); 3136 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 3137 Address ZeroAddrBound = 3138 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 3139 /*Name=*/".bound.zero.addr"); 3140 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 3141 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 3142 // ThreadId for serialized parallels is 0. 3143 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 3144 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 3145 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 3146 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 3147 3148 // __kmpc_end_serialized_parallel(&Loc, GTid); 3149 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 3150 CGF.EmitRuntimeCall( 3151 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 3152 EndArgs); 3153 }; 3154 if (IfCond) { 3155 emitIfClause(CGF, IfCond, ThenGen, ElseGen); 3156 } else { 3157 RegionCodeGenTy ThenRCG(ThenGen); 3158 ThenRCG(CGF); 3159 } 3160 } 3161 3162 // If we're inside an (outlined) parallel region, use the region info's 3163 // thread-ID variable (it is passed in a first argument of the outlined function 3164 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 3165 // regular serial code region, get thread ID by calling kmp_int32 3166 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 3167 // return the address of that temp. 3168 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 3169 SourceLocation Loc) { 3170 if (auto *OMPRegionInfo = 3171 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3172 if (OMPRegionInfo->getThreadIDVariable()) 3173 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); 3174 3175 llvm::Value *ThreadID = getThreadID(CGF, Loc); 3176 QualType Int32Ty = 3177 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 3178 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 3179 CGF.EmitStoreOfScalar(ThreadID, 3180 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 3181 3182 return ThreadIDTemp; 3183 } 3184 3185 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 3186 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 3187 SmallString<256> Buffer; 3188 llvm::raw_svector_ostream Out(Buffer); 3189 Out << Name; 3190 StringRef RuntimeName = Out.str(); 3191 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 3192 if (Elem.second) { 3193 assert(Elem.second->getType()->getPointerElementType() == Ty && 3194 "OMP internal variable has different type than requested"); 3195 return &*Elem.second; 3196 } 3197 3198 return Elem.second = new llvm::GlobalVariable( 3199 CGM.getModule(), Ty, /*IsConstant*/ false, 3200 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 3201 Elem.first(), /*InsertBefore=*/nullptr, 3202 llvm::GlobalValue::NotThreadLocal, AddressSpace); 3203 } 3204 3205 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 3206 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 3207 std::string Name = getName({Prefix, "var"}); 3208 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 3209 } 3210 3211 namespace { 3212 /// Common pre(post)-action for different OpenMP constructs. 3213 class CommonActionTy final : public PrePostActionTy { 3214 llvm::FunctionCallee EnterCallee; 3215 ArrayRef<llvm::Value *> EnterArgs; 3216 llvm::FunctionCallee ExitCallee; 3217 ArrayRef<llvm::Value *> ExitArgs; 3218 bool Conditional; 3219 llvm::BasicBlock *ContBlock = nullptr; 3220 3221 public: 3222 CommonActionTy(llvm::FunctionCallee EnterCallee, 3223 ArrayRef<llvm::Value *> EnterArgs, 3224 llvm::FunctionCallee ExitCallee, 3225 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 3226 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 3227 ExitArgs(ExitArgs), Conditional(Conditional) {} 3228 void Enter(CodeGenFunction &CGF) override { 3229 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 3230 if (Conditional) { 3231 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 3232 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3233 ContBlock = CGF.createBasicBlock("omp_if.end"); 3234 // Generate the branch (If-stmt) 3235 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 3236 CGF.EmitBlock(ThenBlock); 3237 } 3238 } 3239 void Done(CodeGenFunction &CGF) { 3240 // Emit the rest of blocks/branches 3241 CGF.EmitBranch(ContBlock); 3242 CGF.EmitBlock(ContBlock, true); 3243 } 3244 void Exit(CodeGenFunction &CGF) override { 3245 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 3246 } 3247 }; 3248 } // anonymous namespace 3249 3250 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 3251 StringRef CriticalName, 3252 const RegionCodeGenTy &CriticalOpGen, 3253 SourceLocation Loc, const Expr *Hint) { 3254 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 3255 // CriticalOpGen(); 3256 // __kmpc_end_critical(ident_t *, gtid, Lock); 3257 // Prepare arguments and build a call to __kmpc_critical 3258 if (!CGF.HaveInsertPoint()) 3259 return; 3260 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3261 getCriticalRegionLock(CriticalName)}; 3262 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 3263 std::end(Args)); 3264 if (Hint) { 3265 EnterArgs.push_back(CGF.Builder.CreateIntCast( 3266 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 3267 } 3268 CommonActionTy Action( 3269 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 3270 : OMPRTL__kmpc_critical), 3271 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 3272 CriticalOpGen.setAction(Action); 3273 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 3274 } 3275 3276 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 3277 const RegionCodeGenTy &MasterOpGen, 3278 SourceLocation Loc) { 3279 if (!CGF.HaveInsertPoint()) 3280 return; 3281 // if(__kmpc_master(ident_t *, gtid)) { 3282 // MasterOpGen(); 3283 // __kmpc_end_master(ident_t *, gtid); 3284 // } 3285 // Prepare arguments and build a call to __kmpc_master 3286 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3287 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 3288 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 3289 /*Conditional=*/true); 3290 MasterOpGen.setAction(Action); 3291 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 3292 Action.Done(CGF); 3293 } 3294 3295 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 3296 SourceLocation Loc) { 3297 if (!CGF.HaveInsertPoint()) 3298 return; 3299 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3300 if (OMPBuilder) { 3301 OMPBuilder->CreateTaskyield(CGF.Builder); 3302 } else { 3303 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 3304 llvm::Value *Args[] = { 3305 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3306 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 3307 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), 3308 Args); 3309 } 3310 3311 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3312 Region->emitUntiedSwitch(CGF); 3313 } 3314 3315 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 3316 const RegionCodeGenTy &TaskgroupOpGen, 3317 SourceLocation Loc) { 3318 if (!CGF.HaveInsertPoint()) 3319 return; 3320 // __kmpc_taskgroup(ident_t *, gtid); 3321 // TaskgroupOpGen(); 3322 // __kmpc_end_taskgroup(ident_t *, gtid); 3323 // Prepare arguments and build a call to __kmpc_taskgroup 3324 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3325 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3326 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3327 Args); 3328 TaskgroupOpGen.setAction(Action); 3329 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3330 } 3331 3332 /// Given an array of pointers to variables, project the address of a 3333 /// given variable. 3334 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3335 unsigned Index, const VarDecl *Var) { 3336 // Pull out the pointer to the variable. 3337 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 3338 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3339 3340 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3341 Addr = CGF.Builder.CreateElementBitCast( 3342 Addr, CGF.ConvertTypeForMem(Var->getType())); 3343 return Addr; 3344 } 3345 3346 static llvm::Value *emitCopyprivateCopyFunction( 3347 CodeGenModule &CGM, llvm::Type *ArgsType, 3348 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3349 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3350 SourceLocation Loc) { 3351 ASTContext &C = CGM.getContext(); 3352 // void copy_func(void *LHSArg, void *RHSArg); 3353 FunctionArgList Args; 3354 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3355 ImplicitParamDecl::Other); 3356 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3357 ImplicitParamDecl::Other); 3358 Args.push_back(&LHSArg); 3359 Args.push_back(&RHSArg); 3360 const auto &CGFI = 3361 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3362 std::string Name = 3363 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 3364 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 3365 llvm::GlobalValue::InternalLinkage, Name, 3366 &CGM.getModule()); 3367 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3368 Fn->setDoesNotRecurse(); 3369 CodeGenFunction CGF(CGM); 3370 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3371 // Dest = (void*[n])(LHSArg); 3372 // Src = (void*[n])(RHSArg); 3373 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3374 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3375 ArgsType), CGF.getPointerAlign()); 3376 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3377 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3378 ArgsType), CGF.getPointerAlign()); 3379 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3380 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3381 // ... 3382 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3383 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3384 const auto *DestVar = 3385 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3386 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3387 3388 const auto *SrcVar = 3389 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3390 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3391 3392 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3393 QualType Type = VD->getType(); 3394 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3395 } 3396 CGF.FinishFunction(); 3397 return Fn; 3398 } 3399 3400 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3401 const RegionCodeGenTy &SingleOpGen, 3402 SourceLocation Loc, 3403 ArrayRef<const Expr *> CopyprivateVars, 3404 ArrayRef<const Expr *> SrcExprs, 3405 ArrayRef<const Expr *> DstExprs, 3406 ArrayRef<const Expr *> AssignmentOps) { 3407 if (!CGF.HaveInsertPoint()) 3408 return; 3409 assert(CopyprivateVars.size() == SrcExprs.size() && 3410 CopyprivateVars.size() == DstExprs.size() && 3411 CopyprivateVars.size() == AssignmentOps.size()); 3412 ASTContext &C = CGM.getContext(); 3413 // int32 did_it = 0; 3414 // if(__kmpc_single(ident_t *, gtid)) { 3415 // SingleOpGen(); 3416 // __kmpc_end_single(ident_t *, gtid); 3417 // did_it = 1; 3418 // } 3419 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3420 // <copy_func>, did_it); 3421 3422 Address DidIt = Address::invalid(); 3423 if (!CopyprivateVars.empty()) { 3424 // int32 did_it = 0; 3425 QualType KmpInt32Ty = 3426 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3427 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3428 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3429 } 3430 // Prepare arguments and build a call to __kmpc_single 3431 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3432 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3433 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3434 /*Conditional=*/true); 3435 SingleOpGen.setAction(Action); 3436 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3437 if (DidIt.isValid()) { 3438 // did_it = 1; 3439 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3440 } 3441 Action.Done(CGF); 3442 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3443 // <copy_func>, did_it); 3444 if (DidIt.isValid()) { 3445 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3446 QualType CopyprivateArrayTy = C.getConstantArrayType( 3447 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 3448 /*IndexTypeQuals=*/0); 3449 // Create a list of all private variables for copyprivate. 3450 Address CopyprivateList = 3451 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3452 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3453 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 3454 CGF.Builder.CreateStore( 3455 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3456 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), 3457 CGF.VoidPtrTy), 3458 Elem); 3459 } 3460 // Build function that copies private values from single region to all other 3461 // threads in the corresponding parallel region. 3462 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3463 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3464 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3465 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3466 Address CL = 3467 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3468 CGF.VoidPtrTy); 3469 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3470 llvm::Value *Args[] = { 3471 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3472 getThreadID(CGF, Loc), // i32 <gtid> 3473 BufSize, // size_t <buf_size> 3474 CL.getPointer(), // void *<copyprivate list> 3475 CpyFn, // void (*) (void *, void *) <copy_func> 3476 DidItVal // i32 did_it 3477 }; 3478 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3479 } 3480 } 3481 3482 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3483 const RegionCodeGenTy &OrderedOpGen, 3484 SourceLocation Loc, bool IsThreads) { 3485 if (!CGF.HaveInsertPoint()) 3486 return; 3487 // __kmpc_ordered(ident_t *, gtid); 3488 // OrderedOpGen(); 3489 // __kmpc_end_ordered(ident_t *, gtid); 3490 // Prepare arguments and build a call to __kmpc_ordered 3491 if (IsThreads) { 3492 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3493 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3494 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3495 Args); 3496 OrderedOpGen.setAction(Action); 3497 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3498 return; 3499 } 3500 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3501 } 3502 3503 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 3504 unsigned Flags; 3505 if (Kind == OMPD_for) 3506 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3507 else if (Kind == OMPD_sections) 3508 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3509 else if (Kind == OMPD_single) 3510 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3511 else if (Kind == OMPD_barrier) 3512 Flags = OMP_IDENT_BARRIER_EXPL; 3513 else 3514 Flags = OMP_IDENT_BARRIER_IMPL; 3515 return Flags; 3516 } 3517 3518 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 3519 CodeGenFunction &CGF, const OMPLoopDirective &S, 3520 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 3521 // Check if the loop directive is actually a doacross loop directive. In this 3522 // case choose static, 1 schedule. 3523 if (llvm::any_of( 3524 S.getClausesOfKind<OMPOrderedClause>(), 3525 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 3526 ScheduleKind = OMPC_SCHEDULE_static; 3527 // Chunk size is 1 in this case. 3528 llvm::APInt ChunkSize(32, 1); 3529 ChunkExpr = IntegerLiteral::Create( 3530 CGF.getContext(), ChunkSize, 3531 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3532 SourceLocation()); 3533 } 3534 } 3535 3536 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3537 OpenMPDirectiveKind Kind, bool EmitChecks, 3538 bool ForceSimpleCall) { 3539 // Check if we should use the OMPBuilder 3540 auto *OMPRegionInfo = 3541 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); 3542 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3543 if (OMPBuilder) { 3544 CGF.Builder.restoreIP(OMPBuilder->CreateBarrier( 3545 CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); 3546 return; 3547 } 3548 3549 if (!CGF.HaveInsertPoint()) 3550 return; 3551 // Build call __kmpc_cancel_barrier(loc, thread_id); 3552 // Build call __kmpc_barrier(loc, thread_id); 3553 unsigned Flags = getDefaultFlagsForBarriers(Kind); 3554 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3555 // thread_id); 3556 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3557 getThreadID(CGF, Loc)}; 3558 if (OMPRegionInfo) { 3559 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3560 llvm::Value *Result = CGF.EmitRuntimeCall( 3561 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3562 if (EmitChecks) { 3563 // if (__kmpc_cancel_barrier()) { 3564 // exit from construct; 3565 // } 3566 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3567 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3568 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3569 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3570 CGF.EmitBlock(ExitBB); 3571 // exit from construct; 3572 CodeGenFunction::JumpDest CancelDestination = 3573 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3574 CGF.EmitBranchThroughCleanup(CancelDestination); 3575 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3576 } 3577 return; 3578 } 3579 } 3580 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3581 } 3582 3583 /// Map the OpenMP loop schedule to the runtime enumeration. 3584 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3585 bool Chunked, bool Ordered) { 3586 switch (ScheduleKind) { 3587 case OMPC_SCHEDULE_static: 3588 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3589 : (Ordered ? OMP_ord_static : OMP_sch_static); 3590 case OMPC_SCHEDULE_dynamic: 3591 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3592 case OMPC_SCHEDULE_guided: 3593 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3594 case OMPC_SCHEDULE_runtime: 3595 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3596 case OMPC_SCHEDULE_auto: 3597 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3598 case OMPC_SCHEDULE_unknown: 3599 assert(!Chunked && "chunk was specified but schedule kind not known"); 3600 return Ordered ? OMP_ord_static : OMP_sch_static; 3601 } 3602 llvm_unreachable("Unexpected runtime schedule"); 3603 } 3604 3605 /// Map the OpenMP distribute schedule to the runtime enumeration. 3606 static OpenMPSchedType 3607 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3608 // only static is allowed for dist_schedule 3609 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3610 } 3611 3612 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3613 bool Chunked) const { 3614 OpenMPSchedType Schedule = 3615 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3616 return Schedule == OMP_sch_static; 3617 } 3618 3619 bool CGOpenMPRuntime::isStaticNonchunked( 3620 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3621 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3622 return Schedule == OMP_dist_sch_static; 3623 } 3624 3625 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 3626 bool Chunked) const { 3627 OpenMPSchedType Schedule = 3628 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3629 return Schedule == OMP_sch_static_chunked; 3630 } 3631 3632 bool CGOpenMPRuntime::isStaticChunked( 3633 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3634 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3635 return Schedule == OMP_dist_sch_static_chunked; 3636 } 3637 3638 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3639 OpenMPSchedType Schedule = 3640 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3641 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3642 return Schedule != OMP_sch_static; 3643 } 3644 3645 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 3646 OpenMPScheduleClauseModifier M1, 3647 OpenMPScheduleClauseModifier M2) { 3648 int Modifier = 0; 3649 switch (M1) { 3650 case OMPC_SCHEDULE_MODIFIER_monotonic: 3651 Modifier = OMP_sch_modifier_monotonic; 3652 break; 3653 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3654 Modifier = OMP_sch_modifier_nonmonotonic; 3655 break; 3656 case OMPC_SCHEDULE_MODIFIER_simd: 3657 if (Schedule == OMP_sch_static_chunked) 3658 Schedule = OMP_sch_static_balanced_chunked; 3659 break; 3660 case OMPC_SCHEDULE_MODIFIER_last: 3661 case OMPC_SCHEDULE_MODIFIER_unknown: 3662 break; 3663 } 3664 switch (M2) { 3665 case OMPC_SCHEDULE_MODIFIER_monotonic: 3666 Modifier = OMP_sch_modifier_monotonic; 3667 break; 3668 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3669 Modifier = OMP_sch_modifier_nonmonotonic; 3670 break; 3671 case OMPC_SCHEDULE_MODIFIER_simd: 3672 if (Schedule == OMP_sch_static_chunked) 3673 Schedule = OMP_sch_static_balanced_chunked; 3674 break; 3675 case OMPC_SCHEDULE_MODIFIER_last: 3676 case OMPC_SCHEDULE_MODIFIER_unknown: 3677 break; 3678 } 3679 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 3680 // If the static schedule kind is specified or if the ordered clause is 3681 // specified, and if the nonmonotonic modifier is not specified, the effect is 3682 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 3683 // modifier is specified, the effect is as if the nonmonotonic modifier is 3684 // specified. 3685 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 3686 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 3687 Schedule == OMP_sch_static_balanced_chunked || 3688 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || 3689 Schedule == OMP_dist_sch_static_chunked || 3690 Schedule == OMP_dist_sch_static)) 3691 Modifier = OMP_sch_modifier_nonmonotonic; 3692 } 3693 return Schedule | Modifier; 3694 } 3695 3696 void CGOpenMPRuntime::emitForDispatchInit( 3697 CodeGenFunction &CGF, SourceLocation Loc, 3698 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3699 bool Ordered, const DispatchRTInput &DispatchValues) { 3700 if (!CGF.HaveInsertPoint()) 3701 return; 3702 OpenMPSchedType Schedule = getRuntimeSchedule( 3703 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3704 assert(Ordered || 3705 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3706 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3707 Schedule != OMP_sch_static_balanced_chunked)); 3708 // Call __kmpc_dispatch_init( 3709 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3710 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3711 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3712 3713 // If the Chunk was not specified in the clause - use default value 1. 3714 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3715 : CGF.Builder.getIntN(IVSize, 1); 3716 llvm::Value *Args[] = { 3717 emitUpdateLocation(CGF, Loc), 3718 getThreadID(CGF, Loc), 3719 CGF.Builder.getInt32(addMonoNonMonoModifier( 3720 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3721 DispatchValues.LB, // Lower 3722 DispatchValues.UB, // Upper 3723 CGF.Builder.getIntN(IVSize, 1), // Stride 3724 Chunk // Chunk 3725 }; 3726 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3727 } 3728 3729 static void emitForStaticInitCall( 3730 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3731 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 3732 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3733 const CGOpenMPRuntime::StaticRTInput &Values) { 3734 if (!CGF.HaveInsertPoint()) 3735 return; 3736 3737 assert(!Values.Ordered); 3738 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3739 Schedule == OMP_sch_static_balanced_chunked || 3740 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3741 Schedule == OMP_dist_sch_static || 3742 Schedule == OMP_dist_sch_static_chunked); 3743 3744 // Call __kmpc_for_static_init( 3745 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3746 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3747 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3748 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3749 llvm::Value *Chunk = Values.Chunk; 3750 if (Chunk == nullptr) { 3751 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3752 Schedule == OMP_dist_sch_static) && 3753 "expected static non-chunked schedule"); 3754 // If the Chunk was not specified in the clause - use default value 1. 3755 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3756 } else { 3757 assert((Schedule == OMP_sch_static_chunked || 3758 Schedule == OMP_sch_static_balanced_chunked || 3759 Schedule == OMP_ord_static_chunked || 3760 Schedule == OMP_dist_sch_static_chunked) && 3761 "expected static chunked schedule"); 3762 } 3763 llvm::Value *Args[] = { 3764 UpdateLocation, 3765 ThreadId, 3766 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 3767 M2)), // Schedule type 3768 Values.IL.getPointer(), // &isLastIter 3769 Values.LB.getPointer(), // &LB 3770 Values.UB.getPointer(), // &UB 3771 Values.ST.getPointer(), // &Stride 3772 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3773 Chunk // Chunk 3774 }; 3775 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3776 } 3777 3778 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3779 SourceLocation Loc, 3780 OpenMPDirectiveKind DKind, 3781 const OpenMPScheduleTy &ScheduleKind, 3782 const StaticRTInput &Values) { 3783 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3784 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3785 assert(isOpenMPWorksharingDirective(DKind) && 3786 "Expected loop-based or sections-based directive."); 3787 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3788 isOpenMPLoopDirective(DKind) 3789 ? OMP_IDENT_WORK_LOOP 3790 : OMP_IDENT_WORK_SECTIONS); 3791 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3792 llvm::FunctionCallee StaticInitFunction = 3793 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3794 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 3795 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3796 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3797 } 3798 3799 void CGOpenMPRuntime::emitDistributeStaticInit( 3800 CodeGenFunction &CGF, SourceLocation Loc, 3801 OpenMPDistScheduleClauseKind SchedKind, 3802 const CGOpenMPRuntime::StaticRTInput &Values) { 3803 OpenMPSchedType ScheduleNum = 3804 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3805 llvm::Value *UpdatedLocation = 3806 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3807 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3808 llvm::FunctionCallee StaticInitFunction = 3809 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3810 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3811 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3812 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3813 } 3814 3815 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3816 SourceLocation Loc, 3817 OpenMPDirectiveKind DKind) { 3818 if (!CGF.HaveInsertPoint()) 3819 return; 3820 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3821 llvm::Value *Args[] = { 3822 emitUpdateLocation(CGF, Loc, 3823 isOpenMPDistributeDirective(DKind) 3824 ? OMP_IDENT_WORK_DISTRIBUTE 3825 : isOpenMPLoopDirective(DKind) 3826 ? OMP_IDENT_WORK_LOOP 3827 : OMP_IDENT_WORK_SECTIONS), 3828 getThreadID(CGF, Loc)}; 3829 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 3830 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3831 Args); 3832 } 3833 3834 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3835 SourceLocation Loc, 3836 unsigned IVSize, 3837 bool IVSigned) { 3838 if (!CGF.HaveInsertPoint()) 3839 return; 3840 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3841 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3842 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3843 } 3844 3845 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3846 SourceLocation Loc, unsigned IVSize, 3847 bool IVSigned, Address IL, 3848 Address LB, Address UB, 3849 Address ST) { 3850 // Call __kmpc_dispatch_next( 3851 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3852 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3853 // kmp_int[32|64] *p_stride); 3854 llvm::Value *Args[] = { 3855 emitUpdateLocation(CGF, Loc), 3856 getThreadID(CGF, Loc), 3857 IL.getPointer(), // &isLastIter 3858 LB.getPointer(), // &Lower 3859 UB.getPointer(), // &Upper 3860 ST.getPointer() // &Stride 3861 }; 3862 llvm::Value *Call = 3863 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3864 return CGF.EmitScalarConversion( 3865 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3866 CGF.getContext().BoolTy, Loc); 3867 } 3868 3869 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3870 llvm::Value *NumThreads, 3871 SourceLocation Loc) { 3872 if (!CGF.HaveInsertPoint()) 3873 return; 3874 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3875 llvm::Value *Args[] = { 3876 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3877 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3878 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3879 Args); 3880 } 3881 3882 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3883 ProcBindKind ProcBind, 3884 SourceLocation Loc) { 3885 if (!CGF.HaveInsertPoint()) 3886 return; 3887 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value."); 3888 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3889 llvm::Value *Args[] = { 3890 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3891 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; 3892 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3893 } 3894 3895 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3896 SourceLocation Loc, llvm::AtomicOrdering AO) { 3897 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 3898 if (OMPBuilder) { 3899 OMPBuilder->CreateFlush(CGF.Builder); 3900 } else { 3901 if (!CGF.HaveInsertPoint()) 3902 return; 3903 // Build call void __kmpc_flush(ident_t *loc) 3904 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3905 emitUpdateLocation(CGF, Loc)); 3906 } 3907 } 3908 3909 namespace { 3910 /// Indexes of fields for type kmp_task_t. 3911 enum KmpTaskTFields { 3912 /// List of shared variables. 3913 KmpTaskTShareds, 3914 /// Task routine. 3915 KmpTaskTRoutine, 3916 /// Partition id for the untied tasks. 3917 KmpTaskTPartId, 3918 /// Function with call of destructors for private variables. 3919 Data1, 3920 /// Task priority. 3921 Data2, 3922 /// (Taskloops only) Lower bound. 3923 KmpTaskTLowerBound, 3924 /// (Taskloops only) Upper bound. 3925 KmpTaskTUpperBound, 3926 /// (Taskloops only) Stride. 3927 KmpTaskTStride, 3928 /// (Taskloops only) Is last iteration flag. 3929 KmpTaskTLastIter, 3930 /// (Taskloops only) Reduction data. 3931 KmpTaskTReductions, 3932 }; 3933 } // anonymous namespace 3934 3935 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3936 return OffloadEntriesTargetRegion.empty() && 3937 OffloadEntriesDeviceGlobalVar.empty(); 3938 } 3939 3940 /// Initialize target region entry. 3941 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3942 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3943 StringRef ParentName, unsigned LineNum, 3944 unsigned Order) { 3945 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3946 "only required for the device " 3947 "code generation."); 3948 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3949 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3950 OMPTargetRegionEntryTargetRegion); 3951 ++OffloadingEntriesNum; 3952 } 3953 3954 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3955 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3956 StringRef ParentName, unsigned LineNum, 3957 llvm::Constant *Addr, llvm::Constant *ID, 3958 OMPTargetRegionEntryKind Flags) { 3959 // If we are emitting code for a target, the entry is already initialized, 3960 // only has to be registered. 3961 if (CGM.getLangOpts().OpenMPIsDevice) { 3962 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3963 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3964 DiagnosticsEngine::Error, 3965 "Unable to find target region on line '%0' in the device code."); 3966 CGM.getDiags().Report(DiagID) << LineNum; 3967 return; 3968 } 3969 auto &Entry = 3970 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3971 assert(Entry.isValid() && "Entry not initialized!"); 3972 Entry.setAddress(Addr); 3973 Entry.setID(ID); 3974 Entry.setFlags(Flags); 3975 } else { 3976 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3977 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3978 ++OffloadingEntriesNum; 3979 } 3980 } 3981 3982 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3983 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3984 unsigned LineNum) const { 3985 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3986 if (PerDevice == OffloadEntriesTargetRegion.end()) 3987 return false; 3988 auto PerFile = PerDevice->second.find(FileID); 3989 if (PerFile == PerDevice->second.end()) 3990 return false; 3991 auto PerParentName = PerFile->second.find(ParentName); 3992 if (PerParentName == PerFile->second.end()) 3993 return false; 3994 auto PerLine = PerParentName->second.find(LineNum); 3995 if (PerLine == PerParentName->second.end()) 3996 return false; 3997 // Fail if this entry is already registered. 3998 if (PerLine->second.getAddress() || PerLine->second.getID()) 3999 return false; 4000 return true; 4001 } 4002 4003 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 4004 const OffloadTargetRegionEntryInfoActTy &Action) { 4005 // Scan all target region entries and perform the provided action. 4006 for (const auto &D : OffloadEntriesTargetRegion) 4007 for (const auto &F : D.second) 4008 for (const auto &P : F.second) 4009 for (const auto &L : P.second) 4010 Action(D.first, F.first, P.first(), L.first, L.second); 4011 } 4012 4013 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4014 initializeDeviceGlobalVarEntryInfo(StringRef Name, 4015 OMPTargetGlobalVarEntryKind Flags, 4016 unsigned Order) { 4017 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 4018 "only required for the device " 4019 "code generation."); 4020 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 4021 ++OffloadingEntriesNum; 4022 } 4023 4024 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4025 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 4026 CharUnits VarSize, 4027 OMPTargetGlobalVarEntryKind Flags, 4028 llvm::GlobalValue::LinkageTypes Linkage) { 4029 if (CGM.getLangOpts().OpenMPIsDevice) { 4030 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 4031 assert(Entry.isValid() && Entry.getFlags() == Flags && 4032 "Entry not initialized!"); 4033 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 4034 "Resetting with the new address."); 4035 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 4036 if (Entry.getVarSize().isZero()) { 4037 Entry.setVarSize(VarSize); 4038 Entry.setLinkage(Linkage); 4039 } 4040 return; 4041 } 4042 Entry.setVarSize(VarSize); 4043 Entry.setLinkage(Linkage); 4044 Entry.setAddress(Addr); 4045 } else { 4046 if (hasDeviceGlobalVarEntryInfo(VarName)) { 4047 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 4048 assert(Entry.isValid() && Entry.getFlags() == Flags && 4049 "Entry not initialized!"); 4050 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 4051 "Resetting with the new address."); 4052 if (Entry.getVarSize().isZero()) { 4053 Entry.setVarSize(VarSize); 4054 Entry.setLinkage(Linkage); 4055 } 4056 return; 4057 } 4058 OffloadEntriesDeviceGlobalVar.try_emplace( 4059 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 4060 ++OffloadingEntriesNum; 4061 } 4062 } 4063 4064 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4065 actOnDeviceGlobalVarEntriesInfo( 4066 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 4067 // Scan all target region entries and perform the provided action. 4068 for (const auto &E : OffloadEntriesDeviceGlobalVar) 4069 Action(E.getKey(), E.getValue()); 4070 } 4071 4072 void CGOpenMPRuntime::createOffloadEntry( 4073 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 4074 llvm::GlobalValue::LinkageTypes Linkage) { 4075 StringRef Name = Addr->getName(); 4076 llvm::Module &M = CGM.getModule(); 4077 llvm::LLVMContext &C = M.getContext(); 4078 4079 // Create constant string with the name. 4080 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 4081 4082 std::string StringName = getName({"omp_offloading", "entry_name"}); 4083 auto *Str = new llvm::GlobalVariable( 4084 M, StrPtrInit->getType(), /*isConstant=*/true, 4085 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 4086 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4087 4088 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 4089 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 4090 llvm::ConstantInt::get(CGM.SizeTy, Size), 4091 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 4092 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 4093 std::string EntryName = getName({"omp_offloading", "entry", ""}); 4094 llvm::GlobalVariable *Entry = createGlobalStruct( 4095 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 4096 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 4097 4098 // The entry has to be created in the section the linker expects it to be. 4099 Entry->setSection("omp_offloading_entries"); 4100 } 4101 4102 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 4103 // Emit the offloading entries and metadata so that the device codegen side 4104 // can easily figure out what to emit. The produced metadata looks like 4105 // this: 4106 // 4107 // !omp_offload.info = !{!1, ...} 4108 // 4109 // Right now we only generate metadata for function that contain target 4110 // regions. 4111 4112 // If we are in simd mode or there are no entries, we don't need to do 4113 // anything. 4114 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 4115 return; 4116 4117 llvm::Module &M = CGM.getModule(); 4118 llvm::LLVMContext &C = M.getContext(); 4119 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 4120 SourceLocation, StringRef>, 4121 16> 4122 OrderedEntries(OffloadEntriesInfoManager.size()); 4123 llvm::SmallVector<StringRef, 16> ParentFunctions( 4124 OffloadEntriesInfoManager.size()); 4125 4126 // Auxiliary methods to create metadata values and strings. 4127 auto &&GetMDInt = [this](unsigned V) { 4128 return llvm::ConstantAsMetadata::get( 4129 llvm::ConstantInt::get(CGM.Int32Ty, V)); 4130 }; 4131 4132 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 4133 4134 // Create the offloading info metadata node. 4135 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 4136 4137 // Create function that emits metadata for each target region entry; 4138 auto &&TargetRegionMetadataEmitter = 4139 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 4140 &GetMDString]( 4141 unsigned DeviceID, unsigned FileID, StringRef ParentName, 4142 unsigned Line, 4143 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 4144 // Generate metadata for target regions. Each entry of this metadata 4145 // contains: 4146 // - Entry 0 -> Kind of this type of metadata (0). 4147 // - Entry 1 -> Device ID of the file where the entry was identified. 4148 // - Entry 2 -> File ID of the file where the entry was identified. 4149 // - Entry 3 -> Mangled name of the function where the entry was 4150 // identified. 4151 // - Entry 4 -> Line in the file where the entry was identified. 4152 // - Entry 5 -> Order the entry was created. 4153 // The first element of the metadata node is the kind. 4154 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 4155 GetMDInt(FileID), GetMDString(ParentName), 4156 GetMDInt(Line), GetMDInt(E.getOrder())}; 4157 4158 SourceLocation Loc; 4159 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 4160 E = CGM.getContext().getSourceManager().fileinfo_end(); 4161 I != E; ++I) { 4162 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 4163 I->getFirst()->getUniqueID().getFile() == FileID) { 4164 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 4165 I->getFirst(), Line, 1); 4166 break; 4167 } 4168 } 4169 // Save this entry in the right position of the ordered entries array. 4170 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 4171 ParentFunctions[E.getOrder()] = ParentName; 4172 4173 // Add metadata to the named metadata node. 4174 MD->addOperand(llvm::MDNode::get(C, Ops)); 4175 }; 4176 4177 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 4178 TargetRegionMetadataEmitter); 4179 4180 // Create function that emits metadata for each device global variable entry; 4181 auto &&DeviceGlobalVarMetadataEmitter = 4182 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 4183 MD](StringRef MangledName, 4184 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 4185 &E) { 4186 // Generate metadata for global variables. Each entry of this metadata 4187 // contains: 4188 // - Entry 0 -> Kind of this type of metadata (1). 4189 // - Entry 1 -> Mangled name of the variable. 4190 // - Entry 2 -> Declare target kind. 4191 // - Entry 3 -> Order the entry was created. 4192 // The first element of the metadata node is the kind. 4193 llvm::Metadata *Ops[] = { 4194 GetMDInt(E.getKind()), GetMDString(MangledName), 4195 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 4196 4197 // Save this entry in the right position of the ordered entries array. 4198 OrderedEntries[E.getOrder()] = 4199 std::make_tuple(&E, SourceLocation(), MangledName); 4200 4201 // Add metadata to the named metadata node. 4202 MD->addOperand(llvm::MDNode::get(C, Ops)); 4203 }; 4204 4205 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 4206 DeviceGlobalVarMetadataEmitter); 4207 4208 for (const auto &E : OrderedEntries) { 4209 assert(std::get<0>(E) && "All ordered entries must exist!"); 4210 if (const auto *CE = 4211 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 4212 std::get<0>(E))) { 4213 if (!CE->getID() || !CE->getAddress()) { 4214 // Do not blame the entry if the parent funtion is not emitted. 4215 StringRef FnName = ParentFunctions[CE->getOrder()]; 4216 if (!CGM.GetGlobalValue(FnName)) 4217 continue; 4218 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4219 DiagnosticsEngine::Error, 4220 "Offloading entry for target region in %0 is incorrect: either the " 4221 "address or the ID is invalid."); 4222 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 4223 continue; 4224 } 4225 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 4226 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 4227 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 4228 OffloadEntryInfoDeviceGlobalVar>( 4229 std::get<0>(E))) { 4230 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 4231 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4232 CE->getFlags()); 4233 switch (Flags) { 4234 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 4235 if (CGM.getLangOpts().OpenMPIsDevice && 4236 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 4237 continue; 4238 if (!CE->getAddress()) { 4239 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4240 DiagnosticsEngine::Error, "Offloading entry for declare target " 4241 "variable %0 is incorrect: the " 4242 "address is invalid."); 4243 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 4244 continue; 4245 } 4246 // The vaiable has no definition - no need to add the entry. 4247 if (CE->getVarSize().isZero()) 4248 continue; 4249 break; 4250 } 4251 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 4252 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 4253 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 4254 "Declaret target link address is set."); 4255 if (CGM.getLangOpts().OpenMPIsDevice) 4256 continue; 4257 if (!CE->getAddress()) { 4258 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4259 DiagnosticsEngine::Error, 4260 "Offloading entry for declare target variable is incorrect: the " 4261 "address is invalid."); 4262 CGM.getDiags().Report(DiagID); 4263 continue; 4264 } 4265 break; 4266 } 4267 createOffloadEntry(CE->getAddress(), CE->getAddress(), 4268 CE->getVarSize().getQuantity(), Flags, 4269 CE->getLinkage()); 4270 } else { 4271 llvm_unreachable("Unsupported entry kind."); 4272 } 4273 } 4274 } 4275 4276 /// Loads all the offload entries information from the host IR 4277 /// metadata. 4278 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 4279 // If we are in target mode, load the metadata from the host IR. This code has 4280 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 4281 4282 if (!CGM.getLangOpts().OpenMPIsDevice) 4283 return; 4284 4285 if (CGM.getLangOpts().OMPHostIRFile.empty()) 4286 return; 4287 4288 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 4289 if (auto EC = Buf.getError()) { 4290 CGM.getDiags().Report(diag::err_cannot_open_file) 4291 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4292 return; 4293 } 4294 4295 llvm::LLVMContext C; 4296 auto ME = expectedToErrorOrAndEmitErrors( 4297 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 4298 4299 if (auto EC = ME.getError()) { 4300 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4301 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 4302 CGM.getDiags().Report(DiagID) 4303 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4304 return; 4305 } 4306 4307 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4308 if (!MD) 4309 return; 4310 4311 for (llvm::MDNode *MN : MD->operands()) { 4312 auto &&GetMDInt = [MN](unsigned Idx) { 4313 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4314 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4315 }; 4316 4317 auto &&GetMDString = [MN](unsigned Idx) { 4318 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4319 return V->getString(); 4320 }; 4321 4322 switch (GetMDInt(0)) { 4323 default: 4324 llvm_unreachable("Unexpected metadata!"); 4325 break; 4326 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4327 OffloadingEntryInfoTargetRegion: 4328 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4329 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4330 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4331 /*Order=*/GetMDInt(5)); 4332 break; 4333 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4334 OffloadingEntryInfoDeviceGlobalVar: 4335 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4336 /*MangledName=*/GetMDString(1), 4337 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4338 /*Flags=*/GetMDInt(2)), 4339 /*Order=*/GetMDInt(3)); 4340 break; 4341 } 4342 } 4343 } 4344 4345 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4346 if (!KmpRoutineEntryPtrTy) { 4347 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4348 ASTContext &C = CGM.getContext(); 4349 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4350 FunctionProtoType::ExtProtoInfo EPI; 4351 KmpRoutineEntryPtrQTy = C.getPointerType( 4352 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4353 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4354 } 4355 } 4356 4357 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4358 // Make sure the type of the entry is already created. This is the type we 4359 // have to create: 4360 // struct __tgt_offload_entry{ 4361 // void *addr; // Pointer to the offload entry info. 4362 // // (function or global) 4363 // char *name; // Name of the function or global. 4364 // size_t size; // Size of the entry info (0 if it a function). 4365 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4366 // int32_t reserved; // Reserved, to use by the runtime library. 4367 // }; 4368 if (TgtOffloadEntryQTy.isNull()) { 4369 ASTContext &C = CGM.getContext(); 4370 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4371 RD->startDefinition(); 4372 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4373 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4374 addFieldToRecordDecl(C, RD, C.getSizeType()); 4375 addFieldToRecordDecl( 4376 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4377 addFieldToRecordDecl( 4378 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4379 RD->completeDefinition(); 4380 RD->addAttr(PackedAttr::CreateImplicit(C)); 4381 TgtOffloadEntryQTy = C.getRecordType(RD); 4382 } 4383 return TgtOffloadEntryQTy; 4384 } 4385 4386 namespace { 4387 struct PrivateHelpersTy { 4388 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, 4389 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) 4390 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), 4391 PrivateElemInit(PrivateElemInit) {} 4392 const Expr *OriginalRef = nullptr; 4393 const VarDecl *Original = nullptr; 4394 const VarDecl *PrivateCopy = nullptr; 4395 const VarDecl *PrivateElemInit = nullptr; 4396 }; 4397 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4398 } // anonymous namespace 4399 4400 static RecordDecl * 4401 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4402 if (!Privates.empty()) { 4403 ASTContext &C = CGM.getContext(); 4404 // Build struct .kmp_privates_t. { 4405 // /* private vars */ 4406 // }; 4407 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4408 RD->startDefinition(); 4409 for (const auto &Pair : Privates) { 4410 const VarDecl *VD = Pair.second.Original; 4411 QualType Type = VD->getType().getNonReferenceType(); 4412 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4413 if (VD->hasAttrs()) { 4414 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4415 E(VD->getAttrs().end()); 4416 I != E; ++I) 4417 FD->addAttr(*I); 4418 } 4419 } 4420 RD->completeDefinition(); 4421 return RD; 4422 } 4423 return nullptr; 4424 } 4425 4426 static RecordDecl * 4427 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4428 QualType KmpInt32Ty, 4429 QualType KmpRoutineEntryPointerQTy) { 4430 ASTContext &C = CGM.getContext(); 4431 // Build struct kmp_task_t { 4432 // void * shareds; 4433 // kmp_routine_entry_t routine; 4434 // kmp_int32 part_id; 4435 // kmp_cmplrdata_t data1; 4436 // kmp_cmplrdata_t data2; 4437 // For taskloops additional fields: 4438 // kmp_uint64 lb; 4439 // kmp_uint64 ub; 4440 // kmp_int64 st; 4441 // kmp_int32 liter; 4442 // void * reductions; 4443 // }; 4444 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4445 UD->startDefinition(); 4446 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4447 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4448 UD->completeDefinition(); 4449 QualType KmpCmplrdataTy = C.getRecordType(UD); 4450 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4451 RD->startDefinition(); 4452 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4453 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4454 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4455 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4456 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4457 if (isOpenMPTaskLoopDirective(Kind)) { 4458 QualType KmpUInt64Ty = 4459 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4460 QualType KmpInt64Ty = 4461 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4462 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4463 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4464 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4465 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4466 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4467 } 4468 RD->completeDefinition(); 4469 return RD; 4470 } 4471 4472 static RecordDecl * 4473 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4474 ArrayRef<PrivateDataTy> Privates) { 4475 ASTContext &C = CGM.getContext(); 4476 // Build struct kmp_task_t_with_privates { 4477 // kmp_task_t task_data; 4478 // .kmp_privates_t. privates; 4479 // }; 4480 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4481 RD->startDefinition(); 4482 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4483 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4484 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4485 RD->completeDefinition(); 4486 return RD; 4487 } 4488 4489 /// Emit a proxy function which accepts kmp_task_t as the second 4490 /// argument. 4491 /// \code 4492 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4493 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4494 /// For taskloops: 4495 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4496 /// tt->reductions, tt->shareds); 4497 /// return 0; 4498 /// } 4499 /// \endcode 4500 static llvm::Function * 4501 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4502 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4503 QualType KmpTaskTWithPrivatesPtrQTy, 4504 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4505 QualType SharedsPtrTy, llvm::Function *TaskFunction, 4506 llvm::Value *TaskPrivatesMap) { 4507 ASTContext &C = CGM.getContext(); 4508 FunctionArgList Args; 4509 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4510 ImplicitParamDecl::Other); 4511 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4512 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4513 ImplicitParamDecl::Other); 4514 Args.push_back(&GtidArg); 4515 Args.push_back(&TaskTypeArg); 4516 const auto &TaskEntryFnInfo = 4517 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4518 llvm::FunctionType *TaskEntryTy = 4519 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4520 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 4521 auto *TaskEntry = llvm::Function::Create( 4522 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4523 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4524 TaskEntry->setDoesNotRecurse(); 4525 CodeGenFunction CGF(CGM); 4526 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4527 Loc, Loc); 4528 4529 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4530 // tt, 4531 // For taskloops: 4532 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4533 // tt->task_data.shareds); 4534 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4535 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4536 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4537 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4538 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4539 const auto *KmpTaskTWithPrivatesQTyRD = 4540 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4541 LValue Base = 4542 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4543 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4544 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4545 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4546 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); 4547 4548 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4549 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4550 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4551 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4552 CGF.ConvertTypeForMem(SharedsPtrTy)); 4553 4554 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4555 llvm::Value *PrivatesParam; 4556 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4557 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4558 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4559 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); 4560 } else { 4561 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4562 } 4563 4564 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4565 TaskPrivatesMap, 4566 CGF.Builder 4567 .CreatePointerBitCastOrAddrSpaceCast( 4568 TDBase.getAddress(CGF), CGF.VoidPtrTy) 4569 .getPointer()}; 4570 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4571 std::end(CommonArgs)); 4572 if (isOpenMPTaskLoopDirective(Kind)) { 4573 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4574 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4575 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4576 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4577 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4578 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4579 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4580 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4581 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4582 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4583 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4584 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4585 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4586 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4587 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4588 CallArgs.push_back(LBParam); 4589 CallArgs.push_back(UBParam); 4590 CallArgs.push_back(StParam); 4591 CallArgs.push_back(LIParam); 4592 CallArgs.push_back(RParam); 4593 } 4594 CallArgs.push_back(SharedsParam); 4595 4596 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4597 CallArgs); 4598 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4599 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4600 CGF.FinishFunction(); 4601 return TaskEntry; 4602 } 4603 4604 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4605 SourceLocation Loc, 4606 QualType KmpInt32Ty, 4607 QualType KmpTaskTWithPrivatesPtrQTy, 4608 QualType KmpTaskTWithPrivatesQTy) { 4609 ASTContext &C = CGM.getContext(); 4610 FunctionArgList Args; 4611 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4612 ImplicitParamDecl::Other); 4613 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4614 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4615 ImplicitParamDecl::Other); 4616 Args.push_back(&GtidArg); 4617 Args.push_back(&TaskTypeArg); 4618 const auto &DestructorFnInfo = 4619 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4620 llvm::FunctionType *DestructorFnTy = 4621 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4622 std::string Name = 4623 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 4624 auto *DestructorFn = 4625 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4626 Name, &CGM.getModule()); 4627 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4628 DestructorFnInfo); 4629 DestructorFn->setDoesNotRecurse(); 4630 CodeGenFunction CGF(CGM); 4631 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4632 Args, Loc, Loc); 4633 4634 LValue Base = CGF.EmitLoadOfPointerLValue( 4635 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4636 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4637 const auto *KmpTaskTWithPrivatesQTyRD = 4638 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4639 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4640 Base = CGF.EmitLValueForField(Base, *FI); 4641 for (const auto *Field : 4642 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4643 if (QualType::DestructionKind DtorKind = 4644 Field->getType().isDestructedType()) { 4645 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4646 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); 4647 } 4648 } 4649 CGF.FinishFunction(); 4650 return DestructorFn; 4651 } 4652 4653 /// Emit a privates mapping function for correct handling of private and 4654 /// firstprivate variables. 4655 /// \code 4656 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4657 /// **noalias priv1,..., <tyn> **noalias privn) { 4658 /// *priv1 = &.privates.priv1; 4659 /// ...; 4660 /// *privn = &.privates.privn; 4661 /// } 4662 /// \endcode 4663 static llvm::Value * 4664 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4665 ArrayRef<const Expr *> PrivateVars, 4666 ArrayRef<const Expr *> FirstprivateVars, 4667 ArrayRef<const Expr *> LastprivateVars, 4668 QualType PrivatesQTy, 4669 ArrayRef<PrivateDataTy> Privates) { 4670 ASTContext &C = CGM.getContext(); 4671 FunctionArgList Args; 4672 ImplicitParamDecl TaskPrivatesArg( 4673 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4674 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4675 ImplicitParamDecl::Other); 4676 Args.push_back(&TaskPrivatesArg); 4677 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4678 unsigned Counter = 1; 4679 for (const Expr *E : PrivateVars) { 4680 Args.push_back(ImplicitParamDecl::Create( 4681 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4682 C.getPointerType(C.getPointerType(E->getType())) 4683 .withConst() 4684 .withRestrict(), 4685 ImplicitParamDecl::Other)); 4686 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4687 PrivateVarsPos[VD] = Counter; 4688 ++Counter; 4689 } 4690 for (const Expr *E : FirstprivateVars) { 4691 Args.push_back(ImplicitParamDecl::Create( 4692 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4693 C.getPointerType(C.getPointerType(E->getType())) 4694 .withConst() 4695 .withRestrict(), 4696 ImplicitParamDecl::Other)); 4697 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4698 PrivateVarsPos[VD] = Counter; 4699 ++Counter; 4700 } 4701 for (const Expr *E : LastprivateVars) { 4702 Args.push_back(ImplicitParamDecl::Create( 4703 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4704 C.getPointerType(C.getPointerType(E->getType())) 4705 .withConst() 4706 .withRestrict(), 4707 ImplicitParamDecl::Other)); 4708 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4709 PrivateVarsPos[VD] = Counter; 4710 ++Counter; 4711 } 4712 const auto &TaskPrivatesMapFnInfo = 4713 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4714 llvm::FunctionType *TaskPrivatesMapTy = 4715 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4716 std::string Name = 4717 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 4718 auto *TaskPrivatesMap = llvm::Function::Create( 4719 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 4720 &CGM.getModule()); 4721 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4722 TaskPrivatesMapFnInfo); 4723 if (CGM.getLangOpts().Optimize) { 4724 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4725 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4726 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4727 } 4728 CodeGenFunction CGF(CGM); 4729 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4730 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4731 4732 // *privi = &.privates.privi; 4733 LValue Base = CGF.EmitLoadOfPointerLValue( 4734 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4735 TaskPrivatesArg.getType()->castAs<PointerType>()); 4736 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4737 Counter = 0; 4738 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4739 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4740 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4741 LValue RefLVal = 4742 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4743 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4744 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); 4745 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); 4746 ++Counter; 4747 } 4748 CGF.FinishFunction(); 4749 return TaskPrivatesMap; 4750 } 4751 4752 /// Emit initialization for private variables in task-based directives. 4753 static void emitPrivatesInit(CodeGenFunction &CGF, 4754 const OMPExecutableDirective &D, 4755 Address KmpTaskSharedsPtr, LValue TDBase, 4756 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4757 QualType SharedsTy, QualType SharedsPtrTy, 4758 const OMPTaskDataTy &Data, 4759 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4760 ASTContext &C = CGF.getContext(); 4761 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4762 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4763 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4764 ? OMPD_taskloop 4765 : OMPD_task; 4766 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4767 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4768 LValue SrcBase; 4769 bool IsTargetTask = 4770 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4771 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4772 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4773 // PointersArray and SizesArray. The original variables for these arrays are 4774 // not captured and we get their addresses explicitly. 4775 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || 4776 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4777 SrcBase = CGF.MakeAddrLValue( 4778 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4779 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4780 SharedsTy); 4781 } 4782 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4783 for (const PrivateDataTy &Pair : Privates) { 4784 const VarDecl *VD = Pair.second.PrivateCopy; 4785 const Expr *Init = VD->getAnyInitializer(); 4786 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4787 !CGF.isTrivialInitializer(Init)))) { 4788 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4789 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4790 const VarDecl *OriginalVD = Pair.second.Original; 4791 // Check if the variable is the target-based BasePointersArray, 4792 // PointersArray or SizesArray. 4793 LValue SharedRefLValue; 4794 QualType Type = PrivateLValue.getType(); 4795 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4796 if (IsTargetTask && !SharedField) { 4797 assert(isa<ImplicitParamDecl>(OriginalVD) && 4798 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4799 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4800 ->getNumParams() == 0 && 4801 isa<TranslationUnitDecl>( 4802 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4803 ->getDeclContext()) && 4804 "Expected artificial target data variable."); 4805 SharedRefLValue = 4806 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4807 } else if (ForDup) { 4808 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4809 SharedRefLValue = CGF.MakeAddrLValue( 4810 Address(SharedRefLValue.getPointer(CGF), 4811 C.getDeclAlign(OriginalVD)), 4812 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4813 SharedRefLValue.getTBAAInfo()); 4814 } else { 4815 InlinedOpenMPRegionRAII Region( 4816 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, 4817 /*HasCancel=*/false); 4818 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); 4819 } 4820 if (Type->isArrayType()) { 4821 // Initialize firstprivate array. 4822 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4823 // Perform simple memcpy. 4824 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4825 } else { 4826 // Initialize firstprivate array using element-by-element 4827 // initialization. 4828 CGF.EmitOMPAggregateAssign( 4829 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), 4830 Type, 4831 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4832 Address SrcElement) { 4833 // Clean up any temporaries needed by the initialization. 4834 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4835 InitScope.addPrivate( 4836 Elem, [SrcElement]() -> Address { return SrcElement; }); 4837 (void)InitScope.Privatize(); 4838 // Emit initialization for single element. 4839 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4840 CGF, &CapturesInfo); 4841 CGF.EmitAnyExprToMem(Init, DestElement, 4842 Init->getType().getQualifiers(), 4843 /*IsInitializer=*/false); 4844 }); 4845 } 4846 } else { 4847 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4848 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { 4849 return SharedRefLValue.getAddress(CGF); 4850 }); 4851 (void)InitScope.Privatize(); 4852 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4853 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4854 /*capturedByInit=*/false); 4855 } 4856 } else { 4857 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4858 } 4859 } 4860 ++FI; 4861 } 4862 } 4863 4864 /// Check if duplication function is required for taskloops. 4865 static bool checkInitIsRequired(CodeGenFunction &CGF, 4866 ArrayRef<PrivateDataTy> Privates) { 4867 bool InitRequired = false; 4868 for (const PrivateDataTy &Pair : Privates) { 4869 const VarDecl *VD = Pair.second.PrivateCopy; 4870 const Expr *Init = VD->getAnyInitializer(); 4871 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4872 !CGF.isTrivialInitializer(Init)); 4873 if (InitRequired) 4874 break; 4875 } 4876 return InitRequired; 4877 } 4878 4879 4880 /// Emit task_dup function (for initialization of 4881 /// private/firstprivate/lastprivate vars and last_iter flag) 4882 /// \code 4883 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4884 /// lastpriv) { 4885 /// // setup lastprivate flag 4886 /// task_dst->last = lastpriv; 4887 /// // could be constructor calls here... 4888 /// } 4889 /// \endcode 4890 static llvm::Value * 4891 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4892 const OMPExecutableDirective &D, 4893 QualType KmpTaskTWithPrivatesPtrQTy, 4894 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4895 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4896 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4897 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4898 ASTContext &C = CGM.getContext(); 4899 FunctionArgList Args; 4900 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4901 KmpTaskTWithPrivatesPtrQTy, 4902 ImplicitParamDecl::Other); 4903 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4904 KmpTaskTWithPrivatesPtrQTy, 4905 ImplicitParamDecl::Other); 4906 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4907 ImplicitParamDecl::Other); 4908 Args.push_back(&DstArg); 4909 Args.push_back(&SrcArg); 4910 Args.push_back(&LastprivArg); 4911 const auto &TaskDupFnInfo = 4912 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4913 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4914 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4915 auto *TaskDup = llvm::Function::Create( 4916 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4917 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4918 TaskDup->setDoesNotRecurse(); 4919 CodeGenFunction CGF(CGM); 4920 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4921 Loc); 4922 4923 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4924 CGF.GetAddrOfLocalVar(&DstArg), 4925 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4926 // task_dst->liter = lastpriv; 4927 if (WithLastIter) { 4928 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4929 LValue Base = CGF.EmitLValueForField( 4930 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4931 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4932 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4933 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4934 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4935 } 4936 4937 // Emit initial values for private copies (if any). 4938 assert(!Privates.empty()); 4939 Address KmpTaskSharedsPtr = Address::invalid(); 4940 if (!Data.FirstprivateVars.empty()) { 4941 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4942 CGF.GetAddrOfLocalVar(&SrcArg), 4943 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4944 LValue Base = CGF.EmitLValueForField( 4945 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4946 KmpTaskSharedsPtr = Address( 4947 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4948 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4949 KmpTaskTShareds)), 4950 Loc), 4951 CGF.getNaturalTypeAlignment(SharedsTy)); 4952 } 4953 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4954 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4955 CGF.FinishFunction(); 4956 return TaskDup; 4957 } 4958 4959 /// Checks if destructor function is required to be generated. 4960 /// \return true if cleanups are required, false otherwise. 4961 static bool 4962 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4963 bool NeedsCleanup = false; 4964 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4965 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4966 for (const FieldDecl *FD : PrivateRD->fields()) { 4967 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4968 if (NeedsCleanup) 4969 break; 4970 } 4971 return NeedsCleanup; 4972 } 4973 4974 CGOpenMPRuntime::TaskResultTy 4975 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4976 const OMPExecutableDirective &D, 4977 llvm::Function *TaskFunction, QualType SharedsTy, 4978 Address Shareds, const OMPTaskDataTy &Data) { 4979 ASTContext &C = CGM.getContext(); 4980 llvm::SmallVector<PrivateDataTy, 4> Privates; 4981 // Aggregate privates and sort them by the alignment. 4982 const auto *I = Data.PrivateCopies.begin(); 4983 for (const Expr *E : Data.PrivateVars) { 4984 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4985 Privates.emplace_back( 4986 C.getDeclAlign(VD), 4987 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4988 /*PrivateElemInit=*/nullptr)); 4989 ++I; 4990 } 4991 I = Data.FirstprivateCopies.begin(); 4992 const auto *IElemInitRef = Data.FirstprivateInits.begin(); 4993 for (const Expr *E : Data.FirstprivateVars) { 4994 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4995 Privates.emplace_back( 4996 C.getDeclAlign(VD), 4997 PrivateHelpersTy( 4998 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4999 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 5000 ++I; 5001 ++IElemInitRef; 5002 } 5003 I = Data.LastprivateCopies.begin(); 5004 for (const Expr *E : Data.LastprivateVars) { 5005 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5006 Privates.emplace_back( 5007 C.getDeclAlign(VD), 5008 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5009 /*PrivateElemInit=*/nullptr)); 5010 ++I; 5011 } 5012 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 5013 return L.first > R.first; 5014 }); 5015 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 5016 // Build type kmp_routine_entry_t (if not built yet). 5017 emitKmpRoutineEntryT(KmpInt32Ty); 5018 // Build type kmp_task_t (if not built yet). 5019 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 5020 if (SavedKmpTaskloopTQTy.isNull()) { 5021 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5022 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5023 } 5024 KmpTaskTQTy = SavedKmpTaskloopTQTy; 5025 } else { 5026 assert((D.getDirectiveKind() == OMPD_task || 5027 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 5028 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 5029 "Expected taskloop, task or target directive"); 5030 if (SavedKmpTaskTQTy.isNull()) { 5031 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5032 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5033 } 5034 KmpTaskTQTy = SavedKmpTaskTQTy; 5035 } 5036 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 5037 // Build particular struct kmp_task_t for the given task. 5038 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 5039 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 5040 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 5041 QualType KmpTaskTWithPrivatesPtrQTy = 5042 C.getPointerType(KmpTaskTWithPrivatesQTy); 5043 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 5044 llvm::Type *KmpTaskTWithPrivatesPtrTy = 5045 KmpTaskTWithPrivatesTy->getPointerTo(); 5046 llvm::Value *KmpTaskTWithPrivatesTySize = 5047 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 5048 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 5049 5050 // Emit initial values for private copies (if any). 5051 llvm::Value *TaskPrivatesMap = nullptr; 5052 llvm::Type *TaskPrivatesMapTy = 5053 std::next(TaskFunction->arg_begin(), 3)->getType(); 5054 if (!Privates.empty()) { 5055 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 5056 TaskPrivatesMap = emitTaskPrivateMappingFunction( 5057 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 5058 FI->getType(), Privates); 5059 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5060 TaskPrivatesMap, TaskPrivatesMapTy); 5061 } else { 5062 TaskPrivatesMap = llvm::ConstantPointerNull::get( 5063 cast<llvm::PointerType>(TaskPrivatesMapTy)); 5064 } 5065 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 5066 // kmp_task_t *tt); 5067 llvm::Function *TaskEntry = emitProxyTaskFunction( 5068 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5069 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 5070 TaskPrivatesMap); 5071 5072 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 5073 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 5074 // kmp_routine_entry_t *task_entry); 5075 // Task flags. Format is taken from 5076 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 5077 // description of kmp_tasking_flags struct. 5078 enum { 5079 TiedFlag = 0x1, 5080 FinalFlag = 0x2, 5081 DestructorsFlag = 0x8, 5082 PriorityFlag = 0x20, 5083 DetachableFlag = 0x40, 5084 }; 5085 unsigned Flags = Data.Tied ? TiedFlag : 0; 5086 bool NeedsCleanup = false; 5087 if (!Privates.empty()) { 5088 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 5089 if (NeedsCleanup) 5090 Flags = Flags | DestructorsFlag; 5091 } 5092 if (Data.Priority.getInt()) 5093 Flags = Flags | PriorityFlag; 5094 if (D.hasClausesOfKind<OMPDetachClause>()) 5095 Flags = Flags | DetachableFlag; 5096 llvm::Value *TaskFlags = 5097 Data.Final.getPointer() 5098 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 5099 CGF.Builder.getInt32(FinalFlag), 5100 CGF.Builder.getInt32(/*C=*/0)) 5101 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 5102 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 5103 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 5104 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 5105 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 5106 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5107 TaskEntry, KmpRoutineEntryPtrTy)}; 5108 llvm::Value *NewTask; 5109 if (D.hasClausesOfKind<OMPNowaitClause>()) { 5110 // Check if we have any device clause associated with the directive. 5111 const Expr *Device = nullptr; 5112 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 5113 Device = C->getDevice(); 5114 // Emit device ID if any otherwise use default value. 5115 llvm::Value *DeviceID; 5116 if (Device) 5117 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 5118 CGF.Int64Ty, /*isSigned=*/true); 5119 else 5120 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 5121 AllocArgs.push_back(DeviceID); 5122 NewTask = CGF.EmitRuntimeCall( 5123 createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs); 5124 } else { 5125 NewTask = CGF.EmitRuntimeCall( 5126 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 5127 } 5128 // Emit detach clause initialization. 5129 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, 5130 // task_descriptor); 5131 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { 5132 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); 5133 LValue EvtLVal = CGF.EmitLValue(Evt); 5134 5135 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, 5136 // int gtid, kmp_task_t *task); 5137 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); 5138 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); 5139 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); 5140 llvm::Value *EvtVal = CGF.EmitRuntimeCall( 5141 createRuntimeFunction(OMPRTL__kmpc_task_allow_completion_event), 5142 {Loc, Tid, NewTask}); 5143 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), 5144 Evt->getExprLoc()); 5145 CGF.EmitStoreOfScalar(EvtVal, EvtLVal); 5146 } 5147 llvm::Value *NewTaskNewTaskTTy = 5148 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5149 NewTask, KmpTaskTWithPrivatesPtrTy); 5150 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 5151 KmpTaskTWithPrivatesQTy); 5152 LValue TDBase = 5153 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 5154 // Fill the data in the resulting kmp_task_t record. 5155 // Copy shareds if there are any. 5156 Address KmpTaskSharedsPtr = Address::invalid(); 5157 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 5158 KmpTaskSharedsPtr = 5159 Address(CGF.EmitLoadOfScalar( 5160 CGF.EmitLValueForField( 5161 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 5162 KmpTaskTShareds)), 5163 Loc), 5164 CGF.getNaturalTypeAlignment(SharedsTy)); 5165 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 5166 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 5167 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 5168 } 5169 // Emit initial values for private copies (if any). 5170 TaskResultTy Result; 5171 if (!Privates.empty()) { 5172 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 5173 SharedsTy, SharedsPtrTy, Data, Privates, 5174 /*ForDup=*/false); 5175 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 5176 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 5177 Result.TaskDupFn = emitTaskDupFunction( 5178 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 5179 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 5180 /*WithLastIter=*/!Data.LastprivateVars.empty()); 5181 } 5182 } 5183 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 5184 enum { Priority = 0, Destructors = 1 }; 5185 // Provide pointer to function with destructors for privates. 5186 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 5187 const RecordDecl *KmpCmplrdataUD = 5188 (*FI)->getType()->getAsUnionType()->getDecl(); 5189 if (NeedsCleanup) { 5190 llvm::Value *DestructorFn = emitDestructorsFunction( 5191 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5192 KmpTaskTWithPrivatesQTy); 5193 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 5194 LValue DestructorsLV = CGF.EmitLValueForField( 5195 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 5196 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5197 DestructorFn, KmpRoutineEntryPtrTy), 5198 DestructorsLV); 5199 } 5200 // Set priority. 5201 if (Data.Priority.getInt()) { 5202 LValue Data2LV = CGF.EmitLValueForField( 5203 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 5204 LValue PriorityLV = CGF.EmitLValueForField( 5205 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 5206 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 5207 } 5208 Result.NewTask = NewTask; 5209 Result.TaskEntry = TaskEntry; 5210 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 5211 Result.TDBase = TDBase; 5212 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 5213 return Result; 5214 } 5215 5216 namespace { 5217 /// Dependence kind for RTL. 5218 enum RTLDependenceKindTy { 5219 DepIn = 0x01, 5220 DepInOut = 0x3, 5221 DepMutexInOutSet = 0x4 5222 }; 5223 /// Fields ids in kmp_depend_info record. 5224 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 5225 } // namespace 5226 5227 /// Translates internal dependency kind into the runtime kind. 5228 static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { 5229 RTLDependenceKindTy DepKind; 5230 switch (K) { 5231 case OMPC_DEPEND_in: 5232 DepKind = DepIn; 5233 break; 5234 // Out and InOut dependencies must use the same code. 5235 case OMPC_DEPEND_out: 5236 case OMPC_DEPEND_inout: 5237 DepKind = DepInOut; 5238 break; 5239 case OMPC_DEPEND_mutexinoutset: 5240 DepKind = DepMutexInOutSet; 5241 break; 5242 case OMPC_DEPEND_source: 5243 case OMPC_DEPEND_sink: 5244 case OMPC_DEPEND_depobj: 5245 case OMPC_DEPEND_unknown: 5246 llvm_unreachable("Unknown task dependence type"); 5247 } 5248 return DepKind; 5249 } 5250 5251 /// Builds kmp_depend_info, if it is not built yet, and builds flags type. 5252 static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, 5253 QualType &FlagsTy) { 5254 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 5255 if (KmpDependInfoTy.isNull()) { 5256 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 5257 KmpDependInfoRD->startDefinition(); 5258 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 5259 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 5260 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 5261 KmpDependInfoRD->completeDefinition(); 5262 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 5263 } 5264 } 5265 5266 std::pair<llvm::Value *, LValue> 5267 CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, 5268 SourceLocation Loc) { 5269 ASTContext &C = CGM.getContext(); 5270 QualType FlagsTy; 5271 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5272 RecordDecl *KmpDependInfoRD = 5273 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5274 LValue Base = CGF.EmitLoadOfPointerLValue( 5275 DepobjLVal.getAddress(CGF), 5276 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5277 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5278 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5279 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5280 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 5281 Base.getTBAAInfo()); 5282 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5283 Addr.getPointer(), 5284 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5285 LValue NumDepsBase = CGF.MakeAddrLValue( 5286 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 5287 Base.getBaseInfo(), Base.getTBAAInfo()); 5288 // NumDeps = deps[i].base_addr; 5289 LValue BaseAddrLVal = CGF.EmitLValueForField( 5290 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5291 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); 5292 return std::make_pair(NumDeps, Base); 5293 } 5294 5295 namespace { 5296 /// Loop generator for OpenMP iterator expression. 5297 class OMPIteratorGeneratorScope final 5298 : public CodeGenFunction::OMPPrivateScope { 5299 CodeGenFunction &CGF; 5300 const OMPIteratorExpr *E = nullptr; 5301 SmallVector<CodeGenFunction::JumpDest, 4> ContDests; 5302 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; 5303 OMPIteratorGeneratorScope() = delete; 5304 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; 5305 5306 public: 5307 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) 5308 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { 5309 if (!E) 5310 return; 5311 SmallVector<llvm::Value *, 4> Uppers; 5312 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 5313 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); 5314 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); 5315 addPrivate(VD, [&CGF, VD]() { 5316 return CGF.CreateMemTemp(VD->getType(), VD->getName()); 5317 }); 5318 const OMPIteratorHelperData &HelperData = E->getHelper(I); 5319 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { 5320 return CGF.CreateMemTemp(HelperData.CounterVD->getType(), 5321 "counter.addr"); 5322 }); 5323 } 5324 Privatize(); 5325 5326 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { 5327 const OMPIteratorHelperData &HelperData = E->getHelper(I); 5328 LValue CLVal = 5329 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), 5330 HelperData.CounterVD->getType()); 5331 // Counter = 0; 5332 CGF.EmitStoreOfScalar( 5333 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), 5334 CLVal); 5335 CodeGenFunction::JumpDest &ContDest = 5336 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); 5337 CodeGenFunction::JumpDest &ExitDest = 5338 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); 5339 // N = <number-of_iterations>; 5340 llvm::Value *N = Uppers[I]; 5341 // cont: 5342 // if (Counter < N) goto body; else goto exit; 5343 CGF.EmitBlock(ContDest.getBlock()); 5344 auto *CVal = 5345 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); 5346 llvm::Value *Cmp = 5347 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() 5348 ? CGF.Builder.CreateICmpSLT(CVal, N) 5349 : CGF.Builder.CreateICmpULT(CVal, N); 5350 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); 5351 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); 5352 // body: 5353 CGF.EmitBlock(BodyBB); 5354 // Iteri = Begini + Counter * Stepi; 5355 CGF.EmitIgnoredExpr(HelperData.Update); 5356 } 5357 } 5358 ~OMPIteratorGeneratorScope() { 5359 if (!E) 5360 return; 5361 for (unsigned I = E->numOfIterators(); I > 0; --I) { 5362 // Counter = Counter + 1; 5363 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); 5364 CGF.EmitIgnoredExpr(HelperData.CounterUpdate); 5365 // goto cont; 5366 CGF.EmitBranchThroughCleanup(ContDests[I - 1]); 5367 // exit: 5368 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); 5369 } 5370 } 5371 }; 5372 } // namespace 5373 5374 static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 5375 llvm::PointerUnion<unsigned *, LValue *> Pos, 5376 const OMPTaskDataTy::DependData &Data, 5377 Address DependenciesArray) { 5378 CodeGenModule &CGM = CGF.CGM; 5379 ASTContext &C = CGM.getContext(); 5380 QualType FlagsTy; 5381 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5382 RecordDecl *KmpDependInfoRD = 5383 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5384 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5385 5386 OMPIteratorGeneratorScope IteratorScope( 5387 CGF, cast_or_null<OMPIteratorExpr>( 5388 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 5389 : nullptr)); 5390 for (const Expr *E : Data.DepExprs) { 5391 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); 5392 llvm::Value *Addr; 5393 if (OASE) { 5394 const Expr *Base = OASE->getBase(); 5395 Addr = CGF.EmitScalarExpr(Base); 5396 } else { 5397 Addr = CGF.EmitLValue(E).getPointer(CGF); 5398 } 5399 llvm::Value *Size; 5400 QualType Ty = E->getType(); 5401 if (OASE) { 5402 Size = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); 5403 for (const Expr *SE : OASE->getDimensions()) { 5404 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 5405 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 5406 CGF.getContext().getSizeType(), 5407 SE->getExprLoc()); 5408 Size = CGF.Builder.CreateNUWMul(Size, Sz); 5409 } 5410 } else if (const auto *ASE = 5411 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 5412 LValue UpAddrLVal = 5413 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 5414 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( 5415 UpAddrLVal.getPointer(CGF), /*Idx0=*/1); 5416 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGM.SizeTy); 5417 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 5418 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 5419 } else { 5420 Size = CGF.getTypeSize(Ty); 5421 } 5422 LValue Base; 5423 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 5424 Base = CGF.MakeAddrLValue( 5425 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); 5426 } else { 5427 LValue &PosLVal = *Pos.get<LValue *>(); 5428 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 5429 Base = CGF.MakeAddrLValue( 5430 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), 5431 DependenciesArray.getAlignment()), 5432 KmpDependInfoTy); 5433 } 5434 // deps[i].base_addr = &<Dependencies[i].second>; 5435 LValue BaseAddrLVal = CGF.EmitLValueForField( 5436 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5437 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), 5438 BaseAddrLVal); 5439 // deps[i].len = sizeof(<Dependencies[i].second>); 5440 LValue LenLVal = CGF.EmitLValueForField( 5441 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 5442 CGF.EmitStoreOfScalar(Size, LenLVal); 5443 // deps[i].flags = <Dependencies[i].first>; 5444 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); 5445 LValue FlagsLVal = CGF.EmitLValueForField( 5446 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5447 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5448 FlagsLVal); 5449 if (unsigned *P = Pos.dyn_cast<unsigned *>()) { 5450 ++(*P); 5451 } else { 5452 LValue &PosLVal = *Pos.get<LValue *>(); 5453 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 5454 Idx = CGF.Builder.CreateNUWAdd(Idx, 5455 llvm::ConstantInt::get(Idx->getType(), 1)); 5456 CGF.EmitStoreOfScalar(Idx, PosLVal); 5457 } 5458 } 5459 } 5460 5461 static SmallVector<llvm::Value *, 4> 5462 emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 5463 const OMPTaskDataTy::DependData &Data) { 5464 assert(Data.DepKind == OMPC_DEPEND_depobj && 5465 "Expected depobj dependecy kind."); 5466 SmallVector<llvm::Value *, 4> Sizes; 5467 SmallVector<LValue, 4> SizeLVals; 5468 ASTContext &C = CGF.getContext(); 5469 QualType FlagsTy; 5470 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5471 RecordDecl *KmpDependInfoRD = 5472 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5473 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5474 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 5475 { 5476 OMPIteratorGeneratorScope IteratorScope( 5477 CGF, cast_or_null<OMPIteratorExpr>( 5478 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 5479 : nullptr)); 5480 for (const Expr *E : Data.DepExprs) { 5481 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 5482 LValue Base = CGF.EmitLoadOfPointerLValue( 5483 DepobjLVal.getAddress(CGF), 5484 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5485 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5486 Base.getAddress(CGF), KmpDependInfoPtrT); 5487 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 5488 Base.getTBAAInfo()); 5489 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5490 Addr.getPointer(), 5491 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5492 LValue NumDepsBase = CGF.MakeAddrLValue( 5493 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 5494 Base.getBaseInfo(), Base.getTBAAInfo()); 5495 // NumDeps = deps[i].base_addr; 5496 LValue BaseAddrLVal = CGF.EmitLValueForField( 5497 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5498 llvm::Value *NumDeps = 5499 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 5500 LValue NumLVal = CGF.MakeAddrLValue( 5501 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), 5502 C.getUIntPtrType()); 5503 CGF.InitTempAlloca(NumLVal.getAddress(CGF), 5504 llvm::ConstantInt::get(CGF.IntPtrTy, 0)); 5505 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); 5506 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); 5507 CGF.EmitStoreOfScalar(Add, NumLVal); 5508 SizeLVals.push_back(NumLVal); 5509 } 5510 } 5511 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { 5512 llvm::Value *Size = 5513 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); 5514 Sizes.push_back(Size); 5515 } 5516 return Sizes; 5517 } 5518 5519 static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, 5520 LValue PosLVal, 5521 const OMPTaskDataTy::DependData &Data, 5522 Address DependenciesArray) { 5523 assert(Data.DepKind == OMPC_DEPEND_depobj && 5524 "Expected depobj dependecy kind."); 5525 ASTContext &C = CGF.getContext(); 5526 QualType FlagsTy; 5527 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5528 RecordDecl *KmpDependInfoRD = 5529 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5530 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5531 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); 5532 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); 5533 { 5534 OMPIteratorGeneratorScope IteratorScope( 5535 CGF, cast_or_null<OMPIteratorExpr>( 5536 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() 5537 : nullptr)); 5538 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { 5539 const Expr *E = Data.DepExprs[I]; 5540 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); 5541 LValue Base = CGF.EmitLoadOfPointerLValue( 5542 DepobjLVal.getAddress(CGF), 5543 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5544 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5545 Base.getAddress(CGF), KmpDependInfoPtrT); 5546 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), 5547 Base.getTBAAInfo()); 5548 5549 // Get number of elements in a single depobj. 5550 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5551 Addr.getPointer(), 5552 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5553 LValue NumDepsBase = CGF.MakeAddrLValue( 5554 Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, 5555 Base.getBaseInfo(), Base.getTBAAInfo()); 5556 // NumDeps = deps[i].base_addr; 5557 LValue BaseAddrLVal = CGF.EmitLValueForField( 5558 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5559 llvm::Value *NumDeps = 5560 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); 5561 5562 // memcopy dependency data. 5563 llvm::Value *Size = CGF.Builder.CreateNUWMul( 5564 ElSize, 5565 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); 5566 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); 5567 Address DepAddr = 5568 Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), 5569 DependenciesArray.getAlignment()); 5570 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); 5571 5572 // Increase pos. 5573 // pos += size; 5574 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); 5575 CGF.EmitStoreOfScalar(Add, PosLVal); 5576 } 5577 } 5578 } 5579 5580 std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( 5581 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, 5582 SourceLocation Loc) { 5583 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { 5584 return D.DepExprs.empty(); 5585 })) 5586 return std::make_pair(nullptr, Address::invalid()); 5587 // Process list of dependencies. 5588 ASTContext &C = CGM.getContext(); 5589 Address DependenciesArray = Address::invalid(); 5590 llvm::Value *NumOfElements = nullptr; 5591 unsigned NumDependencies = std::accumulate( 5592 Dependencies.begin(), Dependencies.end(), 0, 5593 [](unsigned V, const OMPTaskDataTy::DependData &D) { 5594 return D.DepKind == OMPC_DEPEND_depobj 5595 ? V 5596 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); 5597 }); 5598 QualType FlagsTy; 5599 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5600 bool HasDepobjDeps = false; 5601 bool HasRegularWithIterators = false; 5602 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); 5603 llvm::Value *NumOfRegularWithIterators = 5604 llvm::ConstantInt::get(CGF.IntPtrTy, 1); 5605 // Calculate number of depobj dependecies and regular deps with the iterators. 5606 for (const OMPTaskDataTy::DependData &D : Dependencies) { 5607 if (D.DepKind == OMPC_DEPEND_depobj) { 5608 SmallVector<llvm::Value *, 4> Sizes = 5609 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); 5610 for (llvm::Value *Size : Sizes) { 5611 NumOfDepobjElements = 5612 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); 5613 } 5614 HasDepobjDeps = true; 5615 continue; 5616 } 5617 // Include number of iterations, if any. 5618 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { 5619 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 5620 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 5621 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); 5622 NumOfRegularWithIterators = 5623 CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); 5624 } 5625 HasRegularWithIterators = true; 5626 continue; 5627 } 5628 } 5629 5630 QualType KmpDependInfoArrayTy; 5631 if (HasDepobjDeps || HasRegularWithIterators) { 5632 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, 5633 /*isSigned=*/false); 5634 if (HasDepobjDeps) { 5635 NumOfElements = 5636 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); 5637 } 5638 if (HasRegularWithIterators) { 5639 NumOfElements = 5640 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); 5641 } 5642 OpaqueValueExpr OVE(Loc, 5643 C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), 5644 VK_RValue); 5645 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, 5646 RValue::get(NumOfElements)); 5647 KmpDependInfoArrayTy = 5648 C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, 5649 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); 5650 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); 5651 // Properly emit variable-sized array. 5652 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, 5653 ImplicitParamDecl::Other); 5654 CGF.EmitVarDecl(*PD); 5655 DependenciesArray = CGF.GetAddrOfLocalVar(PD); 5656 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, 5657 /*isSigned=*/false); 5658 } else { 5659 KmpDependInfoArrayTy = C.getConstantArrayType( 5660 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, 5661 ArrayType::Normal, /*IndexTypeQuals=*/0); 5662 DependenciesArray = 5663 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 5664 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); 5665 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, 5666 /*isSigned=*/false); 5667 } 5668 unsigned Pos = 0; 5669 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 5670 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 5671 Dependencies[I].IteratorExpr) 5672 continue; 5673 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], 5674 DependenciesArray); 5675 } 5676 // Copy regular dependecies with iterators. 5677 LValue PosLVal = CGF.MakeAddrLValue( 5678 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); 5679 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); 5680 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 5681 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || 5682 !Dependencies[I].IteratorExpr) 5683 continue; 5684 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], 5685 DependenciesArray); 5686 } 5687 // Copy final depobj arrays without iterators. 5688 if (HasDepobjDeps) { 5689 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { 5690 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) 5691 continue; 5692 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], 5693 DependenciesArray); 5694 } 5695 } 5696 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5697 DependenciesArray, CGF.VoidPtrTy); 5698 return std::make_pair(NumOfElements, DependenciesArray); 5699 } 5700 5701 Address CGOpenMPRuntime::emitDepobjDependClause( 5702 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, 5703 SourceLocation Loc) { 5704 if (Dependencies.DepExprs.empty()) 5705 return Address::invalid(); 5706 // Process list of dependencies. 5707 ASTContext &C = CGM.getContext(); 5708 Address DependenciesArray = Address::invalid(); 5709 unsigned NumDependencies = Dependencies.DepExprs.size(); 5710 QualType FlagsTy; 5711 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5712 RecordDecl *KmpDependInfoRD = 5713 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5714 5715 llvm::Value *Size; 5716 // Define type kmp_depend_info[<Dependencies.size()>]; 5717 // For depobj reserve one extra element to store the number of elements. 5718 // It is required to handle depobj(x) update(in) construct. 5719 // kmp_depend_info[<Dependencies.size()>] deps; 5720 llvm::Value *NumDepsVal; 5721 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); 5722 if (const auto *IE = 5723 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { 5724 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); 5725 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { 5726 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); 5727 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); 5728 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); 5729 } 5730 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), 5731 NumDepsVal); 5732 CharUnits SizeInBytes = 5733 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); 5734 llvm::Value *RecSize = CGM.getSize(SizeInBytes); 5735 Size = CGF.Builder.CreateNUWMul(Size, RecSize); 5736 NumDepsVal = 5737 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); 5738 } else { 5739 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5740 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), 5741 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5742 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); 5743 Size = CGM.getSize(Sz.alignTo(Align)); 5744 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); 5745 } 5746 // Need to allocate on the dynamic memory. 5747 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5748 // Use default allocator. 5749 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5750 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 5751 5752 llvm::Value *Addr = CGF.EmitRuntimeCall( 5753 createRuntimeFunction(OMPRTL__kmpc_alloc), Args, ".dep.arr.addr"); 5754 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5755 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); 5756 DependenciesArray = Address(Addr, Align); 5757 // Write number of elements in the first element of array for depobj. 5758 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); 5759 // deps[i].base_addr = NumDependencies; 5760 LValue BaseAddrLVal = CGF.EmitLValueForField( 5761 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5762 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); 5763 llvm::PointerUnion<unsigned *, LValue *> Pos; 5764 unsigned Idx = 1; 5765 LValue PosLVal; 5766 if (Dependencies.IteratorExpr) { 5767 PosLVal = CGF.MakeAddrLValue( 5768 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), 5769 C.getSizeType()); 5770 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, 5771 /*IsInit=*/true); 5772 Pos = &PosLVal; 5773 } else { 5774 Pos = &Idx; 5775 } 5776 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); 5777 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5778 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); 5779 return DependenciesArray; 5780 } 5781 5782 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, 5783 SourceLocation Loc) { 5784 ASTContext &C = CGM.getContext(); 5785 QualType FlagsTy; 5786 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5787 LValue Base = CGF.EmitLoadOfPointerLValue( 5788 DepobjLVal.getAddress(CGF), 5789 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 5790 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); 5791 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5792 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); 5793 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( 5794 Addr.getPointer(), 5795 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); 5796 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, 5797 CGF.VoidPtrTy); 5798 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5799 // Use default allocator. 5800 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5801 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; 5802 5803 // _kmpc_free(gtid, addr, nullptr); 5804 (void)CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_free), Args); 5805 } 5806 5807 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, 5808 OpenMPDependClauseKind NewDepKind, 5809 SourceLocation Loc) { 5810 ASTContext &C = CGM.getContext(); 5811 QualType FlagsTy; 5812 getDependTypes(C, KmpDependInfoTy, FlagsTy); 5813 RecordDecl *KmpDependInfoRD = 5814 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5815 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5816 llvm::Value *NumDeps; 5817 LValue Base; 5818 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); 5819 5820 Address Begin = Base.getAddress(CGF); 5821 // Cast from pointer to array type to pointer to single element. 5822 llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); 5823 // The basic structure here is a while-do loop. 5824 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); 5825 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); 5826 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5827 CGF.EmitBlock(BodyBB); 5828 llvm::PHINode *ElementPHI = 5829 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); 5830 ElementPHI->addIncoming(Begin.getPointer(), EntryBB); 5831 Begin = Address(ElementPHI, Begin.getAlignment()); 5832 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), 5833 Base.getTBAAInfo()); 5834 // deps[i].flags = NewDepKind; 5835 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); 5836 LValue FlagsLVal = CGF.EmitLValueForField( 5837 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5838 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5839 FlagsLVal); 5840 5841 // Shift the address forward by one element. 5842 Address ElementNext = 5843 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); 5844 ElementPHI->addIncoming(ElementNext.getPointer(), 5845 CGF.Builder.GetInsertBlock()); 5846 llvm::Value *IsEmpty = 5847 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); 5848 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5849 // Done. 5850 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5851 } 5852 5853 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5854 const OMPExecutableDirective &D, 5855 llvm::Function *TaskFunction, 5856 QualType SharedsTy, Address Shareds, 5857 const Expr *IfCond, 5858 const OMPTaskDataTy &Data) { 5859 if (!CGF.HaveInsertPoint()) 5860 return; 5861 5862 TaskResultTy Result = 5863 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5864 llvm::Value *NewTask = Result.NewTask; 5865 llvm::Function *TaskEntry = Result.TaskEntry; 5866 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5867 LValue TDBase = Result.TDBase; 5868 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5869 // Process list of dependences. 5870 Address DependenciesArray = Address::invalid(); 5871 llvm::Value *NumOfElements; 5872 std::tie(NumOfElements, DependenciesArray) = 5873 emitDependClause(CGF, Data.Dependences, Loc); 5874 5875 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5876 // libcall. 5877 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5878 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5879 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5880 // list is not empty 5881 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5882 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5883 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5884 llvm::Value *DepTaskArgs[7]; 5885 if (!Data.Dependences.empty()) { 5886 DepTaskArgs[0] = UpLoc; 5887 DepTaskArgs[1] = ThreadID; 5888 DepTaskArgs[2] = NewTask; 5889 DepTaskArgs[3] = NumOfElements; 5890 DepTaskArgs[4] = DependenciesArray.getPointer(); 5891 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5892 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5893 } 5894 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, 5895 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5896 if (!Data.Tied) { 5897 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5898 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5899 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5900 } 5901 if (!Data.Dependences.empty()) { 5902 CGF.EmitRuntimeCall( 5903 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5904 } else { 5905 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5906 TaskArgs); 5907 } 5908 // Check if parent region is untied and build return for untied task; 5909 if (auto *Region = 5910 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5911 Region->emitUntiedSwitch(CGF); 5912 }; 5913 5914 llvm::Value *DepWaitTaskArgs[6]; 5915 if (!Data.Dependences.empty()) { 5916 DepWaitTaskArgs[0] = UpLoc; 5917 DepWaitTaskArgs[1] = ThreadID; 5918 DepWaitTaskArgs[2] = NumOfElements; 5919 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5920 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5921 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5922 } 5923 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5924 &Data, &DepWaitTaskArgs, 5925 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5926 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5927 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5928 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5929 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5930 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5931 // is specified. 5932 if (!Data.Dependences.empty()) 5933 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5934 DepWaitTaskArgs); 5935 // Call proxy_task_entry(gtid, new_task); 5936 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5937 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5938 Action.Enter(CGF); 5939 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5940 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5941 OutlinedFnArgs); 5942 }; 5943 5944 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5945 // kmp_task_t *new_task); 5946 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5947 // kmp_task_t *new_task); 5948 RegionCodeGenTy RCG(CodeGen); 5949 CommonActionTy Action( 5950 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5951 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5952 RCG.setAction(Action); 5953 RCG(CGF); 5954 }; 5955 5956 if (IfCond) { 5957 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5958 } else { 5959 RegionCodeGenTy ThenRCG(ThenCodeGen); 5960 ThenRCG(CGF); 5961 } 5962 } 5963 5964 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5965 const OMPLoopDirective &D, 5966 llvm::Function *TaskFunction, 5967 QualType SharedsTy, Address Shareds, 5968 const Expr *IfCond, 5969 const OMPTaskDataTy &Data) { 5970 if (!CGF.HaveInsertPoint()) 5971 return; 5972 TaskResultTy Result = 5973 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5974 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5975 // libcall. 5976 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5977 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5978 // sched, kmp_uint64 grainsize, void *task_dup); 5979 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5980 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5981 llvm::Value *IfVal; 5982 if (IfCond) { 5983 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5984 /*isSigned=*/true); 5985 } else { 5986 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5987 } 5988 5989 LValue LBLVal = CGF.EmitLValueForField( 5990 Result.TDBase, 5991 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5992 const auto *LBVar = 5993 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5994 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), 5995 LBLVal.getQuals(), 5996 /*IsInitializer=*/true); 5997 LValue UBLVal = CGF.EmitLValueForField( 5998 Result.TDBase, 5999 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 6000 const auto *UBVar = 6001 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 6002 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), 6003 UBLVal.getQuals(), 6004 /*IsInitializer=*/true); 6005 LValue StLVal = CGF.EmitLValueForField( 6006 Result.TDBase, 6007 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 6008 const auto *StVar = 6009 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 6010 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), 6011 StLVal.getQuals(), 6012 /*IsInitializer=*/true); 6013 // Store reductions address. 6014 LValue RedLVal = CGF.EmitLValueForField( 6015 Result.TDBase, 6016 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 6017 if (Data.Reductions) { 6018 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 6019 } else { 6020 CGF.EmitNullInitialization(RedLVal.getAddress(CGF), 6021 CGF.getContext().VoidPtrTy); 6022 } 6023 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 6024 llvm::Value *TaskArgs[] = { 6025 UpLoc, 6026 ThreadID, 6027 Result.NewTask, 6028 IfVal, 6029 LBLVal.getPointer(CGF), 6030 UBLVal.getPointer(CGF), 6031 CGF.EmitLoadOfScalar(StLVal, Loc), 6032 llvm::ConstantInt::getSigned( 6033 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 6034 llvm::ConstantInt::getSigned( 6035 CGF.IntTy, Data.Schedule.getPointer() 6036 ? Data.Schedule.getInt() ? NumTasks : Grainsize 6037 : NoSchedule), 6038 Data.Schedule.getPointer() 6039 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 6040 /*isSigned=*/false) 6041 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 6042 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6043 Result.TaskDupFn, CGF.VoidPtrTy) 6044 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 6045 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 6046 } 6047 6048 /// Emit reduction operation for each element of array (required for 6049 /// array sections) LHS op = RHS. 6050 /// \param Type Type of array. 6051 /// \param LHSVar Variable on the left side of the reduction operation 6052 /// (references element of array in original variable). 6053 /// \param RHSVar Variable on the right side of the reduction operation 6054 /// (references element of array in original variable). 6055 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 6056 /// RHSVar. 6057 static void EmitOMPAggregateReduction( 6058 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 6059 const VarDecl *RHSVar, 6060 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 6061 const Expr *, const Expr *)> &RedOpGen, 6062 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 6063 const Expr *UpExpr = nullptr) { 6064 // Perform element-by-element initialization. 6065 QualType ElementTy; 6066 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 6067 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 6068 6069 // Drill down to the base element type on both arrays. 6070 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 6071 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 6072 6073 llvm::Value *RHSBegin = RHSAddr.getPointer(); 6074 llvm::Value *LHSBegin = LHSAddr.getPointer(); 6075 // Cast from pointer to array type to pointer to single element. 6076 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 6077 // The basic structure here is a while-do loop. 6078 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 6079 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 6080 llvm::Value *IsEmpty = 6081 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 6082 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 6083 6084 // Enter the loop body, making that address the current address. 6085 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 6086 CGF.EmitBlock(BodyBB); 6087 6088 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 6089 6090 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 6091 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 6092 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 6093 Address RHSElementCurrent = 6094 Address(RHSElementPHI, 6095 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 6096 6097 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 6098 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 6099 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 6100 Address LHSElementCurrent = 6101 Address(LHSElementPHI, 6102 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 6103 6104 // Emit copy. 6105 CodeGenFunction::OMPPrivateScope Scope(CGF); 6106 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 6107 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 6108 Scope.Privatize(); 6109 RedOpGen(CGF, XExpr, EExpr, UpExpr); 6110 Scope.ForceCleanup(); 6111 6112 // Shift the address forward by one element. 6113 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 6114 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 6115 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 6116 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 6117 // Check whether we've reached the end. 6118 llvm::Value *Done = 6119 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 6120 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 6121 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 6122 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 6123 6124 // Done. 6125 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 6126 } 6127 6128 /// Emit reduction combiner. If the combiner is a simple expression emit it as 6129 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 6130 /// UDR combiner function. 6131 static void emitReductionCombiner(CodeGenFunction &CGF, 6132 const Expr *ReductionOp) { 6133 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 6134 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 6135 if (const auto *DRE = 6136 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 6137 if (const auto *DRD = 6138 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 6139 std::pair<llvm::Function *, llvm::Function *> Reduction = 6140 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 6141 RValue Func = RValue::get(Reduction.first); 6142 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 6143 CGF.EmitIgnoredExpr(ReductionOp); 6144 return; 6145 } 6146 CGF.EmitIgnoredExpr(ReductionOp); 6147 } 6148 6149 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 6150 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 6151 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 6152 ArrayRef<const Expr *> ReductionOps) { 6153 ASTContext &C = CGM.getContext(); 6154 6155 // void reduction_func(void *LHSArg, void *RHSArg); 6156 FunctionArgList Args; 6157 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6158 ImplicitParamDecl::Other); 6159 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6160 ImplicitParamDecl::Other); 6161 Args.push_back(&LHSArg); 6162 Args.push_back(&RHSArg); 6163 const auto &CGFI = 6164 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6165 std::string Name = getName({"omp", "reduction", "reduction_func"}); 6166 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 6167 llvm::GlobalValue::InternalLinkage, Name, 6168 &CGM.getModule()); 6169 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 6170 Fn->setDoesNotRecurse(); 6171 CodeGenFunction CGF(CGM); 6172 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 6173 6174 // Dst = (void*[n])(LHSArg); 6175 // Src = (void*[n])(RHSArg); 6176 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6177 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 6178 ArgsType), CGF.getPointerAlign()); 6179 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6180 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 6181 ArgsType), CGF.getPointerAlign()); 6182 6183 // ... 6184 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 6185 // ... 6186 CodeGenFunction::OMPPrivateScope Scope(CGF); 6187 auto IPriv = Privates.begin(); 6188 unsigned Idx = 0; 6189 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 6190 const auto *RHSVar = 6191 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 6192 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 6193 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 6194 }); 6195 const auto *LHSVar = 6196 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 6197 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 6198 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 6199 }); 6200 QualType PrivTy = (*IPriv)->getType(); 6201 if (PrivTy->isVariablyModifiedType()) { 6202 // Get array size and emit VLA type. 6203 ++Idx; 6204 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 6205 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 6206 const VariableArrayType *VLA = 6207 CGF.getContext().getAsVariableArrayType(PrivTy); 6208 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 6209 CodeGenFunction::OpaqueValueMapping OpaqueMap( 6210 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 6211 CGF.EmitVariablyModifiedType(PrivTy); 6212 } 6213 } 6214 Scope.Privatize(); 6215 IPriv = Privates.begin(); 6216 auto ILHS = LHSExprs.begin(); 6217 auto IRHS = RHSExprs.begin(); 6218 for (const Expr *E : ReductionOps) { 6219 if ((*IPriv)->getType()->isArrayType()) { 6220 // Emit reduction for array section. 6221 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 6222 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 6223 EmitOMPAggregateReduction( 6224 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 6225 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 6226 emitReductionCombiner(CGF, E); 6227 }); 6228 } else { 6229 // Emit reduction for array subscript or single variable. 6230 emitReductionCombiner(CGF, E); 6231 } 6232 ++IPriv; 6233 ++ILHS; 6234 ++IRHS; 6235 } 6236 Scope.ForceCleanup(); 6237 CGF.FinishFunction(); 6238 return Fn; 6239 } 6240 6241 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 6242 const Expr *ReductionOp, 6243 const Expr *PrivateRef, 6244 const DeclRefExpr *LHS, 6245 const DeclRefExpr *RHS) { 6246 if (PrivateRef->getType()->isArrayType()) { 6247 // Emit reduction for array section. 6248 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 6249 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 6250 EmitOMPAggregateReduction( 6251 CGF, PrivateRef->getType(), LHSVar, RHSVar, 6252 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 6253 emitReductionCombiner(CGF, ReductionOp); 6254 }); 6255 } else { 6256 // Emit reduction for array subscript or single variable. 6257 emitReductionCombiner(CGF, ReductionOp); 6258 } 6259 } 6260 6261 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 6262 ArrayRef<const Expr *> Privates, 6263 ArrayRef<const Expr *> LHSExprs, 6264 ArrayRef<const Expr *> RHSExprs, 6265 ArrayRef<const Expr *> ReductionOps, 6266 ReductionOptionsTy Options) { 6267 if (!CGF.HaveInsertPoint()) 6268 return; 6269 6270 bool WithNowait = Options.WithNowait; 6271 bool SimpleReduction = Options.SimpleReduction; 6272 6273 // Next code should be emitted for reduction: 6274 // 6275 // static kmp_critical_name lock = { 0 }; 6276 // 6277 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 6278 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 6279 // ... 6280 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 6281 // *(Type<n>-1*)rhs[<n>-1]); 6282 // } 6283 // 6284 // ... 6285 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 6286 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 6287 // RedList, reduce_func, &<lock>)) { 6288 // case 1: 6289 // ... 6290 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 6291 // ... 6292 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 6293 // break; 6294 // case 2: 6295 // ... 6296 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 6297 // ... 6298 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 6299 // break; 6300 // default:; 6301 // } 6302 // 6303 // if SimpleReduction is true, only the next code is generated: 6304 // ... 6305 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 6306 // ... 6307 6308 ASTContext &C = CGM.getContext(); 6309 6310 if (SimpleReduction) { 6311 CodeGenFunction::RunCleanupsScope Scope(CGF); 6312 auto IPriv = Privates.begin(); 6313 auto ILHS = LHSExprs.begin(); 6314 auto IRHS = RHSExprs.begin(); 6315 for (const Expr *E : ReductionOps) { 6316 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 6317 cast<DeclRefExpr>(*IRHS)); 6318 ++IPriv; 6319 ++ILHS; 6320 ++IRHS; 6321 } 6322 return; 6323 } 6324 6325 // 1. Build a list of reduction variables. 6326 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 6327 auto Size = RHSExprs.size(); 6328 for (const Expr *E : Privates) { 6329 if (E->getType()->isVariablyModifiedType()) 6330 // Reserve place for array size. 6331 ++Size; 6332 } 6333 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 6334 QualType ReductionArrayTy = 6335 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 6336 /*IndexTypeQuals=*/0); 6337 Address ReductionList = 6338 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 6339 auto IPriv = Privates.begin(); 6340 unsigned Idx = 0; 6341 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 6342 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 6343 CGF.Builder.CreateStore( 6344 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6345 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), 6346 Elem); 6347 if ((*IPriv)->getType()->isVariablyModifiedType()) { 6348 // Store array size. 6349 ++Idx; 6350 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 6351 llvm::Value *Size = CGF.Builder.CreateIntCast( 6352 CGF.getVLASize( 6353 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 6354 .NumElts, 6355 CGF.SizeTy, /*isSigned=*/false); 6356 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 6357 Elem); 6358 } 6359 } 6360 6361 // 2. Emit reduce_func(). 6362 llvm::Function *ReductionFn = emitReductionFunction( 6363 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 6364 LHSExprs, RHSExprs, ReductionOps); 6365 6366 // 3. Create static kmp_critical_name lock = { 0 }; 6367 std::string Name = getName({"reduction"}); 6368 llvm::Value *Lock = getCriticalRegionLock(Name); 6369 6370 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 6371 // RedList, reduce_func, &<lock>); 6372 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 6373 llvm::Value *ThreadId = getThreadID(CGF, Loc); 6374 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 6375 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6376 ReductionList.getPointer(), CGF.VoidPtrTy); 6377 llvm::Value *Args[] = { 6378 IdentTLoc, // ident_t *<loc> 6379 ThreadId, // i32 <gtid> 6380 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 6381 ReductionArrayTySize, // size_type sizeof(RedList) 6382 RL, // void *RedList 6383 ReductionFn, // void (*) (void *, void *) <reduce_func> 6384 Lock // kmp_critical_name *&<lock> 6385 }; 6386 llvm::Value *Res = CGF.EmitRuntimeCall( 6387 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 6388 : OMPRTL__kmpc_reduce), 6389 Args); 6390 6391 // 5. Build switch(res) 6392 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 6393 llvm::SwitchInst *SwInst = 6394 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 6395 6396 // 6. Build case 1: 6397 // ... 6398 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 6399 // ... 6400 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 6401 // break; 6402 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 6403 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 6404 CGF.EmitBlock(Case1BB); 6405 6406 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 6407 llvm::Value *EndArgs[] = { 6408 IdentTLoc, // ident_t *<loc> 6409 ThreadId, // i32 <gtid> 6410 Lock // kmp_critical_name *&<lock> 6411 }; 6412 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 6413 CodeGenFunction &CGF, PrePostActionTy &Action) { 6414 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6415 auto IPriv = Privates.begin(); 6416 auto ILHS = LHSExprs.begin(); 6417 auto IRHS = RHSExprs.begin(); 6418 for (const Expr *E : ReductionOps) { 6419 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 6420 cast<DeclRefExpr>(*IRHS)); 6421 ++IPriv; 6422 ++ILHS; 6423 ++IRHS; 6424 } 6425 }; 6426 RegionCodeGenTy RCG(CodeGen); 6427 CommonActionTy Action( 6428 nullptr, llvm::None, 6429 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 6430 : OMPRTL__kmpc_end_reduce), 6431 EndArgs); 6432 RCG.setAction(Action); 6433 RCG(CGF); 6434 6435 CGF.EmitBranch(DefaultBB); 6436 6437 // 7. Build case 2: 6438 // ... 6439 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 6440 // ... 6441 // break; 6442 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 6443 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 6444 CGF.EmitBlock(Case2BB); 6445 6446 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 6447 CodeGenFunction &CGF, PrePostActionTy &Action) { 6448 auto ILHS = LHSExprs.begin(); 6449 auto IRHS = RHSExprs.begin(); 6450 auto IPriv = Privates.begin(); 6451 for (const Expr *E : ReductionOps) { 6452 const Expr *XExpr = nullptr; 6453 const Expr *EExpr = nullptr; 6454 const Expr *UpExpr = nullptr; 6455 BinaryOperatorKind BO = BO_Comma; 6456 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 6457 if (BO->getOpcode() == BO_Assign) { 6458 XExpr = BO->getLHS(); 6459 UpExpr = BO->getRHS(); 6460 } 6461 } 6462 // Try to emit update expression as a simple atomic. 6463 const Expr *RHSExpr = UpExpr; 6464 if (RHSExpr) { 6465 // Analyze RHS part of the whole expression. 6466 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 6467 RHSExpr->IgnoreParenImpCasts())) { 6468 // If this is a conditional operator, analyze its condition for 6469 // min/max reduction operator. 6470 RHSExpr = ACO->getCond(); 6471 } 6472 if (const auto *BORHS = 6473 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 6474 EExpr = BORHS->getRHS(); 6475 BO = BORHS->getOpcode(); 6476 } 6477 } 6478 if (XExpr) { 6479 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 6480 auto &&AtomicRedGen = [BO, VD, 6481 Loc](CodeGenFunction &CGF, const Expr *XExpr, 6482 const Expr *EExpr, const Expr *UpExpr) { 6483 LValue X = CGF.EmitLValue(XExpr); 6484 RValue E; 6485 if (EExpr) 6486 E = CGF.EmitAnyExpr(EExpr); 6487 CGF.EmitOMPAtomicSimpleUpdateExpr( 6488 X, E, BO, /*IsXLHSInRHSPart=*/true, 6489 llvm::AtomicOrdering::Monotonic, Loc, 6490 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 6491 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6492 PrivateScope.addPrivate( 6493 VD, [&CGF, VD, XRValue, Loc]() { 6494 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 6495 CGF.emitOMPSimpleStore( 6496 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 6497 VD->getType().getNonReferenceType(), Loc); 6498 return LHSTemp; 6499 }); 6500 (void)PrivateScope.Privatize(); 6501 return CGF.EmitAnyExpr(UpExpr); 6502 }); 6503 }; 6504 if ((*IPriv)->getType()->isArrayType()) { 6505 // Emit atomic reduction for array section. 6506 const auto *RHSVar = 6507 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 6508 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 6509 AtomicRedGen, XExpr, EExpr, UpExpr); 6510 } else { 6511 // Emit atomic reduction for array subscript or single variable. 6512 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 6513 } 6514 } else { 6515 // Emit as a critical region. 6516 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 6517 const Expr *, const Expr *) { 6518 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6519 std::string Name = RT.getName({"atomic_reduction"}); 6520 RT.emitCriticalRegion( 6521 CGF, Name, 6522 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 6523 Action.Enter(CGF); 6524 emitReductionCombiner(CGF, E); 6525 }, 6526 Loc); 6527 }; 6528 if ((*IPriv)->getType()->isArrayType()) { 6529 const auto *LHSVar = 6530 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 6531 const auto *RHSVar = 6532 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 6533 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 6534 CritRedGen); 6535 } else { 6536 CritRedGen(CGF, nullptr, nullptr, nullptr); 6537 } 6538 } 6539 ++ILHS; 6540 ++IRHS; 6541 ++IPriv; 6542 } 6543 }; 6544 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 6545 if (!WithNowait) { 6546 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 6547 llvm::Value *EndArgs[] = { 6548 IdentTLoc, // ident_t *<loc> 6549 ThreadId, // i32 <gtid> 6550 Lock // kmp_critical_name *&<lock> 6551 }; 6552 CommonActionTy Action(nullptr, llvm::None, 6553 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 6554 EndArgs); 6555 AtomicRCG.setAction(Action); 6556 AtomicRCG(CGF); 6557 } else { 6558 AtomicRCG(CGF); 6559 } 6560 6561 CGF.EmitBranch(DefaultBB); 6562 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 6563 } 6564 6565 /// Generates unique name for artificial threadprivate variables. 6566 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 6567 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 6568 const Expr *Ref) { 6569 SmallString<256> Buffer; 6570 llvm::raw_svector_ostream Out(Buffer); 6571 const clang::DeclRefExpr *DE; 6572 const VarDecl *D = ::getBaseDecl(Ref, DE); 6573 if (!D) 6574 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 6575 D = D->getCanonicalDecl(); 6576 std::string Name = CGM.getOpenMPRuntime().getName( 6577 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 6578 Out << Prefix << Name << "_" 6579 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 6580 return std::string(Out.str()); 6581 } 6582 6583 /// Emits reduction initializer function: 6584 /// \code 6585 /// void @.red_init(void* %arg, void* %orig) { 6586 /// %0 = bitcast void* %arg to <type>* 6587 /// store <type> <init>, <type>* %0 6588 /// ret void 6589 /// } 6590 /// \endcode 6591 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 6592 SourceLocation Loc, 6593 ReductionCodeGen &RCG, unsigned N) { 6594 ASTContext &C = CGM.getContext(); 6595 QualType VoidPtrTy = C.VoidPtrTy; 6596 VoidPtrTy.addRestrict(); 6597 FunctionArgList Args; 6598 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 6599 ImplicitParamDecl::Other); 6600 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, 6601 ImplicitParamDecl::Other); 6602 Args.emplace_back(&Param); 6603 Args.emplace_back(&ParamOrig); 6604 const auto &FnInfo = 6605 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6606 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6607 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 6608 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6609 Name, &CGM.getModule()); 6610 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6611 Fn->setDoesNotRecurse(); 6612 CodeGenFunction CGF(CGM); 6613 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6614 Address PrivateAddr = CGF.EmitLoadOfPointer( 6615 CGF.GetAddrOfLocalVar(&Param), 6616 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6617 llvm::Value *Size = nullptr; 6618 // If the size of the reduction item is non-constant, load it from global 6619 // threadprivate variable. 6620 if (RCG.getSizes(N).second) { 6621 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6622 CGF, CGM.getContext().getSizeType(), 6623 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6624 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6625 CGM.getContext().getSizeType(), Loc); 6626 } 6627 RCG.emitAggregateType(CGF, N, Size); 6628 LValue OrigLVal; 6629 // If initializer uses initializer from declare reduction construct, emit a 6630 // pointer to the address of the original reduction item (reuired by reduction 6631 // initializer) 6632 if (RCG.usesReductionInitializer(N)) { 6633 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); 6634 SharedAddr = CGF.EmitLoadOfPointer( 6635 SharedAddr, 6636 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 6637 OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 6638 } else { 6639 OrigLVal = CGF.MakeNaturalAlignAddrLValue( 6640 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 6641 CGM.getContext().VoidPtrTy); 6642 } 6643 // Emit the initializer: 6644 // %0 = bitcast void* %arg to <type>* 6645 // store <type> <init>, <type>* %0 6646 RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, 6647 [](CodeGenFunction &) { return false; }); 6648 CGF.FinishFunction(); 6649 return Fn; 6650 } 6651 6652 /// Emits reduction combiner function: 6653 /// \code 6654 /// void @.red_comb(void* %arg0, void* %arg1) { 6655 /// %lhs = bitcast void* %arg0 to <type>* 6656 /// %rhs = bitcast void* %arg1 to <type>* 6657 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 6658 /// store <type> %2, <type>* %lhs 6659 /// ret void 6660 /// } 6661 /// \endcode 6662 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 6663 SourceLocation Loc, 6664 ReductionCodeGen &RCG, unsigned N, 6665 const Expr *ReductionOp, 6666 const Expr *LHS, const Expr *RHS, 6667 const Expr *PrivateRef) { 6668 ASTContext &C = CGM.getContext(); 6669 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 6670 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 6671 FunctionArgList Args; 6672 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 6673 C.VoidPtrTy, ImplicitParamDecl::Other); 6674 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6675 ImplicitParamDecl::Other); 6676 Args.emplace_back(&ParamInOut); 6677 Args.emplace_back(&ParamIn); 6678 const auto &FnInfo = 6679 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6680 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6681 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 6682 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6683 Name, &CGM.getModule()); 6684 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6685 Fn->setDoesNotRecurse(); 6686 CodeGenFunction CGF(CGM); 6687 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6688 llvm::Value *Size = nullptr; 6689 // If the size of the reduction item is non-constant, load it from global 6690 // threadprivate variable. 6691 if (RCG.getSizes(N).second) { 6692 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6693 CGF, CGM.getContext().getSizeType(), 6694 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6695 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6696 CGM.getContext().getSizeType(), Loc); 6697 } 6698 RCG.emitAggregateType(CGF, N, Size); 6699 // Remap lhs and rhs variables to the addresses of the function arguments. 6700 // %lhs = bitcast void* %arg0 to <type>* 6701 // %rhs = bitcast void* %arg1 to <type>* 6702 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6703 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6704 // Pull out the pointer to the variable. 6705 Address PtrAddr = CGF.EmitLoadOfPointer( 6706 CGF.GetAddrOfLocalVar(&ParamInOut), 6707 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6708 return CGF.Builder.CreateElementBitCast( 6709 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6710 }); 6711 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6712 // Pull out the pointer to the variable. 6713 Address PtrAddr = CGF.EmitLoadOfPointer( 6714 CGF.GetAddrOfLocalVar(&ParamIn), 6715 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6716 return CGF.Builder.CreateElementBitCast( 6717 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6718 }); 6719 PrivateScope.Privatize(); 6720 // Emit the combiner body: 6721 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6722 // store <type> %2, <type>* %lhs 6723 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6724 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6725 cast<DeclRefExpr>(RHS)); 6726 CGF.FinishFunction(); 6727 return Fn; 6728 } 6729 6730 /// Emits reduction finalizer function: 6731 /// \code 6732 /// void @.red_fini(void* %arg) { 6733 /// %0 = bitcast void* %arg to <type>* 6734 /// <destroy>(<type>* %0) 6735 /// ret void 6736 /// } 6737 /// \endcode 6738 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6739 SourceLocation Loc, 6740 ReductionCodeGen &RCG, unsigned N) { 6741 if (!RCG.needCleanups(N)) 6742 return nullptr; 6743 ASTContext &C = CGM.getContext(); 6744 FunctionArgList Args; 6745 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6746 ImplicitParamDecl::Other); 6747 Args.emplace_back(&Param); 6748 const auto &FnInfo = 6749 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6750 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6751 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6752 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6753 Name, &CGM.getModule()); 6754 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6755 Fn->setDoesNotRecurse(); 6756 CodeGenFunction CGF(CGM); 6757 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6758 Address PrivateAddr = CGF.EmitLoadOfPointer( 6759 CGF.GetAddrOfLocalVar(&Param), 6760 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6761 llvm::Value *Size = nullptr; 6762 // If the size of the reduction item is non-constant, load it from global 6763 // threadprivate variable. 6764 if (RCG.getSizes(N).second) { 6765 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6766 CGF, CGM.getContext().getSizeType(), 6767 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6768 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6769 CGM.getContext().getSizeType(), Loc); 6770 } 6771 RCG.emitAggregateType(CGF, N, Size); 6772 // Emit the finalizer body: 6773 // <destroy>(<type>* %0) 6774 RCG.emitCleanups(CGF, N, PrivateAddr); 6775 CGF.FinishFunction(Loc); 6776 return Fn; 6777 } 6778 6779 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6780 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6781 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6782 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6783 return nullptr; 6784 6785 // Build typedef struct: 6786 // kmp_taskred_input { 6787 // void *reduce_shar; // shared reduction item 6788 // void *reduce_orig; // original reduction item used for initialization 6789 // size_t reduce_size; // size of data item 6790 // void *reduce_init; // data initialization routine 6791 // void *reduce_fini; // data finalization routine 6792 // void *reduce_comb; // data combiner routine 6793 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6794 // } kmp_taskred_input_t; 6795 ASTContext &C = CGM.getContext(); 6796 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); 6797 RD->startDefinition(); 6798 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6799 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6800 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6801 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6802 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6803 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6804 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6805 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6806 RD->completeDefinition(); 6807 QualType RDType = C.getRecordType(RD); 6808 unsigned Size = Data.ReductionVars.size(); 6809 llvm::APInt ArraySize(/*numBits=*/64, Size); 6810 QualType ArrayRDType = C.getConstantArrayType( 6811 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6812 // kmp_task_red_input_t .rd_input.[Size]; 6813 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6814 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, 6815 Data.ReductionCopies, Data.ReductionOps); 6816 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6817 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6818 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6819 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6820 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6821 TaskRedInput.getPointer(), Idxs, 6822 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6823 ".rd_input.gep."); 6824 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6825 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6826 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6827 RCG.emitSharedOrigLValue(CGF, Cnt); 6828 llvm::Value *CastedShared = 6829 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); 6830 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6831 // ElemLVal.reduce_orig = &Origs[Cnt]; 6832 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); 6833 llvm::Value *CastedOrig = 6834 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); 6835 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); 6836 RCG.emitAggregateType(CGF, Cnt); 6837 llvm::Value *SizeValInChars; 6838 llvm::Value *SizeVal; 6839 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6840 // We use delayed creation/initialization for VLAs and array sections. It is 6841 // required because runtime does not provide the way to pass the sizes of 6842 // VLAs/array sections to initializer/combiner/finalizer functions. Instead 6843 // threadprivate global variables are used to store these values and use 6844 // them in the functions. 6845 bool DelayedCreation = !!SizeVal; 6846 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6847 /*isSigned=*/false); 6848 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6849 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6850 // ElemLVal.reduce_init = init; 6851 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6852 llvm::Value *InitAddr = 6853 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6854 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6855 // ElemLVal.reduce_fini = fini; 6856 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6857 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6858 llvm::Value *FiniAddr = Fini 6859 ? CGF.EmitCastToVoidPtr(Fini) 6860 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6861 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6862 // ElemLVal.reduce_comb = comb; 6863 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6864 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6865 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6866 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6867 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6868 // ElemLVal.flags = 0; 6869 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6870 if (DelayedCreation) { 6871 CGF.EmitStoreOfScalar( 6872 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6873 FlagsLVal); 6874 } else 6875 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), 6876 FlagsLVal.getType()); 6877 } 6878 if (Data.IsReductionWithTaskMod) { 6879 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6880 // is_ws, int num, void *data); 6881 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6882 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6883 CGM.IntTy, /*isSigned=*/true); 6884 llvm::Value *Args[] = { 6885 IdentTLoc, GTid, 6886 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, 6887 /*isSigned=*/true), 6888 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6889 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6890 TaskRedInput.getPointer(), CGM.VoidPtrTy)}; 6891 return CGF.EmitRuntimeCall( 6892 createRuntimeFunction(OMPRTL__kmpc_taskred_modifier_init), Args); 6893 } 6894 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); 6895 llvm::Value *Args[] = { 6896 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6897 /*isSigned=*/true), 6898 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6899 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6900 CGM.VoidPtrTy)}; 6901 return CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskred_init), 6902 Args); 6903 } 6904 6905 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 6906 SourceLocation Loc, 6907 bool IsWorksharingReduction) { 6908 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int 6909 // is_ws, int num, void *data); 6910 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); 6911 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6912 CGM.IntTy, /*isSigned=*/true); 6913 llvm::Value *Args[] = {IdentTLoc, GTid, 6914 llvm::ConstantInt::get(CGM.IntTy, 6915 IsWorksharingReduction ? 1 : 0, 6916 /*isSigned=*/true)}; 6917 (void)CGF.EmitRuntimeCall( 6918 createRuntimeFunction(OMPRTL__kmpc_task_reduction_modifier_fini), Args); 6919 } 6920 6921 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6922 SourceLocation Loc, 6923 ReductionCodeGen &RCG, 6924 unsigned N) { 6925 auto Sizes = RCG.getSizes(N); 6926 // Emit threadprivate global variable if the type is non-constant 6927 // (Sizes.second = nullptr). 6928 if (Sizes.second) { 6929 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6930 /*isSigned=*/false); 6931 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6932 CGF, CGM.getContext().getSizeType(), 6933 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6934 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6935 } 6936 } 6937 6938 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6939 SourceLocation Loc, 6940 llvm::Value *ReductionsPtr, 6941 LValue SharedLVal) { 6942 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6943 // *d); 6944 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), 6945 CGM.IntTy, 6946 /*isSigned=*/true), 6947 ReductionsPtr, 6948 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6949 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; 6950 return Address( 6951 CGF.EmitRuntimeCall( 6952 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6953 SharedLVal.getAlignment()); 6954 } 6955 6956 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6957 SourceLocation Loc) { 6958 if (!CGF.HaveInsertPoint()) 6959 return; 6960 6961 llvm::OpenMPIRBuilder *OMPBuilder = CGF.CGM.getOpenMPIRBuilder(); 6962 if (OMPBuilder) { 6963 OMPBuilder->CreateTaskwait(CGF.Builder); 6964 } else { 6965 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6966 // global_tid); 6967 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6968 // Ignore return result until untied tasks are supported. 6969 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 6970 } 6971 6972 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6973 Region->emitUntiedSwitch(CGF); 6974 } 6975 6976 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6977 OpenMPDirectiveKind InnerKind, 6978 const RegionCodeGenTy &CodeGen, 6979 bool HasCancel) { 6980 if (!CGF.HaveInsertPoint()) 6981 return; 6982 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6983 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6984 } 6985 6986 namespace { 6987 enum RTCancelKind { 6988 CancelNoreq = 0, 6989 CancelParallel = 1, 6990 CancelLoop = 2, 6991 CancelSections = 3, 6992 CancelTaskgroup = 4 6993 }; 6994 } // anonymous namespace 6995 6996 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6997 RTCancelKind CancelKind = CancelNoreq; 6998 if (CancelRegion == OMPD_parallel) 6999 CancelKind = CancelParallel; 7000 else if (CancelRegion == OMPD_for) 7001 CancelKind = CancelLoop; 7002 else if (CancelRegion == OMPD_sections) 7003 CancelKind = CancelSections; 7004 else { 7005 assert(CancelRegion == OMPD_taskgroup); 7006 CancelKind = CancelTaskgroup; 7007 } 7008 return CancelKind; 7009 } 7010 7011 void CGOpenMPRuntime::emitCancellationPointCall( 7012 CodeGenFunction &CGF, SourceLocation Loc, 7013 OpenMPDirectiveKind CancelRegion) { 7014 if (!CGF.HaveInsertPoint()) 7015 return; 7016 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 7017 // global_tid, kmp_int32 cncl_kind); 7018 if (auto *OMPRegionInfo = 7019 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 7020 // For 'cancellation point taskgroup', the task region info may not have a 7021 // cancel. This may instead happen in another adjacent task. 7022 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 7023 llvm::Value *Args[] = { 7024 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 7025 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 7026 // Ignore return result until untied tasks are supported. 7027 llvm::Value *Result = CGF.EmitRuntimeCall( 7028 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 7029 // if (__kmpc_cancellationpoint()) { 7030 // exit from construct; 7031 // } 7032 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 7033 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 7034 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 7035 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 7036 CGF.EmitBlock(ExitBB); 7037 // exit from construct; 7038 CodeGenFunction::JumpDest CancelDest = 7039 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 7040 CGF.EmitBranchThroughCleanup(CancelDest); 7041 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 7042 } 7043 } 7044 } 7045 7046 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 7047 const Expr *IfCond, 7048 OpenMPDirectiveKind CancelRegion) { 7049 if (!CGF.HaveInsertPoint()) 7050 return; 7051 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 7052 // kmp_int32 cncl_kind); 7053 if (auto *OMPRegionInfo = 7054 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 7055 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 7056 PrePostActionTy &) { 7057 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 7058 llvm::Value *Args[] = { 7059 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 7060 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 7061 // Ignore return result until untied tasks are supported. 7062 llvm::Value *Result = CGF.EmitRuntimeCall( 7063 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 7064 // if (__kmpc_cancel()) { 7065 // exit from construct; 7066 // } 7067 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 7068 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 7069 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 7070 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 7071 CGF.EmitBlock(ExitBB); 7072 // exit from construct; 7073 CodeGenFunction::JumpDest CancelDest = 7074 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 7075 CGF.EmitBranchThroughCleanup(CancelDest); 7076 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 7077 }; 7078 if (IfCond) { 7079 emitIfClause(CGF, IfCond, ThenGen, 7080 [](CodeGenFunction &, PrePostActionTy &) {}); 7081 } else { 7082 RegionCodeGenTy ThenRCG(ThenGen); 7083 ThenRCG(CGF); 7084 } 7085 } 7086 } 7087 7088 void CGOpenMPRuntime::emitTargetOutlinedFunction( 7089 const OMPExecutableDirective &D, StringRef ParentName, 7090 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 7091 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 7092 assert(!ParentName.empty() && "Invalid target region parent name!"); 7093 HasEmittedTargetRegion = true; 7094 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 7095 IsOffloadEntry, CodeGen); 7096 } 7097 7098 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 7099 const OMPExecutableDirective &D, StringRef ParentName, 7100 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 7101 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 7102 // Create a unique name for the entry function using the source location 7103 // information of the current target region. The name will be something like: 7104 // 7105 // __omp_offloading_DD_FFFF_PP_lBB 7106 // 7107 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 7108 // mangled name of the function that encloses the target region and BB is the 7109 // line number of the target region. 7110 7111 unsigned DeviceID; 7112 unsigned FileID; 7113 unsigned Line; 7114 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 7115 Line); 7116 SmallString<64> EntryFnName; 7117 { 7118 llvm::raw_svector_ostream OS(EntryFnName); 7119 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 7120 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 7121 } 7122 7123 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 7124 7125 CodeGenFunction CGF(CGM, true); 7126 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 7127 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7128 7129 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); 7130 7131 // If this target outline function is not an offload entry, we don't need to 7132 // register it. 7133 if (!IsOffloadEntry) 7134 return; 7135 7136 // The target region ID is used by the runtime library to identify the current 7137 // target region, so it only has to be unique and not necessarily point to 7138 // anything. It could be the pointer to the outlined function that implements 7139 // the target region, but we aren't using that so that the compiler doesn't 7140 // need to keep that, and could therefore inline the host function if proven 7141 // worthwhile during optimization. In the other hand, if emitting code for the 7142 // device, the ID has to be the function address so that it can retrieved from 7143 // the offloading entry and launched by the runtime library. We also mark the 7144 // outlined function to have external linkage in case we are emitting code for 7145 // the device, because these functions will be entry points to the device. 7146 7147 if (CGM.getLangOpts().OpenMPIsDevice) { 7148 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 7149 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 7150 OutlinedFn->setDSOLocal(false); 7151 } else { 7152 std::string Name = getName({EntryFnName, "region_id"}); 7153 OutlinedFnID = new llvm::GlobalVariable( 7154 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 7155 llvm::GlobalValue::WeakAnyLinkage, 7156 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 7157 } 7158 7159 // Register the information for the entry associated with this target region. 7160 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 7161 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 7162 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 7163 } 7164 7165 /// Checks if the expression is constant or does not have non-trivial function 7166 /// calls. 7167 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 7168 // We can skip constant expressions. 7169 // We can skip expressions with trivial calls or simple expressions. 7170 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 7171 !E->hasNonTrivialCall(Ctx)) && 7172 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 7173 } 7174 7175 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 7176 const Stmt *Body) { 7177 const Stmt *Child = Body->IgnoreContainers(); 7178 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 7179 Child = nullptr; 7180 for (const Stmt *S : C->body()) { 7181 if (const auto *E = dyn_cast<Expr>(S)) { 7182 if (isTrivial(Ctx, E)) 7183 continue; 7184 } 7185 // Some of the statements can be ignored. 7186 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 7187 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 7188 continue; 7189 // Analyze declarations. 7190 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 7191 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 7192 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 7193 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 7194 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 7195 isa<UsingDirectiveDecl>(D) || 7196 isa<OMPDeclareReductionDecl>(D) || 7197 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 7198 return true; 7199 const auto *VD = dyn_cast<VarDecl>(D); 7200 if (!VD) 7201 return false; 7202 return VD->isConstexpr() || 7203 ((VD->getType().isTrivialType(Ctx) || 7204 VD->getType()->isReferenceType()) && 7205 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 7206 })) 7207 continue; 7208 } 7209 // Found multiple children - cannot get the one child only. 7210 if (Child) 7211 return nullptr; 7212 Child = S; 7213 } 7214 if (Child) 7215 Child = Child->IgnoreContainers(); 7216 } 7217 return Child; 7218 } 7219 7220 /// Emit the number of teams for a target directive. Inspect the num_teams 7221 /// clause associated with a teams construct combined or closely nested 7222 /// with the target directive. 7223 /// 7224 /// Emit a team of size one for directives such as 'target parallel' that 7225 /// have no associated teams construct. 7226 /// 7227 /// Otherwise, return nullptr. 7228 static llvm::Value * 7229 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 7230 const OMPExecutableDirective &D) { 7231 assert(!CGF.getLangOpts().OpenMPIsDevice && 7232 "Clauses associated with the teams directive expected to be emitted " 7233 "only for the host!"); 7234 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7235 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7236 "Expected target-based executable directive."); 7237 CGBuilderTy &Bld = CGF.Builder; 7238 switch (DirectiveKind) { 7239 case OMPD_target: { 7240 const auto *CS = D.getInnermostCapturedStmt(); 7241 const auto *Body = 7242 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 7243 const Stmt *ChildStmt = 7244 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 7245 if (const auto *NestedDir = 7246 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 7247 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 7248 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 7249 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7250 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7251 const Expr *NumTeams = 7252 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 7253 llvm::Value *NumTeamsVal = 7254 CGF.EmitScalarExpr(NumTeams, 7255 /*IgnoreResultAssign*/ true); 7256 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 7257 /*isSigned=*/true); 7258 } 7259 return Bld.getInt32(0); 7260 } 7261 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 7262 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 7263 return Bld.getInt32(1); 7264 return Bld.getInt32(0); 7265 } 7266 return nullptr; 7267 } 7268 case OMPD_target_teams: 7269 case OMPD_target_teams_distribute: 7270 case OMPD_target_teams_distribute_simd: 7271 case OMPD_target_teams_distribute_parallel_for: 7272 case OMPD_target_teams_distribute_parallel_for_simd: { 7273 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 7274 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 7275 const Expr *NumTeams = 7276 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 7277 llvm::Value *NumTeamsVal = 7278 CGF.EmitScalarExpr(NumTeams, 7279 /*IgnoreResultAssign*/ true); 7280 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 7281 /*isSigned=*/true); 7282 } 7283 return Bld.getInt32(0); 7284 } 7285 case OMPD_target_parallel: 7286 case OMPD_target_parallel_for: 7287 case OMPD_target_parallel_for_simd: 7288 case OMPD_target_simd: 7289 return Bld.getInt32(1); 7290 case OMPD_parallel: 7291 case OMPD_for: 7292 case OMPD_parallel_for: 7293 case OMPD_parallel_master: 7294 case OMPD_parallel_sections: 7295 case OMPD_for_simd: 7296 case OMPD_parallel_for_simd: 7297 case OMPD_cancel: 7298 case OMPD_cancellation_point: 7299 case OMPD_ordered: 7300 case OMPD_threadprivate: 7301 case OMPD_allocate: 7302 case OMPD_task: 7303 case OMPD_simd: 7304 case OMPD_sections: 7305 case OMPD_section: 7306 case OMPD_single: 7307 case OMPD_master: 7308 case OMPD_critical: 7309 case OMPD_taskyield: 7310 case OMPD_barrier: 7311 case OMPD_taskwait: 7312 case OMPD_taskgroup: 7313 case OMPD_atomic: 7314 case OMPD_flush: 7315 case OMPD_depobj: 7316 case OMPD_scan: 7317 case OMPD_teams: 7318 case OMPD_target_data: 7319 case OMPD_target_exit_data: 7320 case OMPD_target_enter_data: 7321 case OMPD_distribute: 7322 case OMPD_distribute_simd: 7323 case OMPD_distribute_parallel_for: 7324 case OMPD_distribute_parallel_for_simd: 7325 case OMPD_teams_distribute: 7326 case OMPD_teams_distribute_simd: 7327 case OMPD_teams_distribute_parallel_for: 7328 case OMPD_teams_distribute_parallel_for_simd: 7329 case OMPD_target_update: 7330 case OMPD_declare_simd: 7331 case OMPD_declare_variant: 7332 case OMPD_begin_declare_variant: 7333 case OMPD_end_declare_variant: 7334 case OMPD_declare_target: 7335 case OMPD_end_declare_target: 7336 case OMPD_declare_reduction: 7337 case OMPD_declare_mapper: 7338 case OMPD_taskloop: 7339 case OMPD_taskloop_simd: 7340 case OMPD_master_taskloop: 7341 case OMPD_master_taskloop_simd: 7342 case OMPD_parallel_master_taskloop: 7343 case OMPD_parallel_master_taskloop_simd: 7344 case OMPD_requires: 7345 case OMPD_unknown: 7346 break; 7347 } 7348 llvm_unreachable("Unexpected directive kind."); 7349 } 7350 7351 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 7352 llvm::Value *DefaultThreadLimitVal) { 7353 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7354 CGF.getContext(), CS->getCapturedStmt()); 7355 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7356 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 7357 llvm::Value *NumThreads = nullptr; 7358 llvm::Value *CondVal = nullptr; 7359 // Handle if clause. If if clause present, the number of threads is 7360 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7361 if (Dir->hasClausesOfKind<OMPIfClause>()) { 7362 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7363 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7364 const OMPIfClause *IfClause = nullptr; 7365 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 7366 if (C->getNameModifier() == OMPD_unknown || 7367 C->getNameModifier() == OMPD_parallel) { 7368 IfClause = C; 7369 break; 7370 } 7371 } 7372 if (IfClause) { 7373 const Expr *Cond = IfClause->getCondition(); 7374 bool Result; 7375 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7376 if (!Result) 7377 return CGF.Builder.getInt32(1); 7378 } else { 7379 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 7380 if (const auto *PreInit = 7381 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 7382 for (const auto *I : PreInit->decls()) { 7383 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7384 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7385 } else { 7386 CodeGenFunction::AutoVarEmission Emission = 7387 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7388 CGF.EmitAutoVarCleanups(Emission); 7389 } 7390 } 7391 } 7392 CondVal = CGF.EvaluateExprAsBool(Cond); 7393 } 7394 } 7395 } 7396 // Check the value of num_threads clause iff if clause was not specified 7397 // or is not evaluated to false. 7398 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 7399 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7400 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7401 const auto *NumThreadsClause = 7402 Dir->getSingleClause<OMPNumThreadsClause>(); 7403 CodeGenFunction::LexicalScope Scope( 7404 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 7405 if (const auto *PreInit = 7406 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 7407 for (const auto *I : PreInit->decls()) { 7408 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7409 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7410 } else { 7411 CodeGenFunction::AutoVarEmission Emission = 7412 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7413 CGF.EmitAutoVarCleanups(Emission); 7414 } 7415 } 7416 } 7417 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 7418 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 7419 /*isSigned=*/false); 7420 if (DefaultThreadLimitVal) 7421 NumThreads = CGF.Builder.CreateSelect( 7422 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 7423 DefaultThreadLimitVal, NumThreads); 7424 } else { 7425 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 7426 : CGF.Builder.getInt32(0); 7427 } 7428 // Process condition of the if clause. 7429 if (CondVal) { 7430 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 7431 CGF.Builder.getInt32(1)); 7432 } 7433 return NumThreads; 7434 } 7435 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 7436 return CGF.Builder.getInt32(1); 7437 return DefaultThreadLimitVal; 7438 } 7439 return DefaultThreadLimitVal ? DefaultThreadLimitVal 7440 : CGF.Builder.getInt32(0); 7441 } 7442 7443 /// Emit the number of threads for a target directive. Inspect the 7444 /// thread_limit clause associated with a teams construct combined or closely 7445 /// nested with the target directive. 7446 /// 7447 /// Emit the num_threads clause for directives such as 'target parallel' that 7448 /// have no associated teams construct. 7449 /// 7450 /// Otherwise, return nullptr. 7451 static llvm::Value * 7452 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 7453 const OMPExecutableDirective &D) { 7454 assert(!CGF.getLangOpts().OpenMPIsDevice && 7455 "Clauses associated with the teams directive expected to be emitted " 7456 "only for the host!"); 7457 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 7458 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 7459 "Expected target-based executable directive."); 7460 CGBuilderTy &Bld = CGF.Builder; 7461 llvm::Value *ThreadLimitVal = nullptr; 7462 llvm::Value *NumThreadsVal = nullptr; 7463 switch (DirectiveKind) { 7464 case OMPD_target: { 7465 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7466 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7467 return NumThreads; 7468 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7469 CGF.getContext(), CS->getCapturedStmt()); 7470 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7471 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 7472 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 7473 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 7474 const auto *ThreadLimitClause = 7475 Dir->getSingleClause<OMPThreadLimitClause>(); 7476 CodeGenFunction::LexicalScope Scope( 7477 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 7478 if (const auto *PreInit = 7479 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 7480 for (const auto *I : PreInit->decls()) { 7481 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 7482 CGF.EmitVarDecl(cast<VarDecl>(*I)); 7483 } else { 7484 CodeGenFunction::AutoVarEmission Emission = 7485 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 7486 CGF.EmitAutoVarCleanups(Emission); 7487 } 7488 } 7489 } 7490 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7491 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7492 ThreadLimitVal = 7493 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7494 } 7495 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 7496 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 7497 CS = Dir->getInnermostCapturedStmt(); 7498 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7499 CGF.getContext(), CS->getCapturedStmt()); 7500 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 7501 } 7502 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 7503 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 7504 CS = Dir->getInnermostCapturedStmt(); 7505 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7506 return NumThreads; 7507 } 7508 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 7509 return Bld.getInt32(1); 7510 } 7511 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7512 } 7513 case OMPD_target_teams: { 7514 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7515 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7516 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7517 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7518 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7519 ThreadLimitVal = 7520 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7521 } 7522 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 7523 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7524 return NumThreads; 7525 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 7526 CGF.getContext(), CS->getCapturedStmt()); 7527 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 7528 if (Dir->getDirectiveKind() == OMPD_distribute) { 7529 CS = Dir->getInnermostCapturedStmt(); 7530 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 7531 return NumThreads; 7532 } 7533 } 7534 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 7535 } 7536 case OMPD_target_teams_distribute: 7537 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7538 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7539 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7540 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7541 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7542 ThreadLimitVal = 7543 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7544 } 7545 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 7546 case OMPD_target_parallel: 7547 case OMPD_target_parallel_for: 7548 case OMPD_target_parallel_for_simd: 7549 case OMPD_target_teams_distribute_parallel_for: 7550 case OMPD_target_teams_distribute_parallel_for_simd: { 7551 llvm::Value *CondVal = nullptr; 7552 // Handle if clause. If if clause present, the number of threads is 7553 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 7554 if (D.hasClausesOfKind<OMPIfClause>()) { 7555 const OMPIfClause *IfClause = nullptr; 7556 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 7557 if (C->getNameModifier() == OMPD_unknown || 7558 C->getNameModifier() == OMPD_parallel) { 7559 IfClause = C; 7560 break; 7561 } 7562 } 7563 if (IfClause) { 7564 const Expr *Cond = IfClause->getCondition(); 7565 bool Result; 7566 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 7567 if (!Result) 7568 return Bld.getInt32(1); 7569 } else { 7570 CodeGenFunction::RunCleanupsScope Scope(CGF); 7571 CondVal = CGF.EvaluateExprAsBool(Cond); 7572 } 7573 } 7574 } 7575 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 7576 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 7577 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7578 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7579 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7580 ThreadLimitVal = 7581 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7582 } 7583 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7584 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7585 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7586 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7587 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7588 NumThreadsVal = 7589 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7590 ThreadLimitVal = ThreadLimitVal 7591 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7592 ThreadLimitVal), 7593 NumThreadsVal, ThreadLimitVal) 7594 : NumThreadsVal; 7595 } 7596 if (!ThreadLimitVal) 7597 ThreadLimitVal = Bld.getInt32(0); 7598 if (CondVal) 7599 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7600 return ThreadLimitVal; 7601 } 7602 case OMPD_target_teams_distribute_simd: 7603 case OMPD_target_simd: 7604 return Bld.getInt32(1); 7605 case OMPD_parallel: 7606 case OMPD_for: 7607 case OMPD_parallel_for: 7608 case OMPD_parallel_master: 7609 case OMPD_parallel_sections: 7610 case OMPD_for_simd: 7611 case OMPD_parallel_for_simd: 7612 case OMPD_cancel: 7613 case OMPD_cancellation_point: 7614 case OMPD_ordered: 7615 case OMPD_threadprivate: 7616 case OMPD_allocate: 7617 case OMPD_task: 7618 case OMPD_simd: 7619 case OMPD_sections: 7620 case OMPD_section: 7621 case OMPD_single: 7622 case OMPD_master: 7623 case OMPD_critical: 7624 case OMPD_taskyield: 7625 case OMPD_barrier: 7626 case OMPD_taskwait: 7627 case OMPD_taskgroup: 7628 case OMPD_atomic: 7629 case OMPD_flush: 7630 case OMPD_depobj: 7631 case OMPD_scan: 7632 case OMPD_teams: 7633 case OMPD_target_data: 7634 case OMPD_target_exit_data: 7635 case OMPD_target_enter_data: 7636 case OMPD_distribute: 7637 case OMPD_distribute_simd: 7638 case OMPD_distribute_parallel_for: 7639 case OMPD_distribute_parallel_for_simd: 7640 case OMPD_teams_distribute: 7641 case OMPD_teams_distribute_simd: 7642 case OMPD_teams_distribute_parallel_for: 7643 case OMPD_teams_distribute_parallel_for_simd: 7644 case OMPD_target_update: 7645 case OMPD_declare_simd: 7646 case OMPD_declare_variant: 7647 case OMPD_begin_declare_variant: 7648 case OMPD_end_declare_variant: 7649 case OMPD_declare_target: 7650 case OMPD_end_declare_target: 7651 case OMPD_declare_reduction: 7652 case OMPD_declare_mapper: 7653 case OMPD_taskloop: 7654 case OMPD_taskloop_simd: 7655 case OMPD_master_taskloop: 7656 case OMPD_master_taskloop_simd: 7657 case OMPD_parallel_master_taskloop: 7658 case OMPD_parallel_master_taskloop_simd: 7659 case OMPD_requires: 7660 case OMPD_unknown: 7661 break; 7662 } 7663 llvm_unreachable("Unsupported directive kind."); 7664 } 7665 7666 namespace { 7667 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7668 7669 // Utility to handle information from clauses associated with a given 7670 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7671 // It provides a convenient interface to obtain the information and generate 7672 // code for that information. 7673 class MappableExprsHandler { 7674 public: 7675 /// Values for bit flags used to specify the mapping type for 7676 /// offloading. 7677 enum OpenMPOffloadMappingFlags : uint64_t { 7678 /// No flags 7679 OMP_MAP_NONE = 0x0, 7680 /// Allocate memory on the device and move data from host to device. 7681 OMP_MAP_TO = 0x01, 7682 /// Allocate memory on the device and move data from device to host. 7683 OMP_MAP_FROM = 0x02, 7684 /// Always perform the requested mapping action on the element, even 7685 /// if it was already mapped before. 7686 OMP_MAP_ALWAYS = 0x04, 7687 /// Delete the element from the device environment, ignoring the 7688 /// current reference count associated with the element. 7689 OMP_MAP_DELETE = 0x08, 7690 /// The element being mapped is a pointer-pointee pair; both the 7691 /// pointer and the pointee should be mapped. 7692 OMP_MAP_PTR_AND_OBJ = 0x10, 7693 /// This flags signals that the base address of an entry should be 7694 /// passed to the target kernel as an argument. 7695 OMP_MAP_TARGET_PARAM = 0x20, 7696 /// Signal that the runtime library has to return the device pointer 7697 /// in the current position for the data being mapped. Used when we have the 7698 /// use_device_ptr clause. 7699 OMP_MAP_RETURN_PARAM = 0x40, 7700 /// This flag signals that the reference being passed is a pointer to 7701 /// private data. 7702 OMP_MAP_PRIVATE = 0x80, 7703 /// Pass the element to the device by value. 7704 OMP_MAP_LITERAL = 0x100, 7705 /// Implicit map 7706 OMP_MAP_IMPLICIT = 0x200, 7707 /// Close is a hint to the runtime to allocate memory close to 7708 /// the target device. 7709 OMP_MAP_CLOSE = 0x400, 7710 /// The 16 MSBs of the flags indicate whether the entry is member of some 7711 /// struct/class. 7712 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7713 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7714 }; 7715 7716 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7717 static unsigned getFlagMemberOffset() { 7718 unsigned Offset = 0; 7719 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7720 Remain = Remain >> 1) 7721 Offset++; 7722 return Offset; 7723 } 7724 7725 /// Class that associates information with a base pointer to be passed to the 7726 /// runtime library. 7727 class BasePointerInfo { 7728 /// The base pointer. 7729 llvm::Value *Ptr = nullptr; 7730 /// The base declaration that refers to this device pointer, or null if 7731 /// there is none. 7732 const ValueDecl *DevPtrDecl = nullptr; 7733 7734 public: 7735 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7736 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7737 llvm::Value *operator*() const { return Ptr; } 7738 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7739 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7740 }; 7741 7742 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7743 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7744 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7745 7746 /// Map between a struct and the its lowest & highest elements which have been 7747 /// mapped. 7748 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7749 /// HE(FieldIndex, Pointer)} 7750 struct StructRangeInfoTy { 7751 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7752 0, Address::invalid()}; 7753 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7754 0, Address::invalid()}; 7755 Address Base = Address::invalid(); 7756 }; 7757 7758 private: 7759 /// Kind that defines how a device pointer has to be returned. 7760 struct MapInfo { 7761 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7762 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7763 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7764 bool ReturnDevicePointer = false; 7765 bool IsImplicit = false; 7766 7767 MapInfo() = default; 7768 MapInfo( 7769 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7770 OpenMPMapClauseKind MapType, 7771 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7772 bool ReturnDevicePointer, bool IsImplicit) 7773 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7774 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 7775 }; 7776 7777 /// If use_device_ptr is used on a pointer which is a struct member and there 7778 /// is no map information about it, then emission of that entry is deferred 7779 /// until the whole struct has been processed. 7780 struct DeferredDevicePtrEntryTy { 7781 const Expr *IE = nullptr; 7782 const ValueDecl *VD = nullptr; 7783 7784 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 7785 : IE(IE), VD(VD) {} 7786 }; 7787 7788 /// The target directive from where the mappable clauses were extracted. It 7789 /// is either a executable directive or a user-defined mapper directive. 7790 llvm::PointerUnion<const OMPExecutableDirective *, 7791 const OMPDeclareMapperDecl *> 7792 CurDir; 7793 7794 /// Function the directive is being generated for. 7795 CodeGenFunction &CGF; 7796 7797 /// Set of all first private variables in the current directive. 7798 /// bool data is set to true if the variable is implicitly marked as 7799 /// firstprivate, false otherwise. 7800 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7801 7802 /// Map between device pointer declarations and their expression components. 7803 /// The key value for declarations in 'this' is null. 7804 llvm::DenseMap< 7805 const ValueDecl *, 7806 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7807 DevPointersMap; 7808 7809 llvm::Value *getExprTypeSize(const Expr *E) const { 7810 QualType ExprTy = E->getType().getCanonicalType(); 7811 7812 // Calculate the size for array shaping expression. 7813 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) { 7814 llvm::Value *Size = 7815 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType()); 7816 for (const Expr *SE : OAE->getDimensions()) { 7817 llvm::Value *Sz = CGF.EmitScalarExpr(SE); 7818 Sz = CGF.EmitScalarConversion(Sz, SE->getType(), 7819 CGF.getContext().getSizeType(), 7820 SE->getExprLoc()); 7821 Size = CGF.Builder.CreateNUWMul(Size, Sz); 7822 } 7823 return Size; 7824 } 7825 7826 // Reference types are ignored for mapping purposes. 7827 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7828 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7829 7830 // Given that an array section is considered a built-in type, we need to 7831 // do the calculation based on the length of the section instead of relying 7832 // on CGF.getTypeSize(E->getType()). 7833 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7834 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7835 OAE->getBase()->IgnoreParenImpCasts()) 7836 .getCanonicalType(); 7837 7838 // If there is no length associated with the expression and lower bound is 7839 // not specified too, that means we are using the whole length of the 7840 // base. 7841 if (!OAE->getLength() && OAE->getColonLoc().isValid() && 7842 !OAE->getLowerBound()) 7843 return CGF.getTypeSize(BaseTy); 7844 7845 llvm::Value *ElemSize; 7846 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7847 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7848 } else { 7849 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7850 assert(ATy && "Expecting array type if not a pointer type."); 7851 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7852 } 7853 7854 // If we don't have a length at this point, that is because we have an 7855 // array section with a single element. 7856 if (!OAE->getLength() && OAE->getColonLoc().isInvalid()) 7857 return ElemSize; 7858 7859 if (const Expr *LenExpr = OAE->getLength()) { 7860 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7861 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7862 CGF.getContext().getSizeType(), 7863 LenExpr->getExprLoc()); 7864 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7865 } 7866 assert(!OAE->getLength() && OAE->getColonLoc().isValid() && 7867 OAE->getLowerBound() && "expected array_section[lb:]."); 7868 // Size = sizetype - lb * elemtype; 7869 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7870 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7871 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7872 CGF.getContext().getSizeType(), 7873 OAE->getLowerBound()->getExprLoc()); 7874 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7875 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7876 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7877 LengthVal = CGF.Builder.CreateSelect( 7878 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7879 return LengthVal; 7880 } 7881 return CGF.getTypeSize(ExprTy); 7882 } 7883 7884 /// Return the corresponding bits for a given map clause modifier. Add 7885 /// a flag marking the map as a pointer if requested. Add a flag marking the 7886 /// map as the first one of a series of maps that relate to the same map 7887 /// expression. 7888 OpenMPOffloadMappingFlags getMapTypeBits( 7889 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7890 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7891 OpenMPOffloadMappingFlags Bits = 7892 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7893 switch (MapType) { 7894 case OMPC_MAP_alloc: 7895 case OMPC_MAP_release: 7896 // alloc and release is the default behavior in the runtime library, i.e. 7897 // if we don't pass any bits alloc/release that is what the runtime is 7898 // going to do. Therefore, we don't need to signal anything for these two 7899 // type modifiers. 7900 break; 7901 case OMPC_MAP_to: 7902 Bits |= OMP_MAP_TO; 7903 break; 7904 case OMPC_MAP_from: 7905 Bits |= OMP_MAP_FROM; 7906 break; 7907 case OMPC_MAP_tofrom: 7908 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7909 break; 7910 case OMPC_MAP_delete: 7911 Bits |= OMP_MAP_DELETE; 7912 break; 7913 case OMPC_MAP_unknown: 7914 llvm_unreachable("Unexpected map type!"); 7915 } 7916 if (AddPtrFlag) 7917 Bits |= OMP_MAP_PTR_AND_OBJ; 7918 if (AddIsTargetParamFlag) 7919 Bits |= OMP_MAP_TARGET_PARAM; 7920 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7921 != MapModifiers.end()) 7922 Bits |= OMP_MAP_ALWAYS; 7923 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7924 != MapModifiers.end()) 7925 Bits |= OMP_MAP_CLOSE; 7926 return Bits; 7927 } 7928 7929 /// Return true if the provided expression is a final array section. A 7930 /// final array section, is one whose length can't be proved to be one. 7931 bool isFinalArraySectionExpression(const Expr *E) const { 7932 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7933 7934 // It is not an array section and therefore not a unity-size one. 7935 if (!OASE) 7936 return false; 7937 7938 // An array section with no colon always refer to a single element. 7939 if (OASE->getColonLoc().isInvalid()) 7940 return false; 7941 7942 const Expr *Length = OASE->getLength(); 7943 7944 // If we don't have a length we have to check if the array has size 1 7945 // for this dimension. Also, we should always expect a length if the 7946 // base type is pointer. 7947 if (!Length) { 7948 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7949 OASE->getBase()->IgnoreParenImpCasts()) 7950 .getCanonicalType(); 7951 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7952 return ATy->getSize().getSExtValue() != 1; 7953 // If we don't have a constant dimension length, we have to consider 7954 // the current section as having any size, so it is not necessarily 7955 // unitary. If it happen to be unity size, that's user fault. 7956 return true; 7957 } 7958 7959 // Check if the length evaluates to 1. 7960 Expr::EvalResult Result; 7961 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7962 return true; // Can have more that size 1. 7963 7964 llvm::APSInt ConstLength = Result.Val.getInt(); 7965 return ConstLength.getSExtValue() != 1; 7966 } 7967 7968 /// Generate the base pointers, section pointers, sizes and map type 7969 /// bits for the provided map type, map modifier, and expression components. 7970 /// \a IsFirstComponent should be set to true if the provided set of 7971 /// components is the first associated with a capture. 7972 void generateInfoForComponentList( 7973 OpenMPMapClauseKind MapType, 7974 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7975 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7976 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7977 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7978 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 7979 bool IsImplicit, 7980 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7981 OverlappedElements = llvm::None) const { 7982 // The following summarizes what has to be generated for each map and the 7983 // types below. The generated information is expressed in this order: 7984 // base pointer, section pointer, size, flags 7985 // (to add to the ones that come from the map type and modifier). 7986 // 7987 // double d; 7988 // int i[100]; 7989 // float *p; 7990 // 7991 // struct S1 { 7992 // int i; 7993 // float f[50]; 7994 // } 7995 // struct S2 { 7996 // int i; 7997 // float f[50]; 7998 // S1 s; 7999 // double *p; 8000 // struct S2 *ps; 8001 // } 8002 // S2 s; 8003 // S2 *ps; 8004 // 8005 // map(d) 8006 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 8007 // 8008 // map(i) 8009 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 8010 // 8011 // map(i[1:23]) 8012 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 8013 // 8014 // map(p) 8015 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 8016 // 8017 // map(p[1:24]) 8018 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 8019 // 8020 // map(s) 8021 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 8022 // 8023 // map(s.i) 8024 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 8025 // 8026 // map(s.s.f) 8027 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 8028 // 8029 // map(s.p) 8030 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 8031 // 8032 // map(to: s.p[:22]) 8033 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 8034 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 8035 // &(s.p), &(s.p[0]), 22*sizeof(double), 8036 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 8037 // (*) alloc space for struct members, only this is a target parameter 8038 // (**) map the pointer (nothing to be mapped in this example) (the compiler 8039 // optimizes this entry out, same in the examples below) 8040 // (***) map the pointee (map: to) 8041 // 8042 // map(s.ps) 8043 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 8044 // 8045 // map(from: s.ps->s.i) 8046 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 8047 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 8048 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 8049 // 8050 // map(to: s.ps->ps) 8051 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 8052 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 8053 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 8054 // 8055 // map(s.ps->ps->ps) 8056 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 8057 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 8058 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 8059 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 8060 // 8061 // map(to: s.ps->ps->s.f[:22]) 8062 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 8063 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 8064 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 8065 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 8066 // 8067 // map(ps) 8068 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 8069 // 8070 // map(ps->i) 8071 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 8072 // 8073 // map(ps->s.f) 8074 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 8075 // 8076 // map(from: ps->p) 8077 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 8078 // 8079 // map(to: ps->p[:22]) 8080 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 8081 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 8082 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 8083 // 8084 // map(ps->ps) 8085 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 8086 // 8087 // map(from: ps->ps->s.i) 8088 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 8089 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 8090 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 8091 // 8092 // map(from: ps->ps->ps) 8093 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 8094 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 8095 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 8096 // 8097 // map(ps->ps->ps->ps) 8098 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 8099 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 8100 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 8101 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 8102 // 8103 // map(to: ps->ps->ps->s.f[:22]) 8104 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 8105 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 8106 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 8107 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 8108 // 8109 // map(to: s.f[:22]) map(from: s.p[:33]) 8110 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 8111 // sizeof(double*) (**), TARGET_PARAM 8112 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 8113 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 8114 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 8115 // (*) allocate contiguous space needed to fit all mapped members even if 8116 // we allocate space for members not mapped (in this example, 8117 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 8118 // them as well because they fall between &s.f[0] and &s.p) 8119 // 8120 // map(from: s.f[:22]) map(to: ps->p[:33]) 8121 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 8122 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 8123 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 8124 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 8125 // (*) the struct this entry pertains to is the 2nd element in the list of 8126 // arguments, hence MEMBER_OF(2) 8127 // 8128 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 8129 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 8130 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 8131 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 8132 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 8133 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 8134 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 8135 // (*) the struct this entry pertains to is the 4th element in the list 8136 // of arguments, hence MEMBER_OF(4) 8137 8138 // Track if the map information being generated is the first for a capture. 8139 bool IsCaptureFirstInfo = IsFirstComponentList; 8140 // When the variable is on a declare target link or in a to clause with 8141 // unified memory, a reference is needed to hold the host/device address 8142 // of the variable. 8143 bool RequiresReference = false; 8144 8145 // Scan the components from the base to the complete expression. 8146 auto CI = Components.rbegin(); 8147 auto CE = Components.rend(); 8148 auto I = CI; 8149 8150 // Track if the map information being generated is the first for a list of 8151 // components. 8152 bool IsExpressionFirstInfo = true; 8153 Address BP = Address::invalid(); 8154 const Expr *AssocExpr = I->getAssociatedExpression(); 8155 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 8156 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 8157 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); 8158 8159 if (isa<MemberExpr>(AssocExpr)) { 8160 // The base is the 'this' pointer. The content of the pointer is going 8161 // to be the base of the field being mapped. 8162 BP = CGF.LoadCXXThisAddress(); 8163 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 8164 (OASE && 8165 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 8166 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 8167 } else if (OAShE && 8168 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { 8169 BP = Address( 8170 CGF.EmitScalarExpr(OAShE->getBase()), 8171 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); 8172 } else { 8173 // The base is the reference to the variable. 8174 // BP = &Var. 8175 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); 8176 if (const auto *VD = 8177 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 8178 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8179 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 8180 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 8181 (*Res == OMPDeclareTargetDeclAttr::MT_To && 8182 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 8183 RequiresReference = true; 8184 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 8185 } 8186 } 8187 } 8188 8189 // If the variable is a pointer and is being dereferenced (i.e. is not 8190 // the last component), the base has to be the pointer itself, not its 8191 // reference. References are ignored for mapping purposes. 8192 QualType Ty = 8193 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 8194 if (Ty->isAnyPointerType() && std::next(I) != CE) { 8195 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 8196 8197 // We do not need to generate individual map information for the 8198 // pointer, it can be associated with the combined storage. 8199 ++I; 8200 } 8201 } 8202 8203 // Track whether a component of the list should be marked as MEMBER_OF some 8204 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 8205 // in a component list should be marked as MEMBER_OF, all subsequent entries 8206 // do not belong to the base struct. E.g. 8207 // struct S2 s; 8208 // s.ps->ps->ps->f[:] 8209 // (1) (2) (3) (4) 8210 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 8211 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 8212 // is the pointee of ps(2) which is not member of struct s, so it should not 8213 // be marked as such (it is still PTR_AND_OBJ). 8214 // The variable is initialized to false so that PTR_AND_OBJ entries which 8215 // are not struct members are not considered (e.g. array of pointers to 8216 // data). 8217 bool ShouldBeMemberOf = false; 8218 8219 // Variable keeping track of whether or not we have encountered a component 8220 // in the component list which is a member expression. Useful when we have a 8221 // pointer or a final array section, in which case it is the previous 8222 // component in the list which tells us whether we have a member expression. 8223 // E.g. X.f[:] 8224 // While processing the final array section "[:]" it is "f" which tells us 8225 // whether we are dealing with a member of a declared struct. 8226 const MemberExpr *EncounteredME = nullptr; 8227 8228 for (; I != CE; ++I) { 8229 // If the current component is member of a struct (parent struct) mark it. 8230 if (!EncounteredME) { 8231 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 8232 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 8233 // as MEMBER_OF the parent struct. 8234 if (EncounteredME) 8235 ShouldBeMemberOf = true; 8236 } 8237 8238 auto Next = std::next(I); 8239 8240 // We need to generate the addresses and sizes if this is the last 8241 // component, if the component is a pointer or if it is an array section 8242 // whose length can't be proved to be one. If this is a pointer, it 8243 // becomes the base address for the following components. 8244 8245 // A final array section, is one whose length can't be proved to be one. 8246 bool IsFinalArraySection = 8247 isFinalArraySectionExpression(I->getAssociatedExpression()); 8248 8249 // Get information on whether the element is a pointer. Have to do a 8250 // special treatment for array sections given that they are built-in 8251 // types. 8252 const auto *OASE = 8253 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 8254 const auto *OAShE = 8255 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); 8256 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); 8257 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); 8258 bool IsPointer = 8259 OAShE || 8260 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 8261 .getCanonicalType() 8262 ->isAnyPointerType()) || 8263 I->getAssociatedExpression()->getType()->isAnyPointerType(); 8264 bool IsNonDerefPointer = IsPointer && !UO && !BO; 8265 8266 if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { 8267 // If this is not the last component, we expect the pointer to be 8268 // associated with an array expression or member expression. 8269 assert((Next == CE || 8270 isa<MemberExpr>(Next->getAssociatedExpression()) || 8271 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 8272 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || 8273 isa<UnaryOperator>(Next->getAssociatedExpression()) || 8274 isa<BinaryOperator>(Next->getAssociatedExpression())) && 8275 "Unexpected expression"); 8276 8277 Address LB = Address::invalid(); 8278 if (OAShE) { 8279 LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), 8280 CGF.getContext().getTypeAlignInChars( 8281 OAShE->getBase()->getType())); 8282 } else { 8283 LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) 8284 .getAddress(CGF); 8285 } 8286 8287 // If this component is a pointer inside the base struct then we don't 8288 // need to create any entry for it - it will be combined with the object 8289 // it is pointing to into a single PTR_AND_OBJ entry. 8290 bool IsMemberPointer = 8291 IsPointer && EncounteredME && 8292 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 8293 EncounteredME); 8294 if (!OverlappedElements.empty()) { 8295 // Handle base element with the info for overlapped elements. 8296 assert(!PartialStruct.Base.isValid() && "The base element is set."); 8297 assert(Next == CE && 8298 "Expected last element for the overlapped elements."); 8299 assert(!IsPointer && 8300 "Unexpected base element with the pointer type."); 8301 // Mark the whole struct as the struct that requires allocation on the 8302 // device. 8303 PartialStruct.LowestElem = {0, LB}; 8304 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 8305 I->getAssociatedExpression()->getType()); 8306 Address HB = CGF.Builder.CreateConstGEP( 8307 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 8308 CGF.VoidPtrTy), 8309 TypeSize.getQuantity() - 1); 8310 PartialStruct.HighestElem = { 8311 std::numeric_limits<decltype( 8312 PartialStruct.HighestElem.first)>::max(), 8313 HB}; 8314 PartialStruct.Base = BP; 8315 // Emit data for non-overlapped data. 8316 OpenMPOffloadMappingFlags Flags = 8317 OMP_MAP_MEMBER_OF | 8318 getMapTypeBits(MapType, MapModifiers, IsImplicit, 8319 /*AddPtrFlag=*/false, 8320 /*AddIsTargetParamFlag=*/false); 8321 LB = BP; 8322 llvm::Value *Size = nullptr; 8323 // Do bitcopy of all non-overlapped structure elements. 8324 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 8325 Component : OverlappedElements) { 8326 Address ComponentLB = Address::invalid(); 8327 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 8328 Component) { 8329 if (MC.getAssociatedDeclaration()) { 8330 ComponentLB = 8331 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 8332 .getAddress(CGF); 8333 Size = CGF.Builder.CreatePtrDiff( 8334 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 8335 CGF.EmitCastToVoidPtr(LB.getPointer())); 8336 break; 8337 } 8338 } 8339 BasePointers.push_back(BP.getPointer()); 8340 Pointers.push_back(LB.getPointer()); 8341 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, 8342 /*isSigned=*/true)); 8343 Types.push_back(Flags); 8344 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 8345 } 8346 BasePointers.push_back(BP.getPointer()); 8347 Pointers.push_back(LB.getPointer()); 8348 Size = CGF.Builder.CreatePtrDiff( 8349 CGF.EmitCastToVoidPtr( 8350 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 8351 CGF.EmitCastToVoidPtr(LB.getPointer())); 8352 Sizes.push_back( 8353 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8354 Types.push_back(Flags); 8355 break; 8356 } 8357 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 8358 if (!IsMemberPointer) { 8359 BasePointers.push_back(BP.getPointer()); 8360 Pointers.push_back(LB.getPointer()); 8361 Sizes.push_back( 8362 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 8363 8364 // We need to add a pointer flag for each map that comes from the 8365 // same expression except for the first one. We also need to signal 8366 // this map is the first one that relates with the current capture 8367 // (there is a set of entries for each capture). 8368 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 8369 MapType, MapModifiers, IsImplicit, 8370 !IsExpressionFirstInfo || RequiresReference, 8371 IsCaptureFirstInfo && !RequiresReference); 8372 8373 if (!IsExpressionFirstInfo) { 8374 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 8375 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 8376 if (IsPointer) 8377 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 8378 OMP_MAP_DELETE | OMP_MAP_CLOSE); 8379 8380 if (ShouldBeMemberOf) { 8381 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 8382 // should be later updated with the correct value of MEMBER_OF. 8383 Flags |= OMP_MAP_MEMBER_OF; 8384 // From now on, all subsequent PTR_AND_OBJ entries should not be 8385 // marked as MEMBER_OF. 8386 ShouldBeMemberOf = false; 8387 } 8388 } 8389 8390 Types.push_back(Flags); 8391 } 8392 8393 // If we have encountered a member expression so far, keep track of the 8394 // mapped member. If the parent is "*this", then the value declaration 8395 // is nullptr. 8396 if (EncounteredME) { 8397 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl()); 8398 unsigned FieldIndex = FD->getFieldIndex(); 8399 8400 // Update info about the lowest and highest elements for this struct 8401 if (!PartialStruct.Base.isValid()) { 8402 PartialStruct.LowestElem = {FieldIndex, LB}; 8403 PartialStruct.HighestElem = {FieldIndex, LB}; 8404 PartialStruct.Base = BP; 8405 } else if (FieldIndex < PartialStruct.LowestElem.first) { 8406 PartialStruct.LowestElem = {FieldIndex, LB}; 8407 } else if (FieldIndex > PartialStruct.HighestElem.first) { 8408 PartialStruct.HighestElem = {FieldIndex, LB}; 8409 } 8410 } 8411 8412 // If we have a final array section, we are done with this expression. 8413 if (IsFinalArraySection) 8414 break; 8415 8416 // The pointer becomes the base for the next element. 8417 if (Next != CE) 8418 BP = LB; 8419 8420 IsExpressionFirstInfo = false; 8421 IsCaptureFirstInfo = false; 8422 } 8423 } 8424 } 8425 8426 /// Return the adjusted map modifiers if the declaration a capture refers to 8427 /// appears in a first-private clause. This is expected to be used only with 8428 /// directives that start with 'target'. 8429 MappableExprsHandler::OpenMPOffloadMappingFlags 8430 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 8431 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 8432 8433 // A first private variable captured by reference will use only the 8434 // 'private ptr' and 'map to' flag. Return the right flags if the captured 8435 // declaration is known as first-private in this handler. 8436 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 8437 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 8438 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 8439 return MappableExprsHandler::OMP_MAP_ALWAYS | 8440 MappableExprsHandler::OMP_MAP_TO; 8441 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 8442 return MappableExprsHandler::OMP_MAP_TO | 8443 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 8444 return MappableExprsHandler::OMP_MAP_PRIVATE | 8445 MappableExprsHandler::OMP_MAP_TO; 8446 } 8447 return MappableExprsHandler::OMP_MAP_TO | 8448 MappableExprsHandler::OMP_MAP_FROM; 8449 } 8450 8451 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 8452 // Rotate by getFlagMemberOffset() bits. 8453 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 8454 << getFlagMemberOffset()); 8455 } 8456 8457 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 8458 OpenMPOffloadMappingFlags MemberOfFlag) { 8459 // If the entry is PTR_AND_OBJ but has not been marked with the special 8460 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 8461 // marked as MEMBER_OF. 8462 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 8463 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 8464 return; 8465 8466 // Reset the placeholder value to prepare the flag for the assignment of the 8467 // proper MEMBER_OF value. 8468 Flags &= ~OMP_MAP_MEMBER_OF; 8469 Flags |= MemberOfFlag; 8470 } 8471 8472 void getPlainLayout(const CXXRecordDecl *RD, 8473 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 8474 bool AsBase) const { 8475 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 8476 8477 llvm::StructType *St = 8478 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 8479 8480 unsigned NumElements = St->getNumElements(); 8481 llvm::SmallVector< 8482 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 8483 RecordLayout(NumElements); 8484 8485 // Fill bases. 8486 for (const auto &I : RD->bases()) { 8487 if (I.isVirtual()) 8488 continue; 8489 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8490 // Ignore empty bases. 8491 if (Base->isEmpty() || CGF.getContext() 8492 .getASTRecordLayout(Base) 8493 .getNonVirtualSize() 8494 .isZero()) 8495 continue; 8496 8497 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 8498 RecordLayout[FieldIndex] = Base; 8499 } 8500 // Fill in virtual bases. 8501 for (const auto &I : RD->vbases()) { 8502 const auto *Base = I.getType()->getAsCXXRecordDecl(); 8503 // Ignore empty bases. 8504 if (Base->isEmpty()) 8505 continue; 8506 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 8507 if (RecordLayout[FieldIndex]) 8508 continue; 8509 RecordLayout[FieldIndex] = Base; 8510 } 8511 // Fill in all the fields. 8512 assert(!RD->isUnion() && "Unexpected union."); 8513 for (const auto *Field : RD->fields()) { 8514 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 8515 // will fill in later.) 8516 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 8517 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 8518 RecordLayout[FieldIndex] = Field; 8519 } 8520 } 8521 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 8522 &Data : RecordLayout) { 8523 if (Data.isNull()) 8524 continue; 8525 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 8526 getPlainLayout(Base, Layout, /*AsBase=*/true); 8527 else 8528 Layout.push_back(Data.get<const FieldDecl *>()); 8529 } 8530 } 8531 8532 public: 8533 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 8534 : CurDir(&Dir), CGF(CGF) { 8535 // Extract firstprivate clause information. 8536 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 8537 for (const auto *D : C->varlists()) 8538 FirstPrivateDecls.try_emplace( 8539 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 8540 // Extract device pointer clause information. 8541 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 8542 for (auto L : C->component_lists()) 8543 DevPointersMap[L.first].push_back(L.second); 8544 } 8545 8546 /// Constructor for the declare mapper directive. 8547 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 8548 : CurDir(&Dir), CGF(CGF) {} 8549 8550 /// Generate code for the combined entry if we have a partially mapped struct 8551 /// and take care of the mapping flags of the arguments corresponding to 8552 /// individual struct members. 8553 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 8554 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8555 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 8556 const StructRangeInfoTy &PartialStruct) const { 8557 // Base is the base of the struct 8558 BasePointers.push_back(PartialStruct.Base.getPointer()); 8559 // Pointer is the address of the lowest element 8560 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 8561 Pointers.push_back(LB); 8562 // Size is (addr of {highest+1} element) - (addr of lowest element) 8563 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 8564 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 8565 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 8566 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 8567 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 8568 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 8569 /*isSigned=*/false); 8570 Sizes.push_back(Size); 8571 // Map type is always TARGET_PARAM 8572 Types.push_back(OMP_MAP_TARGET_PARAM); 8573 // Remove TARGET_PARAM flag from the first element 8574 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 8575 8576 // All other current entries will be MEMBER_OF the combined entry 8577 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8578 // 0xFFFF in the MEMBER_OF field). 8579 OpenMPOffloadMappingFlags MemberOfFlag = 8580 getMemberOfFlag(BasePointers.size() - 1); 8581 for (auto &M : CurTypes) 8582 setCorrectMemberOfFlag(M, MemberOfFlag); 8583 } 8584 8585 /// Generate all the base pointers, section pointers, sizes and map 8586 /// types for the extracted mappable expressions. Also, for each item that 8587 /// relates with a device pointer, a pair of the relevant declaration and 8588 /// index where it occurs is appended to the device pointers info array. 8589 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 8590 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8591 MapFlagsArrayTy &Types) const { 8592 // We have to process the component lists that relate with the same 8593 // declaration in a single chunk so that we can generate the map flags 8594 // correctly. Therefore, we organize all lists in a map. 8595 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8596 8597 // Helper function to fill the information map for the different supported 8598 // clauses. 8599 auto &&InfoGen = [&Info]( 8600 const ValueDecl *D, 8601 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8602 OpenMPMapClauseKind MapType, 8603 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8604 bool ReturnDevicePointer, bool IsImplicit) { 8605 const ValueDecl *VD = 8606 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8607 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8608 IsImplicit); 8609 }; 8610 8611 assert(CurDir.is<const OMPExecutableDirective *>() && 8612 "Expect a executable directive"); 8613 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8614 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 8615 for (const auto L : C->component_lists()) { 8616 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 8617 /*ReturnDevicePointer=*/false, C->isImplicit()); 8618 } 8619 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 8620 for (const auto L : C->component_lists()) { 8621 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 8622 /*ReturnDevicePointer=*/false, C->isImplicit()); 8623 } 8624 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 8625 for (const auto L : C->component_lists()) { 8626 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 8627 /*ReturnDevicePointer=*/false, C->isImplicit()); 8628 } 8629 8630 // Look at the use_device_ptr clause information and mark the existing map 8631 // entries as such. If there is no map information for an entry in the 8632 // use_device_ptr list, we create one with map type 'alloc' and zero size 8633 // section. It is the user fault if that was not mapped before. If there is 8634 // no map information and the pointer is a struct member, then we defer the 8635 // emission of that entry until the whole struct has been processed. 8636 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 8637 DeferredInfo; 8638 8639 for (const auto *C : 8640 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 8641 for (const auto L : C->component_lists()) { 8642 assert(!L.second.empty() && "Not expecting empty list of components!"); 8643 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 8644 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 8645 const Expr *IE = L.second.back().getAssociatedExpression(); 8646 // If the first component is a member expression, we have to look into 8647 // 'this', which maps to null in the map of map information. Otherwise 8648 // look directly for the information. 8649 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8650 8651 // We potentially have map information for this declaration already. 8652 // Look for the first set of components that refer to it. 8653 if (It != Info.end()) { 8654 auto CI = std::find_if( 8655 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 8656 return MI.Components.back().getAssociatedDeclaration() == VD; 8657 }); 8658 // If we found a map entry, signal that the pointer has to be returned 8659 // and move on to the next declaration. 8660 if (CI != It->second.end()) { 8661 CI->ReturnDevicePointer = true; 8662 continue; 8663 } 8664 } 8665 8666 // We didn't find any match in our map information - generate a zero 8667 // size array section - if the pointer is a struct member we defer this 8668 // action until the whole struct has been processed. 8669 if (isa<MemberExpr>(IE)) { 8670 // Insert the pointer into Info to be processed by 8671 // generateInfoForComponentList. Because it is a member pointer 8672 // without a pointee, no entry will be generated for it, therefore 8673 // we need to generate one after the whole struct has been processed. 8674 // Nonetheless, generateInfoForComponentList must be called to take 8675 // the pointer into account for the calculation of the range of the 8676 // partial struct. 8677 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 8678 /*ReturnDevicePointer=*/false, C->isImplicit()); 8679 DeferredInfo[nullptr].emplace_back(IE, VD); 8680 } else { 8681 llvm::Value *Ptr = 8682 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8683 BasePointers.emplace_back(Ptr, VD); 8684 Pointers.push_back(Ptr); 8685 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8686 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8687 } 8688 } 8689 } 8690 8691 for (const auto &M : Info) { 8692 // We need to know when we generate information for the first component 8693 // associated with a capture, because the mapping flags depend on it. 8694 bool IsFirstComponentList = true; 8695 8696 // Temporary versions of arrays 8697 MapBaseValuesArrayTy CurBasePointers; 8698 MapValuesArrayTy CurPointers; 8699 MapValuesArrayTy CurSizes; 8700 MapFlagsArrayTy CurTypes; 8701 StructRangeInfoTy PartialStruct; 8702 8703 for (const MapInfo &L : M.second) { 8704 assert(!L.Components.empty() && 8705 "Not expecting declaration with no component lists."); 8706 8707 // Remember the current base pointer index. 8708 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 8709 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8710 CurBasePointers, CurPointers, CurSizes, 8711 CurTypes, PartialStruct, 8712 IsFirstComponentList, L.IsImplicit); 8713 8714 // If this entry relates with a device pointer, set the relevant 8715 // declaration and add the 'return pointer' flag. 8716 if (L.ReturnDevicePointer) { 8717 assert(CurBasePointers.size() > CurrentBasePointersIdx && 8718 "Unexpected number of mapped base pointers."); 8719 8720 const ValueDecl *RelevantVD = 8721 L.Components.back().getAssociatedDeclaration(); 8722 assert(RelevantVD && 8723 "No relevant declaration related with device pointer??"); 8724 8725 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 8726 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8727 } 8728 IsFirstComponentList = false; 8729 } 8730 8731 // Append any pending zero-length pointers which are struct members and 8732 // used with use_device_ptr. 8733 auto CI = DeferredInfo.find(M.first); 8734 if (CI != DeferredInfo.end()) { 8735 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8736 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); 8737 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 8738 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 8739 CurBasePointers.emplace_back(BasePtr, L.VD); 8740 CurPointers.push_back(Ptr); 8741 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8742 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8743 // value MEMBER_OF=FFFF so that the entry is later updated with the 8744 // correct value of MEMBER_OF. 8745 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8746 OMP_MAP_MEMBER_OF); 8747 } 8748 } 8749 8750 // If there is an entry in PartialStruct it means we have a struct with 8751 // individual members mapped. Emit an extra combined entry. 8752 if (PartialStruct.Base.isValid()) 8753 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8754 PartialStruct); 8755 8756 // We need to append the results of this capture to what we already have. 8757 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8758 Pointers.append(CurPointers.begin(), CurPointers.end()); 8759 Sizes.append(CurSizes.begin(), CurSizes.end()); 8760 Types.append(CurTypes.begin(), CurTypes.end()); 8761 } 8762 } 8763 8764 /// Generate all the base pointers, section pointers, sizes and map types for 8765 /// the extracted map clauses of user-defined mapper. 8766 void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, 8767 MapValuesArrayTy &Pointers, 8768 MapValuesArrayTy &Sizes, 8769 MapFlagsArrayTy &Types) const { 8770 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8771 "Expect a declare mapper directive"); 8772 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8773 // We have to process the component lists that relate with the same 8774 // declaration in a single chunk so that we can generate the map flags 8775 // correctly. Therefore, we organize all lists in a map. 8776 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8777 8778 // Helper function to fill the information map for the different supported 8779 // clauses. 8780 auto &&InfoGen = [&Info]( 8781 const ValueDecl *D, 8782 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8783 OpenMPMapClauseKind MapType, 8784 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8785 bool ReturnDevicePointer, bool IsImplicit) { 8786 const ValueDecl *VD = 8787 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8788 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8789 IsImplicit); 8790 }; 8791 8792 for (const auto *C : CurMapperDir->clauselists()) { 8793 const auto *MC = cast<OMPMapClause>(C); 8794 for (const auto L : MC->component_lists()) { 8795 InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), 8796 /*ReturnDevicePointer=*/false, MC->isImplicit()); 8797 } 8798 } 8799 8800 for (const auto &M : Info) { 8801 // We need to know when we generate information for the first component 8802 // associated with a capture, because the mapping flags depend on it. 8803 bool IsFirstComponentList = true; 8804 8805 // Temporary versions of arrays 8806 MapBaseValuesArrayTy CurBasePointers; 8807 MapValuesArrayTy CurPointers; 8808 MapValuesArrayTy CurSizes; 8809 MapFlagsArrayTy CurTypes; 8810 StructRangeInfoTy PartialStruct; 8811 8812 for (const MapInfo &L : M.second) { 8813 assert(!L.Components.empty() && 8814 "Not expecting declaration with no component lists."); 8815 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8816 CurBasePointers, CurPointers, CurSizes, 8817 CurTypes, PartialStruct, 8818 IsFirstComponentList, L.IsImplicit); 8819 IsFirstComponentList = false; 8820 } 8821 8822 // If there is an entry in PartialStruct it means we have a struct with 8823 // individual members mapped. Emit an extra combined entry. 8824 if (PartialStruct.Base.isValid()) 8825 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8826 PartialStruct); 8827 8828 // We need to append the results of this capture to what we already have. 8829 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8830 Pointers.append(CurPointers.begin(), CurPointers.end()); 8831 Sizes.append(CurSizes.begin(), CurSizes.end()); 8832 Types.append(CurTypes.begin(), CurTypes.end()); 8833 } 8834 } 8835 8836 /// Emit capture info for lambdas for variables captured by reference. 8837 void generateInfoForLambdaCaptures( 8838 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 8839 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8840 MapFlagsArrayTy &Types, 8841 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8842 const auto *RD = VD->getType() 8843 .getCanonicalType() 8844 .getNonReferenceType() 8845 ->getAsCXXRecordDecl(); 8846 if (!RD || !RD->isLambda()) 8847 return; 8848 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8849 LValue VDLVal = CGF.MakeAddrLValue( 8850 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8851 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8852 FieldDecl *ThisCapture = nullptr; 8853 RD->getCaptureFields(Captures, ThisCapture); 8854 if (ThisCapture) { 8855 LValue ThisLVal = 8856 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8857 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8858 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF), 8859 VDLVal.getPointer(CGF)); 8860 BasePointers.push_back(ThisLVal.getPointer(CGF)); 8861 Pointers.push_back(ThisLValVal.getPointer(CGF)); 8862 Sizes.push_back( 8863 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8864 CGF.Int64Ty, /*isSigned=*/true)); 8865 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8866 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8867 } 8868 for (const LambdaCapture &LC : RD->captures()) { 8869 if (!LC.capturesVariable()) 8870 continue; 8871 const VarDecl *VD = LC.getCapturedVar(); 8872 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8873 continue; 8874 auto It = Captures.find(VD); 8875 assert(It != Captures.end() && "Found lambda capture without field."); 8876 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8877 if (LC.getCaptureKind() == LCK_ByRef) { 8878 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8879 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8880 VDLVal.getPointer(CGF)); 8881 BasePointers.push_back(VarLVal.getPointer(CGF)); 8882 Pointers.push_back(VarLValVal.getPointer(CGF)); 8883 Sizes.push_back(CGF.Builder.CreateIntCast( 8884 CGF.getTypeSize( 8885 VD->getType().getCanonicalType().getNonReferenceType()), 8886 CGF.Int64Ty, /*isSigned=*/true)); 8887 } else { 8888 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8889 LambdaPointers.try_emplace(VarLVal.getPointer(CGF), 8890 VDLVal.getPointer(CGF)); 8891 BasePointers.push_back(VarLVal.getPointer(CGF)); 8892 Pointers.push_back(VarRVal.getScalarVal()); 8893 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8894 } 8895 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8896 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8897 } 8898 } 8899 8900 /// Set correct indices for lambdas captures. 8901 void adjustMemberOfForLambdaCaptures( 8902 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8903 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8904 MapFlagsArrayTy &Types) const { 8905 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8906 // Set correct member_of idx for all implicit lambda captures. 8907 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8908 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8909 continue; 8910 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8911 assert(BasePtr && "Unable to find base lambda address."); 8912 int TgtIdx = -1; 8913 for (unsigned J = I; J > 0; --J) { 8914 unsigned Idx = J - 1; 8915 if (Pointers[Idx] != BasePtr) 8916 continue; 8917 TgtIdx = Idx; 8918 break; 8919 } 8920 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8921 // All other current entries will be MEMBER_OF the combined entry 8922 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8923 // 0xFFFF in the MEMBER_OF field). 8924 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8925 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8926 } 8927 } 8928 8929 /// Generate the base pointers, section pointers, sizes and map types 8930 /// associated to a given capture. 8931 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8932 llvm::Value *Arg, 8933 MapBaseValuesArrayTy &BasePointers, 8934 MapValuesArrayTy &Pointers, 8935 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8936 StructRangeInfoTy &PartialStruct) const { 8937 assert(!Cap->capturesVariableArrayType() && 8938 "Not expecting to generate map info for a variable array type!"); 8939 8940 // We need to know when we generating information for the first component 8941 const ValueDecl *VD = Cap->capturesThis() 8942 ? nullptr 8943 : Cap->getCapturedVar()->getCanonicalDecl(); 8944 8945 // If this declaration appears in a is_device_ptr clause we just have to 8946 // pass the pointer by value. If it is a reference to a declaration, we just 8947 // pass its value. 8948 if (DevPointersMap.count(VD)) { 8949 BasePointers.emplace_back(Arg, VD); 8950 Pointers.push_back(Arg); 8951 Sizes.push_back( 8952 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8953 CGF.Int64Ty, /*isSigned=*/true)); 8954 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8955 return; 8956 } 8957 8958 using MapData = 8959 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8960 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 8961 SmallVector<MapData, 4> DeclComponentLists; 8962 assert(CurDir.is<const OMPExecutableDirective *>() && 8963 "Expect a executable directive"); 8964 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8965 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8966 for (const auto L : C->decl_component_lists(VD)) { 8967 assert(L.first == VD && 8968 "We got information for the wrong declaration??"); 8969 assert(!L.second.empty() && 8970 "Not expecting declaration with no component lists."); 8971 DeclComponentLists.emplace_back(L.second, C->getMapType(), 8972 C->getMapTypeModifiers(), 8973 C->isImplicit()); 8974 } 8975 } 8976 8977 // Find overlapping elements (including the offset from the base element). 8978 llvm::SmallDenseMap< 8979 const MapData *, 8980 llvm::SmallVector< 8981 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8982 4> 8983 OverlappedData; 8984 size_t Count = 0; 8985 for (const MapData &L : DeclComponentLists) { 8986 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8987 OpenMPMapClauseKind MapType; 8988 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8989 bool IsImplicit; 8990 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8991 ++Count; 8992 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8993 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8994 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 8995 auto CI = Components.rbegin(); 8996 auto CE = Components.rend(); 8997 auto SI = Components1.rbegin(); 8998 auto SE = Components1.rend(); 8999 for (; CI != CE && SI != SE; ++CI, ++SI) { 9000 if (CI->getAssociatedExpression()->getStmtClass() != 9001 SI->getAssociatedExpression()->getStmtClass()) 9002 break; 9003 // Are we dealing with different variables/fields? 9004 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 9005 break; 9006 } 9007 // Found overlapping if, at least for one component, reached the head of 9008 // the components list. 9009 if (CI == CE || SI == SE) { 9010 assert((CI != CE || SI != SE) && 9011 "Unexpected full match of the mapping components."); 9012 const MapData &BaseData = CI == CE ? L : L1; 9013 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 9014 SI == SE ? Components : Components1; 9015 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 9016 OverlappedElements.getSecond().push_back(SubData); 9017 } 9018 } 9019 } 9020 // Sort the overlapped elements for each item. 9021 llvm::SmallVector<const FieldDecl *, 4> Layout; 9022 if (!OverlappedData.empty()) { 9023 if (const auto *CRD = 9024 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 9025 getPlainLayout(CRD, Layout, /*AsBase=*/false); 9026 else { 9027 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 9028 Layout.append(RD->field_begin(), RD->field_end()); 9029 } 9030 } 9031 for (auto &Pair : OverlappedData) { 9032 llvm::sort( 9033 Pair.getSecond(), 9034 [&Layout]( 9035 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 9036 OMPClauseMappableExprCommon::MappableExprComponentListRef 9037 Second) { 9038 auto CI = First.rbegin(); 9039 auto CE = First.rend(); 9040 auto SI = Second.rbegin(); 9041 auto SE = Second.rend(); 9042 for (; CI != CE && SI != SE; ++CI, ++SI) { 9043 if (CI->getAssociatedExpression()->getStmtClass() != 9044 SI->getAssociatedExpression()->getStmtClass()) 9045 break; 9046 // Are we dealing with different variables/fields? 9047 if (CI->getAssociatedDeclaration() != 9048 SI->getAssociatedDeclaration()) 9049 break; 9050 } 9051 9052 // Lists contain the same elements. 9053 if (CI == CE && SI == SE) 9054 return false; 9055 9056 // List with less elements is less than list with more elements. 9057 if (CI == CE || SI == SE) 9058 return CI == CE; 9059 9060 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 9061 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 9062 if (FD1->getParent() == FD2->getParent()) 9063 return FD1->getFieldIndex() < FD2->getFieldIndex(); 9064 const auto It = 9065 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 9066 return FD == FD1 || FD == FD2; 9067 }); 9068 return *It == FD1; 9069 }); 9070 } 9071 9072 // Associated with a capture, because the mapping flags depend on it. 9073 // Go through all of the elements with the overlapped elements. 9074 for (const auto &Pair : OverlappedData) { 9075 const MapData &L = *Pair.getFirst(); 9076 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9077 OpenMPMapClauseKind MapType; 9078 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9079 bool IsImplicit; 9080 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 9081 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 9082 OverlappedComponents = Pair.getSecond(); 9083 bool IsFirstComponentList = true; 9084 generateInfoForComponentList(MapType, MapModifiers, Components, 9085 BasePointers, Pointers, Sizes, Types, 9086 PartialStruct, IsFirstComponentList, 9087 IsImplicit, OverlappedComponents); 9088 } 9089 // Go through other elements without overlapped elements. 9090 bool IsFirstComponentList = OverlappedData.empty(); 9091 for (const MapData &L : DeclComponentLists) { 9092 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 9093 OpenMPMapClauseKind MapType; 9094 ArrayRef<OpenMPMapModifierKind> MapModifiers; 9095 bool IsImplicit; 9096 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 9097 auto It = OverlappedData.find(&L); 9098 if (It == OverlappedData.end()) 9099 generateInfoForComponentList(MapType, MapModifiers, Components, 9100 BasePointers, Pointers, Sizes, Types, 9101 PartialStruct, IsFirstComponentList, 9102 IsImplicit); 9103 IsFirstComponentList = false; 9104 } 9105 } 9106 9107 /// Generate the base pointers, section pointers, sizes and map types 9108 /// associated with the declare target link variables. 9109 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 9110 MapValuesArrayTy &Pointers, 9111 MapValuesArrayTy &Sizes, 9112 MapFlagsArrayTy &Types) const { 9113 assert(CurDir.is<const OMPExecutableDirective *>() && 9114 "Expect a executable directive"); 9115 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 9116 // Map other list items in the map clause which are not captured variables 9117 // but "declare target link" global variables. 9118 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 9119 for (const auto L : C->component_lists()) { 9120 if (!L.first) 9121 continue; 9122 const auto *VD = dyn_cast<VarDecl>(L.first); 9123 if (!VD) 9124 continue; 9125 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9126 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9127 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 9128 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 9129 continue; 9130 StructRangeInfoTy PartialStruct; 9131 generateInfoForComponentList( 9132 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 9133 Pointers, Sizes, Types, PartialStruct, 9134 /*IsFirstComponentList=*/true, C->isImplicit()); 9135 assert(!PartialStruct.Base.isValid() && 9136 "No partial structs for declare target link expected."); 9137 } 9138 } 9139 } 9140 9141 /// Generate the default map information for a given capture \a CI, 9142 /// record field declaration \a RI and captured value \a CV. 9143 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 9144 const FieldDecl &RI, llvm::Value *CV, 9145 MapBaseValuesArrayTy &CurBasePointers, 9146 MapValuesArrayTy &CurPointers, 9147 MapValuesArrayTy &CurSizes, 9148 MapFlagsArrayTy &CurMapTypes) const { 9149 bool IsImplicit = true; 9150 // Do the default mapping. 9151 if (CI.capturesThis()) { 9152 CurBasePointers.push_back(CV); 9153 CurPointers.push_back(CV); 9154 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 9155 CurSizes.push_back( 9156 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 9157 CGF.Int64Ty, /*isSigned=*/true)); 9158 // Default map type. 9159 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 9160 } else if (CI.capturesVariableByCopy()) { 9161 CurBasePointers.push_back(CV); 9162 CurPointers.push_back(CV); 9163 if (!RI.getType()->isAnyPointerType()) { 9164 // We have to signal to the runtime captures passed by value that are 9165 // not pointers. 9166 CurMapTypes.push_back(OMP_MAP_LITERAL); 9167 CurSizes.push_back(CGF.Builder.CreateIntCast( 9168 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 9169 } else { 9170 // Pointers are implicitly mapped with a zero size and no flags 9171 // (other than first map that is added for all implicit maps). 9172 CurMapTypes.push_back(OMP_MAP_NONE); 9173 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 9174 } 9175 const VarDecl *VD = CI.getCapturedVar(); 9176 auto I = FirstPrivateDecls.find(VD); 9177 if (I != FirstPrivateDecls.end()) 9178 IsImplicit = I->getSecond(); 9179 } else { 9180 assert(CI.capturesVariable() && "Expected captured reference."); 9181 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 9182 QualType ElementType = PtrTy->getPointeeType(); 9183 CurSizes.push_back(CGF.Builder.CreateIntCast( 9184 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 9185 // The default map type for a scalar/complex type is 'to' because by 9186 // default the value doesn't have to be retrieved. For an aggregate 9187 // type, the default is 'tofrom'. 9188 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 9189 const VarDecl *VD = CI.getCapturedVar(); 9190 auto I = FirstPrivateDecls.find(VD); 9191 if (I != FirstPrivateDecls.end() && 9192 VD->getType().isConstant(CGF.getContext())) { 9193 llvm::Constant *Addr = 9194 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 9195 // Copy the value of the original variable to the new global copy. 9196 CGF.Builder.CreateMemCpy( 9197 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), 9198 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 9199 CurSizes.back(), /*IsVolatile=*/false); 9200 // Use new global variable as the base pointers. 9201 CurBasePointers.push_back(Addr); 9202 CurPointers.push_back(Addr); 9203 } else { 9204 CurBasePointers.push_back(CV); 9205 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 9206 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 9207 CV, ElementType, CGF.getContext().getDeclAlign(VD), 9208 AlignmentSource::Decl)); 9209 CurPointers.push_back(PtrAddr.getPointer()); 9210 } else { 9211 CurPointers.push_back(CV); 9212 } 9213 } 9214 if (I != FirstPrivateDecls.end()) 9215 IsImplicit = I->getSecond(); 9216 } 9217 // Every default map produces a single argument which is a target parameter. 9218 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 9219 9220 // Add flag stating this is an implicit map. 9221 if (IsImplicit) 9222 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 9223 } 9224 }; 9225 } // anonymous namespace 9226 9227 /// Emit the arrays used to pass the captures and map information to the 9228 /// offloading runtime library. If there is no map or capture information, 9229 /// return nullptr by reference. 9230 static void 9231 emitOffloadingArrays(CodeGenFunction &CGF, 9232 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 9233 MappableExprsHandler::MapValuesArrayTy &Pointers, 9234 MappableExprsHandler::MapValuesArrayTy &Sizes, 9235 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 9236 CGOpenMPRuntime::TargetDataInfo &Info) { 9237 CodeGenModule &CGM = CGF.CGM; 9238 ASTContext &Ctx = CGF.getContext(); 9239 9240 // Reset the array information. 9241 Info.clearArrayInfo(); 9242 Info.NumberOfPtrs = BasePointers.size(); 9243 9244 if (Info.NumberOfPtrs) { 9245 // Detect if we have any capture size requiring runtime evaluation of the 9246 // size so that a constant array could be eventually used. 9247 bool hasRuntimeEvaluationCaptureSize = false; 9248 for (llvm::Value *S : Sizes) 9249 if (!isa<llvm::Constant>(S)) { 9250 hasRuntimeEvaluationCaptureSize = true; 9251 break; 9252 } 9253 9254 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 9255 QualType PointerArrayType = Ctx.getConstantArrayType( 9256 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 9257 /*IndexTypeQuals=*/0); 9258 9259 Info.BasePointersArray = 9260 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 9261 Info.PointersArray = 9262 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 9263 9264 // If we don't have any VLA types or other types that require runtime 9265 // evaluation, we can use a constant array for the map sizes, otherwise we 9266 // need to fill up the arrays as we do for the pointers. 9267 QualType Int64Ty = 9268 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 9269 if (hasRuntimeEvaluationCaptureSize) { 9270 QualType SizeArrayType = Ctx.getConstantArrayType( 9271 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 9272 /*IndexTypeQuals=*/0); 9273 Info.SizesArray = 9274 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 9275 } else { 9276 // We expect all the sizes to be constant, so we collect them to create 9277 // a constant array. 9278 SmallVector<llvm::Constant *, 16> ConstSizes; 9279 for (llvm::Value *S : Sizes) 9280 ConstSizes.push_back(cast<llvm::Constant>(S)); 9281 9282 auto *SizesArrayInit = llvm::ConstantArray::get( 9283 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 9284 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 9285 auto *SizesArrayGbl = new llvm::GlobalVariable( 9286 CGM.getModule(), SizesArrayInit->getType(), 9287 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9288 SizesArrayInit, Name); 9289 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9290 Info.SizesArray = SizesArrayGbl; 9291 } 9292 9293 // The map types are always constant so we don't need to generate code to 9294 // fill arrays. Instead, we create an array constant. 9295 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 9296 llvm::copy(MapTypes, Mapping.begin()); 9297 llvm::Constant *MapTypesArrayInit = 9298 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 9299 std::string MaptypesName = 9300 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 9301 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 9302 CGM.getModule(), MapTypesArrayInit->getType(), 9303 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 9304 MapTypesArrayInit, MaptypesName); 9305 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 9306 Info.MapTypesArray = MapTypesArrayGbl; 9307 9308 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 9309 llvm::Value *BPVal = *BasePointers[I]; 9310 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 9311 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9312 Info.BasePointersArray, 0, I); 9313 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9314 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9315 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9316 CGF.Builder.CreateStore(BPVal, BPAddr); 9317 9318 if (Info.requiresDevicePointerInfo()) 9319 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 9320 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 9321 9322 llvm::Value *PVal = Pointers[I]; 9323 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 9324 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9325 Info.PointersArray, 0, I); 9326 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 9327 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 9328 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 9329 CGF.Builder.CreateStore(PVal, PAddr); 9330 9331 if (hasRuntimeEvaluationCaptureSize) { 9332 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 9333 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9334 Info.SizesArray, 9335 /*Idx0=*/0, 9336 /*Idx1=*/I); 9337 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 9338 CGF.Builder.CreateStore( 9339 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true), 9340 SAddr); 9341 } 9342 } 9343 } 9344 } 9345 9346 /// Emit the arguments to be passed to the runtime library based on the 9347 /// arrays of pointers, sizes and map types. 9348 static void emitOffloadingArraysArgument( 9349 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 9350 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 9351 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 9352 CodeGenModule &CGM = CGF.CGM; 9353 if (Info.NumberOfPtrs) { 9354 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9355 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9356 Info.BasePointersArray, 9357 /*Idx0=*/0, /*Idx1=*/0); 9358 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9359 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 9360 Info.PointersArray, 9361 /*Idx0=*/0, 9362 /*Idx1=*/0); 9363 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9364 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 9365 /*Idx0=*/0, /*Idx1=*/0); 9366 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 9367 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 9368 Info.MapTypesArray, 9369 /*Idx0=*/0, 9370 /*Idx1=*/0); 9371 } else { 9372 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9373 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 9374 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9375 MapTypesArrayArg = 9376 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 9377 } 9378 } 9379 9380 /// Check for inner distribute directive. 9381 static const OMPExecutableDirective * 9382 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 9383 const auto *CS = D.getInnermostCapturedStmt(); 9384 const auto *Body = 9385 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 9386 const Stmt *ChildStmt = 9387 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9388 9389 if (const auto *NestedDir = 9390 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9391 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 9392 switch (D.getDirectiveKind()) { 9393 case OMPD_target: 9394 if (isOpenMPDistributeDirective(DKind)) 9395 return NestedDir; 9396 if (DKind == OMPD_teams) { 9397 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 9398 /*IgnoreCaptured=*/true); 9399 if (!Body) 9400 return nullptr; 9401 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 9402 if (const auto *NND = 9403 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 9404 DKind = NND->getDirectiveKind(); 9405 if (isOpenMPDistributeDirective(DKind)) 9406 return NND; 9407 } 9408 } 9409 return nullptr; 9410 case OMPD_target_teams: 9411 if (isOpenMPDistributeDirective(DKind)) 9412 return NestedDir; 9413 return nullptr; 9414 case OMPD_target_parallel: 9415 case OMPD_target_simd: 9416 case OMPD_target_parallel_for: 9417 case OMPD_target_parallel_for_simd: 9418 return nullptr; 9419 case OMPD_target_teams_distribute: 9420 case OMPD_target_teams_distribute_simd: 9421 case OMPD_target_teams_distribute_parallel_for: 9422 case OMPD_target_teams_distribute_parallel_for_simd: 9423 case OMPD_parallel: 9424 case OMPD_for: 9425 case OMPD_parallel_for: 9426 case OMPD_parallel_master: 9427 case OMPD_parallel_sections: 9428 case OMPD_for_simd: 9429 case OMPD_parallel_for_simd: 9430 case OMPD_cancel: 9431 case OMPD_cancellation_point: 9432 case OMPD_ordered: 9433 case OMPD_threadprivate: 9434 case OMPD_allocate: 9435 case OMPD_task: 9436 case OMPD_simd: 9437 case OMPD_sections: 9438 case OMPD_section: 9439 case OMPD_single: 9440 case OMPD_master: 9441 case OMPD_critical: 9442 case OMPD_taskyield: 9443 case OMPD_barrier: 9444 case OMPD_taskwait: 9445 case OMPD_taskgroup: 9446 case OMPD_atomic: 9447 case OMPD_flush: 9448 case OMPD_depobj: 9449 case OMPD_scan: 9450 case OMPD_teams: 9451 case OMPD_target_data: 9452 case OMPD_target_exit_data: 9453 case OMPD_target_enter_data: 9454 case OMPD_distribute: 9455 case OMPD_distribute_simd: 9456 case OMPD_distribute_parallel_for: 9457 case OMPD_distribute_parallel_for_simd: 9458 case OMPD_teams_distribute: 9459 case OMPD_teams_distribute_simd: 9460 case OMPD_teams_distribute_parallel_for: 9461 case OMPD_teams_distribute_parallel_for_simd: 9462 case OMPD_target_update: 9463 case OMPD_declare_simd: 9464 case OMPD_declare_variant: 9465 case OMPD_begin_declare_variant: 9466 case OMPD_end_declare_variant: 9467 case OMPD_declare_target: 9468 case OMPD_end_declare_target: 9469 case OMPD_declare_reduction: 9470 case OMPD_declare_mapper: 9471 case OMPD_taskloop: 9472 case OMPD_taskloop_simd: 9473 case OMPD_master_taskloop: 9474 case OMPD_master_taskloop_simd: 9475 case OMPD_parallel_master_taskloop: 9476 case OMPD_parallel_master_taskloop_simd: 9477 case OMPD_requires: 9478 case OMPD_unknown: 9479 llvm_unreachable("Unexpected directive."); 9480 } 9481 } 9482 9483 return nullptr; 9484 } 9485 9486 /// Emit the user-defined mapper function. The code generation follows the 9487 /// pattern in the example below. 9488 /// \code 9489 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 9490 /// void *base, void *begin, 9491 /// int64_t size, int64_t type) { 9492 /// // Allocate space for an array section first. 9493 /// if (size > 1 && !maptype.IsDelete) 9494 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9495 /// size*sizeof(Ty), clearToFrom(type)); 9496 /// // Map members. 9497 /// for (unsigned i = 0; i < size; i++) { 9498 /// // For each component specified by this mapper: 9499 /// for (auto c : all_components) { 9500 /// if (c.hasMapper()) 9501 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 9502 /// c.arg_type); 9503 /// else 9504 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 9505 /// c.arg_begin, c.arg_size, c.arg_type); 9506 /// } 9507 /// } 9508 /// // Delete the array section. 9509 /// if (size > 1 && maptype.IsDelete) 9510 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 9511 /// size*sizeof(Ty), clearToFrom(type)); 9512 /// } 9513 /// \endcode 9514 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 9515 CodeGenFunction *CGF) { 9516 if (UDMMap.count(D) > 0) 9517 return; 9518 ASTContext &C = CGM.getContext(); 9519 QualType Ty = D->getType(); 9520 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 9521 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 9522 auto *MapperVarDecl = 9523 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 9524 SourceLocation Loc = D->getLocation(); 9525 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 9526 9527 // Prepare mapper function arguments and attributes. 9528 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9529 C.VoidPtrTy, ImplicitParamDecl::Other); 9530 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 9531 ImplicitParamDecl::Other); 9532 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 9533 C.VoidPtrTy, ImplicitParamDecl::Other); 9534 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9535 ImplicitParamDecl::Other); 9536 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 9537 ImplicitParamDecl::Other); 9538 FunctionArgList Args; 9539 Args.push_back(&HandleArg); 9540 Args.push_back(&BaseArg); 9541 Args.push_back(&BeginArg); 9542 Args.push_back(&SizeArg); 9543 Args.push_back(&TypeArg); 9544 const CGFunctionInfo &FnInfo = 9545 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 9546 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 9547 SmallString<64> TyStr; 9548 llvm::raw_svector_ostream Out(TyStr); 9549 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 9550 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 9551 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 9552 Name, &CGM.getModule()); 9553 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 9554 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 9555 // Start the mapper function code generation. 9556 CodeGenFunction MapperCGF(CGM); 9557 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 9558 // Compute the starting and end addreses of array elements. 9559 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 9560 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 9561 C.getPointerType(Int64Ty), Loc); 9562 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 9563 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 9564 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 9565 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 9566 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 9567 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 9568 C.getPointerType(Int64Ty), Loc); 9569 // Prepare common arguments for array initiation and deletion. 9570 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 9571 MapperCGF.GetAddrOfLocalVar(&HandleArg), 9572 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9573 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 9574 MapperCGF.GetAddrOfLocalVar(&BaseArg), 9575 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9576 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 9577 MapperCGF.GetAddrOfLocalVar(&BeginArg), 9578 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 9579 9580 // Emit array initiation if this is an array section and \p MapType indicates 9581 // that memory allocation is required. 9582 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 9583 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9584 ElementSize, HeadBB, /*IsInit=*/true); 9585 9586 // Emit a for loop to iterate through SizeArg of elements and map all of them. 9587 9588 // Emit the loop header block. 9589 MapperCGF.EmitBlock(HeadBB); 9590 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 9591 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 9592 // Evaluate whether the initial condition is satisfied. 9593 llvm::Value *IsEmpty = 9594 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 9595 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 9596 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 9597 9598 // Emit the loop body block. 9599 MapperCGF.EmitBlock(BodyBB); 9600 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 9601 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 9602 PtrPHI->addIncoming(PtrBegin, EntryBB); 9603 Address PtrCurrent = 9604 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 9605 .getAlignment() 9606 .alignmentOfArrayElement(ElementSize)); 9607 // Privatize the declared variable of mapper to be the current array element. 9608 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 9609 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 9610 return MapperCGF 9611 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 9612 .getAddress(MapperCGF); 9613 }); 9614 (void)Scope.Privatize(); 9615 9616 // Get map clause information. Fill up the arrays with all mapped variables. 9617 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9618 MappableExprsHandler::MapValuesArrayTy Pointers; 9619 MappableExprsHandler::MapValuesArrayTy Sizes; 9620 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9621 MappableExprsHandler MEHandler(*D, MapperCGF); 9622 MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); 9623 9624 // Call the runtime API __tgt_mapper_num_components to get the number of 9625 // pre-existing components. 9626 llvm::Value *OffloadingArgs[] = {Handle}; 9627 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 9628 createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs); 9629 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 9630 PreviousSize, 9631 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 9632 9633 // Fill up the runtime mapper handle for all components. 9634 for (unsigned I = 0; I < BasePointers.size(); ++I) { 9635 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 9636 *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9637 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 9638 Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 9639 llvm::Value *CurSizeArg = Sizes[I]; 9640 9641 // Extract the MEMBER_OF field from the map type. 9642 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 9643 MapperCGF.EmitBlock(MemberBB); 9644 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); 9645 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 9646 OriMapType, 9647 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 9648 llvm::BasicBlock *MemberCombineBB = 9649 MapperCGF.createBasicBlock("omp.member.combine"); 9650 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 9651 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 9652 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 9653 // Add the number of pre-existing components to the MEMBER_OF field if it 9654 // is valid. 9655 MapperCGF.EmitBlock(MemberCombineBB); 9656 llvm::Value *CombinedMember = 9657 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 9658 // Do nothing if it is not a member of previous components. 9659 MapperCGF.EmitBlock(TypeBB); 9660 llvm::PHINode *MemberMapType = 9661 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9662 MemberMapType->addIncoming(OriMapType, MemberBB); 9663 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9664 9665 // Combine the map type inherited from user-defined mapper with that 9666 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9667 // bits of the \a MapType, which is the input argument of the mapper 9668 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9669 // bits of MemberMapType. 9670 // [OpenMP 5.0], 1.2.6. map-type decay. 9671 // | alloc | to | from | tofrom | release | delete 9672 // ---------------------------------------------------------- 9673 // alloc | alloc | alloc | alloc | alloc | release | delete 9674 // to | alloc | to | alloc | to | release | delete 9675 // from | alloc | alloc | from | from | release | delete 9676 // tofrom | alloc | to | from | tofrom | release | delete 9677 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9678 MapType, 9679 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9680 MappableExprsHandler::OMP_MAP_FROM)); 9681 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9682 llvm::BasicBlock *AllocElseBB = 9683 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9684 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9685 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9686 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9687 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9688 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9689 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9690 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9691 MapperCGF.EmitBlock(AllocBB); 9692 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9693 MemberMapType, 9694 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9695 MappableExprsHandler::OMP_MAP_FROM))); 9696 MapperCGF.Builder.CreateBr(EndBB); 9697 MapperCGF.EmitBlock(AllocElseBB); 9698 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9699 LeftToFrom, 9700 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9701 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9702 // In case of to, clear OMP_MAP_FROM. 9703 MapperCGF.EmitBlock(ToBB); 9704 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9705 MemberMapType, 9706 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9707 MapperCGF.Builder.CreateBr(EndBB); 9708 MapperCGF.EmitBlock(ToElseBB); 9709 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9710 LeftToFrom, 9711 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9712 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9713 // In case of from, clear OMP_MAP_TO. 9714 MapperCGF.EmitBlock(FromBB); 9715 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9716 MemberMapType, 9717 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9718 // In case of tofrom, do nothing. 9719 MapperCGF.EmitBlock(EndBB); 9720 llvm::PHINode *CurMapType = 9721 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9722 CurMapType->addIncoming(AllocMapType, AllocBB); 9723 CurMapType->addIncoming(ToMapType, ToBB); 9724 CurMapType->addIncoming(FromMapType, FromBB); 9725 CurMapType->addIncoming(MemberMapType, ToElseBB); 9726 9727 // TODO: call the corresponding mapper function if a user-defined mapper is 9728 // associated with this map clause. 9729 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9730 // data structure. 9731 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9732 CurSizeArg, CurMapType}; 9733 MapperCGF.EmitRuntimeCall( 9734 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), 9735 OffloadingArgs); 9736 } 9737 9738 // Update the pointer to point to the next element that needs to be mapped, 9739 // and check whether we have mapped all elements. 9740 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9741 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9742 PtrPHI->addIncoming(PtrNext, BodyBB); 9743 llvm::Value *IsDone = 9744 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9745 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9746 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9747 9748 MapperCGF.EmitBlock(ExitBB); 9749 // Emit array deletion if this is an array section and \p MapType indicates 9750 // that deletion is required. 9751 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9752 ElementSize, DoneBB, /*IsInit=*/false); 9753 9754 // Emit the function exit block. 9755 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9756 MapperCGF.FinishFunction(); 9757 UDMMap.try_emplace(D, Fn); 9758 if (CGF) { 9759 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9760 Decls.second.push_back(D); 9761 } 9762 } 9763 9764 /// Emit the array initialization or deletion portion for user-defined mapper 9765 /// code generation. First, it evaluates whether an array section is mapped and 9766 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9767 /// true, and \a MapType indicates to not delete this array, array 9768 /// initialization code is generated. If \a IsInit is false, and \a MapType 9769 /// indicates to not this array, array deletion code is generated. 9770 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9771 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9772 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9773 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9774 StringRef Prefix = IsInit ? ".init" : ".del"; 9775 9776 // Evaluate if this is an array section. 9777 llvm::BasicBlock *IsDeleteBB = 9778 MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"})); 9779 llvm::BasicBlock *BodyBB = 9780 MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); 9781 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9782 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9783 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9784 9785 // Evaluate if we are going to delete this section. 9786 MapperCGF.EmitBlock(IsDeleteBB); 9787 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9788 MapType, 9789 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9790 llvm::Value *DeleteCond; 9791 if (IsInit) { 9792 DeleteCond = MapperCGF.Builder.CreateIsNull( 9793 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9794 } else { 9795 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9796 DeleteBit, getName({"omp.array", Prefix, ".delete"})); 9797 } 9798 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9799 9800 MapperCGF.EmitBlock(BodyBB); 9801 // Get the array size by multiplying element size and element number (i.e., \p 9802 // Size). 9803 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9804 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9805 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9806 // memory allocation/deletion purpose only. 9807 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9808 MapType, 9809 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9810 MappableExprsHandler::OMP_MAP_FROM))); 9811 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9812 // data structure. 9813 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9814 MapperCGF.EmitRuntimeCall( 9815 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs); 9816 } 9817 9818 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9819 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9820 llvm::Value *DeviceID, 9821 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9822 const OMPLoopDirective &D)> 9823 SizeEmitter) { 9824 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9825 const OMPExecutableDirective *TD = &D; 9826 // Get nested teams distribute kind directive, if any. 9827 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9828 TD = getNestedDistributeDirective(CGM.getContext(), D); 9829 if (!TD) 9830 return; 9831 const auto *LD = cast<OMPLoopDirective>(TD); 9832 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, 9833 PrePostActionTy &) { 9834 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9835 llvm::Value *Args[] = {DeviceID, NumIterations}; 9836 CGF.EmitRuntimeCall( 9837 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); 9838 } 9839 }; 9840 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9841 } 9842 9843 void CGOpenMPRuntime::emitTargetCall( 9844 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9845 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9846 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 9847 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9848 const OMPLoopDirective &D)> 9849 SizeEmitter) { 9850 if (!CGF.HaveInsertPoint()) 9851 return; 9852 9853 assert(OutlinedFn && "Invalid outlined function!"); 9854 9855 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 9856 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9857 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9858 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9859 PrePostActionTy &) { 9860 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9861 }; 9862 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9863 9864 CodeGenFunction::OMPTargetDataInfo InputInfo; 9865 llvm::Value *MapTypesArray = nullptr; 9866 // Fill up the pointer arrays and transfer execution to the device. 9867 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9868 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, 9869 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9870 if (Device.getInt() == OMPC_DEVICE_ancestor) { 9871 // Reverse offloading is not supported, so just execute on the host. 9872 if (RequiresOuterTask) { 9873 CapturedVars.clear(); 9874 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9875 } 9876 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9877 return; 9878 } 9879 9880 // On top of the arrays that were filled up, the target offloading call 9881 // takes as arguments the device id as well as the host pointer. The host 9882 // pointer is used by the runtime library to identify the current target 9883 // region, so it only has to be unique and not necessarily point to 9884 // anything. It could be the pointer to the outlined function that 9885 // implements the target region, but we aren't using that so that the 9886 // compiler doesn't need to keep that, and could therefore inline the host 9887 // function if proven worthwhile during optimization. 9888 9889 // From this point on, we need to have an ID of the target region defined. 9890 assert(OutlinedFnID && "Invalid outlined function ID!"); 9891 9892 // Emit device ID if any. 9893 llvm::Value *DeviceID; 9894 if (Device.getPointer()) { 9895 assert((Device.getInt() == OMPC_DEVICE_unknown || 9896 Device.getInt() == OMPC_DEVICE_device_num) && 9897 "Expected device_num modifier."); 9898 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); 9899 DeviceID = 9900 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); 9901 } else { 9902 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9903 } 9904 9905 // Emit the number of elements in the offloading arrays. 9906 llvm::Value *PointerNum = 9907 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9908 9909 // Return value of the runtime offloading call. 9910 llvm::Value *Return; 9911 9912 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9913 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9914 9915 // Emit tripcount for the target loop-based directive. 9916 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 9917 9918 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9919 // The target region is an outlined function launched by the runtime 9920 // via calls __tgt_target() or __tgt_target_teams(). 9921 // 9922 // __tgt_target() launches a target region with one team and one thread, 9923 // executing a serial region. This master thread may in turn launch 9924 // more threads within its team upon encountering a parallel region, 9925 // however, no additional teams can be launched on the device. 9926 // 9927 // __tgt_target_teams() launches a target region with one or more teams, 9928 // each with one or more threads. This call is required for target 9929 // constructs such as: 9930 // 'target teams' 9931 // 'target' / 'teams' 9932 // 'target teams distribute parallel for' 9933 // 'target parallel' 9934 // and so on. 9935 // 9936 // Note that on the host and CPU targets, the runtime implementation of 9937 // these calls simply call the outlined function without forking threads. 9938 // The outlined functions themselves have runtime calls to 9939 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9940 // the compiler in emitTeamsCall() and emitParallelCall(). 9941 // 9942 // In contrast, on the NVPTX target, the implementation of 9943 // __tgt_target_teams() launches a GPU kernel with the requested number 9944 // of teams and threads so no additional calls to the runtime are required. 9945 if (NumTeams) { 9946 // If we have NumTeams defined this means that we have an enclosed teams 9947 // region. Therefore we also expect to have NumThreads defined. These two 9948 // values should be defined in the presence of a teams directive, 9949 // regardless of having any clauses associated. If the user is using teams 9950 // but no clauses, these two values will be the default that should be 9951 // passed to the runtime library - a 32-bit integer with the value zero. 9952 assert(NumThreads && "Thread limit expression should be available along " 9953 "with number of teams."); 9954 llvm::Value *OffloadingArgs[] = {DeviceID, 9955 OutlinedFnID, 9956 PointerNum, 9957 InputInfo.BasePointersArray.getPointer(), 9958 InputInfo.PointersArray.getPointer(), 9959 InputInfo.SizesArray.getPointer(), 9960 MapTypesArray, 9961 NumTeams, 9962 NumThreads}; 9963 Return = CGF.EmitRuntimeCall( 9964 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 9965 : OMPRTL__tgt_target_teams), 9966 OffloadingArgs); 9967 } else { 9968 llvm::Value *OffloadingArgs[] = {DeviceID, 9969 OutlinedFnID, 9970 PointerNum, 9971 InputInfo.BasePointersArray.getPointer(), 9972 InputInfo.PointersArray.getPointer(), 9973 InputInfo.SizesArray.getPointer(), 9974 MapTypesArray}; 9975 Return = CGF.EmitRuntimeCall( 9976 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 9977 : OMPRTL__tgt_target), 9978 OffloadingArgs); 9979 } 9980 9981 // Check the error code and execute the host version if required. 9982 llvm::BasicBlock *OffloadFailedBlock = 9983 CGF.createBasicBlock("omp_offload.failed"); 9984 llvm::BasicBlock *OffloadContBlock = 9985 CGF.createBasicBlock("omp_offload.cont"); 9986 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9987 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9988 9989 CGF.EmitBlock(OffloadFailedBlock); 9990 if (RequiresOuterTask) { 9991 CapturedVars.clear(); 9992 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9993 } 9994 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9995 CGF.EmitBranch(OffloadContBlock); 9996 9997 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9998 }; 9999 10000 // Notify that the host version must be executed. 10001 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 10002 RequiresOuterTask](CodeGenFunction &CGF, 10003 PrePostActionTy &) { 10004 if (RequiresOuterTask) { 10005 CapturedVars.clear(); 10006 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 10007 } 10008 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 10009 }; 10010 10011 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 10012 &CapturedVars, RequiresOuterTask, 10013 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 10014 // Fill up the arrays with all the captured variables. 10015 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10016 MappableExprsHandler::MapValuesArrayTy Pointers; 10017 MappableExprsHandler::MapValuesArrayTy Sizes; 10018 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10019 10020 // Get mappable expression information. 10021 MappableExprsHandler MEHandler(D, CGF); 10022 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 10023 10024 auto RI = CS.getCapturedRecordDecl()->field_begin(); 10025 auto CV = CapturedVars.begin(); 10026 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 10027 CE = CS.capture_end(); 10028 CI != CE; ++CI, ++RI, ++CV) { 10029 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 10030 MappableExprsHandler::MapValuesArrayTy CurPointers; 10031 MappableExprsHandler::MapValuesArrayTy CurSizes; 10032 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 10033 MappableExprsHandler::StructRangeInfoTy PartialStruct; 10034 10035 // VLA sizes are passed to the outlined region by copy and do not have map 10036 // information associated. 10037 if (CI->capturesVariableArrayType()) { 10038 CurBasePointers.push_back(*CV); 10039 CurPointers.push_back(*CV); 10040 CurSizes.push_back(CGF.Builder.CreateIntCast( 10041 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 10042 // Copy to the device as an argument. No need to retrieve it. 10043 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 10044 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 10045 MappableExprsHandler::OMP_MAP_IMPLICIT); 10046 } else { 10047 // If we have any information in the map clause, we use it, otherwise we 10048 // just do a default mapping. 10049 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 10050 CurSizes, CurMapTypes, PartialStruct); 10051 if (CurBasePointers.empty()) 10052 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 10053 CurPointers, CurSizes, CurMapTypes); 10054 // Generate correct mapping for variables captured by reference in 10055 // lambdas. 10056 if (CI->capturesVariable()) 10057 MEHandler.generateInfoForLambdaCaptures( 10058 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 10059 CurMapTypes, LambdaPointers); 10060 } 10061 // We expect to have at least an element of information for this capture. 10062 assert(!CurBasePointers.empty() && 10063 "Non-existing map pointer for capture!"); 10064 assert(CurBasePointers.size() == CurPointers.size() && 10065 CurBasePointers.size() == CurSizes.size() && 10066 CurBasePointers.size() == CurMapTypes.size() && 10067 "Inconsistent map information sizes!"); 10068 10069 // If there is an entry in PartialStruct it means we have a struct with 10070 // individual members mapped. Emit an extra combined entry. 10071 if (PartialStruct.Base.isValid()) 10072 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 10073 CurMapTypes, PartialStruct); 10074 10075 // We need to append the results of this capture to what we already have. 10076 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 10077 Pointers.append(CurPointers.begin(), CurPointers.end()); 10078 Sizes.append(CurSizes.begin(), CurSizes.end()); 10079 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 10080 } 10081 // Adjust MEMBER_OF flags for the lambdas captures. 10082 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 10083 Pointers, MapTypes); 10084 // Map other list items in the map clause which are not captured variables 10085 // but "declare target link" global variables. 10086 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 10087 MapTypes); 10088 10089 TargetDataInfo Info; 10090 // Fill up the arrays and create the arguments. 10091 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10092 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10093 Info.PointersArray, Info.SizesArray, 10094 Info.MapTypesArray, Info); 10095 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10096 InputInfo.BasePointersArray = 10097 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10098 InputInfo.PointersArray = 10099 Address(Info.PointersArray, CGM.getPointerAlign()); 10100 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 10101 MapTypesArray = Info.MapTypesArray; 10102 if (RequiresOuterTask) 10103 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10104 else 10105 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10106 }; 10107 10108 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 10109 CodeGenFunction &CGF, PrePostActionTy &) { 10110 if (RequiresOuterTask) { 10111 CodeGenFunction::OMPTargetDataInfo InputInfo; 10112 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 10113 } else { 10114 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 10115 } 10116 }; 10117 10118 // If we have a target function ID it means that we need to support 10119 // offloading, otherwise, just execute on the host. We need to execute on host 10120 // regardless of the conditional in the if clause if, e.g., the user do not 10121 // specify target triples. 10122 if (OutlinedFnID) { 10123 if (IfCond) { 10124 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 10125 } else { 10126 RegionCodeGenTy ThenRCG(TargetThenGen); 10127 ThenRCG(CGF); 10128 } 10129 } else { 10130 RegionCodeGenTy ElseRCG(TargetElseGen); 10131 ElseRCG(CGF); 10132 } 10133 } 10134 10135 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 10136 StringRef ParentName) { 10137 if (!S) 10138 return; 10139 10140 // Codegen OMP target directives that offload compute to the device. 10141 bool RequiresDeviceCodegen = 10142 isa<OMPExecutableDirective>(S) && 10143 isOpenMPTargetExecutionDirective( 10144 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 10145 10146 if (RequiresDeviceCodegen) { 10147 const auto &E = *cast<OMPExecutableDirective>(S); 10148 unsigned DeviceID; 10149 unsigned FileID; 10150 unsigned Line; 10151 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 10152 FileID, Line); 10153 10154 // Is this a target region that should not be emitted as an entry point? If 10155 // so just signal we are done with this target region. 10156 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 10157 ParentName, Line)) 10158 return; 10159 10160 switch (E.getDirectiveKind()) { 10161 case OMPD_target: 10162 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 10163 cast<OMPTargetDirective>(E)); 10164 break; 10165 case OMPD_target_parallel: 10166 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 10167 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 10168 break; 10169 case OMPD_target_teams: 10170 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 10171 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 10172 break; 10173 case OMPD_target_teams_distribute: 10174 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 10175 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 10176 break; 10177 case OMPD_target_teams_distribute_simd: 10178 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 10179 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 10180 break; 10181 case OMPD_target_parallel_for: 10182 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 10183 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 10184 break; 10185 case OMPD_target_parallel_for_simd: 10186 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 10187 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 10188 break; 10189 case OMPD_target_simd: 10190 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 10191 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 10192 break; 10193 case OMPD_target_teams_distribute_parallel_for: 10194 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 10195 CGM, ParentName, 10196 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 10197 break; 10198 case OMPD_target_teams_distribute_parallel_for_simd: 10199 CodeGenFunction:: 10200 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 10201 CGM, ParentName, 10202 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 10203 break; 10204 case OMPD_parallel: 10205 case OMPD_for: 10206 case OMPD_parallel_for: 10207 case OMPD_parallel_master: 10208 case OMPD_parallel_sections: 10209 case OMPD_for_simd: 10210 case OMPD_parallel_for_simd: 10211 case OMPD_cancel: 10212 case OMPD_cancellation_point: 10213 case OMPD_ordered: 10214 case OMPD_threadprivate: 10215 case OMPD_allocate: 10216 case OMPD_task: 10217 case OMPD_simd: 10218 case OMPD_sections: 10219 case OMPD_section: 10220 case OMPD_single: 10221 case OMPD_master: 10222 case OMPD_critical: 10223 case OMPD_taskyield: 10224 case OMPD_barrier: 10225 case OMPD_taskwait: 10226 case OMPD_taskgroup: 10227 case OMPD_atomic: 10228 case OMPD_flush: 10229 case OMPD_depobj: 10230 case OMPD_scan: 10231 case OMPD_teams: 10232 case OMPD_target_data: 10233 case OMPD_target_exit_data: 10234 case OMPD_target_enter_data: 10235 case OMPD_distribute: 10236 case OMPD_distribute_simd: 10237 case OMPD_distribute_parallel_for: 10238 case OMPD_distribute_parallel_for_simd: 10239 case OMPD_teams_distribute: 10240 case OMPD_teams_distribute_simd: 10241 case OMPD_teams_distribute_parallel_for: 10242 case OMPD_teams_distribute_parallel_for_simd: 10243 case OMPD_target_update: 10244 case OMPD_declare_simd: 10245 case OMPD_declare_variant: 10246 case OMPD_begin_declare_variant: 10247 case OMPD_end_declare_variant: 10248 case OMPD_declare_target: 10249 case OMPD_end_declare_target: 10250 case OMPD_declare_reduction: 10251 case OMPD_declare_mapper: 10252 case OMPD_taskloop: 10253 case OMPD_taskloop_simd: 10254 case OMPD_master_taskloop: 10255 case OMPD_master_taskloop_simd: 10256 case OMPD_parallel_master_taskloop: 10257 case OMPD_parallel_master_taskloop_simd: 10258 case OMPD_requires: 10259 case OMPD_unknown: 10260 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 10261 } 10262 return; 10263 } 10264 10265 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 10266 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 10267 return; 10268 10269 scanForTargetRegionsFunctions( 10270 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 10271 return; 10272 } 10273 10274 // If this is a lambda function, look into its body. 10275 if (const auto *L = dyn_cast<LambdaExpr>(S)) 10276 S = L->getBody(); 10277 10278 // Keep looking for target regions recursively. 10279 for (const Stmt *II : S->children()) 10280 scanForTargetRegionsFunctions(II, ParentName); 10281 } 10282 10283 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 10284 // If emitting code for the host, we do not process FD here. Instead we do 10285 // the normal code generation. 10286 if (!CGM.getLangOpts().OpenMPIsDevice) { 10287 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 10288 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10289 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10290 // Do not emit device_type(nohost) functions for the host. 10291 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 10292 return true; 10293 } 10294 return false; 10295 } 10296 10297 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 10298 // Try to detect target regions in the function. 10299 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 10300 StringRef Name = CGM.getMangledName(GD); 10301 scanForTargetRegionsFunctions(FD->getBody(), Name); 10302 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 10303 OMPDeclareTargetDeclAttr::getDeviceType(FD); 10304 // Do not emit device_type(nohost) functions for the host. 10305 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 10306 return true; 10307 } 10308 10309 // Do not to emit function if it is not marked as declare target. 10310 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 10311 AlreadyEmittedTargetDecls.count(VD) == 0; 10312 } 10313 10314 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 10315 if (!CGM.getLangOpts().OpenMPIsDevice) 10316 return false; 10317 10318 // Check if there are Ctors/Dtors in this declaration and look for target 10319 // regions in it. We use the complete variant to produce the kernel name 10320 // mangling. 10321 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 10322 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 10323 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 10324 StringRef ParentName = 10325 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 10326 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 10327 } 10328 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 10329 StringRef ParentName = 10330 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 10331 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 10332 } 10333 } 10334 10335 // Do not to emit variable if it is not marked as declare target. 10336 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10337 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 10338 cast<VarDecl>(GD.getDecl())); 10339 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 10340 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10341 HasRequiresUnifiedSharedMemory)) { 10342 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 10343 return true; 10344 } 10345 return false; 10346 } 10347 10348 llvm::Constant * 10349 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 10350 const VarDecl *VD) { 10351 assert(VD->getType().isConstant(CGM.getContext()) && 10352 "Expected constant variable."); 10353 StringRef VarName; 10354 llvm::Constant *Addr; 10355 llvm::GlobalValue::LinkageTypes Linkage; 10356 QualType Ty = VD->getType(); 10357 SmallString<128> Buffer; 10358 { 10359 unsigned DeviceID; 10360 unsigned FileID; 10361 unsigned Line; 10362 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 10363 FileID, Line); 10364 llvm::raw_svector_ostream OS(Buffer); 10365 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 10366 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 10367 VarName = OS.str(); 10368 } 10369 Linkage = llvm::GlobalValue::InternalLinkage; 10370 Addr = 10371 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 10372 getDefaultFirstprivateAddressSpace()); 10373 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 10374 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 10375 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 10376 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10377 VarName, Addr, VarSize, 10378 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 10379 return Addr; 10380 } 10381 10382 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 10383 llvm::Constant *Addr) { 10384 if (CGM.getLangOpts().OMPTargetTriples.empty() && 10385 !CGM.getLangOpts().OpenMPIsDevice) 10386 return; 10387 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10388 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10389 if (!Res) { 10390 if (CGM.getLangOpts().OpenMPIsDevice) { 10391 // Register non-target variables being emitted in device code (debug info 10392 // may cause this). 10393 StringRef VarName = CGM.getMangledName(VD); 10394 EmittedNonTargetVariables.try_emplace(VarName, Addr); 10395 } 10396 return; 10397 } 10398 // Register declare target variables. 10399 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 10400 StringRef VarName; 10401 CharUnits VarSize; 10402 llvm::GlobalValue::LinkageTypes Linkage; 10403 10404 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10405 !HasRequiresUnifiedSharedMemory) { 10406 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10407 VarName = CGM.getMangledName(VD); 10408 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 10409 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 10410 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 10411 } else { 10412 VarSize = CharUnits::Zero(); 10413 } 10414 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 10415 // Temp solution to prevent optimizations of the internal variables. 10416 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 10417 std::string RefName = getName({VarName, "ref"}); 10418 if (!CGM.GetGlobalValue(RefName)) { 10419 llvm::Constant *AddrRef = 10420 getOrCreateInternalVariable(Addr->getType(), RefName); 10421 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 10422 GVAddrRef->setConstant(/*Val=*/true); 10423 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 10424 GVAddrRef->setInitializer(Addr); 10425 CGM.addCompilerUsedGlobal(GVAddrRef); 10426 } 10427 } 10428 } else { 10429 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 10430 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10431 HasRequiresUnifiedSharedMemory)) && 10432 "Declare target attribute must link or to with unified memory."); 10433 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 10434 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 10435 else 10436 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 10437 10438 if (CGM.getLangOpts().OpenMPIsDevice) { 10439 VarName = Addr->getName(); 10440 Addr = nullptr; 10441 } else { 10442 VarName = getAddrOfDeclareTargetVar(VD).getName(); 10443 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 10444 } 10445 VarSize = CGM.getPointerSize(); 10446 Linkage = llvm::GlobalValue::WeakAnyLinkage; 10447 } 10448 10449 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 10450 VarName, Addr, VarSize, Flags, Linkage); 10451 } 10452 10453 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 10454 if (isa<FunctionDecl>(GD.getDecl()) || 10455 isa<OMPDeclareReductionDecl>(GD.getDecl())) 10456 return emitTargetFunctions(GD); 10457 10458 return emitTargetGlobalVariable(GD); 10459 } 10460 10461 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 10462 for (const VarDecl *VD : DeferredGlobalVariables) { 10463 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 10464 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 10465 if (!Res) 10466 continue; 10467 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 10468 !HasRequiresUnifiedSharedMemory) { 10469 CGM.EmitGlobal(VD); 10470 } else { 10471 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 10472 (*Res == OMPDeclareTargetDeclAttr::MT_To && 10473 HasRequiresUnifiedSharedMemory)) && 10474 "Expected link clause or to clause with unified memory."); 10475 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 10476 } 10477 } 10478 } 10479 10480 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 10481 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 10482 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 10483 " Expected target-based directive."); 10484 } 10485 10486 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { 10487 for (const OMPClause *Clause : D->clauselists()) { 10488 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 10489 HasRequiresUnifiedSharedMemory = true; 10490 } else if (const auto *AC = 10491 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { 10492 switch (AC->getAtomicDefaultMemOrderKind()) { 10493 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel: 10494 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease; 10495 break; 10496 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst: 10497 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent; 10498 break; 10499 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed: 10500 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic; 10501 break; 10502 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown: 10503 break; 10504 } 10505 } 10506 } 10507 } 10508 10509 llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const { 10510 return RequiresAtomicOrdering; 10511 } 10512 10513 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 10514 LangAS &AS) { 10515 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 10516 return false; 10517 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 10518 switch(A->getAllocatorType()) { 10519 case OMPAllocateDeclAttr::OMPNullMemAlloc: 10520 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 10521 // Not supported, fallback to the default mem space. 10522 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 10523 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 10524 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 10525 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 10526 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 10527 case OMPAllocateDeclAttr::OMPConstMemAlloc: 10528 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 10529 AS = LangAS::Default; 10530 return true; 10531 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 10532 llvm_unreachable("Expected predefined allocator for the variables with the " 10533 "static storage."); 10534 } 10535 return false; 10536 } 10537 10538 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 10539 return HasRequiresUnifiedSharedMemory; 10540 } 10541 10542 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 10543 CodeGenModule &CGM) 10544 : CGM(CGM) { 10545 if (CGM.getLangOpts().OpenMPIsDevice) { 10546 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 10547 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 10548 } 10549 } 10550 10551 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 10552 if (CGM.getLangOpts().OpenMPIsDevice) 10553 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 10554 } 10555 10556 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 10557 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 10558 return true; 10559 10560 const auto *D = cast<FunctionDecl>(GD.getDecl()); 10561 // Do not to emit function if it is marked as declare target as it was already 10562 // emitted. 10563 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 10564 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) { 10565 if (auto *F = dyn_cast_or_null<llvm::Function>( 10566 CGM.GetGlobalValue(CGM.getMangledName(GD)))) 10567 return !F->isDeclaration(); 10568 return false; 10569 } 10570 return true; 10571 } 10572 10573 return !AlreadyEmittedTargetDecls.insert(D).second; 10574 } 10575 10576 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 10577 // If we don't have entries or if we are emitting code for the device, we 10578 // don't need to do anything. 10579 if (CGM.getLangOpts().OMPTargetTriples.empty() || 10580 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 10581 (OffloadEntriesInfoManager.empty() && 10582 !HasEmittedDeclareTargetRegion && 10583 !HasEmittedTargetRegion)) 10584 return nullptr; 10585 10586 // Create and register the function that handles the requires directives. 10587 ASTContext &C = CGM.getContext(); 10588 10589 llvm::Function *RequiresRegFn; 10590 { 10591 CodeGenFunction CGF(CGM); 10592 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 10593 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 10594 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 10595 RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); 10596 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 10597 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 10598 // TODO: check for other requires clauses. 10599 // The requires directive takes effect only when a target region is 10600 // present in the compilation unit. Otherwise it is ignored and not 10601 // passed to the runtime. This avoids the runtime from throwing an error 10602 // for mismatching requires clauses across compilation units that don't 10603 // contain at least 1 target region. 10604 assert((HasEmittedTargetRegion || 10605 HasEmittedDeclareTargetRegion || 10606 !OffloadEntriesInfoManager.empty()) && 10607 "Target or declare target region expected."); 10608 if (HasRequiresUnifiedSharedMemory) 10609 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 10610 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires), 10611 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 10612 CGF.FinishFunction(); 10613 } 10614 return RequiresRegFn; 10615 } 10616 10617 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 10618 const OMPExecutableDirective &D, 10619 SourceLocation Loc, 10620 llvm::Function *OutlinedFn, 10621 ArrayRef<llvm::Value *> CapturedVars) { 10622 if (!CGF.HaveInsertPoint()) 10623 return; 10624 10625 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10626 CodeGenFunction::RunCleanupsScope Scope(CGF); 10627 10628 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 10629 llvm::Value *Args[] = { 10630 RTLoc, 10631 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 10632 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 10633 llvm::SmallVector<llvm::Value *, 16> RealArgs; 10634 RealArgs.append(std::begin(Args), std::end(Args)); 10635 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 10636 10637 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 10638 CGF.EmitRuntimeCall(RTLFn, RealArgs); 10639 } 10640 10641 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 10642 const Expr *NumTeams, 10643 const Expr *ThreadLimit, 10644 SourceLocation Loc) { 10645 if (!CGF.HaveInsertPoint()) 10646 return; 10647 10648 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 10649 10650 llvm::Value *NumTeamsVal = 10651 NumTeams 10652 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 10653 CGF.CGM.Int32Ty, /* isSigned = */ true) 10654 : CGF.Builder.getInt32(0); 10655 10656 llvm::Value *ThreadLimitVal = 10657 ThreadLimit 10658 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 10659 CGF.CGM.Int32Ty, /* isSigned = */ true) 10660 : CGF.Builder.getInt32(0); 10661 10662 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 10663 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 10664 ThreadLimitVal}; 10665 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 10666 PushNumTeamsArgs); 10667 } 10668 10669 void CGOpenMPRuntime::emitTargetDataCalls( 10670 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10671 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 10672 if (!CGF.HaveInsertPoint()) 10673 return; 10674 10675 // Action used to replace the default codegen action and turn privatization 10676 // off. 10677 PrePostActionTy NoPrivAction; 10678 10679 // Generate the code for the opening of the data environment. Capture all the 10680 // arguments of the runtime call by reference because they are used in the 10681 // closing of the region. 10682 auto &&BeginThenGen = [this, &D, Device, &Info, 10683 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 10684 // Fill up the arrays with all the mapped variables. 10685 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10686 MappableExprsHandler::MapValuesArrayTy Pointers; 10687 MappableExprsHandler::MapValuesArrayTy Sizes; 10688 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10689 10690 // Get map clause information. 10691 MappableExprsHandler MCHandler(D, CGF); 10692 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10693 10694 // Fill up the arrays and create the arguments. 10695 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10696 10697 llvm::Value *BasePointersArrayArg = nullptr; 10698 llvm::Value *PointersArrayArg = nullptr; 10699 llvm::Value *SizesArrayArg = nullptr; 10700 llvm::Value *MapTypesArrayArg = nullptr; 10701 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10702 SizesArrayArg, MapTypesArrayArg, Info); 10703 10704 // Emit device ID if any. 10705 llvm::Value *DeviceID = nullptr; 10706 if (Device) { 10707 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10708 CGF.Int64Ty, /*isSigned=*/true); 10709 } else { 10710 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10711 } 10712 10713 // Emit the number of elements in the offloading arrays. 10714 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10715 10716 llvm::Value *OffloadingArgs[] = { 10717 DeviceID, PointerNum, BasePointersArrayArg, 10718 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10719 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 10720 OffloadingArgs); 10721 10722 // If device pointer privatization is required, emit the body of the region 10723 // here. It will have to be duplicated: with and without privatization. 10724 if (!Info.CaptureDeviceAddrMap.empty()) 10725 CodeGen(CGF); 10726 }; 10727 10728 // Generate code for the closing of the data region. 10729 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10730 PrePostActionTy &) { 10731 assert(Info.isValid() && "Invalid data environment closing arguments."); 10732 10733 llvm::Value *BasePointersArrayArg = nullptr; 10734 llvm::Value *PointersArrayArg = nullptr; 10735 llvm::Value *SizesArrayArg = nullptr; 10736 llvm::Value *MapTypesArrayArg = nullptr; 10737 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10738 SizesArrayArg, MapTypesArrayArg, Info); 10739 10740 // Emit device ID if any. 10741 llvm::Value *DeviceID = nullptr; 10742 if (Device) { 10743 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10744 CGF.Int64Ty, /*isSigned=*/true); 10745 } else { 10746 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10747 } 10748 10749 // Emit the number of elements in the offloading arrays. 10750 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10751 10752 llvm::Value *OffloadingArgs[] = { 10753 DeviceID, PointerNum, BasePointersArrayArg, 10754 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10755 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 10756 OffloadingArgs); 10757 }; 10758 10759 // If we need device pointer privatization, we need to emit the body of the 10760 // region with no privatization in the 'else' branch of the conditional. 10761 // Otherwise, we don't have to do anything. 10762 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10763 PrePostActionTy &) { 10764 if (!Info.CaptureDeviceAddrMap.empty()) { 10765 CodeGen.setAction(NoPrivAction); 10766 CodeGen(CGF); 10767 } 10768 }; 10769 10770 // We don't have to do anything to close the region if the if clause evaluates 10771 // to false. 10772 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10773 10774 if (IfCond) { 10775 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10776 } else { 10777 RegionCodeGenTy RCG(BeginThenGen); 10778 RCG(CGF); 10779 } 10780 10781 // If we don't require privatization of device pointers, we emit the body in 10782 // between the runtime calls. This avoids duplicating the body code. 10783 if (Info.CaptureDeviceAddrMap.empty()) { 10784 CodeGen.setAction(NoPrivAction); 10785 CodeGen(CGF); 10786 } 10787 10788 if (IfCond) { 10789 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10790 } else { 10791 RegionCodeGenTy RCG(EndThenGen); 10792 RCG(CGF); 10793 } 10794 } 10795 10796 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10797 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10798 const Expr *Device) { 10799 if (!CGF.HaveInsertPoint()) 10800 return; 10801 10802 assert((isa<OMPTargetEnterDataDirective>(D) || 10803 isa<OMPTargetExitDataDirective>(D) || 10804 isa<OMPTargetUpdateDirective>(D)) && 10805 "Expecting either target enter, exit data, or update directives."); 10806 10807 CodeGenFunction::OMPTargetDataInfo InputInfo; 10808 llvm::Value *MapTypesArray = nullptr; 10809 // Generate the code for the opening of the data environment. 10810 auto &&ThenGen = [this, &D, Device, &InputInfo, 10811 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10812 // Emit device ID if any. 10813 llvm::Value *DeviceID = nullptr; 10814 if (Device) { 10815 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10816 CGF.Int64Ty, /*isSigned=*/true); 10817 } else { 10818 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10819 } 10820 10821 // Emit the number of elements in the offloading arrays. 10822 llvm::Constant *PointerNum = 10823 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10824 10825 llvm::Value *OffloadingArgs[] = {DeviceID, 10826 PointerNum, 10827 InputInfo.BasePointersArray.getPointer(), 10828 InputInfo.PointersArray.getPointer(), 10829 InputInfo.SizesArray.getPointer(), 10830 MapTypesArray}; 10831 10832 // Select the right runtime function call for each expected standalone 10833 // directive. 10834 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10835 OpenMPRTLFunction RTLFn; 10836 switch (D.getDirectiveKind()) { 10837 case OMPD_target_enter_data: 10838 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 10839 : OMPRTL__tgt_target_data_begin; 10840 break; 10841 case OMPD_target_exit_data: 10842 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 10843 : OMPRTL__tgt_target_data_end; 10844 break; 10845 case OMPD_target_update: 10846 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 10847 : OMPRTL__tgt_target_data_update; 10848 break; 10849 case OMPD_parallel: 10850 case OMPD_for: 10851 case OMPD_parallel_for: 10852 case OMPD_parallel_master: 10853 case OMPD_parallel_sections: 10854 case OMPD_for_simd: 10855 case OMPD_parallel_for_simd: 10856 case OMPD_cancel: 10857 case OMPD_cancellation_point: 10858 case OMPD_ordered: 10859 case OMPD_threadprivate: 10860 case OMPD_allocate: 10861 case OMPD_task: 10862 case OMPD_simd: 10863 case OMPD_sections: 10864 case OMPD_section: 10865 case OMPD_single: 10866 case OMPD_master: 10867 case OMPD_critical: 10868 case OMPD_taskyield: 10869 case OMPD_barrier: 10870 case OMPD_taskwait: 10871 case OMPD_taskgroup: 10872 case OMPD_atomic: 10873 case OMPD_flush: 10874 case OMPD_depobj: 10875 case OMPD_scan: 10876 case OMPD_teams: 10877 case OMPD_target_data: 10878 case OMPD_distribute: 10879 case OMPD_distribute_simd: 10880 case OMPD_distribute_parallel_for: 10881 case OMPD_distribute_parallel_for_simd: 10882 case OMPD_teams_distribute: 10883 case OMPD_teams_distribute_simd: 10884 case OMPD_teams_distribute_parallel_for: 10885 case OMPD_teams_distribute_parallel_for_simd: 10886 case OMPD_declare_simd: 10887 case OMPD_declare_variant: 10888 case OMPD_begin_declare_variant: 10889 case OMPD_end_declare_variant: 10890 case OMPD_declare_target: 10891 case OMPD_end_declare_target: 10892 case OMPD_declare_reduction: 10893 case OMPD_declare_mapper: 10894 case OMPD_taskloop: 10895 case OMPD_taskloop_simd: 10896 case OMPD_master_taskloop: 10897 case OMPD_master_taskloop_simd: 10898 case OMPD_parallel_master_taskloop: 10899 case OMPD_parallel_master_taskloop_simd: 10900 case OMPD_target: 10901 case OMPD_target_simd: 10902 case OMPD_target_teams_distribute: 10903 case OMPD_target_teams_distribute_simd: 10904 case OMPD_target_teams_distribute_parallel_for: 10905 case OMPD_target_teams_distribute_parallel_for_simd: 10906 case OMPD_target_teams: 10907 case OMPD_target_parallel: 10908 case OMPD_target_parallel_for: 10909 case OMPD_target_parallel_for_simd: 10910 case OMPD_requires: 10911 case OMPD_unknown: 10912 llvm_unreachable("Unexpected standalone target data directive."); 10913 break; 10914 } 10915 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 10916 }; 10917 10918 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 10919 CodeGenFunction &CGF, PrePostActionTy &) { 10920 // Fill up the arrays with all the mapped variables. 10921 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10922 MappableExprsHandler::MapValuesArrayTy Pointers; 10923 MappableExprsHandler::MapValuesArrayTy Sizes; 10924 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10925 10926 // Get map clause information. 10927 MappableExprsHandler MEHandler(D, CGF); 10928 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10929 10930 TargetDataInfo Info; 10931 // Fill up the arrays and create the arguments. 10932 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10933 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10934 Info.PointersArray, Info.SizesArray, 10935 Info.MapTypesArray, Info); 10936 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10937 InputInfo.BasePointersArray = 10938 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10939 InputInfo.PointersArray = 10940 Address(Info.PointersArray, CGM.getPointerAlign()); 10941 InputInfo.SizesArray = 10942 Address(Info.SizesArray, CGM.getPointerAlign()); 10943 MapTypesArray = Info.MapTypesArray; 10944 if (D.hasClausesOfKind<OMPDependClause>()) 10945 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10946 else 10947 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10948 }; 10949 10950 if (IfCond) { 10951 emitIfClause(CGF, IfCond, TargetThenGen, 10952 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10953 } else { 10954 RegionCodeGenTy ThenRCG(TargetThenGen); 10955 ThenRCG(CGF); 10956 } 10957 } 10958 10959 namespace { 10960 /// Kind of parameter in a function with 'declare simd' directive. 10961 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 10962 /// Attribute set of the parameter. 10963 struct ParamAttrTy { 10964 ParamKindTy Kind = Vector; 10965 llvm::APSInt StrideOrArg; 10966 llvm::APSInt Alignment; 10967 }; 10968 } // namespace 10969 10970 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10971 ArrayRef<ParamAttrTy> ParamAttrs) { 10972 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10973 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10974 // of that clause. The VLEN value must be power of 2. 10975 // In other case the notion of the function`s "characteristic data type" (CDT) 10976 // is used to compute the vector length. 10977 // CDT is defined in the following order: 10978 // a) For non-void function, the CDT is the return type. 10979 // b) If the function has any non-uniform, non-linear parameters, then the 10980 // CDT is the type of the first such parameter. 10981 // c) If the CDT determined by a) or b) above is struct, union, or class 10982 // type which is pass-by-value (except for the type that maps to the 10983 // built-in complex data type), the characteristic data type is int. 10984 // d) If none of the above three cases is applicable, the CDT is int. 10985 // The VLEN is then determined based on the CDT and the size of vector 10986 // register of that ISA for which current vector version is generated. The 10987 // VLEN is computed using the formula below: 10988 // VLEN = sizeof(vector_register) / sizeof(CDT), 10989 // where vector register size specified in section 3.2.1 Registers and the 10990 // Stack Frame of original AMD64 ABI document. 10991 QualType RetType = FD->getReturnType(); 10992 if (RetType.isNull()) 10993 return 0; 10994 ASTContext &C = FD->getASTContext(); 10995 QualType CDT; 10996 if (!RetType.isNull() && !RetType->isVoidType()) { 10997 CDT = RetType; 10998 } else { 10999 unsigned Offset = 0; 11000 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 11001 if (ParamAttrs[Offset].Kind == Vector) 11002 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 11003 ++Offset; 11004 } 11005 if (CDT.isNull()) { 11006 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11007 if (ParamAttrs[I + Offset].Kind == Vector) { 11008 CDT = FD->getParamDecl(I)->getType(); 11009 break; 11010 } 11011 } 11012 } 11013 } 11014 if (CDT.isNull()) 11015 CDT = C.IntTy; 11016 CDT = CDT->getCanonicalTypeUnqualified(); 11017 if (CDT->isRecordType() || CDT->isUnionType()) 11018 CDT = C.IntTy; 11019 return C.getTypeSize(CDT); 11020 } 11021 11022 static void 11023 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 11024 const llvm::APSInt &VLENVal, 11025 ArrayRef<ParamAttrTy> ParamAttrs, 11026 OMPDeclareSimdDeclAttr::BranchStateTy State) { 11027 struct ISADataTy { 11028 char ISA; 11029 unsigned VecRegSize; 11030 }; 11031 ISADataTy ISAData[] = { 11032 { 11033 'b', 128 11034 }, // SSE 11035 { 11036 'c', 256 11037 }, // AVX 11038 { 11039 'd', 256 11040 }, // AVX2 11041 { 11042 'e', 512 11043 }, // AVX512 11044 }; 11045 llvm::SmallVector<char, 2> Masked; 11046 switch (State) { 11047 case OMPDeclareSimdDeclAttr::BS_Undefined: 11048 Masked.push_back('N'); 11049 Masked.push_back('M'); 11050 break; 11051 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11052 Masked.push_back('N'); 11053 break; 11054 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11055 Masked.push_back('M'); 11056 break; 11057 } 11058 for (char Mask : Masked) { 11059 for (const ISADataTy &Data : ISAData) { 11060 SmallString<256> Buffer; 11061 llvm::raw_svector_ostream Out(Buffer); 11062 Out << "_ZGV" << Data.ISA << Mask; 11063 if (!VLENVal) { 11064 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 11065 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 11066 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 11067 } else { 11068 Out << VLENVal; 11069 } 11070 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 11071 switch (ParamAttr.Kind){ 11072 case LinearWithVarStride: 11073 Out << 's' << ParamAttr.StrideOrArg; 11074 break; 11075 case Linear: 11076 Out << 'l'; 11077 if (ParamAttr.StrideOrArg != 1) 11078 Out << ParamAttr.StrideOrArg; 11079 break; 11080 case Uniform: 11081 Out << 'u'; 11082 break; 11083 case Vector: 11084 Out << 'v'; 11085 break; 11086 } 11087 if (!!ParamAttr.Alignment) 11088 Out << 'a' << ParamAttr.Alignment; 11089 } 11090 Out << '_' << Fn->getName(); 11091 Fn->addFnAttr(Out.str()); 11092 } 11093 } 11094 } 11095 11096 // This are the Functions that are needed to mangle the name of the 11097 // vector functions generated by the compiler, according to the rules 11098 // defined in the "Vector Function ABI specifications for AArch64", 11099 // available at 11100 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 11101 11102 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 11103 /// 11104 /// TODO: Need to implement the behavior for reference marked with a 11105 /// var or no linear modifiers (1.b in the section). For this, we 11106 /// need to extend ParamKindTy to support the linear modifiers. 11107 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 11108 QT = QT.getCanonicalType(); 11109 11110 if (QT->isVoidType()) 11111 return false; 11112 11113 if (Kind == ParamKindTy::Uniform) 11114 return false; 11115 11116 if (Kind == ParamKindTy::Linear) 11117 return false; 11118 11119 // TODO: Handle linear references with modifiers 11120 11121 if (Kind == ParamKindTy::LinearWithVarStride) 11122 return false; 11123 11124 return true; 11125 } 11126 11127 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 11128 static bool getAArch64PBV(QualType QT, ASTContext &C) { 11129 QT = QT.getCanonicalType(); 11130 unsigned Size = C.getTypeSize(QT); 11131 11132 // Only scalars and complex within 16 bytes wide set PVB to true. 11133 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 11134 return false; 11135 11136 if (QT->isFloatingType()) 11137 return true; 11138 11139 if (QT->isIntegerType()) 11140 return true; 11141 11142 if (QT->isPointerType()) 11143 return true; 11144 11145 // TODO: Add support for complex types (section 3.1.2, item 2). 11146 11147 return false; 11148 } 11149 11150 /// Computes the lane size (LS) of a return type or of an input parameter, 11151 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 11152 /// TODO: Add support for references, section 3.2.1, item 1. 11153 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 11154 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 11155 QualType PTy = QT.getCanonicalType()->getPointeeType(); 11156 if (getAArch64PBV(PTy, C)) 11157 return C.getTypeSize(PTy); 11158 } 11159 if (getAArch64PBV(QT, C)) 11160 return C.getTypeSize(QT); 11161 11162 return C.getTypeSize(C.getUIntPtrType()); 11163 } 11164 11165 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 11166 // signature of the scalar function, as defined in 3.2.2 of the 11167 // AAVFABI. 11168 static std::tuple<unsigned, unsigned, bool> 11169 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 11170 QualType RetType = FD->getReturnType().getCanonicalType(); 11171 11172 ASTContext &C = FD->getASTContext(); 11173 11174 bool OutputBecomesInput = false; 11175 11176 llvm::SmallVector<unsigned, 8> Sizes; 11177 if (!RetType->isVoidType()) { 11178 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 11179 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 11180 OutputBecomesInput = true; 11181 } 11182 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 11183 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 11184 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 11185 } 11186 11187 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 11188 // The LS of a function parameter / return value can only be a power 11189 // of 2, starting from 8 bits, up to 128. 11190 assert(std::all_of(Sizes.begin(), Sizes.end(), 11191 [](unsigned Size) { 11192 return Size == 8 || Size == 16 || Size == 32 || 11193 Size == 64 || Size == 128; 11194 }) && 11195 "Invalid size"); 11196 11197 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 11198 *std::max_element(std::begin(Sizes), std::end(Sizes)), 11199 OutputBecomesInput); 11200 } 11201 11202 /// Mangle the parameter part of the vector function name according to 11203 /// their OpenMP classification. The mangling function is defined in 11204 /// section 3.5 of the AAVFABI. 11205 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 11206 SmallString<256> Buffer; 11207 llvm::raw_svector_ostream Out(Buffer); 11208 for (const auto &ParamAttr : ParamAttrs) { 11209 switch (ParamAttr.Kind) { 11210 case LinearWithVarStride: 11211 Out << "ls" << ParamAttr.StrideOrArg; 11212 break; 11213 case Linear: 11214 Out << 'l'; 11215 // Don't print the step value if it is not present or if it is 11216 // equal to 1. 11217 if (ParamAttr.StrideOrArg != 1) 11218 Out << ParamAttr.StrideOrArg; 11219 break; 11220 case Uniform: 11221 Out << 'u'; 11222 break; 11223 case Vector: 11224 Out << 'v'; 11225 break; 11226 } 11227 11228 if (!!ParamAttr.Alignment) 11229 Out << 'a' << ParamAttr.Alignment; 11230 } 11231 11232 return std::string(Out.str()); 11233 } 11234 11235 // Function used to add the attribute. The parameter `VLEN` is 11236 // templated to allow the use of "x" when targeting scalable functions 11237 // for SVE. 11238 template <typename T> 11239 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 11240 char ISA, StringRef ParSeq, 11241 StringRef MangledName, bool OutputBecomesInput, 11242 llvm::Function *Fn) { 11243 SmallString<256> Buffer; 11244 llvm::raw_svector_ostream Out(Buffer); 11245 Out << Prefix << ISA << LMask << VLEN; 11246 if (OutputBecomesInput) 11247 Out << "v"; 11248 Out << ParSeq << "_" << MangledName; 11249 Fn->addFnAttr(Out.str()); 11250 } 11251 11252 // Helper function to generate the Advanced SIMD names depending on 11253 // the value of the NDS when simdlen is not present. 11254 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 11255 StringRef Prefix, char ISA, 11256 StringRef ParSeq, StringRef MangledName, 11257 bool OutputBecomesInput, 11258 llvm::Function *Fn) { 11259 switch (NDS) { 11260 case 8: 11261 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11262 OutputBecomesInput, Fn); 11263 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 11264 OutputBecomesInput, Fn); 11265 break; 11266 case 16: 11267 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11268 OutputBecomesInput, Fn); 11269 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 11270 OutputBecomesInput, Fn); 11271 break; 11272 case 32: 11273 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11274 OutputBecomesInput, Fn); 11275 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 11276 OutputBecomesInput, Fn); 11277 break; 11278 case 64: 11279 case 128: 11280 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 11281 OutputBecomesInput, Fn); 11282 break; 11283 default: 11284 llvm_unreachable("Scalar type is too wide."); 11285 } 11286 } 11287 11288 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 11289 static void emitAArch64DeclareSimdFunction( 11290 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 11291 ArrayRef<ParamAttrTy> ParamAttrs, 11292 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 11293 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 11294 11295 // Get basic data for building the vector signature. 11296 const auto Data = getNDSWDS(FD, ParamAttrs); 11297 const unsigned NDS = std::get<0>(Data); 11298 const unsigned WDS = std::get<1>(Data); 11299 const bool OutputBecomesInput = std::get<2>(Data); 11300 11301 // Check the values provided via `simdlen` by the user. 11302 // 1. A `simdlen(1)` doesn't produce vector signatures, 11303 if (UserVLEN == 1) { 11304 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11305 DiagnosticsEngine::Warning, 11306 "The clause simdlen(1) has no effect when targeting aarch64."); 11307 CGM.getDiags().Report(SLoc, DiagID); 11308 return; 11309 } 11310 11311 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 11312 // Advanced SIMD output. 11313 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 11314 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11315 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 11316 "power of 2 when targeting Advanced SIMD."); 11317 CGM.getDiags().Report(SLoc, DiagID); 11318 return; 11319 } 11320 11321 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 11322 // limits. 11323 if (ISA == 's' && UserVLEN != 0) { 11324 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 11325 unsigned DiagID = CGM.getDiags().getCustomDiagID( 11326 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 11327 "lanes in the architectural constraints " 11328 "for SVE (min is 128-bit, max is " 11329 "2048-bit, by steps of 128-bit)"); 11330 CGM.getDiags().Report(SLoc, DiagID) << WDS; 11331 return; 11332 } 11333 } 11334 11335 // Sort out parameter sequence. 11336 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 11337 StringRef Prefix = "_ZGV"; 11338 // Generate simdlen from user input (if any). 11339 if (UserVLEN) { 11340 if (ISA == 's') { 11341 // SVE generates only a masked function. 11342 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11343 OutputBecomesInput, Fn); 11344 } else { 11345 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11346 // Advanced SIMD generates one or two functions, depending on 11347 // the `[not]inbranch` clause. 11348 switch (State) { 11349 case OMPDeclareSimdDeclAttr::BS_Undefined: 11350 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11351 OutputBecomesInput, Fn); 11352 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11353 OutputBecomesInput, Fn); 11354 break; 11355 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11356 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 11357 OutputBecomesInput, Fn); 11358 break; 11359 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11360 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 11361 OutputBecomesInput, Fn); 11362 break; 11363 } 11364 } 11365 } else { 11366 // If no user simdlen is provided, follow the AAVFABI rules for 11367 // generating the vector length. 11368 if (ISA == 's') { 11369 // SVE, section 3.4.1, item 1. 11370 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 11371 OutputBecomesInput, Fn); 11372 } else { 11373 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 11374 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 11375 // two vector names depending on the use of the clause 11376 // `[not]inbranch`. 11377 switch (State) { 11378 case OMPDeclareSimdDeclAttr::BS_Undefined: 11379 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11380 OutputBecomesInput, Fn); 11381 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11382 OutputBecomesInput, Fn); 11383 break; 11384 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 11385 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 11386 OutputBecomesInput, Fn); 11387 break; 11388 case OMPDeclareSimdDeclAttr::BS_Inbranch: 11389 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 11390 OutputBecomesInput, Fn); 11391 break; 11392 } 11393 } 11394 } 11395 } 11396 11397 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 11398 llvm::Function *Fn) { 11399 ASTContext &C = CGM.getContext(); 11400 FD = FD->getMostRecentDecl(); 11401 // Map params to their positions in function decl. 11402 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 11403 if (isa<CXXMethodDecl>(FD)) 11404 ParamPositions.try_emplace(FD, 0); 11405 unsigned ParamPos = ParamPositions.size(); 11406 for (const ParmVarDecl *P : FD->parameters()) { 11407 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 11408 ++ParamPos; 11409 } 11410 while (FD) { 11411 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 11412 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 11413 // Mark uniform parameters. 11414 for (const Expr *E : Attr->uniforms()) { 11415 E = E->IgnoreParenImpCasts(); 11416 unsigned Pos; 11417 if (isa<CXXThisExpr>(E)) { 11418 Pos = ParamPositions[FD]; 11419 } else { 11420 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11421 ->getCanonicalDecl(); 11422 Pos = ParamPositions[PVD]; 11423 } 11424 ParamAttrs[Pos].Kind = Uniform; 11425 } 11426 // Get alignment info. 11427 auto NI = Attr->alignments_begin(); 11428 for (const Expr *E : Attr->aligneds()) { 11429 E = E->IgnoreParenImpCasts(); 11430 unsigned Pos; 11431 QualType ParmTy; 11432 if (isa<CXXThisExpr>(E)) { 11433 Pos = ParamPositions[FD]; 11434 ParmTy = E->getType(); 11435 } else { 11436 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11437 ->getCanonicalDecl(); 11438 Pos = ParamPositions[PVD]; 11439 ParmTy = PVD->getType(); 11440 } 11441 ParamAttrs[Pos].Alignment = 11442 (*NI) 11443 ? (*NI)->EvaluateKnownConstInt(C) 11444 : llvm::APSInt::getUnsigned( 11445 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 11446 .getQuantity()); 11447 ++NI; 11448 } 11449 // Mark linear parameters. 11450 auto SI = Attr->steps_begin(); 11451 auto MI = Attr->modifiers_begin(); 11452 for (const Expr *E : Attr->linears()) { 11453 E = E->IgnoreParenImpCasts(); 11454 unsigned Pos; 11455 // Rescaling factor needed to compute the linear parameter 11456 // value in the mangled name. 11457 unsigned PtrRescalingFactor = 1; 11458 if (isa<CXXThisExpr>(E)) { 11459 Pos = ParamPositions[FD]; 11460 } else { 11461 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 11462 ->getCanonicalDecl(); 11463 Pos = ParamPositions[PVD]; 11464 if (auto *P = dyn_cast<PointerType>(PVD->getType())) 11465 PtrRescalingFactor = CGM.getContext() 11466 .getTypeSizeInChars(P->getPointeeType()) 11467 .getQuantity(); 11468 } 11469 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 11470 ParamAttr.Kind = Linear; 11471 // Assuming a stride of 1, for `linear` without modifiers. 11472 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); 11473 if (*SI) { 11474 Expr::EvalResult Result; 11475 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 11476 if (const auto *DRE = 11477 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 11478 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 11479 ParamAttr.Kind = LinearWithVarStride; 11480 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 11481 ParamPositions[StridePVD->getCanonicalDecl()]); 11482 } 11483 } 11484 } else { 11485 ParamAttr.StrideOrArg = Result.Val.getInt(); 11486 } 11487 } 11488 // If we are using a linear clause on a pointer, we need to 11489 // rescale the value of linear_step with the byte size of the 11490 // pointee type. 11491 if (Linear == ParamAttr.Kind) 11492 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; 11493 ++SI; 11494 ++MI; 11495 } 11496 llvm::APSInt VLENVal; 11497 SourceLocation ExprLoc; 11498 const Expr *VLENExpr = Attr->getSimdlen(); 11499 if (VLENExpr) { 11500 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 11501 ExprLoc = VLENExpr->getExprLoc(); 11502 } 11503 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 11504 if (CGM.getTriple().isX86()) { 11505 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 11506 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 11507 unsigned VLEN = VLENVal.getExtValue(); 11508 StringRef MangledName = Fn->getName(); 11509 if (CGM.getTarget().hasFeature("sve")) 11510 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11511 MangledName, 's', 128, Fn, ExprLoc); 11512 if (CGM.getTarget().hasFeature("neon")) 11513 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 11514 MangledName, 'n', 128, Fn, ExprLoc); 11515 } 11516 } 11517 FD = FD->getPreviousDecl(); 11518 } 11519 } 11520 11521 namespace { 11522 /// Cleanup action for doacross support. 11523 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 11524 public: 11525 static const int DoacrossFinArgs = 2; 11526 11527 private: 11528 llvm::FunctionCallee RTLFn; 11529 llvm::Value *Args[DoacrossFinArgs]; 11530 11531 public: 11532 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 11533 ArrayRef<llvm::Value *> CallArgs) 11534 : RTLFn(RTLFn) { 11535 assert(CallArgs.size() == DoacrossFinArgs); 11536 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11537 } 11538 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11539 if (!CGF.HaveInsertPoint()) 11540 return; 11541 CGF.EmitRuntimeCall(RTLFn, Args); 11542 } 11543 }; 11544 } // namespace 11545 11546 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11547 const OMPLoopDirective &D, 11548 ArrayRef<Expr *> NumIterations) { 11549 if (!CGF.HaveInsertPoint()) 11550 return; 11551 11552 ASTContext &C = CGM.getContext(); 11553 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 11554 RecordDecl *RD; 11555 if (KmpDimTy.isNull()) { 11556 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 11557 // kmp_int64 lo; // lower 11558 // kmp_int64 up; // upper 11559 // kmp_int64 st; // stride 11560 // }; 11561 RD = C.buildImplicitRecord("kmp_dim"); 11562 RD->startDefinition(); 11563 addFieldToRecordDecl(C, RD, Int64Ty); 11564 addFieldToRecordDecl(C, RD, Int64Ty); 11565 addFieldToRecordDecl(C, RD, Int64Ty); 11566 RD->completeDefinition(); 11567 KmpDimTy = C.getRecordType(RD); 11568 } else { 11569 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 11570 } 11571 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 11572 QualType ArrayTy = 11573 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 11574 11575 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 11576 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 11577 enum { LowerFD = 0, UpperFD, StrideFD }; 11578 // Fill dims with data. 11579 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 11580 LValue DimsLVal = CGF.MakeAddrLValue( 11581 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 11582 // dims.upper = num_iterations; 11583 LValue UpperLVal = CGF.EmitLValueForField( 11584 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 11585 llvm::Value *NumIterVal = CGF.EmitScalarConversion( 11586 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(), 11587 Int64Ty, NumIterations[I]->getExprLoc()); 11588 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 11589 // dims.stride = 1; 11590 LValue StrideLVal = CGF.EmitLValueForField( 11591 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 11592 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 11593 StrideLVal); 11594 } 11595 11596 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 11597 // kmp_int32 num_dims, struct kmp_dim * dims); 11598 llvm::Value *Args[] = { 11599 emitUpdateLocation(CGF, D.getBeginLoc()), 11600 getThreadID(CGF, D.getBeginLoc()), 11601 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 11602 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11603 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 11604 CGM.VoidPtrTy)}; 11605 11606 llvm::FunctionCallee RTLFn = 11607 createRuntimeFunction(OMPRTL__kmpc_doacross_init); 11608 CGF.EmitRuntimeCall(RTLFn, Args); 11609 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 11610 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 11611 llvm::FunctionCallee FiniRTLFn = 11612 createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 11613 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11614 llvm::makeArrayRef(FiniArgs)); 11615 } 11616 11617 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11618 const OMPDependClause *C) { 11619 QualType Int64Ty = 11620 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 11621 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 11622 QualType ArrayTy = CGM.getContext().getConstantArrayType( 11623 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 11624 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 11625 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 11626 const Expr *CounterVal = C->getLoopData(I); 11627 assert(CounterVal); 11628 llvm::Value *CntVal = CGF.EmitScalarConversion( 11629 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 11630 CounterVal->getExprLoc()); 11631 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 11632 /*Volatile=*/false, Int64Ty); 11633 } 11634 llvm::Value *Args[] = { 11635 emitUpdateLocation(CGF, C->getBeginLoc()), 11636 getThreadID(CGF, C->getBeginLoc()), 11637 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 11638 llvm::FunctionCallee RTLFn; 11639 if (C->getDependencyKind() == OMPC_DEPEND_source) { 11640 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 11641 } else { 11642 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 11643 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 11644 } 11645 CGF.EmitRuntimeCall(RTLFn, Args); 11646 } 11647 11648 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 11649 llvm::FunctionCallee Callee, 11650 ArrayRef<llvm::Value *> Args) const { 11651 assert(Loc.isValid() && "Outlined function call location must be valid."); 11652 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 11653 11654 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 11655 if (Fn->doesNotThrow()) { 11656 CGF.EmitNounwindRuntimeCall(Fn, Args); 11657 return; 11658 } 11659 } 11660 CGF.EmitRuntimeCall(Callee, Args); 11661 } 11662 11663 void CGOpenMPRuntime::emitOutlinedFunctionCall( 11664 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 11665 ArrayRef<llvm::Value *> Args) const { 11666 emitCall(CGF, Loc, OutlinedFn, Args); 11667 } 11668 11669 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 11670 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 11671 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 11672 HasEmittedDeclareTargetRegion = true; 11673 } 11674 11675 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 11676 const VarDecl *NativeParam, 11677 const VarDecl *TargetParam) const { 11678 return CGF.GetAddrOfLocalVar(NativeParam); 11679 } 11680 11681 namespace { 11682 /// Cleanup action for allocate support. 11683 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 11684 public: 11685 static const int CleanupArgs = 3; 11686 11687 private: 11688 llvm::FunctionCallee RTLFn; 11689 llvm::Value *Args[CleanupArgs]; 11690 11691 public: 11692 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 11693 ArrayRef<llvm::Value *> CallArgs) 11694 : RTLFn(RTLFn) { 11695 assert(CallArgs.size() == CleanupArgs && 11696 "Size of arguments does not match."); 11697 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 11698 } 11699 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 11700 if (!CGF.HaveInsertPoint()) 11701 return; 11702 CGF.EmitRuntimeCall(RTLFn, Args); 11703 } 11704 }; 11705 } // namespace 11706 11707 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 11708 const VarDecl *VD) { 11709 if (!VD) 11710 return Address::invalid(); 11711 const VarDecl *CVD = VD->getCanonicalDecl(); 11712 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 11713 return Address::invalid(); 11714 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 11715 // Use the default allocation. 11716 if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || 11717 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && 11718 !AA->getAllocator()) 11719 return Address::invalid(); 11720 llvm::Value *Size; 11721 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 11722 if (CVD->getType()->isVariablyModifiedType()) { 11723 Size = CGF.getTypeSize(CVD->getType()); 11724 // Align the size: ((size + align - 1) / align) * align 11725 Size = CGF.Builder.CreateNUWAdd( 11726 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 11727 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 11728 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 11729 } else { 11730 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 11731 Size = CGM.getSize(Sz.alignTo(Align)); 11732 } 11733 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11734 assert(AA->getAllocator() && 11735 "Expected allocator expression for non-default allocator."); 11736 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11737 // According to the standard, the original allocator type is a enum (integer). 11738 // Convert to pointer type, if required. 11739 if (Allocator->getType()->isIntegerTy()) 11740 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 11741 else if (Allocator->getType()->isPointerTy()) 11742 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 11743 CGM.VoidPtrTy); 11744 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11745 11746 llvm::Value *Addr = 11747 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, 11748 getName({CVD->getName(), ".void.addr"})); 11749 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 11750 Allocator}; 11751 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); 11752 11753 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11754 llvm::makeArrayRef(FiniArgs)); 11755 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11756 Addr, 11757 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 11758 getName({CVD->getName(), ".addr"})); 11759 return Address(Addr, Align); 11760 } 11761 11762 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( 11763 CodeGenModule &CGM, const OMPLoopDirective &S) 11764 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) { 11765 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11766 if (!NeedToPush) 11767 return; 11768 NontemporalDeclsSet &DS = 11769 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back(); 11770 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) { 11771 for (const Stmt *Ref : C->private_refs()) { 11772 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts(); 11773 const ValueDecl *VD; 11774 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) { 11775 VD = DRE->getDecl(); 11776 } else { 11777 const auto *ME = cast<MemberExpr>(SimpleRefExpr); 11778 assert((ME->isImplicitCXXThis() || 11779 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && 11780 "Expected member of current class."); 11781 VD = ME->getMemberDecl(); 11782 } 11783 DS.insert(VD); 11784 } 11785 } 11786 } 11787 11788 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { 11789 if (!NeedToPush) 11790 return; 11791 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); 11792 } 11793 11794 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { 11795 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11796 11797 return llvm::any_of( 11798 CGM.getOpenMPRuntime().NontemporalDeclsStack, 11799 [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); 11800 } 11801 11802 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( 11803 const OMPExecutableDirective &S, 11804 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled) 11805 const { 11806 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs; 11807 // Vars in target/task regions must be excluded completely. 11808 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) || 11809 isOpenMPTaskingDirective(S.getDirectiveKind())) { 11810 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 11811 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind()); 11812 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front()); 11813 for (const CapturedStmt::Capture &Cap : CS->captures()) { 11814 if (Cap.capturesVariable() || Cap.capturesVariableByCopy()) 11815 NeedToCheckForLPCs.insert(Cap.getCapturedVar()); 11816 } 11817 } 11818 // Exclude vars in private clauses. 11819 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 11820 for (const Expr *Ref : C->varlists()) { 11821 if (!Ref->getType()->isScalarType()) 11822 continue; 11823 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11824 if (!DRE) 11825 continue; 11826 NeedToCheckForLPCs.insert(DRE->getDecl()); 11827 } 11828 } 11829 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { 11830 for (const Expr *Ref : C->varlists()) { 11831 if (!Ref->getType()->isScalarType()) 11832 continue; 11833 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11834 if (!DRE) 11835 continue; 11836 NeedToCheckForLPCs.insert(DRE->getDecl()); 11837 } 11838 } 11839 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11840 for (const Expr *Ref : C->varlists()) { 11841 if (!Ref->getType()->isScalarType()) 11842 continue; 11843 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11844 if (!DRE) 11845 continue; 11846 NeedToCheckForLPCs.insert(DRE->getDecl()); 11847 } 11848 } 11849 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { 11850 for (const Expr *Ref : C->varlists()) { 11851 if (!Ref->getType()->isScalarType()) 11852 continue; 11853 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11854 if (!DRE) 11855 continue; 11856 NeedToCheckForLPCs.insert(DRE->getDecl()); 11857 } 11858 } 11859 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { 11860 for (const Expr *Ref : C->varlists()) { 11861 if (!Ref->getType()->isScalarType()) 11862 continue; 11863 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); 11864 if (!DRE) 11865 continue; 11866 NeedToCheckForLPCs.insert(DRE->getDecl()); 11867 } 11868 } 11869 for (const Decl *VD : NeedToCheckForLPCs) { 11870 for (const LastprivateConditionalData &Data : 11871 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) { 11872 if (Data.DeclToUniqueName.count(VD) > 0) { 11873 if (!Data.Disabled) 11874 NeedToAddForLPCsAsDisabled.insert(VD); 11875 break; 11876 } 11877 } 11878 } 11879 } 11880 11881 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11882 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal) 11883 : CGM(CGF.CGM), 11884 Action((CGM.getLangOpts().OpenMP >= 50 && 11885 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(), 11886 [](const OMPLastprivateClause *C) { 11887 return C->getKind() == 11888 OMPC_LASTPRIVATE_conditional; 11889 })) 11890 ? ActionToDo::PushAsLastprivateConditional 11891 : ActionToDo::DoNotPush) { 11892 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11893 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush) 11894 return; 11895 assert(Action == ActionToDo::PushAsLastprivateConditional && 11896 "Expected a push action."); 11897 LastprivateConditionalData &Data = 11898 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11899 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 11900 if (C->getKind() != OMPC_LASTPRIVATE_conditional) 11901 continue; 11902 11903 for (const Expr *Ref : C->varlists()) { 11904 Data.DeclToUniqueName.insert(std::make_pair( 11905 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(), 11906 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref)))); 11907 } 11908 } 11909 Data.IVLVal = IVLVal; 11910 Data.Fn = CGF.CurFn; 11911 } 11912 11913 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII( 11914 CodeGenFunction &CGF, const OMPExecutableDirective &S) 11915 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) { 11916 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode."); 11917 if (CGM.getLangOpts().OpenMP < 50) 11918 return; 11919 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled; 11920 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled); 11921 if (!NeedToAddForLPCsAsDisabled.empty()) { 11922 Action = ActionToDo::DisableLastprivateConditional; 11923 LastprivateConditionalData &Data = 11924 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back(); 11925 for (const Decl *VD : NeedToAddForLPCsAsDisabled) 11926 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>())); 11927 Data.Fn = CGF.CurFn; 11928 Data.Disabled = true; 11929 } 11930 } 11931 11932 CGOpenMPRuntime::LastprivateConditionalRAII 11933 CGOpenMPRuntime::LastprivateConditionalRAII::disable( 11934 CodeGenFunction &CGF, const OMPExecutableDirective &S) { 11935 return LastprivateConditionalRAII(CGF, S); 11936 } 11937 11938 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() { 11939 if (CGM.getLangOpts().OpenMP < 50) 11940 return; 11941 if (Action == ActionToDo::DisableLastprivateConditional) { 11942 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11943 "Expected list of disabled private vars."); 11944 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11945 } 11946 if (Action == ActionToDo::PushAsLastprivateConditional) { 11947 assert( 11948 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && 11949 "Expected list of lastprivate conditional vars."); 11950 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back(); 11951 } 11952 } 11953 11954 Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, 11955 const VarDecl *VD) { 11956 ASTContext &C = CGM.getContext(); 11957 auto I = LastprivateConditionalToTypes.find(CGF.CurFn); 11958 if (I == LastprivateConditionalToTypes.end()) 11959 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first; 11960 QualType NewType; 11961 const FieldDecl *VDField; 11962 const FieldDecl *FiredField; 11963 LValue BaseLVal; 11964 auto VI = I->getSecond().find(VD); 11965 if (VI == I->getSecond().end()) { 11966 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional"); 11967 RD->startDefinition(); 11968 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType()); 11969 FiredField = addFieldToRecordDecl(C, RD, C.CharTy); 11970 RD->completeDefinition(); 11971 NewType = C.getRecordType(RD); 11972 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName()); 11973 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl); 11974 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal); 11975 } else { 11976 NewType = std::get<0>(VI->getSecond()); 11977 VDField = std::get<1>(VI->getSecond()); 11978 FiredField = std::get<2>(VI->getSecond()); 11979 BaseLVal = std::get<3>(VI->getSecond()); 11980 } 11981 LValue FiredLVal = 11982 CGF.EmitLValueForField(BaseLVal, FiredField); 11983 CGF.EmitStoreOfScalar( 11984 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), 11985 FiredLVal); 11986 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); 11987 } 11988 11989 namespace { 11990 /// Checks if the lastprivate conditional variable is referenced in LHS. 11991 class LastprivateConditionalRefChecker final 11992 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> { 11993 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM; 11994 const Expr *FoundE = nullptr; 11995 const Decl *FoundD = nullptr; 11996 StringRef UniqueDeclName; 11997 LValue IVLVal; 11998 llvm::Function *FoundFn = nullptr; 11999 SourceLocation Loc; 12000 12001 public: 12002 bool VisitDeclRefExpr(const DeclRefExpr *E) { 12003 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12004 llvm::reverse(LPM)) { 12005 auto It = D.DeclToUniqueName.find(E->getDecl()); 12006 if (It == D.DeclToUniqueName.end()) 12007 continue; 12008 if (D.Disabled) 12009 return false; 12010 FoundE = E; 12011 FoundD = E->getDecl()->getCanonicalDecl(); 12012 UniqueDeclName = It->second; 12013 IVLVal = D.IVLVal; 12014 FoundFn = D.Fn; 12015 break; 12016 } 12017 return FoundE == E; 12018 } 12019 bool VisitMemberExpr(const MemberExpr *E) { 12020 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase())) 12021 return false; 12022 for (const CGOpenMPRuntime::LastprivateConditionalData &D : 12023 llvm::reverse(LPM)) { 12024 auto It = D.DeclToUniqueName.find(E->getMemberDecl()); 12025 if (It == D.DeclToUniqueName.end()) 12026 continue; 12027 if (D.Disabled) 12028 return false; 12029 FoundE = E; 12030 FoundD = E->getMemberDecl()->getCanonicalDecl(); 12031 UniqueDeclName = It->second; 12032 IVLVal = D.IVLVal; 12033 FoundFn = D.Fn; 12034 break; 12035 } 12036 return FoundE == E; 12037 } 12038 bool VisitStmt(const Stmt *S) { 12039 for (const Stmt *Child : S->children()) { 12040 if (!Child) 12041 continue; 12042 if (const auto *E = dyn_cast<Expr>(Child)) 12043 if (!E->isGLValue()) 12044 continue; 12045 if (Visit(Child)) 12046 return true; 12047 } 12048 return false; 12049 } 12050 explicit LastprivateConditionalRefChecker( 12051 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM) 12052 : LPM(LPM) {} 12053 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *> 12054 getFoundData() const { 12055 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn); 12056 } 12057 }; 12058 } // namespace 12059 12060 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, 12061 LValue IVLVal, 12062 StringRef UniqueDeclName, 12063 LValue LVal, 12064 SourceLocation Loc) { 12065 // Last updated loop counter for the lastprivate conditional var. 12066 // int<xx> last_iv = 0; 12067 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); 12068 llvm::Constant *LastIV = 12069 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); 12070 cast<llvm::GlobalVariable>(LastIV)->setAlignment( 12071 IVLVal.getAlignment().getAsAlign()); 12072 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); 12073 12074 // Last value of the lastprivate conditional. 12075 // decltype(priv_a) last_a; 12076 llvm::Constant *Last = getOrCreateInternalVariable( 12077 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); 12078 cast<llvm::GlobalVariable>(Last)->setAlignment( 12079 LVal.getAlignment().getAsAlign()); 12080 LValue LastLVal = 12081 CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); 12082 12083 // Global loop counter. Required to handle inner parallel-for regions. 12084 // iv 12085 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc); 12086 12087 // #pragma omp critical(a) 12088 // if (last_iv <= iv) { 12089 // last_iv = iv; 12090 // last_a = priv_a; 12091 // } 12092 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal, 12093 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 12094 Action.Enter(CGF); 12095 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc); 12096 // (last_iv <= iv) ? Check if the variable is updated and store new 12097 // value in global var. 12098 llvm::Value *CmpRes; 12099 if (IVLVal.getType()->isSignedIntegerType()) { 12100 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal); 12101 } else { 12102 assert(IVLVal.getType()->isUnsignedIntegerType() && 12103 "Loop iteration variable must be integer."); 12104 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal); 12105 } 12106 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then"); 12107 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit"); 12108 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB); 12109 // { 12110 CGF.EmitBlock(ThenBB); 12111 12112 // last_iv = iv; 12113 CGF.EmitStoreOfScalar(IVVal, LastIVLVal); 12114 12115 // last_a = priv_a; 12116 switch (CGF.getEvaluationKind(LVal.getType())) { 12117 case TEK_Scalar: { 12118 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc); 12119 CGF.EmitStoreOfScalar(PrivVal, LastLVal); 12120 break; 12121 } 12122 case TEK_Complex: { 12123 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc); 12124 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false); 12125 break; 12126 } 12127 case TEK_Aggregate: 12128 llvm_unreachable( 12129 "Aggregates are not supported in lastprivate conditional."); 12130 } 12131 // } 12132 CGF.EmitBranch(ExitBB); 12133 // There is no need to emit line number for unconditional branch. 12134 (void)ApplyDebugLocation::CreateEmpty(CGF); 12135 CGF.EmitBlock(ExitBB, /*IsFinished=*/true); 12136 }; 12137 12138 if (CGM.getLangOpts().OpenMPSimd) { 12139 // Do not emit as a critical region as no parallel region could be emitted. 12140 RegionCodeGenTy ThenRCG(CodeGen); 12141 ThenRCG(CGF); 12142 } else { 12143 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc); 12144 } 12145 } 12146 12147 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, 12148 const Expr *LHS) { 12149 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12150 return; 12151 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack); 12152 if (!Checker.Visit(LHS)) 12153 return; 12154 const Expr *FoundE; 12155 const Decl *FoundD; 12156 StringRef UniqueDeclName; 12157 LValue IVLVal; 12158 llvm::Function *FoundFn; 12159 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) = 12160 Checker.getFoundData(); 12161 if (FoundFn != CGF.CurFn) { 12162 // Special codegen for inner parallel regions. 12163 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1; 12164 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD); 12165 assert(It != LastprivateConditionalToTypes[FoundFn].end() && 12166 "Lastprivate conditional is not found in outer region."); 12167 QualType StructTy = std::get<0>(It->getSecond()); 12168 const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); 12169 LValue PrivLVal = CGF.EmitLValue(FoundE); 12170 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 12171 PrivLVal.getAddress(CGF), 12172 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); 12173 LValue BaseLVal = 12174 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); 12175 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); 12176 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get( 12177 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)), 12178 FiredLVal, llvm::AtomicOrdering::Unordered, 12179 /*IsVolatile=*/true, /*isInit=*/false); 12180 return; 12181 } 12182 12183 // Private address of the lastprivate conditional in the current context. 12184 // priv_a 12185 LValue LVal = CGF.EmitLValue(FoundE); 12186 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal, 12187 FoundE->getExprLoc()); 12188 } 12189 12190 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( 12191 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12192 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) { 12193 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty()) 12194 return; 12195 auto Range = llvm::reverse(LastprivateConditionalStack); 12196 auto It = llvm::find_if( 12197 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; }); 12198 if (It == Range.end() || It->Fn != CGF.CurFn) 12199 return; 12200 auto LPCI = LastprivateConditionalToTypes.find(It->Fn); 12201 assert(LPCI != LastprivateConditionalToTypes.end() && 12202 "Lastprivates must be registered already."); 12203 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; 12204 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); 12205 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); 12206 for (const auto &Pair : It->DeclToUniqueName) { 12207 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); 12208 if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) 12209 continue; 12210 auto I = LPCI->getSecond().find(Pair.first); 12211 assert(I != LPCI->getSecond().end() && 12212 "Lastprivate must be rehistered already."); 12213 // bool Cmp = priv_a.Fired != 0; 12214 LValue BaseLVal = std::get<3>(I->getSecond()); 12215 LValue FiredLVal = 12216 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond())); 12217 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc()); 12218 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res); 12219 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then"); 12220 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done"); 12221 // if (Cmp) { 12222 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB); 12223 CGF.EmitBlock(ThenBB); 12224 Address Addr = CGF.GetAddrOfLocalVar(VD); 12225 LValue LVal; 12226 if (VD->getType()->isReferenceType()) 12227 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(), 12228 AlignmentSource::Decl); 12229 else 12230 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(), 12231 AlignmentSource::Decl); 12232 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal, 12233 D.getBeginLoc()); 12234 auto AL = ApplyDebugLocation::CreateArtificial(CGF); 12235 CGF.EmitBlock(DoneBB, /*IsFinal=*/true); 12236 // } 12237 } 12238 } 12239 12240 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( 12241 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD, 12242 SourceLocation Loc) { 12243 if (CGF.getLangOpts().OpenMP < 50) 12244 return; 12245 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD); 12246 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() && 12247 "Unknown lastprivate conditional variable."); 12248 StringRef UniqueName = It->second; 12249 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName); 12250 // The variable was not updated in the region - exit. 12251 if (!GV) 12252 return; 12253 LValue LPLVal = CGF.MakeAddrLValue( 12254 GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); 12255 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); 12256 CGF.EmitStoreOfScalar(Res, PrivLVal); 12257 } 12258 12259 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 12260 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12261 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12262 llvm_unreachable("Not supported in SIMD-only mode"); 12263 } 12264 12265 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 12266 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12267 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 12268 llvm_unreachable("Not supported in SIMD-only mode"); 12269 } 12270 12271 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 12272 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 12273 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 12274 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 12275 bool Tied, unsigned &NumberOfParts) { 12276 llvm_unreachable("Not supported in SIMD-only mode"); 12277 } 12278 12279 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 12280 SourceLocation Loc, 12281 llvm::Function *OutlinedFn, 12282 ArrayRef<llvm::Value *> CapturedVars, 12283 const Expr *IfCond) { 12284 llvm_unreachable("Not supported in SIMD-only mode"); 12285 } 12286 12287 void CGOpenMPSIMDRuntime::emitCriticalRegion( 12288 CodeGenFunction &CGF, StringRef CriticalName, 12289 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 12290 const Expr *Hint) { 12291 llvm_unreachable("Not supported in SIMD-only mode"); 12292 } 12293 12294 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 12295 const RegionCodeGenTy &MasterOpGen, 12296 SourceLocation Loc) { 12297 llvm_unreachable("Not supported in SIMD-only mode"); 12298 } 12299 12300 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 12301 SourceLocation Loc) { 12302 llvm_unreachable("Not supported in SIMD-only mode"); 12303 } 12304 12305 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 12306 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 12307 SourceLocation Loc) { 12308 llvm_unreachable("Not supported in SIMD-only mode"); 12309 } 12310 12311 void CGOpenMPSIMDRuntime::emitSingleRegion( 12312 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 12313 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 12314 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 12315 ArrayRef<const Expr *> AssignmentOps) { 12316 llvm_unreachable("Not supported in SIMD-only mode"); 12317 } 12318 12319 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 12320 const RegionCodeGenTy &OrderedOpGen, 12321 SourceLocation Loc, 12322 bool IsThreads) { 12323 llvm_unreachable("Not supported in SIMD-only mode"); 12324 } 12325 12326 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 12327 SourceLocation Loc, 12328 OpenMPDirectiveKind Kind, 12329 bool EmitChecks, 12330 bool ForceSimpleCall) { 12331 llvm_unreachable("Not supported in SIMD-only mode"); 12332 } 12333 12334 void CGOpenMPSIMDRuntime::emitForDispatchInit( 12335 CodeGenFunction &CGF, SourceLocation Loc, 12336 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 12337 bool Ordered, const DispatchRTInput &DispatchValues) { 12338 llvm_unreachable("Not supported in SIMD-only mode"); 12339 } 12340 12341 void CGOpenMPSIMDRuntime::emitForStaticInit( 12342 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 12343 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 12344 llvm_unreachable("Not supported in SIMD-only mode"); 12345 } 12346 12347 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 12348 CodeGenFunction &CGF, SourceLocation Loc, 12349 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 12350 llvm_unreachable("Not supported in SIMD-only mode"); 12351 } 12352 12353 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 12354 SourceLocation Loc, 12355 unsigned IVSize, 12356 bool IVSigned) { 12357 llvm_unreachable("Not supported in SIMD-only mode"); 12358 } 12359 12360 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 12361 SourceLocation Loc, 12362 OpenMPDirectiveKind DKind) { 12363 llvm_unreachable("Not supported in SIMD-only mode"); 12364 } 12365 12366 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 12367 SourceLocation Loc, 12368 unsigned IVSize, bool IVSigned, 12369 Address IL, Address LB, 12370 Address UB, Address ST) { 12371 llvm_unreachable("Not supported in SIMD-only mode"); 12372 } 12373 12374 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 12375 llvm::Value *NumThreads, 12376 SourceLocation Loc) { 12377 llvm_unreachable("Not supported in SIMD-only mode"); 12378 } 12379 12380 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 12381 ProcBindKind ProcBind, 12382 SourceLocation Loc) { 12383 llvm_unreachable("Not supported in SIMD-only mode"); 12384 } 12385 12386 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 12387 const VarDecl *VD, 12388 Address VDAddr, 12389 SourceLocation Loc) { 12390 llvm_unreachable("Not supported in SIMD-only mode"); 12391 } 12392 12393 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 12394 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 12395 CodeGenFunction *CGF) { 12396 llvm_unreachable("Not supported in SIMD-only mode"); 12397 } 12398 12399 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 12400 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 12401 llvm_unreachable("Not supported in SIMD-only mode"); 12402 } 12403 12404 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 12405 ArrayRef<const Expr *> Vars, 12406 SourceLocation Loc, 12407 llvm::AtomicOrdering AO) { 12408 llvm_unreachable("Not supported in SIMD-only mode"); 12409 } 12410 12411 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 12412 const OMPExecutableDirective &D, 12413 llvm::Function *TaskFunction, 12414 QualType SharedsTy, Address Shareds, 12415 const Expr *IfCond, 12416 const OMPTaskDataTy &Data) { 12417 llvm_unreachable("Not supported in SIMD-only mode"); 12418 } 12419 12420 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 12421 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 12422 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 12423 const Expr *IfCond, const OMPTaskDataTy &Data) { 12424 llvm_unreachable("Not supported in SIMD-only mode"); 12425 } 12426 12427 void CGOpenMPSIMDRuntime::emitReduction( 12428 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 12429 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 12430 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 12431 assert(Options.SimpleReduction && "Only simple reduction is expected."); 12432 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 12433 ReductionOps, Options); 12434 } 12435 12436 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 12437 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 12438 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 12439 llvm_unreachable("Not supported in SIMD-only mode"); 12440 } 12441 12442 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF, 12443 SourceLocation Loc, 12444 bool IsWorksharingReduction) { 12445 llvm_unreachable("Not supported in SIMD-only mode"); 12446 } 12447 12448 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 12449 SourceLocation Loc, 12450 ReductionCodeGen &RCG, 12451 unsigned N) { 12452 llvm_unreachable("Not supported in SIMD-only mode"); 12453 } 12454 12455 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 12456 SourceLocation Loc, 12457 llvm::Value *ReductionsPtr, 12458 LValue SharedLVal) { 12459 llvm_unreachable("Not supported in SIMD-only mode"); 12460 } 12461 12462 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 12463 SourceLocation Loc) { 12464 llvm_unreachable("Not supported in SIMD-only mode"); 12465 } 12466 12467 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 12468 CodeGenFunction &CGF, SourceLocation Loc, 12469 OpenMPDirectiveKind CancelRegion) { 12470 llvm_unreachable("Not supported in SIMD-only mode"); 12471 } 12472 12473 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 12474 SourceLocation Loc, const Expr *IfCond, 12475 OpenMPDirectiveKind CancelRegion) { 12476 llvm_unreachable("Not supported in SIMD-only mode"); 12477 } 12478 12479 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 12480 const OMPExecutableDirective &D, StringRef ParentName, 12481 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 12482 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 12483 llvm_unreachable("Not supported in SIMD-only mode"); 12484 } 12485 12486 void CGOpenMPSIMDRuntime::emitTargetCall( 12487 CodeGenFunction &CGF, const OMPExecutableDirective &D, 12488 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 12489 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, 12490 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 12491 const OMPLoopDirective &D)> 12492 SizeEmitter) { 12493 llvm_unreachable("Not supported in SIMD-only mode"); 12494 } 12495 12496 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 12497 llvm_unreachable("Not supported in SIMD-only mode"); 12498 } 12499 12500 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 12501 llvm_unreachable("Not supported in SIMD-only mode"); 12502 } 12503 12504 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 12505 return false; 12506 } 12507 12508 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 12509 const OMPExecutableDirective &D, 12510 SourceLocation Loc, 12511 llvm::Function *OutlinedFn, 12512 ArrayRef<llvm::Value *> CapturedVars) { 12513 llvm_unreachable("Not supported in SIMD-only mode"); 12514 } 12515 12516 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 12517 const Expr *NumTeams, 12518 const Expr *ThreadLimit, 12519 SourceLocation Loc) { 12520 llvm_unreachable("Not supported in SIMD-only mode"); 12521 } 12522 12523 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 12524 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12525 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 12526 llvm_unreachable("Not supported in SIMD-only mode"); 12527 } 12528 12529 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 12530 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 12531 const Expr *Device) { 12532 llvm_unreachable("Not supported in SIMD-only mode"); 12533 } 12534 12535 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 12536 const OMPLoopDirective &D, 12537 ArrayRef<Expr *> NumIterations) { 12538 llvm_unreachable("Not supported in SIMD-only mode"); 12539 } 12540 12541 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 12542 const OMPDependClause *C) { 12543 llvm_unreachable("Not supported in SIMD-only mode"); 12544 } 12545 12546 const VarDecl * 12547 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 12548 const VarDecl *NativeParam) const { 12549 llvm_unreachable("Not supported in SIMD-only mode"); 12550 } 12551 12552 Address 12553 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 12554 const VarDecl *NativeParam, 12555 const VarDecl *TargetParam) const { 12556 llvm_unreachable("Not supported in SIMD-only mode"); 12557 } 12558