1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGCXXABI.h" 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/CodeGen/ConstantInitBuilder.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/Basic/BitmaskEnum.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/Bitcode/BitcodeReader.h" 24 #include "llvm/IR/DerivedTypes.h" 25 #include "llvm/IR/GlobalValue.h" 26 #include "llvm/IR/Value.h" 27 #include "llvm/Support/Format.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <cassert> 30 31 using namespace clang; 32 using namespace CodeGen; 33 34 namespace { 35 /// Base class for handling code generation inside OpenMP regions. 36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 37 public: 38 /// Kinds of OpenMP regions used in codegen. 39 enum CGOpenMPRegionKind { 40 /// Region with outlined function for standalone 'parallel' 41 /// directive. 42 ParallelOutlinedRegion, 43 /// Region with outlined function for standalone 'task' directive. 44 TaskOutlinedRegion, 45 /// Region for constructs that do not require function outlining, 46 /// like 'for', 'sections', 'atomic' etc. directives. 47 InlinedRegion, 48 /// Region with outlined function for standalone 'target' directive. 49 TargetRegion, 50 }; 51 52 CGOpenMPRegionInfo(const CapturedStmt &CS, 53 const CGOpenMPRegionKind RegionKind, 54 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 55 bool HasCancel) 56 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 57 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 58 59 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 60 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 61 bool HasCancel) 62 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 63 Kind(Kind), HasCancel(HasCancel) {} 64 65 /// Get a variable or parameter for storing global thread id 66 /// inside OpenMP construct. 67 virtual const VarDecl *getThreadIDVariable() const = 0; 68 69 /// Emit the captured statement body. 70 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 71 72 /// Get an LValue for the current ThreadID variable. 73 /// \return LValue for thread id variable. This LValue always has type int32*. 74 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 75 76 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 77 78 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 79 80 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 81 82 bool hasCancel() const { return HasCancel; } 83 84 static bool classof(const CGCapturedStmtInfo *Info) { 85 return Info->getKind() == CR_OpenMP; 86 } 87 88 ~CGOpenMPRegionInfo() override = default; 89 90 protected: 91 CGOpenMPRegionKind RegionKind; 92 RegionCodeGenTy CodeGen; 93 OpenMPDirectiveKind Kind; 94 bool HasCancel; 95 }; 96 97 /// API for captured statement code generation in OpenMP constructs. 98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 99 public: 100 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 101 const RegionCodeGenTy &CodeGen, 102 OpenMPDirectiveKind Kind, bool HasCancel, 103 StringRef HelperName) 104 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 105 HasCancel), 106 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 107 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 108 } 109 110 /// Get a variable or parameter for storing global thread id 111 /// inside OpenMP construct. 112 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 113 114 /// Get the name of the capture helper. 115 StringRef getHelperName() const override { return HelperName; } 116 117 static bool classof(const CGCapturedStmtInfo *Info) { 118 return CGOpenMPRegionInfo::classof(Info) && 119 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 120 ParallelOutlinedRegion; 121 } 122 123 private: 124 /// A variable or parameter storing global thread id for OpenMP 125 /// constructs. 126 const VarDecl *ThreadIDVar; 127 StringRef HelperName; 128 }; 129 130 /// API for captured statement code generation in OpenMP constructs. 131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 132 public: 133 class UntiedTaskActionTy final : public PrePostActionTy { 134 bool Untied; 135 const VarDecl *PartIDVar; 136 const RegionCodeGenTy UntiedCodeGen; 137 llvm::SwitchInst *UntiedSwitch = nullptr; 138 139 public: 140 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 141 const RegionCodeGenTy &UntiedCodeGen) 142 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 143 void Enter(CodeGenFunction &CGF) override { 144 if (Untied) { 145 // Emit task switching point. 146 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 147 CGF.GetAddrOfLocalVar(PartIDVar), 148 PartIDVar->getType()->castAs<PointerType>()); 149 llvm::Value *Res = 150 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 151 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 152 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 153 CGF.EmitBlock(DoneBB); 154 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 155 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 156 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 157 CGF.Builder.GetInsertBlock()); 158 emitUntiedSwitch(CGF); 159 } 160 } 161 void emitUntiedSwitch(CodeGenFunction &CGF) const { 162 if (Untied) { 163 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 164 CGF.GetAddrOfLocalVar(PartIDVar), 165 PartIDVar->getType()->castAs<PointerType>()); 166 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 167 PartIdLVal); 168 UntiedCodeGen(CGF); 169 CodeGenFunction::JumpDest CurPoint = 170 CGF.getJumpDestInCurrentScope(".untied.next."); 171 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 172 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 173 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 174 CGF.Builder.GetInsertBlock()); 175 CGF.EmitBranchThroughCleanup(CurPoint); 176 CGF.EmitBlock(CurPoint.getBlock()); 177 } 178 } 179 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 180 }; 181 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 182 const VarDecl *ThreadIDVar, 183 const RegionCodeGenTy &CodeGen, 184 OpenMPDirectiveKind Kind, bool HasCancel, 185 const UntiedTaskActionTy &Action) 186 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 187 ThreadIDVar(ThreadIDVar), Action(Action) { 188 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 189 } 190 191 /// Get a variable or parameter for storing global thread id 192 /// inside OpenMP construct. 193 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 194 195 /// Get an LValue for the current ThreadID variable. 196 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 197 198 /// Get the name of the capture helper. 199 StringRef getHelperName() const override { return ".omp_outlined."; } 200 201 void emitUntiedSwitch(CodeGenFunction &CGF) override { 202 Action.emitUntiedSwitch(CGF); 203 } 204 205 static bool classof(const CGCapturedStmtInfo *Info) { 206 return CGOpenMPRegionInfo::classof(Info) && 207 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 208 TaskOutlinedRegion; 209 } 210 211 private: 212 /// A variable or parameter storing global thread id for OpenMP 213 /// constructs. 214 const VarDecl *ThreadIDVar; 215 /// Action for emitting code for untied tasks. 216 const UntiedTaskActionTy &Action; 217 }; 218 219 /// API for inlined captured statement code generation in OpenMP 220 /// constructs. 221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 222 public: 223 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 224 const RegionCodeGenTy &CodeGen, 225 OpenMPDirectiveKind Kind, bool HasCancel) 226 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 227 OldCSI(OldCSI), 228 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 229 230 // Retrieve the value of the context parameter. 231 llvm::Value *getContextValue() const override { 232 if (OuterRegionInfo) 233 return OuterRegionInfo->getContextValue(); 234 llvm_unreachable("No context value for inlined OpenMP region"); 235 } 236 237 void setContextValue(llvm::Value *V) override { 238 if (OuterRegionInfo) { 239 OuterRegionInfo->setContextValue(V); 240 return; 241 } 242 llvm_unreachable("No context value for inlined OpenMP region"); 243 } 244 245 /// Lookup the captured field decl for a variable. 246 const FieldDecl *lookup(const VarDecl *VD) const override { 247 if (OuterRegionInfo) 248 return OuterRegionInfo->lookup(VD); 249 // If there is no outer outlined region,no need to lookup in a list of 250 // captured variables, we can use the original one. 251 return nullptr; 252 } 253 254 FieldDecl *getThisFieldDecl() const override { 255 if (OuterRegionInfo) 256 return OuterRegionInfo->getThisFieldDecl(); 257 return nullptr; 258 } 259 260 /// Get a variable or parameter for storing global thread id 261 /// inside OpenMP construct. 262 const VarDecl *getThreadIDVariable() const override { 263 if (OuterRegionInfo) 264 return OuterRegionInfo->getThreadIDVariable(); 265 return nullptr; 266 } 267 268 /// Get an LValue for the current ThreadID variable. 269 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 270 if (OuterRegionInfo) 271 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 272 llvm_unreachable("No LValue for inlined OpenMP construct"); 273 } 274 275 /// Get the name of the capture helper. 276 StringRef getHelperName() const override { 277 if (auto *OuterRegionInfo = getOldCSI()) 278 return OuterRegionInfo->getHelperName(); 279 llvm_unreachable("No helper name for inlined OpenMP construct"); 280 } 281 282 void emitUntiedSwitch(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 OuterRegionInfo->emitUntiedSwitch(CGF); 285 } 286 287 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 288 289 static bool classof(const CGCapturedStmtInfo *Info) { 290 return CGOpenMPRegionInfo::classof(Info) && 291 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 292 } 293 294 ~CGOpenMPInlinedRegionInfo() override = default; 295 296 private: 297 /// CodeGen info about outer OpenMP region. 298 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 299 CGOpenMPRegionInfo *OuterRegionInfo; 300 }; 301 302 /// API for captured statement code generation in OpenMP target 303 /// constructs. For this captures, implicit parameters are used instead of the 304 /// captured fields. The name of the target region has to be unique in a given 305 /// application so it is provided by the client, because only the client has 306 /// the information to generate that. 307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 308 public: 309 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 310 const RegionCodeGenTy &CodeGen, StringRef HelperName) 311 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 312 /*HasCancel=*/false), 313 HelperName(HelperName) {} 314 315 /// This is unused for target regions because each starts executing 316 /// with a single thread. 317 const VarDecl *getThreadIDVariable() const override { return nullptr; } 318 319 /// Get the name of the capture helper. 320 StringRef getHelperName() const override { return HelperName; } 321 322 static bool classof(const CGCapturedStmtInfo *Info) { 323 return CGOpenMPRegionInfo::classof(Info) && 324 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 325 } 326 327 private: 328 StringRef HelperName; 329 }; 330 331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 332 llvm_unreachable("No codegen for expressions"); 333 } 334 /// API for generation of expressions captured in a innermost OpenMP 335 /// region. 336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 337 public: 338 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 339 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 340 OMPD_unknown, 341 /*HasCancel=*/false), 342 PrivScope(CGF) { 343 // Make sure the globals captured in the provided statement are local by 344 // using the privatization logic. We assume the same variable is not 345 // captured more than once. 346 for (const auto &C : CS.captures()) { 347 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 348 continue; 349 350 const VarDecl *VD = C.getCapturedVar(); 351 if (VD->isLocalVarDeclOrParm()) 352 continue; 353 354 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 355 /*RefersToEnclosingVariableOrCapture=*/false, 356 VD->getType().getNonReferenceType(), VK_LValue, 357 C.getLocation()); 358 PrivScope.addPrivate( 359 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); }); 360 } 361 (void)PrivScope.Privatize(); 362 } 363 364 /// Lookup the captured field decl for a variable. 365 const FieldDecl *lookup(const VarDecl *VD) const override { 366 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 367 return FD; 368 return nullptr; 369 } 370 371 /// Emit the captured statement body. 372 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 373 llvm_unreachable("No body for expressions"); 374 } 375 376 /// Get a variable or parameter for storing global thread id 377 /// inside OpenMP construct. 378 const VarDecl *getThreadIDVariable() const override { 379 llvm_unreachable("No thread id for expressions"); 380 } 381 382 /// Get the name of the capture helper. 383 StringRef getHelperName() const override { 384 llvm_unreachable("No helper name for expressions"); 385 } 386 387 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 388 389 private: 390 /// Private scope to capture global variables. 391 CodeGenFunction::OMPPrivateScope PrivScope; 392 }; 393 394 /// RAII for emitting code of OpenMP constructs. 395 class InlinedOpenMPRegionRAII { 396 CodeGenFunction &CGF; 397 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 398 FieldDecl *LambdaThisCaptureField = nullptr; 399 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 400 401 public: 402 /// Constructs region for combined constructs. 403 /// \param CodeGen Code generation sequence for combined directives. Includes 404 /// a list of functions used for code generation of implicitly inlined 405 /// regions. 406 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 407 OpenMPDirectiveKind Kind, bool HasCancel) 408 : CGF(CGF) { 409 // Start emission for the construct. 410 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 411 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 412 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 413 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 414 CGF.LambdaThisCaptureField = nullptr; 415 BlockInfo = CGF.BlockInfo; 416 CGF.BlockInfo = nullptr; 417 } 418 419 ~InlinedOpenMPRegionRAII() { 420 // Restore original CapturedStmtInfo only if we're done with code emission. 421 auto *OldCSI = 422 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 423 delete CGF.CapturedStmtInfo; 424 CGF.CapturedStmtInfo = OldCSI; 425 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 426 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 427 CGF.BlockInfo = BlockInfo; 428 } 429 }; 430 431 /// Values for bit flags used in the ident_t to describe the fields. 432 /// All enumeric elements are named and described in accordance with the code 433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 434 enum OpenMPLocationFlags : unsigned { 435 /// Use trampoline for internal microtask. 436 OMP_IDENT_IMD = 0x01, 437 /// Use c-style ident structure. 438 OMP_IDENT_KMPC = 0x02, 439 /// Atomic reduction option for kmpc_reduce. 440 OMP_ATOMIC_REDUCE = 0x10, 441 /// Explicit 'barrier' directive. 442 OMP_IDENT_BARRIER_EXPL = 0x20, 443 /// Implicit barrier in code. 444 OMP_IDENT_BARRIER_IMPL = 0x40, 445 /// Implicit barrier in 'for' directive. 446 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 447 /// Implicit barrier in 'sections' directive. 448 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 449 /// Implicit barrier in 'single' directive. 450 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 451 /// Call of __kmp_for_static_init for static loop. 452 OMP_IDENT_WORK_LOOP = 0x200, 453 /// Call of __kmp_for_static_init for sections. 454 OMP_IDENT_WORK_SECTIONS = 0x400, 455 /// Call of __kmp_for_static_init for distribute. 456 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 457 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 458 }; 459 460 namespace { 461 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 462 /// Values for bit flags for marking which requires clauses have been used. 463 enum OpenMPOffloadingRequiresDirFlags : int64_t { 464 /// flag undefined. 465 OMP_REQ_UNDEFINED = 0x000, 466 /// no requires clause present. 467 OMP_REQ_NONE = 0x001, 468 /// reverse_offload clause. 469 OMP_REQ_REVERSE_OFFLOAD = 0x002, 470 /// unified_address clause. 471 OMP_REQ_UNIFIED_ADDRESS = 0x004, 472 /// unified_shared_memory clause. 473 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 474 /// dynamic_allocators clause. 475 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 476 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 477 }; 478 479 enum OpenMPOffloadingReservedDeviceIDs { 480 /// Device ID if the device was not defined, runtime should get it 481 /// from environment variables in the spec. 482 OMP_DEVICEID_UNDEF = -1, 483 }; 484 } // anonymous namespace 485 486 /// Describes ident structure that describes a source location. 487 /// All descriptions are taken from 488 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 489 /// Original structure: 490 /// typedef struct ident { 491 /// kmp_int32 reserved_1; /**< might be used in Fortran; 492 /// see above */ 493 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 494 /// KMP_IDENT_KMPC identifies this union 495 /// member */ 496 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 497 /// see above */ 498 ///#if USE_ITT_BUILD 499 /// /* but currently used for storing 500 /// region-specific ITT */ 501 /// /* contextual information. */ 502 ///#endif /* USE_ITT_BUILD */ 503 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 504 /// C++ */ 505 /// char const *psource; /**< String describing the source location. 506 /// The string is composed of semi-colon separated 507 // fields which describe the source file, 508 /// the function and a pair of line numbers that 509 /// delimit the construct. 510 /// */ 511 /// } ident_t; 512 enum IdentFieldIndex { 513 /// might be used in Fortran 514 IdentField_Reserved_1, 515 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 516 IdentField_Flags, 517 /// Not really used in Fortran any more 518 IdentField_Reserved_2, 519 /// Source[4] in Fortran, do not use for C++ 520 IdentField_Reserved_3, 521 /// String describing the source location. The string is composed of 522 /// semi-colon separated fields which describe the source file, the function 523 /// and a pair of line numbers that delimit the construct. 524 IdentField_PSource 525 }; 526 527 /// Schedule types for 'omp for' loops (these enumerators are taken from 528 /// the enum sched_type in kmp.h). 529 enum OpenMPSchedType { 530 /// Lower bound for default (unordered) versions. 531 OMP_sch_lower = 32, 532 OMP_sch_static_chunked = 33, 533 OMP_sch_static = 34, 534 OMP_sch_dynamic_chunked = 35, 535 OMP_sch_guided_chunked = 36, 536 OMP_sch_runtime = 37, 537 OMP_sch_auto = 38, 538 /// static with chunk adjustment (e.g., simd) 539 OMP_sch_static_balanced_chunked = 45, 540 /// Lower bound for 'ordered' versions. 541 OMP_ord_lower = 64, 542 OMP_ord_static_chunked = 65, 543 OMP_ord_static = 66, 544 OMP_ord_dynamic_chunked = 67, 545 OMP_ord_guided_chunked = 68, 546 OMP_ord_runtime = 69, 547 OMP_ord_auto = 70, 548 OMP_sch_default = OMP_sch_static, 549 /// dist_schedule types 550 OMP_dist_sch_static_chunked = 91, 551 OMP_dist_sch_static = 92, 552 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 553 /// Set if the monotonic schedule modifier was present. 554 OMP_sch_modifier_monotonic = (1 << 29), 555 /// Set if the nonmonotonic schedule modifier was present. 556 OMP_sch_modifier_nonmonotonic = (1 << 30), 557 }; 558 559 enum OpenMPRTLFunction { 560 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 561 /// kmpc_micro microtask, ...); 562 OMPRTL__kmpc_fork_call, 563 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, 564 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 565 OMPRTL__kmpc_threadprivate_cached, 566 /// Call to void __kmpc_threadprivate_register( ident_t *, 567 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 568 OMPRTL__kmpc_threadprivate_register, 569 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 570 OMPRTL__kmpc_global_thread_num, 571 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 572 // kmp_critical_name *crit); 573 OMPRTL__kmpc_critical, 574 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 575 // global_tid, kmp_critical_name *crit, uintptr_t hint); 576 OMPRTL__kmpc_critical_with_hint, 577 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 578 // kmp_critical_name *crit); 579 OMPRTL__kmpc_end_critical, 580 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 581 // global_tid); 582 OMPRTL__kmpc_cancel_barrier, 583 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 584 OMPRTL__kmpc_barrier, 585 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 586 OMPRTL__kmpc_for_static_fini, 587 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 588 // global_tid); 589 OMPRTL__kmpc_serialized_parallel, 590 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 591 // global_tid); 592 OMPRTL__kmpc_end_serialized_parallel, 593 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 594 // kmp_int32 num_threads); 595 OMPRTL__kmpc_push_num_threads, 596 // Call to void __kmpc_flush(ident_t *loc); 597 OMPRTL__kmpc_flush, 598 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 599 OMPRTL__kmpc_master, 600 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 601 OMPRTL__kmpc_end_master, 602 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 603 // int end_part); 604 OMPRTL__kmpc_omp_taskyield, 605 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 606 OMPRTL__kmpc_single, 607 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 608 OMPRTL__kmpc_end_single, 609 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 610 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 611 // kmp_routine_entry_t *task_entry); 612 OMPRTL__kmpc_omp_task_alloc, 613 // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *, 614 // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, 615 // size_t sizeof_shareds, kmp_routine_entry_t *task_entry, 616 // kmp_int64 device_id); 617 OMPRTL__kmpc_omp_target_task_alloc, 618 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 619 // new_task); 620 OMPRTL__kmpc_omp_task, 621 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 622 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 623 // kmp_int32 didit); 624 OMPRTL__kmpc_copyprivate, 625 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 626 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 627 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 628 OMPRTL__kmpc_reduce, 629 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 630 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 631 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 632 // *lck); 633 OMPRTL__kmpc_reduce_nowait, 634 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 635 // kmp_critical_name *lck); 636 OMPRTL__kmpc_end_reduce, 637 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 638 // kmp_critical_name *lck); 639 OMPRTL__kmpc_end_reduce_nowait, 640 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 641 // kmp_task_t * new_task); 642 OMPRTL__kmpc_omp_task_begin_if0, 643 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 644 // kmp_task_t * new_task); 645 OMPRTL__kmpc_omp_task_complete_if0, 646 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 647 OMPRTL__kmpc_ordered, 648 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 649 OMPRTL__kmpc_end_ordered, 650 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 651 // global_tid); 652 OMPRTL__kmpc_omp_taskwait, 653 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 654 OMPRTL__kmpc_taskgroup, 655 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 656 OMPRTL__kmpc_end_taskgroup, 657 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 658 // int proc_bind); 659 OMPRTL__kmpc_push_proc_bind, 660 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 661 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 662 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 663 OMPRTL__kmpc_omp_task_with_deps, 664 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 665 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 666 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 667 OMPRTL__kmpc_omp_wait_deps, 668 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 669 // global_tid, kmp_int32 cncl_kind); 670 OMPRTL__kmpc_cancellationpoint, 671 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 672 // kmp_int32 cncl_kind); 673 OMPRTL__kmpc_cancel, 674 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 675 // kmp_int32 num_teams, kmp_int32 thread_limit); 676 OMPRTL__kmpc_push_num_teams, 677 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 678 // microtask, ...); 679 OMPRTL__kmpc_fork_teams, 680 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 681 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 682 // sched, kmp_uint64 grainsize, void *task_dup); 683 OMPRTL__kmpc_taskloop, 684 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 685 // num_dims, struct kmp_dim *dims); 686 OMPRTL__kmpc_doacross_init, 687 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 688 OMPRTL__kmpc_doacross_fini, 689 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 690 // *vec); 691 OMPRTL__kmpc_doacross_post, 692 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 693 // *vec); 694 OMPRTL__kmpc_doacross_wait, 695 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 696 // *data); 697 OMPRTL__kmpc_task_reduction_init, 698 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 699 // *d); 700 OMPRTL__kmpc_task_reduction_get_th_data, 701 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); 702 OMPRTL__kmpc_alloc, 703 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); 704 OMPRTL__kmpc_free, 705 706 // 707 // Offloading related calls 708 // 709 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 710 // size); 711 OMPRTL__kmpc_push_target_tripcount, 712 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 713 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 714 // *arg_types); 715 OMPRTL__tgt_target, 716 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 717 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 718 // *arg_types); 719 OMPRTL__tgt_target_nowait, 720 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 721 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 722 // *arg_types, int32_t num_teams, int32_t thread_limit); 723 OMPRTL__tgt_target_teams, 724 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 725 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 726 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 727 OMPRTL__tgt_target_teams_nowait, 728 // Call to void __tgt_register_requires(int64_t flags); 729 OMPRTL__tgt_register_requires, 730 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 731 OMPRTL__tgt_register_lib, 732 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 733 OMPRTL__tgt_unregister_lib, 734 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 735 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 736 OMPRTL__tgt_target_data_begin, 737 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 738 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 739 // *arg_types); 740 OMPRTL__tgt_target_data_begin_nowait, 741 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 742 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 743 OMPRTL__tgt_target_data_end, 744 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 745 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 746 // *arg_types); 747 OMPRTL__tgt_target_data_end_nowait, 748 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 749 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 750 OMPRTL__tgt_target_data_update, 751 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 752 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 753 // *arg_types); 754 OMPRTL__tgt_target_data_update_nowait, 755 // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 756 OMPRTL__tgt_mapper_num_components, 757 // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void 758 // *base, void *begin, int64_t size, int64_t type); 759 OMPRTL__tgt_push_mapper_component, 760 }; 761 762 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 763 /// region. 764 class CleanupTy final : public EHScopeStack::Cleanup { 765 PrePostActionTy *Action; 766 767 public: 768 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 769 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 770 if (!CGF.HaveInsertPoint()) 771 return; 772 Action->Exit(CGF); 773 } 774 }; 775 776 } // anonymous namespace 777 778 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 779 CodeGenFunction::RunCleanupsScope Scope(CGF); 780 if (PrePostAction) { 781 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 782 Callback(CodeGen, CGF, *PrePostAction); 783 } else { 784 PrePostActionTy Action; 785 Callback(CodeGen, CGF, Action); 786 } 787 } 788 789 /// Check if the combiner is a call to UDR combiner and if it is so return the 790 /// UDR decl used for reduction. 791 static const OMPDeclareReductionDecl * 792 getReductionInit(const Expr *ReductionOp) { 793 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 794 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 795 if (const auto *DRE = 796 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 797 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 798 return DRD; 799 return nullptr; 800 } 801 802 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 803 const OMPDeclareReductionDecl *DRD, 804 const Expr *InitOp, 805 Address Private, Address Original, 806 QualType Ty) { 807 if (DRD->getInitializer()) { 808 std::pair<llvm::Function *, llvm::Function *> Reduction = 809 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 810 const auto *CE = cast<CallExpr>(InitOp); 811 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 812 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 813 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 814 const auto *LHSDRE = 815 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 816 const auto *RHSDRE = 817 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 818 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 819 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 820 [=]() { return Private; }); 821 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 822 [=]() { return Original; }); 823 (void)PrivateScope.Privatize(); 824 RValue Func = RValue::get(Reduction.second); 825 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 826 CGF.EmitIgnoredExpr(InitOp); 827 } else { 828 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 829 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 830 auto *GV = new llvm::GlobalVariable( 831 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 832 llvm::GlobalValue::PrivateLinkage, Init, Name); 833 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 834 RValue InitRVal; 835 switch (CGF.getEvaluationKind(Ty)) { 836 case TEK_Scalar: 837 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 838 break; 839 case TEK_Complex: 840 InitRVal = 841 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 842 break; 843 case TEK_Aggregate: 844 InitRVal = RValue::getAggregate(LV.getAddress()); 845 break; 846 } 847 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 848 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 849 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 850 /*IsInitializer=*/false); 851 } 852 } 853 854 /// Emit initialization of arrays of complex types. 855 /// \param DestAddr Address of the array. 856 /// \param Type Type of array. 857 /// \param Init Initial expression of array. 858 /// \param SrcAddr Address of the original array. 859 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 860 QualType Type, bool EmitDeclareReductionInit, 861 const Expr *Init, 862 const OMPDeclareReductionDecl *DRD, 863 Address SrcAddr = Address::invalid()) { 864 // Perform element-by-element initialization. 865 QualType ElementTy; 866 867 // Drill down to the base element type on both arrays. 868 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 869 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 870 DestAddr = 871 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 872 if (DRD) 873 SrcAddr = 874 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 875 876 llvm::Value *SrcBegin = nullptr; 877 if (DRD) 878 SrcBegin = SrcAddr.getPointer(); 879 llvm::Value *DestBegin = DestAddr.getPointer(); 880 // Cast from pointer to array type to pointer to single element. 881 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 882 // The basic structure here is a while-do loop. 883 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 884 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 885 llvm::Value *IsEmpty = 886 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 887 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 888 889 // Enter the loop body, making that address the current address. 890 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 891 CGF.EmitBlock(BodyBB); 892 893 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 894 895 llvm::PHINode *SrcElementPHI = nullptr; 896 Address SrcElementCurrent = Address::invalid(); 897 if (DRD) { 898 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 899 "omp.arraycpy.srcElementPast"); 900 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 901 SrcElementCurrent = 902 Address(SrcElementPHI, 903 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 904 } 905 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 906 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 907 DestElementPHI->addIncoming(DestBegin, EntryBB); 908 Address DestElementCurrent = 909 Address(DestElementPHI, 910 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 911 912 // Emit copy. 913 { 914 CodeGenFunction::RunCleanupsScope InitScope(CGF); 915 if (EmitDeclareReductionInit) { 916 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 917 SrcElementCurrent, ElementTy); 918 } else 919 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 920 /*IsInitializer=*/false); 921 } 922 923 if (DRD) { 924 // Shift the address forward by one element. 925 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 926 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 927 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 928 } 929 930 // Shift the address forward by one element. 931 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 932 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 933 // Check whether we've reached the end. 934 llvm::Value *Done = 935 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 936 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 937 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 938 939 // Done. 940 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 941 } 942 943 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 944 return CGF.EmitOMPSharedLValue(E); 945 } 946 947 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 948 const Expr *E) { 949 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 950 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 951 return LValue(); 952 } 953 954 void ReductionCodeGen::emitAggregateInitialization( 955 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 956 const OMPDeclareReductionDecl *DRD) { 957 // Emit VarDecl with copy init for arrays. 958 // Get the address of the original variable captured in current 959 // captured region. 960 const auto *PrivateVD = 961 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 962 bool EmitDeclareReductionInit = 963 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 964 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 965 EmitDeclareReductionInit, 966 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 967 : PrivateVD->getInit(), 968 DRD, SharedLVal.getAddress()); 969 } 970 971 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 972 ArrayRef<const Expr *> Privates, 973 ArrayRef<const Expr *> ReductionOps) { 974 ClausesData.reserve(Shareds.size()); 975 SharedAddresses.reserve(Shareds.size()); 976 Sizes.reserve(Shareds.size()); 977 BaseDecls.reserve(Shareds.size()); 978 auto IPriv = Privates.begin(); 979 auto IRed = ReductionOps.begin(); 980 for (const Expr *Ref : Shareds) { 981 ClausesData.emplace_back(Ref, *IPriv, *IRed); 982 std::advance(IPriv, 1); 983 std::advance(IRed, 1); 984 } 985 } 986 987 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 988 assert(SharedAddresses.size() == N && 989 "Number of generated lvalues must be exactly N."); 990 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 991 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 992 SharedAddresses.emplace_back(First, Second); 993 } 994 995 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 996 const auto *PrivateVD = 997 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 998 QualType PrivateType = PrivateVD->getType(); 999 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 1000 if (!PrivateType->isVariablyModifiedType()) { 1001 Sizes.emplace_back( 1002 CGF.getTypeSize( 1003 SharedAddresses[N].first.getType().getNonReferenceType()), 1004 nullptr); 1005 return; 1006 } 1007 llvm::Value *Size; 1008 llvm::Value *SizeInChars; 1009 auto *ElemType = 1010 cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) 1011 ->getElementType(); 1012 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 1013 if (AsArraySection) { 1014 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), 1015 SharedAddresses[N].first.getPointer()); 1016 Size = CGF.Builder.CreateNUWAdd( 1017 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 1018 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 1019 } else { 1020 SizeInChars = CGF.getTypeSize( 1021 SharedAddresses[N].first.getType().getNonReferenceType()); 1022 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 1023 } 1024 Sizes.emplace_back(SizeInChars, Size); 1025 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1026 CGF, 1027 cast<OpaqueValueExpr>( 1028 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1029 RValue::get(Size)); 1030 CGF.EmitVariablyModifiedType(PrivateType); 1031 } 1032 1033 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 1034 llvm::Value *Size) { 1035 const auto *PrivateVD = 1036 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1037 QualType PrivateType = PrivateVD->getType(); 1038 if (!PrivateType->isVariablyModifiedType()) { 1039 assert(!Size && !Sizes[N].second && 1040 "Size should be nullptr for non-variably modified reduction " 1041 "items."); 1042 return; 1043 } 1044 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1045 CGF, 1046 cast<OpaqueValueExpr>( 1047 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1048 RValue::get(Size)); 1049 CGF.EmitVariablyModifiedType(PrivateType); 1050 } 1051 1052 void ReductionCodeGen::emitInitialization( 1053 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1054 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1055 assert(SharedAddresses.size() > N && "No variable was generated"); 1056 const auto *PrivateVD = 1057 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1058 const OMPDeclareReductionDecl *DRD = 1059 getReductionInit(ClausesData[N].ReductionOp); 1060 QualType PrivateType = PrivateVD->getType(); 1061 PrivateAddr = CGF.Builder.CreateElementBitCast( 1062 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1063 QualType SharedType = SharedAddresses[N].first.getType(); 1064 SharedLVal = CGF.MakeAddrLValue( 1065 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), 1066 CGF.ConvertTypeForMem(SharedType)), 1067 SharedType, SharedAddresses[N].first.getBaseInfo(), 1068 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1069 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1070 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1071 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1072 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1073 PrivateAddr, SharedLVal.getAddress(), 1074 SharedLVal.getType()); 1075 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1076 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1077 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1078 PrivateVD->getType().getQualifiers(), 1079 /*IsInitializer=*/false); 1080 } 1081 } 1082 1083 bool ReductionCodeGen::needCleanups(unsigned N) { 1084 const auto *PrivateVD = 1085 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1086 QualType PrivateType = PrivateVD->getType(); 1087 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1088 return DTorKind != QualType::DK_none; 1089 } 1090 1091 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1092 Address PrivateAddr) { 1093 const auto *PrivateVD = 1094 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1095 QualType PrivateType = PrivateVD->getType(); 1096 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1097 if (needCleanups(N)) { 1098 PrivateAddr = CGF.Builder.CreateElementBitCast( 1099 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1100 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1101 } 1102 } 1103 1104 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1105 LValue BaseLV) { 1106 BaseTy = BaseTy.getNonReferenceType(); 1107 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1108 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1109 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1110 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 1111 } else { 1112 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); 1113 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1114 } 1115 BaseTy = BaseTy->getPointeeType(); 1116 } 1117 return CGF.MakeAddrLValue( 1118 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), 1119 CGF.ConvertTypeForMem(ElTy)), 1120 BaseLV.getType(), BaseLV.getBaseInfo(), 1121 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1122 } 1123 1124 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1125 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1126 llvm::Value *Addr) { 1127 Address Tmp = Address::invalid(); 1128 Address TopTmp = Address::invalid(); 1129 Address MostTopTmp = Address::invalid(); 1130 BaseTy = BaseTy.getNonReferenceType(); 1131 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1132 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1133 Tmp = CGF.CreateMemTemp(BaseTy); 1134 if (TopTmp.isValid()) 1135 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1136 else 1137 MostTopTmp = Tmp; 1138 TopTmp = Tmp; 1139 BaseTy = BaseTy->getPointeeType(); 1140 } 1141 llvm::Type *Ty = BaseLVType; 1142 if (Tmp.isValid()) 1143 Ty = Tmp.getElementType(); 1144 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1145 if (Tmp.isValid()) { 1146 CGF.Builder.CreateStore(Addr, Tmp); 1147 return MostTopTmp; 1148 } 1149 return Address(Addr, BaseLVAlignment); 1150 } 1151 1152 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1153 const VarDecl *OrigVD = nullptr; 1154 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1155 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1156 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1157 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1158 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1159 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1160 DE = cast<DeclRefExpr>(Base); 1161 OrigVD = cast<VarDecl>(DE->getDecl()); 1162 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1163 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1164 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1165 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1166 DE = cast<DeclRefExpr>(Base); 1167 OrigVD = cast<VarDecl>(DE->getDecl()); 1168 } 1169 return OrigVD; 1170 } 1171 1172 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1173 Address PrivateAddr) { 1174 const DeclRefExpr *DE; 1175 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1176 BaseDecls.emplace_back(OrigVD); 1177 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1178 LValue BaseLValue = 1179 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1180 OriginalBaseLValue); 1181 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1182 BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); 1183 llvm::Value *PrivatePointer = 1184 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1185 PrivateAddr.getPointer(), 1186 SharedAddresses[N].first.getAddress().getType()); 1187 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1188 return castToBase(CGF, OrigVD->getType(), 1189 SharedAddresses[N].first.getType(), 1190 OriginalBaseLValue.getAddress().getType(), 1191 OriginalBaseLValue.getAlignment(), Ptr); 1192 } 1193 BaseDecls.emplace_back( 1194 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1195 return PrivateAddr; 1196 } 1197 1198 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1199 const OMPDeclareReductionDecl *DRD = 1200 getReductionInit(ClausesData[N].ReductionOp); 1201 return DRD && DRD->getInitializer(); 1202 } 1203 1204 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1205 return CGF.EmitLoadOfPointerLValue( 1206 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1207 getThreadIDVariable()->getType()->castAs<PointerType>()); 1208 } 1209 1210 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1211 if (!CGF.HaveInsertPoint()) 1212 return; 1213 // 1.2.2 OpenMP Language Terminology 1214 // Structured block - An executable statement with a single entry at the 1215 // top and a single exit at the bottom. 1216 // The point of exit cannot be a branch out of the structured block. 1217 // longjmp() and throw() must not violate the entry/exit criteria. 1218 CGF.EHStack.pushTerminate(); 1219 CodeGen(CGF); 1220 CGF.EHStack.popTerminate(); 1221 } 1222 1223 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1224 CodeGenFunction &CGF) { 1225 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1226 getThreadIDVariable()->getType(), 1227 AlignmentSource::Decl); 1228 } 1229 1230 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1231 QualType FieldTy) { 1232 auto *Field = FieldDecl::Create( 1233 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1234 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1235 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1236 Field->setAccess(AS_public); 1237 DC->addDecl(Field); 1238 return Field; 1239 } 1240 1241 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1242 StringRef Separator) 1243 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1244 OffloadEntriesInfoManager(CGM) { 1245 ASTContext &C = CGM.getContext(); 1246 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1247 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1248 RD->startDefinition(); 1249 // reserved_1 1250 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1251 // flags 1252 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1253 // reserved_2 1254 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1255 // reserved_3 1256 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1257 // psource 1258 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1259 RD->completeDefinition(); 1260 IdentQTy = C.getRecordType(RD); 1261 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1262 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1263 1264 loadOffloadInfoMetadata(); 1265 } 1266 1267 bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD, 1268 const GlobalDecl &OldGD, 1269 llvm::GlobalValue *OrigAddr, 1270 bool IsForDefinition) { 1271 // Emit at least a definition for the aliasee if the the address of the 1272 // original function is requested. 1273 if (IsForDefinition || OrigAddr) 1274 (void)CGM.GetAddrOfGlobal(NewGD); 1275 StringRef NewMangledName = CGM.getMangledName(NewGD); 1276 llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName); 1277 if (Addr && !Addr->isDeclaration()) { 1278 const auto *D = cast<FunctionDecl>(OldGD.getDecl()); 1279 const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(OldGD); 1280 llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI); 1281 1282 // Create a reference to the named value. This ensures that it is emitted 1283 // if a deferred decl. 1284 llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD); 1285 1286 // Create the new alias itself, but don't set a name yet. 1287 auto *GA = 1288 llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule()); 1289 1290 if (OrigAddr) { 1291 assert(OrigAddr->isDeclaration() && "Expected declaration"); 1292 1293 GA->takeName(OrigAddr); 1294 OrigAddr->replaceAllUsesWith( 1295 llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType())); 1296 OrigAddr->eraseFromParent(); 1297 } else { 1298 GA->setName(CGM.getMangledName(OldGD)); 1299 } 1300 1301 // Set attributes which are particular to an alias; this is a 1302 // specialization of the attributes which may be set on a global function. 1303 if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() || 1304 D->isWeakImported()) 1305 GA->setLinkage(llvm::Function::WeakAnyLinkage); 1306 1307 CGM.SetCommonAttributes(OldGD, GA); 1308 return true; 1309 } 1310 return false; 1311 } 1312 1313 void CGOpenMPRuntime::clear() { 1314 InternalVars.clear(); 1315 // Clean non-target variable declarations possibly used only in debug info. 1316 for (const auto &Data : EmittedNonTargetVariables) { 1317 if (!Data.getValue().pointsToAliveValue()) 1318 continue; 1319 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1320 if (!GV) 1321 continue; 1322 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1323 continue; 1324 GV->eraseFromParent(); 1325 } 1326 // Emit aliases for the deferred aliasees. 1327 for (const auto &Pair : DeferredVariantFunction) { 1328 StringRef MangledName = CGM.getMangledName(Pair.second.second); 1329 llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName); 1330 // If not able to emit alias, just emit original declaration. 1331 (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr, 1332 /*IsForDefinition=*/false); 1333 } 1334 } 1335 1336 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1337 SmallString<128> Buffer; 1338 llvm::raw_svector_ostream OS(Buffer); 1339 StringRef Sep = FirstSeparator; 1340 for (StringRef Part : Parts) { 1341 OS << Sep << Part; 1342 Sep = Separator; 1343 } 1344 return OS.str(); 1345 } 1346 1347 static llvm::Function * 1348 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1349 const Expr *CombinerInitializer, const VarDecl *In, 1350 const VarDecl *Out, bool IsCombiner) { 1351 // void .omp_combiner.(Ty *in, Ty *out); 1352 ASTContext &C = CGM.getContext(); 1353 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1354 FunctionArgList Args; 1355 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1356 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1357 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1358 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1359 Args.push_back(&OmpOutParm); 1360 Args.push_back(&OmpInParm); 1361 const CGFunctionInfo &FnInfo = 1362 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1363 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1364 std::string Name = CGM.getOpenMPRuntime().getName( 1365 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1366 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1367 Name, &CGM.getModule()); 1368 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1369 if (CGM.getLangOpts().Optimize) { 1370 Fn->removeFnAttr(llvm::Attribute::NoInline); 1371 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1372 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1373 } 1374 CodeGenFunction CGF(CGM); 1375 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1376 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1377 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1378 Out->getLocation()); 1379 CodeGenFunction::OMPPrivateScope Scope(CGF); 1380 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1381 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1382 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1383 .getAddress(); 1384 }); 1385 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1386 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1387 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1388 .getAddress(); 1389 }); 1390 (void)Scope.Privatize(); 1391 if (!IsCombiner && Out->hasInit() && 1392 !CGF.isTrivialInitializer(Out->getInit())) { 1393 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1394 Out->getType().getQualifiers(), 1395 /*IsInitializer=*/true); 1396 } 1397 if (CombinerInitializer) 1398 CGF.EmitIgnoredExpr(CombinerInitializer); 1399 Scope.ForceCleanup(); 1400 CGF.FinishFunction(); 1401 return Fn; 1402 } 1403 1404 void CGOpenMPRuntime::emitUserDefinedReduction( 1405 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1406 if (UDRMap.count(D) > 0) 1407 return; 1408 llvm::Function *Combiner = emitCombinerOrInitializer( 1409 CGM, D->getType(), D->getCombiner(), 1410 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1411 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1412 /*IsCombiner=*/true); 1413 llvm::Function *Initializer = nullptr; 1414 if (const Expr *Init = D->getInitializer()) { 1415 Initializer = emitCombinerOrInitializer( 1416 CGM, D->getType(), 1417 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1418 : nullptr, 1419 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1420 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1421 /*IsCombiner=*/false); 1422 } 1423 UDRMap.try_emplace(D, Combiner, Initializer); 1424 if (CGF) { 1425 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1426 Decls.second.push_back(D); 1427 } 1428 } 1429 1430 std::pair<llvm::Function *, llvm::Function *> 1431 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1432 auto I = UDRMap.find(D); 1433 if (I != UDRMap.end()) 1434 return I->second; 1435 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1436 return UDRMap.lookup(D); 1437 } 1438 1439 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1440 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1441 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1442 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1443 assert(ThreadIDVar->getType()->isPointerType() && 1444 "thread id variable must be of type kmp_int32 *"); 1445 CodeGenFunction CGF(CGM, true); 1446 bool HasCancel = false; 1447 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1448 HasCancel = OPD->hasCancel(); 1449 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1450 HasCancel = OPSD->hasCancel(); 1451 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1452 HasCancel = OPFD->hasCancel(); 1453 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1454 HasCancel = OPFD->hasCancel(); 1455 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1456 HasCancel = OPFD->hasCancel(); 1457 else if (const auto *OPFD = 1458 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1459 HasCancel = OPFD->hasCancel(); 1460 else if (const auto *OPFD = 1461 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1462 HasCancel = OPFD->hasCancel(); 1463 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1464 HasCancel, OutlinedHelperName); 1465 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1466 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 1467 } 1468 1469 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1470 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1471 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1472 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1473 return emitParallelOrTeamsOutlinedFunction( 1474 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1475 } 1476 1477 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1478 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1479 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1480 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1481 return emitParallelOrTeamsOutlinedFunction( 1482 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1483 } 1484 1485 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1486 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1487 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1488 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1489 bool Tied, unsigned &NumberOfParts) { 1490 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1491 PrePostActionTy &) { 1492 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1493 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1494 llvm::Value *TaskArgs[] = { 1495 UpLoc, ThreadID, 1496 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1497 TaskTVar->getType()->castAs<PointerType>()) 1498 .getPointer()}; 1499 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1500 }; 1501 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1502 UntiedCodeGen); 1503 CodeGen.setAction(Action); 1504 assert(!ThreadIDVar->getType()->isPointerType() && 1505 "thread id variable must be of type kmp_int32 for tasks"); 1506 const OpenMPDirectiveKind Region = 1507 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1508 : OMPD_task; 1509 const CapturedStmt *CS = D.getCapturedStmt(Region); 1510 const auto *TD = dyn_cast<OMPTaskDirective>(&D); 1511 CodeGenFunction CGF(CGM, true); 1512 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1513 InnermostKind, 1514 TD ? TD->hasCancel() : false, Action); 1515 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1516 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1517 if (!Tied) 1518 NumberOfParts = Action.getNumberOfParts(); 1519 return Res; 1520 } 1521 1522 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1523 const RecordDecl *RD, const CGRecordLayout &RL, 1524 ArrayRef<llvm::Constant *> Data) { 1525 llvm::StructType *StructTy = RL.getLLVMType(); 1526 unsigned PrevIdx = 0; 1527 ConstantInitBuilder CIBuilder(CGM); 1528 auto DI = Data.begin(); 1529 for (const FieldDecl *FD : RD->fields()) { 1530 unsigned Idx = RL.getLLVMFieldNo(FD); 1531 // Fill the alignment. 1532 for (unsigned I = PrevIdx; I < Idx; ++I) 1533 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1534 PrevIdx = Idx + 1; 1535 Fields.add(*DI); 1536 ++DI; 1537 } 1538 } 1539 1540 template <class... As> 1541 static llvm::GlobalVariable * 1542 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1543 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1544 As &&... Args) { 1545 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1546 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1547 ConstantInitBuilder CIBuilder(CGM); 1548 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1549 buildStructValue(Fields, CGM, RD, RL, Data); 1550 return Fields.finishAndCreateGlobal( 1551 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1552 std::forward<As>(Args)...); 1553 } 1554 1555 template <typename T> 1556 static void 1557 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1558 ArrayRef<llvm::Constant *> Data, 1559 T &Parent) { 1560 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1561 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1562 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1563 buildStructValue(Fields, CGM, RD, RL, Data); 1564 Fields.finishAndAddTo(Parent); 1565 } 1566 1567 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1568 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1569 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1570 FlagsTy FlagsKey(Flags, Reserved2Flags); 1571 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1572 if (!Entry) { 1573 if (!DefaultOpenMPPSource) { 1574 // Initialize default location for psource field of ident_t structure of 1575 // all ident_t objects. Format is ";file;function;line;column;;". 1576 // Taken from 1577 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1578 DefaultOpenMPPSource = 1579 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1580 DefaultOpenMPPSource = 1581 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1582 } 1583 1584 llvm::Constant *Data[] = { 1585 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1586 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1587 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1588 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1589 llvm::GlobalValue *DefaultOpenMPLocation = 1590 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1591 llvm::GlobalValue::PrivateLinkage); 1592 DefaultOpenMPLocation->setUnnamedAddr( 1593 llvm::GlobalValue::UnnamedAddr::Global); 1594 1595 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1596 } 1597 return Address(Entry, Align); 1598 } 1599 1600 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1601 bool AtCurrentPoint) { 1602 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1603 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1604 1605 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1606 if (AtCurrentPoint) { 1607 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1608 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1609 } else { 1610 Elem.second.ServiceInsertPt = 1611 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1612 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1613 } 1614 } 1615 1616 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1617 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1618 if (Elem.second.ServiceInsertPt) { 1619 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1620 Elem.second.ServiceInsertPt = nullptr; 1621 Ptr->eraseFromParent(); 1622 } 1623 } 1624 1625 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1626 SourceLocation Loc, 1627 unsigned Flags) { 1628 Flags |= OMP_IDENT_KMPC; 1629 // If no debug info is generated - return global default location. 1630 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1631 Loc.isInvalid()) 1632 return getOrCreateDefaultLocation(Flags).getPointer(); 1633 1634 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1635 1636 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1637 Address LocValue = Address::invalid(); 1638 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1639 if (I != OpenMPLocThreadIDMap.end()) 1640 LocValue = Address(I->second.DebugLoc, Align); 1641 1642 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1643 // GetOpenMPThreadID was called before this routine. 1644 if (!LocValue.isValid()) { 1645 // Generate "ident_t .kmpc_loc.addr;" 1646 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1647 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1648 Elem.second.DebugLoc = AI.getPointer(); 1649 LocValue = AI; 1650 1651 if (!Elem.second.ServiceInsertPt) 1652 setLocThreadIdInsertPt(CGF); 1653 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1654 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1655 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1656 CGF.getTypeSize(IdentQTy)); 1657 } 1658 1659 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1660 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1661 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1662 LValue PSource = 1663 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1664 1665 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1666 if (OMPDebugLoc == nullptr) { 1667 SmallString<128> Buffer2; 1668 llvm::raw_svector_ostream OS2(Buffer2); 1669 // Build debug location 1670 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1671 OS2 << ";" << PLoc.getFilename() << ";"; 1672 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1673 OS2 << FD->getQualifiedNameAsString(); 1674 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1675 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1676 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1677 } 1678 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1679 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1680 1681 // Our callers always pass this to a runtime function, so for 1682 // convenience, go ahead and return a naked pointer. 1683 return LocValue.getPointer(); 1684 } 1685 1686 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1687 SourceLocation Loc) { 1688 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1689 1690 llvm::Value *ThreadID = nullptr; 1691 // Check whether we've already cached a load of the thread id in this 1692 // function. 1693 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1694 if (I != OpenMPLocThreadIDMap.end()) { 1695 ThreadID = I->second.ThreadID; 1696 if (ThreadID != nullptr) 1697 return ThreadID; 1698 } 1699 // If exceptions are enabled, do not use parameter to avoid possible crash. 1700 if (auto *OMPRegionInfo = 1701 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1702 if (OMPRegionInfo->getThreadIDVariable()) { 1703 // Check if this an outlined function with thread id passed as argument. 1704 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1705 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1706 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1707 !CGF.getLangOpts().CXXExceptions || 1708 CGF.Builder.GetInsertBlock() == TopBlock || 1709 !isa<llvm::Instruction>(LVal.getPointer()) || 1710 cast<llvm::Instruction>(LVal.getPointer())->getParent() == TopBlock || 1711 cast<llvm::Instruction>(LVal.getPointer())->getParent() == 1712 CGF.Builder.GetInsertBlock()) { 1713 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1714 // If value loaded in entry block, cache it and use it everywhere in 1715 // function. 1716 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1717 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1718 Elem.second.ThreadID = ThreadID; 1719 } 1720 return ThreadID; 1721 } 1722 } 1723 } 1724 1725 // This is not an outlined function region - need to call __kmpc_int32 1726 // kmpc_global_thread_num(ident_t *loc). 1727 // Generate thread id value and cache this value for use across the 1728 // function. 1729 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1730 if (!Elem.second.ServiceInsertPt) 1731 setLocThreadIdInsertPt(CGF); 1732 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1733 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1734 llvm::CallInst *Call = CGF.Builder.CreateCall( 1735 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1736 emitUpdateLocation(CGF, Loc)); 1737 Call->setCallingConv(CGF.getRuntimeCC()); 1738 Elem.second.ThreadID = Call; 1739 return Call; 1740 } 1741 1742 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1743 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1744 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1745 clearLocThreadIdInsertPt(CGF); 1746 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1747 } 1748 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1749 for(auto *D : FunctionUDRMap[CGF.CurFn]) 1750 UDRMap.erase(D); 1751 FunctionUDRMap.erase(CGF.CurFn); 1752 } 1753 auto I = FunctionUDMMap.find(CGF.CurFn); 1754 if (I != FunctionUDMMap.end()) { 1755 for(auto *D : I->second) 1756 UDMMap.erase(D); 1757 FunctionUDMMap.erase(I); 1758 } 1759 } 1760 1761 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1762 return IdentTy->getPointerTo(); 1763 } 1764 1765 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1766 if (!Kmpc_MicroTy) { 1767 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1768 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1769 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1770 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1771 } 1772 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1773 } 1774 1775 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1776 llvm::FunctionCallee RTLFn = nullptr; 1777 switch (static_cast<OpenMPRTLFunction>(Function)) { 1778 case OMPRTL__kmpc_fork_call: { 1779 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1780 // microtask, ...); 1781 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1782 getKmpc_MicroPointerTy()}; 1783 auto *FnTy = 1784 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1785 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1786 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 1787 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 1788 llvm::LLVMContext &Ctx = F->getContext(); 1789 llvm::MDBuilder MDB(Ctx); 1790 // Annotate the callback behavior of the __kmpc_fork_call: 1791 // - The callback callee is argument number 2 (microtask). 1792 // - The first two arguments of the callback callee are unknown (-1). 1793 // - All variadic arguments to the __kmpc_fork_call are passed to the 1794 // callback callee. 1795 F->addMetadata( 1796 llvm::LLVMContext::MD_callback, 1797 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 1798 2, {-1, -1}, 1799 /* VarArgsArePassed */ true)})); 1800 } 1801 } 1802 break; 1803 } 1804 case OMPRTL__kmpc_global_thread_num: { 1805 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1806 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1807 auto *FnTy = 1808 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1809 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1810 break; 1811 } 1812 case OMPRTL__kmpc_threadprivate_cached: { 1813 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1814 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1815 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1816 CGM.VoidPtrTy, CGM.SizeTy, 1817 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1818 auto *FnTy = 1819 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1820 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1821 break; 1822 } 1823 case OMPRTL__kmpc_critical: { 1824 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1825 // kmp_critical_name *crit); 1826 llvm::Type *TypeParams[] = { 1827 getIdentTyPointerTy(), CGM.Int32Ty, 1828 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1829 auto *FnTy = 1830 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1831 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1832 break; 1833 } 1834 case OMPRTL__kmpc_critical_with_hint: { 1835 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1836 // kmp_critical_name *crit, uintptr_t hint); 1837 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1838 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1839 CGM.IntPtrTy}; 1840 auto *FnTy = 1841 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1842 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1843 break; 1844 } 1845 case OMPRTL__kmpc_threadprivate_register: { 1846 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1847 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1848 // typedef void *(*kmpc_ctor)(void *); 1849 auto *KmpcCtorTy = 1850 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1851 /*isVarArg*/ false)->getPointerTo(); 1852 // typedef void *(*kmpc_cctor)(void *, void *); 1853 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1854 auto *KmpcCopyCtorTy = 1855 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1856 /*isVarArg*/ false) 1857 ->getPointerTo(); 1858 // typedef void (*kmpc_dtor)(void *); 1859 auto *KmpcDtorTy = 1860 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1861 ->getPointerTo(); 1862 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1863 KmpcCopyCtorTy, KmpcDtorTy}; 1864 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1865 /*isVarArg*/ false); 1866 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1867 break; 1868 } 1869 case OMPRTL__kmpc_end_critical: { 1870 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1871 // kmp_critical_name *crit); 1872 llvm::Type *TypeParams[] = { 1873 getIdentTyPointerTy(), CGM.Int32Ty, 1874 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1875 auto *FnTy = 1876 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1877 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1878 break; 1879 } 1880 case OMPRTL__kmpc_cancel_barrier: { 1881 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1882 // global_tid); 1883 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1884 auto *FnTy = 1885 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1886 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1887 break; 1888 } 1889 case OMPRTL__kmpc_barrier: { 1890 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1891 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1892 auto *FnTy = 1893 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1894 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1895 break; 1896 } 1897 case OMPRTL__kmpc_for_static_fini: { 1898 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1899 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1900 auto *FnTy = 1901 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1902 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1903 break; 1904 } 1905 case OMPRTL__kmpc_push_num_threads: { 1906 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1907 // kmp_int32 num_threads) 1908 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1909 CGM.Int32Ty}; 1910 auto *FnTy = 1911 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1912 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1913 break; 1914 } 1915 case OMPRTL__kmpc_serialized_parallel: { 1916 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1917 // global_tid); 1918 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1919 auto *FnTy = 1920 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1921 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1922 break; 1923 } 1924 case OMPRTL__kmpc_end_serialized_parallel: { 1925 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1926 // global_tid); 1927 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1928 auto *FnTy = 1929 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1930 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1931 break; 1932 } 1933 case OMPRTL__kmpc_flush: { 1934 // Build void __kmpc_flush(ident_t *loc); 1935 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1936 auto *FnTy = 1937 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1938 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1939 break; 1940 } 1941 case OMPRTL__kmpc_master: { 1942 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1943 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1944 auto *FnTy = 1945 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1946 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1947 break; 1948 } 1949 case OMPRTL__kmpc_end_master: { 1950 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1951 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1952 auto *FnTy = 1953 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1954 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1955 break; 1956 } 1957 case OMPRTL__kmpc_omp_taskyield: { 1958 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1959 // int end_part); 1960 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1961 auto *FnTy = 1962 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1963 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1964 break; 1965 } 1966 case OMPRTL__kmpc_single: { 1967 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1968 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1969 auto *FnTy = 1970 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1971 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1972 break; 1973 } 1974 case OMPRTL__kmpc_end_single: { 1975 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1976 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1977 auto *FnTy = 1978 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1979 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1980 break; 1981 } 1982 case OMPRTL__kmpc_omp_task_alloc: { 1983 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1984 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1985 // kmp_routine_entry_t *task_entry); 1986 assert(KmpRoutineEntryPtrTy != nullptr && 1987 "Type kmp_routine_entry_t must be created."); 1988 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1989 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1990 // Return void * and then cast to particular kmp_task_t type. 1991 auto *FnTy = 1992 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1993 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1994 break; 1995 } 1996 case OMPRTL__kmpc_omp_target_task_alloc: { 1997 // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid, 1998 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1999 // kmp_routine_entry_t *task_entry, kmp_int64 device_id); 2000 assert(KmpRoutineEntryPtrTy != nullptr && 2001 "Type kmp_routine_entry_t must be created."); 2002 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2003 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy, 2004 CGM.Int64Ty}; 2005 // Return void * and then cast to particular kmp_task_t type. 2006 auto *FnTy = 2007 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2008 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc"); 2009 break; 2010 } 2011 case OMPRTL__kmpc_omp_task: { 2012 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2013 // *new_task); 2014 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2015 CGM.VoidPtrTy}; 2016 auto *FnTy = 2017 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2018 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 2019 break; 2020 } 2021 case OMPRTL__kmpc_copyprivate: { 2022 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 2023 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 2024 // kmp_int32 didit); 2025 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2026 auto *CpyFnTy = 2027 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 2028 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 2029 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 2030 CGM.Int32Ty}; 2031 auto *FnTy = 2032 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2033 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 2034 break; 2035 } 2036 case OMPRTL__kmpc_reduce: { 2037 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 2038 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 2039 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 2040 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2041 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2042 /*isVarArg=*/false); 2043 llvm::Type *TypeParams[] = { 2044 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2045 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2046 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2047 auto *FnTy = 2048 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2049 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 2050 break; 2051 } 2052 case OMPRTL__kmpc_reduce_nowait: { 2053 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 2054 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 2055 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 2056 // *lck); 2057 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2058 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2059 /*isVarArg=*/false); 2060 llvm::Type *TypeParams[] = { 2061 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2062 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2063 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2064 auto *FnTy = 2065 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2066 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 2067 break; 2068 } 2069 case OMPRTL__kmpc_end_reduce: { 2070 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 2071 // kmp_critical_name *lck); 2072 llvm::Type *TypeParams[] = { 2073 getIdentTyPointerTy(), CGM.Int32Ty, 2074 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2075 auto *FnTy = 2076 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2077 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 2078 break; 2079 } 2080 case OMPRTL__kmpc_end_reduce_nowait: { 2081 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 2082 // kmp_critical_name *lck); 2083 llvm::Type *TypeParams[] = { 2084 getIdentTyPointerTy(), CGM.Int32Ty, 2085 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2086 auto *FnTy = 2087 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2088 RTLFn = 2089 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 2090 break; 2091 } 2092 case OMPRTL__kmpc_omp_task_begin_if0: { 2093 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2094 // *new_task); 2095 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2096 CGM.VoidPtrTy}; 2097 auto *FnTy = 2098 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2099 RTLFn = 2100 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 2101 break; 2102 } 2103 case OMPRTL__kmpc_omp_task_complete_if0: { 2104 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2105 // *new_task); 2106 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2107 CGM.VoidPtrTy}; 2108 auto *FnTy = 2109 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2110 RTLFn = CGM.CreateRuntimeFunction(FnTy, 2111 /*Name=*/"__kmpc_omp_task_complete_if0"); 2112 break; 2113 } 2114 case OMPRTL__kmpc_ordered: { 2115 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 2116 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2117 auto *FnTy = 2118 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2119 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 2120 break; 2121 } 2122 case OMPRTL__kmpc_end_ordered: { 2123 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 2124 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2125 auto *FnTy = 2126 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2127 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 2128 break; 2129 } 2130 case OMPRTL__kmpc_omp_taskwait: { 2131 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 2132 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2133 auto *FnTy = 2134 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2135 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 2136 break; 2137 } 2138 case OMPRTL__kmpc_taskgroup: { 2139 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 2140 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2141 auto *FnTy = 2142 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2143 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 2144 break; 2145 } 2146 case OMPRTL__kmpc_end_taskgroup: { 2147 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 2148 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2149 auto *FnTy = 2150 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2151 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 2152 break; 2153 } 2154 case OMPRTL__kmpc_push_proc_bind: { 2155 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 2156 // int proc_bind) 2157 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2158 auto *FnTy = 2159 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2160 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 2161 break; 2162 } 2163 case OMPRTL__kmpc_omp_task_with_deps: { 2164 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2165 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2166 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 2167 llvm::Type *TypeParams[] = { 2168 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 2169 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 2170 auto *FnTy = 2171 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2172 RTLFn = 2173 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 2174 break; 2175 } 2176 case OMPRTL__kmpc_omp_wait_deps: { 2177 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2178 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2179 // kmp_depend_info_t *noalias_dep_list); 2180 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2181 CGM.Int32Ty, CGM.VoidPtrTy, 2182 CGM.Int32Ty, CGM.VoidPtrTy}; 2183 auto *FnTy = 2184 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2185 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2186 break; 2187 } 2188 case OMPRTL__kmpc_cancellationpoint: { 2189 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2190 // global_tid, kmp_int32 cncl_kind) 2191 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2192 auto *FnTy = 2193 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2194 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2195 break; 2196 } 2197 case OMPRTL__kmpc_cancel: { 2198 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2199 // kmp_int32 cncl_kind) 2200 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2201 auto *FnTy = 2202 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2203 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2204 break; 2205 } 2206 case OMPRTL__kmpc_push_num_teams: { 2207 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2208 // kmp_int32 num_teams, kmp_int32 num_threads) 2209 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2210 CGM.Int32Ty}; 2211 auto *FnTy = 2212 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2213 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2214 break; 2215 } 2216 case OMPRTL__kmpc_fork_teams: { 2217 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2218 // microtask, ...); 2219 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2220 getKmpc_MicroPointerTy()}; 2221 auto *FnTy = 2222 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2223 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2224 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 2225 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 2226 llvm::LLVMContext &Ctx = F->getContext(); 2227 llvm::MDBuilder MDB(Ctx); 2228 // Annotate the callback behavior of the __kmpc_fork_teams: 2229 // - The callback callee is argument number 2 (microtask). 2230 // - The first two arguments of the callback callee are unknown (-1). 2231 // - All variadic arguments to the __kmpc_fork_teams are passed to the 2232 // callback callee. 2233 F->addMetadata( 2234 llvm::LLVMContext::MD_callback, 2235 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 2236 2, {-1, -1}, 2237 /* VarArgsArePassed */ true)})); 2238 } 2239 } 2240 break; 2241 } 2242 case OMPRTL__kmpc_taskloop: { 2243 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2244 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2245 // sched, kmp_uint64 grainsize, void *task_dup); 2246 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2247 CGM.IntTy, 2248 CGM.VoidPtrTy, 2249 CGM.IntTy, 2250 CGM.Int64Ty->getPointerTo(), 2251 CGM.Int64Ty->getPointerTo(), 2252 CGM.Int64Ty, 2253 CGM.IntTy, 2254 CGM.IntTy, 2255 CGM.Int64Ty, 2256 CGM.VoidPtrTy}; 2257 auto *FnTy = 2258 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2259 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2260 break; 2261 } 2262 case OMPRTL__kmpc_doacross_init: { 2263 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2264 // num_dims, struct kmp_dim *dims); 2265 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2266 CGM.Int32Ty, 2267 CGM.Int32Ty, 2268 CGM.VoidPtrTy}; 2269 auto *FnTy = 2270 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2271 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2272 break; 2273 } 2274 case OMPRTL__kmpc_doacross_fini: { 2275 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2276 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2277 auto *FnTy = 2278 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2279 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2280 break; 2281 } 2282 case OMPRTL__kmpc_doacross_post: { 2283 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2284 // *vec); 2285 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2286 CGM.Int64Ty->getPointerTo()}; 2287 auto *FnTy = 2288 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2289 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2290 break; 2291 } 2292 case OMPRTL__kmpc_doacross_wait: { 2293 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2294 // *vec); 2295 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2296 CGM.Int64Ty->getPointerTo()}; 2297 auto *FnTy = 2298 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2299 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2300 break; 2301 } 2302 case OMPRTL__kmpc_task_reduction_init: { 2303 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2304 // *data); 2305 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2306 auto *FnTy = 2307 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2308 RTLFn = 2309 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2310 break; 2311 } 2312 case OMPRTL__kmpc_task_reduction_get_th_data: { 2313 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2314 // *d); 2315 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2316 auto *FnTy = 2317 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2318 RTLFn = CGM.CreateRuntimeFunction( 2319 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2320 break; 2321 } 2322 case OMPRTL__kmpc_alloc: { 2323 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t 2324 // al); omp_allocator_handle_t type is void *. 2325 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy}; 2326 auto *FnTy = 2327 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2328 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc"); 2329 break; 2330 } 2331 case OMPRTL__kmpc_free: { 2332 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t 2333 // al); omp_allocator_handle_t type is void *. 2334 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2335 auto *FnTy = 2336 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2337 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); 2338 break; 2339 } 2340 case OMPRTL__kmpc_push_target_tripcount: { 2341 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 2342 // size); 2343 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; 2344 llvm::FunctionType *FnTy = 2345 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2346 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); 2347 break; 2348 } 2349 case OMPRTL__tgt_target: { 2350 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2351 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2352 // *arg_types); 2353 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2354 CGM.VoidPtrTy, 2355 CGM.Int32Ty, 2356 CGM.VoidPtrPtrTy, 2357 CGM.VoidPtrPtrTy, 2358 CGM.Int64Ty->getPointerTo(), 2359 CGM.Int64Ty->getPointerTo()}; 2360 auto *FnTy = 2361 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2362 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2363 break; 2364 } 2365 case OMPRTL__tgt_target_nowait: { 2366 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2367 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2368 // int64_t *arg_types); 2369 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2370 CGM.VoidPtrTy, 2371 CGM.Int32Ty, 2372 CGM.VoidPtrPtrTy, 2373 CGM.VoidPtrPtrTy, 2374 CGM.Int64Ty->getPointerTo(), 2375 CGM.Int64Ty->getPointerTo()}; 2376 auto *FnTy = 2377 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2378 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2379 break; 2380 } 2381 case OMPRTL__tgt_target_teams: { 2382 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2383 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2384 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2385 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2386 CGM.VoidPtrTy, 2387 CGM.Int32Ty, 2388 CGM.VoidPtrPtrTy, 2389 CGM.VoidPtrPtrTy, 2390 CGM.Int64Ty->getPointerTo(), 2391 CGM.Int64Ty->getPointerTo(), 2392 CGM.Int32Ty, 2393 CGM.Int32Ty}; 2394 auto *FnTy = 2395 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2396 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2397 break; 2398 } 2399 case OMPRTL__tgt_target_teams_nowait: { 2400 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2401 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 2402 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2403 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2404 CGM.VoidPtrTy, 2405 CGM.Int32Ty, 2406 CGM.VoidPtrPtrTy, 2407 CGM.VoidPtrPtrTy, 2408 CGM.Int64Ty->getPointerTo(), 2409 CGM.Int64Ty->getPointerTo(), 2410 CGM.Int32Ty, 2411 CGM.Int32Ty}; 2412 auto *FnTy = 2413 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2414 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2415 break; 2416 } 2417 case OMPRTL__tgt_register_requires: { 2418 // Build void __tgt_register_requires(int64_t flags); 2419 llvm::Type *TypeParams[] = {CGM.Int64Ty}; 2420 auto *FnTy = 2421 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2422 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); 2423 break; 2424 } 2425 case OMPRTL__tgt_register_lib: { 2426 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 2427 QualType ParamTy = 2428 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2429 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2430 auto *FnTy = 2431 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2432 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 2433 break; 2434 } 2435 case OMPRTL__tgt_unregister_lib: { 2436 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 2437 QualType ParamTy = 2438 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2439 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2440 auto *FnTy = 2441 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2442 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 2443 break; 2444 } 2445 case OMPRTL__tgt_target_data_begin: { 2446 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2447 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2448 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2449 CGM.Int32Ty, 2450 CGM.VoidPtrPtrTy, 2451 CGM.VoidPtrPtrTy, 2452 CGM.Int64Ty->getPointerTo(), 2453 CGM.Int64Ty->getPointerTo()}; 2454 auto *FnTy = 2455 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2456 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2457 break; 2458 } 2459 case OMPRTL__tgt_target_data_begin_nowait: { 2460 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2461 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2462 // *arg_types); 2463 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2464 CGM.Int32Ty, 2465 CGM.VoidPtrPtrTy, 2466 CGM.VoidPtrPtrTy, 2467 CGM.Int64Ty->getPointerTo(), 2468 CGM.Int64Ty->getPointerTo()}; 2469 auto *FnTy = 2470 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2471 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2472 break; 2473 } 2474 case OMPRTL__tgt_target_data_end: { 2475 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2476 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2477 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2478 CGM.Int32Ty, 2479 CGM.VoidPtrPtrTy, 2480 CGM.VoidPtrPtrTy, 2481 CGM.Int64Ty->getPointerTo(), 2482 CGM.Int64Ty->getPointerTo()}; 2483 auto *FnTy = 2484 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2485 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2486 break; 2487 } 2488 case OMPRTL__tgt_target_data_end_nowait: { 2489 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2490 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2491 // *arg_types); 2492 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2493 CGM.Int32Ty, 2494 CGM.VoidPtrPtrTy, 2495 CGM.VoidPtrPtrTy, 2496 CGM.Int64Ty->getPointerTo(), 2497 CGM.Int64Ty->getPointerTo()}; 2498 auto *FnTy = 2499 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2500 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2501 break; 2502 } 2503 case OMPRTL__tgt_target_data_update: { 2504 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2505 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2506 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2507 CGM.Int32Ty, 2508 CGM.VoidPtrPtrTy, 2509 CGM.VoidPtrPtrTy, 2510 CGM.Int64Ty->getPointerTo(), 2511 CGM.Int64Ty->getPointerTo()}; 2512 auto *FnTy = 2513 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2514 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2515 break; 2516 } 2517 case OMPRTL__tgt_target_data_update_nowait: { 2518 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2519 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2520 // *arg_types); 2521 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2522 CGM.Int32Ty, 2523 CGM.VoidPtrPtrTy, 2524 CGM.VoidPtrPtrTy, 2525 CGM.Int64Ty->getPointerTo(), 2526 CGM.Int64Ty->getPointerTo()}; 2527 auto *FnTy = 2528 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2529 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2530 break; 2531 } 2532 case OMPRTL__tgt_mapper_num_components: { 2533 // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 2534 llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; 2535 auto *FnTy = 2536 llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); 2537 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components"); 2538 break; 2539 } 2540 case OMPRTL__tgt_push_mapper_component: { 2541 // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void 2542 // *base, void *begin, int64_t size, int64_t type); 2543 llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy, 2544 CGM.Int64Ty, CGM.Int64Ty}; 2545 auto *FnTy = 2546 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2547 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component"); 2548 break; 2549 } 2550 } 2551 assert(RTLFn && "Unable to find OpenMP runtime function"); 2552 return RTLFn; 2553 } 2554 2555 llvm::FunctionCallee 2556 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 2557 assert((IVSize == 32 || IVSize == 64) && 2558 "IV size is not compatible with the omp runtime"); 2559 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2560 : "__kmpc_for_static_init_4u") 2561 : (IVSigned ? "__kmpc_for_static_init_8" 2562 : "__kmpc_for_static_init_8u"); 2563 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2564 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2565 llvm::Type *TypeParams[] = { 2566 getIdentTyPointerTy(), // loc 2567 CGM.Int32Ty, // tid 2568 CGM.Int32Ty, // schedtype 2569 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2570 PtrTy, // p_lower 2571 PtrTy, // p_upper 2572 PtrTy, // p_stride 2573 ITy, // incr 2574 ITy // chunk 2575 }; 2576 auto *FnTy = 2577 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2578 return CGM.CreateRuntimeFunction(FnTy, Name); 2579 } 2580 2581 llvm::FunctionCallee 2582 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 2583 assert((IVSize == 32 || IVSize == 64) && 2584 "IV size is not compatible with the omp runtime"); 2585 StringRef Name = 2586 IVSize == 32 2587 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2588 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2589 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2590 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2591 CGM.Int32Ty, // tid 2592 CGM.Int32Ty, // schedtype 2593 ITy, // lower 2594 ITy, // upper 2595 ITy, // stride 2596 ITy // chunk 2597 }; 2598 auto *FnTy = 2599 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2600 return CGM.CreateRuntimeFunction(FnTy, Name); 2601 } 2602 2603 llvm::FunctionCallee 2604 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 2605 assert((IVSize == 32 || IVSize == 64) && 2606 "IV size is not compatible with the omp runtime"); 2607 StringRef Name = 2608 IVSize == 32 2609 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2610 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2611 llvm::Type *TypeParams[] = { 2612 getIdentTyPointerTy(), // loc 2613 CGM.Int32Ty, // tid 2614 }; 2615 auto *FnTy = 2616 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2617 return CGM.CreateRuntimeFunction(FnTy, Name); 2618 } 2619 2620 llvm::FunctionCallee 2621 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 2622 assert((IVSize == 32 || IVSize == 64) && 2623 "IV size is not compatible with the omp runtime"); 2624 StringRef Name = 2625 IVSize == 32 2626 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2627 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2628 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2629 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2630 llvm::Type *TypeParams[] = { 2631 getIdentTyPointerTy(), // loc 2632 CGM.Int32Ty, // tid 2633 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2634 PtrTy, // p_lower 2635 PtrTy, // p_upper 2636 PtrTy // p_stride 2637 }; 2638 auto *FnTy = 2639 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2640 return CGM.CreateRuntimeFunction(FnTy, Name); 2641 } 2642 2643 /// Obtain information that uniquely identifies a target entry. This 2644 /// consists of the file and device IDs as well as line number associated with 2645 /// the relevant entry source location. 2646 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2647 unsigned &DeviceID, unsigned &FileID, 2648 unsigned &LineNum) { 2649 SourceManager &SM = C.getSourceManager(); 2650 2651 // The loc should be always valid and have a file ID (the user cannot use 2652 // #pragma directives in macros) 2653 2654 assert(Loc.isValid() && "Source location is expected to be always valid."); 2655 2656 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2657 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2658 2659 llvm::sys::fs::UniqueID ID; 2660 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2661 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2662 << PLoc.getFilename() << EC.message(); 2663 2664 DeviceID = ID.getDevice(); 2665 FileID = ID.getFile(); 2666 LineNum = PLoc.getLine(); 2667 } 2668 2669 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 2670 if (CGM.getLangOpts().OpenMPSimd) 2671 return Address::invalid(); 2672 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2673 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2674 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 2675 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2676 HasRequiresUnifiedSharedMemory))) { 2677 SmallString<64> PtrName; 2678 { 2679 llvm::raw_svector_ostream OS(PtrName); 2680 OS << CGM.getMangledName(GlobalDecl(VD)); 2681 if (!VD->isExternallyVisible()) { 2682 unsigned DeviceID, FileID, Line; 2683 getTargetEntryUniqueInfo(CGM.getContext(), 2684 VD->getCanonicalDecl()->getBeginLoc(), 2685 DeviceID, FileID, Line); 2686 OS << llvm::format("_%x", FileID); 2687 } 2688 OS << "_decl_tgt_ref_ptr"; 2689 } 2690 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2691 if (!Ptr) { 2692 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2693 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2694 PtrName); 2695 2696 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2697 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 2698 2699 if (!CGM.getLangOpts().OpenMPIsDevice) 2700 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2701 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2702 } 2703 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2704 } 2705 return Address::invalid(); 2706 } 2707 2708 llvm::Constant * 2709 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2710 assert(!CGM.getLangOpts().OpenMPUseTLS || 2711 !CGM.getContext().getTargetInfo().isTLSSupported()); 2712 // Lookup the entry, lazily creating it if necessary. 2713 std::string Suffix = getName({"cache", ""}); 2714 return getOrCreateInternalVariable( 2715 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 2716 } 2717 2718 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2719 const VarDecl *VD, 2720 Address VDAddr, 2721 SourceLocation Loc) { 2722 if (CGM.getLangOpts().OpenMPUseTLS && 2723 CGM.getContext().getTargetInfo().isTLSSupported()) 2724 return VDAddr; 2725 2726 llvm::Type *VarTy = VDAddr.getElementType(); 2727 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2728 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2729 CGM.Int8PtrTy), 2730 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2731 getOrCreateThreadPrivateCache(VD)}; 2732 return Address(CGF.EmitRuntimeCall( 2733 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2734 VDAddr.getAlignment()); 2735 } 2736 2737 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2738 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2739 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2740 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2741 // library. 2742 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2743 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2744 OMPLoc); 2745 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2746 // to register constructor/destructor for variable. 2747 llvm::Value *Args[] = { 2748 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2749 Ctor, CopyCtor, Dtor}; 2750 CGF.EmitRuntimeCall( 2751 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2752 } 2753 2754 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2755 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2756 bool PerformInit, CodeGenFunction *CGF) { 2757 if (CGM.getLangOpts().OpenMPUseTLS && 2758 CGM.getContext().getTargetInfo().isTLSSupported()) 2759 return nullptr; 2760 2761 VD = VD->getDefinition(CGM.getContext()); 2762 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 2763 QualType ASTTy = VD->getType(); 2764 2765 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2766 const Expr *Init = VD->getAnyInitializer(); 2767 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2768 // Generate function that re-emits the declaration's initializer into the 2769 // threadprivate copy of the variable VD 2770 CodeGenFunction CtorCGF(CGM); 2771 FunctionArgList Args; 2772 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2773 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2774 ImplicitParamDecl::Other); 2775 Args.push_back(&Dst); 2776 2777 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2778 CGM.getContext().VoidPtrTy, Args); 2779 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2780 std::string Name = getName({"__kmpc_global_ctor_", ""}); 2781 llvm::Function *Fn = 2782 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2783 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2784 Args, Loc, Loc); 2785 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2786 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2787 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2788 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2789 Arg = CtorCGF.Builder.CreateElementBitCast( 2790 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2791 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2792 /*IsInitializer=*/true); 2793 ArgVal = CtorCGF.EmitLoadOfScalar( 2794 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2795 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2796 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2797 CtorCGF.FinishFunction(); 2798 Ctor = Fn; 2799 } 2800 if (VD->getType().isDestructedType() != QualType::DK_none) { 2801 // Generate function that emits destructor call for the threadprivate copy 2802 // of the variable VD 2803 CodeGenFunction DtorCGF(CGM); 2804 FunctionArgList Args; 2805 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2806 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2807 ImplicitParamDecl::Other); 2808 Args.push_back(&Dst); 2809 2810 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2811 CGM.getContext().VoidTy, Args); 2812 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2813 std::string Name = getName({"__kmpc_global_dtor_", ""}); 2814 llvm::Function *Fn = 2815 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2816 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2817 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2818 Loc, Loc); 2819 // Create a scope with an artificial location for the body of this function. 2820 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2821 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2822 DtorCGF.GetAddrOfLocalVar(&Dst), 2823 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2824 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2825 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2826 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2827 DtorCGF.FinishFunction(); 2828 Dtor = Fn; 2829 } 2830 // Do not emit init function if it is not required. 2831 if (!Ctor && !Dtor) 2832 return nullptr; 2833 2834 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2835 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2836 /*isVarArg=*/false) 2837 ->getPointerTo(); 2838 // Copying constructor for the threadprivate variable. 2839 // Must be NULL - reserved by runtime, but currently it requires that this 2840 // parameter is always NULL. Otherwise it fires assertion. 2841 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2842 if (Ctor == nullptr) { 2843 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2844 /*isVarArg=*/false) 2845 ->getPointerTo(); 2846 Ctor = llvm::Constant::getNullValue(CtorTy); 2847 } 2848 if (Dtor == nullptr) { 2849 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2850 /*isVarArg=*/false) 2851 ->getPointerTo(); 2852 Dtor = llvm::Constant::getNullValue(DtorTy); 2853 } 2854 if (!CGF) { 2855 auto *InitFunctionTy = 2856 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2857 std::string Name = getName({"__omp_threadprivate_init_", ""}); 2858 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2859 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 2860 CodeGenFunction InitCGF(CGM); 2861 FunctionArgList ArgList; 2862 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2863 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2864 Loc, Loc); 2865 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2866 InitCGF.FinishFunction(); 2867 return InitFunction; 2868 } 2869 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2870 } 2871 return nullptr; 2872 } 2873 2874 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2875 llvm::GlobalVariable *Addr, 2876 bool PerformInit) { 2877 if (CGM.getLangOpts().OMPTargetTriples.empty() && 2878 !CGM.getLangOpts().OpenMPIsDevice) 2879 return false; 2880 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2881 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2882 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 2883 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2884 HasRequiresUnifiedSharedMemory)) 2885 return CGM.getLangOpts().OpenMPIsDevice; 2886 VD = VD->getDefinition(CGM.getContext()); 2887 if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 2888 return CGM.getLangOpts().OpenMPIsDevice; 2889 2890 QualType ASTTy = VD->getType(); 2891 2892 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 2893 // Produce the unique prefix to identify the new target regions. We use 2894 // the source location of the variable declaration which we know to not 2895 // conflict with any target region. 2896 unsigned DeviceID; 2897 unsigned FileID; 2898 unsigned Line; 2899 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2900 SmallString<128> Buffer, Out; 2901 { 2902 llvm::raw_svector_ostream OS(Buffer); 2903 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2904 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2905 } 2906 2907 const Expr *Init = VD->getAnyInitializer(); 2908 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2909 llvm::Constant *Ctor; 2910 llvm::Constant *ID; 2911 if (CGM.getLangOpts().OpenMPIsDevice) { 2912 // Generate function that re-emits the declaration's initializer into 2913 // the threadprivate copy of the variable VD 2914 CodeGenFunction CtorCGF(CGM); 2915 2916 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2917 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2918 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2919 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2920 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2921 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2922 FunctionArgList(), Loc, Loc); 2923 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2924 CtorCGF.EmitAnyExprToMem(Init, 2925 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2926 Init->getType().getQualifiers(), 2927 /*IsInitializer=*/true); 2928 CtorCGF.FinishFunction(); 2929 Ctor = Fn; 2930 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2931 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 2932 } else { 2933 Ctor = new llvm::GlobalVariable( 2934 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2935 llvm::GlobalValue::PrivateLinkage, 2936 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2937 ID = Ctor; 2938 } 2939 2940 // Register the information for the entry associated with the constructor. 2941 Out.clear(); 2942 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2943 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2944 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2945 } 2946 if (VD->getType().isDestructedType() != QualType::DK_none) { 2947 llvm::Constant *Dtor; 2948 llvm::Constant *ID; 2949 if (CGM.getLangOpts().OpenMPIsDevice) { 2950 // Generate function that emits destructor call for the threadprivate 2951 // copy of the variable VD 2952 CodeGenFunction DtorCGF(CGM); 2953 2954 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2955 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2956 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2957 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2958 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2959 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2960 FunctionArgList(), Loc, Loc); 2961 // Create a scope with an artificial location for the body of this 2962 // function. 2963 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2964 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 2965 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2966 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2967 DtorCGF.FinishFunction(); 2968 Dtor = Fn; 2969 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2970 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 2971 } else { 2972 Dtor = new llvm::GlobalVariable( 2973 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2974 llvm::GlobalValue::PrivateLinkage, 2975 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2976 ID = Dtor; 2977 } 2978 // Register the information for the entry associated with the destructor. 2979 Out.clear(); 2980 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2981 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2982 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2983 } 2984 return CGM.getLangOpts().OpenMPIsDevice; 2985 } 2986 2987 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2988 QualType VarType, 2989 StringRef Name) { 2990 std::string Suffix = getName({"artificial", ""}); 2991 std::string CacheSuffix = getName({"cache", ""}); 2992 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2993 llvm::Value *GAddr = 2994 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2995 llvm::Value *Args[] = { 2996 emitUpdateLocation(CGF, SourceLocation()), 2997 getThreadID(CGF, SourceLocation()), 2998 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2999 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 3000 /*isSigned=*/false), 3001 getOrCreateInternalVariable( 3002 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 3003 return Address( 3004 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3005 CGF.EmitRuntimeCall( 3006 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 3007 VarLVType->getPointerTo(/*AddrSpace=*/0)), 3008 CGM.getPointerAlign()); 3009 } 3010 3011 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 3012 const RegionCodeGenTy &ThenGen, 3013 const RegionCodeGenTy &ElseGen) { 3014 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 3015 3016 // If the condition constant folds and can be elided, try to avoid emitting 3017 // the condition and the dead arm of the if/else. 3018 bool CondConstant; 3019 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 3020 if (CondConstant) 3021 ThenGen(CGF); 3022 else 3023 ElseGen(CGF); 3024 return; 3025 } 3026 3027 // Otherwise, the condition did not fold, or we couldn't elide it. Just 3028 // emit the conditional branch. 3029 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3030 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 3031 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 3032 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 3033 3034 // Emit the 'then' code. 3035 CGF.EmitBlock(ThenBlock); 3036 ThenGen(CGF); 3037 CGF.EmitBranch(ContBlock); 3038 // Emit the 'else' code if present. 3039 // There is no need to emit line number for unconditional branch. 3040 (void)ApplyDebugLocation::CreateEmpty(CGF); 3041 CGF.EmitBlock(ElseBlock); 3042 ElseGen(CGF); 3043 // There is no need to emit line number for unconditional branch. 3044 (void)ApplyDebugLocation::CreateEmpty(CGF); 3045 CGF.EmitBranch(ContBlock); 3046 // Emit the continuation block for code after the if. 3047 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 3048 } 3049 3050 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 3051 llvm::Function *OutlinedFn, 3052 ArrayRef<llvm::Value *> CapturedVars, 3053 const Expr *IfCond) { 3054 if (!CGF.HaveInsertPoint()) 3055 return; 3056 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 3057 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 3058 PrePostActionTy &) { 3059 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 3060 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3061 llvm::Value *Args[] = { 3062 RTLoc, 3063 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 3064 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 3065 llvm::SmallVector<llvm::Value *, 16> RealArgs; 3066 RealArgs.append(std::begin(Args), std::end(Args)); 3067 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 3068 3069 llvm::FunctionCallee RTLFn = 3070 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 3071 CGF.EmitRuntimeCall(RTLFn, RealArgs); 3072 }; 3073 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 3074 PrePostActionTy &) { 3075 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3076 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 3077 // Build calls: 3078 // __kmpc_serialized_parallel(&Loc, GTid); 3079 llvm::Value *Args[] = {RTLoc, ThreadID}; 3080 CGF.EmitRuntimeCall( 3081 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 3082 3083 // OutlinedFn(>id, &zero_bound, CapturedStruct); 3084 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 3085 Address ZeroAddrBound = 3086 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 3087 /*Name=*/".bound.zero.addr"); 3088 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 3089 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 3090 // ThreadId for serialized parallels is 0. 3091 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 3092 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 3093 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 3094 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 3095 3096 // __kmpc_end_serialized_parallel(&Loc, GTid); 3097 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 3098 CGF.EmitRuntimeCall( 3099 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 3100 EndArgs); 3101 }; 3102 if (IfCond) { 3103 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 3104 } else { 3105 RegionCodeGenTy ThenRCG(ThenGen); 3106 ThenRCG(CGF); 3107 } 3108 } 3109 3110 // If we're inside an (outlined) parallel region, use the region info's 3111 // thread-ID variable (it is passed in a first argument of the outlined function 3112 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 3113 // regular serial code region, get thread ID by calling kmp_int32 3114 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 3115 // return the address of that temp. 3116 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 3117 SourceLocation Loc) { 3118 if (auto *OMPRegionInfo = 3119 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3120 if (OMPRegionInfo->getThreadIDVariable()) 3121 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 3122 3123 llvm::Value *ThreadID = getThreadID(CGF, Loc); 3124 QualType Int32Ty = 3125 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 3126 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 3127 CGF.EmitStoreOfScalar(ThreadID, 3128 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 3129 3130 return ThreadIDTemp; 3131 } 3132 3133 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 3134 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 3135 SmallString<256> Buffer; 3136 llvm::raw_svector_ostream Out(Buffer); 3137 Out << Name; 3138 StringRef RuntimeName = Out.str(); 3139 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 3140 if (Elem.second) { 3141 assert(Elem.second->getType()->getPointerElementType() == Ty && 3142 "OMP internal variable has different type than requested"); 3143 return &*Elem.second; 3144 } 3145 3146 return Elem.second = new llvm::GlobalVariable( 3147 CGM.getModule(), Ty, /*IsConstant*/ false, 3148 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 3149 Elem.first(), /*InsertBefore=*/nullptr, 3150 llvm::GlobalValue::NotThreadLocal, AddressSpace); 3151 } 3152 3153 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 3154 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 3155 std::string Name = getName({Prefix, "var"}); 3156 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 3157 } 3158 3159 namespace { 3160 /// Common pre(post)-action for different OpenMP constructs. 3161 class CommonActionTy final : public PrePostActionTy { 3162 llvm::FunctionCallee EnterCallee; 3163 ArrayRef<llvm::Value *> EnterArgs; 3164 llvm::FunctionCallee ExitCallee; 3165 ArrayRef<llvm::Value *> ExitArgs; 3166 bool Conditional; 3167 llvm::BasicBlock *ContBlock = nullptr; 3168 3169 public: 3170 CommonActionTy(llvm::FunctionCallee EnterCallee, 3171 ArrayRef<llvm::Value *> EnterArgs, 3172 llvm::FunctionCallee ExitCallee, 3173 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 3174 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 3175 ExitArgs(ExitArgs), Conditional(Conditional) {} 3176 void Enter(CodeGenFunction &CGF) override { 3177 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 3178 if (Conditional) { 3179 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 3180 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3181 ContBlock = CGF.createBasicBlock("omp_if.end"); 3182 // Generate the branch (If-stmt) 3183 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 3184 CGF.EmitBlock(ThenBlock); 3185 } 3186 } 3187 void Done(CodeGenFunction &CGF) { 3188 // Emit the rest of blocks/branches 3189 CGF.EmitBranch(ContBlock); 3190 CGF.EmitBlock(ContBlock, true); 3191 } 3192 void Exit(CodeGenFunction &CGF) override { 3193 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 3194 } 3195 }; 3196 } // anonymous namespace 3197 3198 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 3199 StringRef CriticalName, 3200 const RegionCodeGenTy &CriticalOpGen, 3201 SourceLocation Loc, const Expr *Hint) { 3202 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 3203 // CriticalOpGen(); 3204 // __kmpc_end_critical(ident_t *, gtid, Lock); 3205 // Prepare arguments and build a call to __kmpc_critical 3206 if (!CGF.HaveInsertPoint()) 3207 return; 3208 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3209 getCriticalRegionLock(CriticalName)}; 3210 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 3211 std::end(Args)); 3212 if (Hint) { 3213 EnterArgs.push_back(CGF.Builder.CreateIntCast( 3214 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 3215 } 3216 CommonActionTy Action( 3217 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 3218 : OMPRTL__kmpc_critical), 3219 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 3220 CriticalOpGen.setAction(Action); 3221 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 3222 } 3223 3224 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 3225 const RegionCodeGenTy &MasterOpGen, 3226 SourceLocation Loc) { 3227 if (!CGF.HaveInsertPoint()) 3228 return; 3229 // if(__kmpc_master(ident_t *, gtid)) { 3230 // MasterOpGen(); 3231 // __kmpc_end_master(ident_t *, gtid); 3232 // } 3233 // Prepare arguments and build a call to __kmpc_master 3234 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3235 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 3236 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 3237 /*Conditional=*/true); 3238 MasterOpGen.setAction(Action); 3239 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 3240 Action.Done(CGF); 3241 } 3242 3243 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 3244 SourceLocation Loc) { 3245 if (!CGF.HaveInsertPoint()) 3246 return; 3247 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 3248 llvm::Value *Args[] = { 3249 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3250 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 3251 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 3252 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3253 Region->emitUntiedSwitch(CGF); 3254 } 3255 3256 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 3257 const RegionCodeGenTy &TaskgroupOpGen, 3258 SourceLocation Loc) { 3259 if (!CGF.HaveInsertPoint()) 3260 return; 3261 // __kmpc_taskgroup(ident_t *, gtid); 3262 // TaskgroupOpGen(); 3263 // __kmpc_end_taskgroup(ident_t *, gtid); 3264 // Prepare arguments and build a call to __kmpc_taskgroup 3265 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3266 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3267 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3268 Args); 3269 TaskgroupOpGen.setAction(Action); 3270 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3271 } 3272 3273 /// Given an array of pointers to variables, project the address of a 3274 /// given variable. 3275 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3276 unsigned Index, const VarDecl *Var) { 3277 // Pull out the pointer to the variable. 3278 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 3279 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3280 3281 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3282 Addr = CGF.Builder.CreateElementBitCast( 3283 Addr, CGF.ConvertTypeForMem(Var->getType())); 3284 return Addr; 3285 } 3286 3287 static llvm::Value *emitCopyprivateCopyFunction( 3288 CodeGenModule &CGM, llvm::Type *ArgsType, 3289 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3290 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3291 SourceLocation Loc) { 3292 ASTContext &C = CGM.getContext(); 3293 // void copy_func(void *LHSArg, void *RHSArg); 3294 FunctionArgList Args; 3295 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3296 ImplicitParamDecl::Other); 3297 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3298 ImplicitParamDecl::Other); 3299 Args.push_back(&LHSArg); 3300 Args.push_back(&RHSArg); 3301 const auto &CGFI = 3302 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3303 std::string Name = 3304 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 3305 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 3306 llvm::GlobalValue::InternalLinkage, Name, 3307 &CGM.getModule()); 3308 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3309 Fn->setDoesNotRecurse(); 3310 CodeGenFunction CGF(CGM); 3311 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3312 // Dest = (void*[n])(LHSArg); 3313 // Src = (void*[n])(RHSArg); 3314 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3315 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3316 ArgsType), CGF.getPointerAlign()); 3317 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3318 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3319 ArgsType), CGF.getPointerAlign()); 3320 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3321 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3322 // ... 3323 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3324 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3325 const auto *DestVar = 3326 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3327 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3328 3329 const auto *SrcVar = 3330 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3331 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3332 3333 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3334 QualType Type = VD->getType(); 3335 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3336 } 3337 CGF.FinishFunction(); 3338 return Fn; 3339 } 3340 3341 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3342 const RegionCodeGenTy &SingleOpGen, 3343 SourceLocation Loc, 3344 ArrayRef<const Expr *> CopyprivateVars, 3345 ArrayRef<const Expr *> SrcExprs, 3346 ArrayRef<const Expr *> DstExprs, 3347 ArrayRef<const Expr *> AssignmentOps) { 3348 if (!CGF.HaveInsertPoint()) 3349 return; 3350 assert(CopyprivateVars.size() == SrcExprs.size() && 3351 CopyprivateVars.size() == DstExprs.size() && 3352 CopyprivateVars.size() == AssignmentOps.size()); 3353 ASTContext &C = CGM.getContext(); 3354 // int32 did_it = 0; 3355 // if(__kmpc_single(ident_t *, gtid)) { 3356 // SingleOpGen(); 3357 // __kmpc_end_single(ident_t *, gtid); 3358 // did_it = 1; 3359 // } 3360 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3361 // <copy_func>, did_it); 3362 3363 Address DidIt = Address::invalid(); 3364 if (!CopyprivateVars.empty()) { 3365 // int32 did_it = 0; 3366 QualType KmpInt32Ty = 3367 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3368 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3369 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3370 } 3371 // Prepare arguments and build a call to __kmpc_single 3372 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3373 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3374 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3375 /*Conditional=*/true); 3376 SingleOpGen.setAction(Action); 3377 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3378 if (DidIt.isValid()) { 3379 // did_it = 1; 3380 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3381 } 3382 Action.Done(CGF); 3383 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3384 // <copy_func>, did_it); 3385 if (DidIt.isValid()) { 3386 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3387 QualType CopyprivateArrayTy = C.getConstantArrayType( 3388 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 3389 /*IndexTypeQuals=*/0); 3390 // Create a list of all private variables for copyprivate. 3391 Address CopyprivateList = 3392 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3393 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3394 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 3395 CGF.Builder.CreateStore( 3396 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3397 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 3398 Elem); 3399 } 3400 // Build function that copies private values from single region to all other 3401 // threads in the corresponding parallel region. 3402 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3403 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3404 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3405 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3406 Address CL = 3407 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3408 CGF.VoidPtrTy); 3409 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3410 llvm::Value *Args[] = { 3411 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3412 getThreadID(CGF, Loc), // i32 <gtid> 3413 BufSize, // size_t <buf_size> 3414 CL.getPointer(), // void *<copyprivate list> 3415 CpyFn, // void (*) (void *, void *) <copy_func> 3416 DidItVal // i32 did_it 3417 }; 3418 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3419 } 3420 } 3421 3422 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3423 const RegionCodeGenTy &OrderedOpGen, 3424 SourceLocation Loc, bool IsThreads) { 3425 if (!CGF.HaveInsertPoint()) 3426 return; 3427 // __kmpc_ordered(ident_t *, gtid); 3428 // OrderedOpGen(); 3429 // __kmpc_end_ordered(ident_t *, gtid); 3430 // Prepare arguments and build a call to __kmpc_ordered 3431 if (IsThreads) { 3432 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3433 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3434 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3435 Args); 3436 OrderedOpGen.setAction(Action); 3437 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3438 return; 3439 } 3440 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3441 } 3442 3443 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 3444 unsigned Flags; 3445 if (Kind == OMPD_for) 3446 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3447 else if (Kind == OMPD_sections) 3448 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3449 else if (Kind == OMPD_single) 3450 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3451 else if (Kind == OMPD_barrier) 3452 Flags = OMP_IDENT_BARRIER_EXPL; 3453 else 3454 Flags = OMP_IDENT_BARRIER_IMPL; 3455 return Flags; 3456 } 3457 3458 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 3459 CodeGenFunction &CGF, const OMPLoopDirective &S, 3460 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 3461 // Check if the loop directive is actually a doacross loop directive. In this 3462 // case choose static, 1 schedule. 3463 if (llvm::any_of( 3464 S.getClausesOfKind<OMPOrderedClause>(), 3465 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 3466 ScheduleKind = OMPC_SCHEDULE_static; 3467 // Chunk size is 1 in this case. 3468 llvm::APInt ChunkSize(32, 1); 3469 ChunkExpr = IntegerLiteral::Create( 3470 CGF.getContext(), ChunkSize, 3471 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3472 SourceLocation()); 3473 } 3474 } 3475 3476 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3477 OpenMPDirectiveKind Kind, bool EmitChecks, 3478 bool ForceSimpleCall) { 3479 if (!CGF.HaveInsertPoint()) 3480 return; 3481 // Build call __kmpc_cancel_barrier(loc, thread_id); 3482 // Build call __kmpc_barrier(loc, thread_id); 3483 unsigned Flags = getDefaultFlagsForBarriers(Kind); 3484 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3485 // thread_id); 3486 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3487 getThreadID(CGF, Loc)}; 3488 if (auto *OMPRegionInfo = 3489 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 3490 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3491 llvm::Value *Result = CGF.EmitRuntimeCall( 3492 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3493 if (EmitChecks) { 3494 // if (__kmpc_cancel_barrier()) { 3495 // exit from construct; 3496 // } 3497 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3498 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3499 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3500 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3501 CGF.EmitBlock(ExitBB); 3502 // exit from construct; 3503 CodeGenFunction::JumpDest CancelDestination = 3504 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3505 CGF.EmitBranchThroughCleanup(CancelDestination); 3506 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3507 } 3508 return; 3509 } 3510 } 3511 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3512 } 3513 3514 /// Map the OpenMP loop schedule to the runtime enumeration. 3515 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3516 bool Chunked, bool Ordered) { 3517 switch (ScheduleKind) { 3518 case OMPC_SCHEDULE_static: 3519 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3520 : (Ordered ? OMP_ord_static : OMP_sch_static); 3521 case OMPC_SCHEDULE_dynamic: 3522 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3523 case OMPC_SCHEDULE_guided: 3524 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3525 case OMPC_SCHEDULE_runtime: 3526 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3527 case OMPC_SCHEDULE_auto: 3528 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3529 case OMPC_SCHEDULE_unknown: 3530 assert(!Chunked && "chunk was specified but schedule kind not known"); 3531 return Ordered ? OMP_ord_static : OMP_sch_static; 3532 } 3533 llvm_unreachable("Unexpected runtime schedule"); 3534 } 3535 3536 /// Map the OpenMP distribute schedule to the runtime enumeration. 3537 static OpenMPSchedType 3538 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3539 // only static is allowed for dist_schedule 3540 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3541 } 3542 3543 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3544 bool Chunked) const { 3545 OpenMPSchedType Schedule = 3546 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3547 return Schedule == OMP_sch_static; 3548 } 3549 3550 bool CGOpenMPRuntime::isStaticNonchunked( 3551 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3552 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3553 return Schedule == OMP_dist_sch_static; 3554 } 3555 3556 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 3557 bool Chunked) const { 3558 OpenMPSchedType Schedule = 3559 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3560 return Schedule == OMP_sch_static_chunked; 3561 } 3562 3563 bool CGOpenMPRuntime::isStaticChunked( 3564 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3565 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3566 return Schedule == OMP_dist_sch_static_chunked; 3567 } 3568 3569 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3570 OpenMPSchedType Schedule = 3571 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3572 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3573 return Schedule != OMP_sch_static; 3574 } 3575 3576 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 3577 OpenMPScheduleClauseModifier M1, 3578 OpenMPScheduleClauseModifier M2) { 3579 int Modifier = 0; 3580 switch (M1) { 3581 case OMPC_SCHEDULE_MODIFIER_monotonic: 3582 Modifier = OMP_sch_modifier_monotonic; 3583 break; 3584 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3585 Modifier = OMP_sch_modifier_nonmonotonic; 3586 break; 3587 case OMPC_SCHEDULE_MODIFIER_simd: 3588 if (Schedule == OMP_sch_static_chunked) 3589 Schedule = OMP_sch_static_balanced_chunked; 3590 break; 3591 case OMPC_SCHEDULE_MODIFIER_last: 3592 case OMPC_SCHEDULE_MODIFIER_unknown: 3593 break; 3594 } 3595 switch (M2) { 3596 case OMPC_SCHEDULE_MODIFIER_monotonic: 3597 Modifier = OMP_sch_modifier_monotonic; 3598 break; 3599 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3600 Modifier = OMP_sch_modifier_nonmonotonic; 3601 break; 3602 case OMPC_SCHEDULE_MODIFIER_simd: 3603 if (Schedule == OMP_sch_static_chunked) 3604 Schedule = OMP_sch_static_balanced_chunked; 3605 break; 3606 case OMPC_SCHEDULE_MODIFIER_last: 3607 case OMPC_SCHEDULE_MODIFIER_unknown: 3608 break; 3609 } 3610 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 3611 // If the static schedule kind is specified or if the ordered clause is 3612 // specified, and if the nonmonotonic modifier is not specified, the effect is 3613 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 3614 // modifier is specified, the effect is as if the nonmonotonic modifier is 3615 // specified. 3616 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 3617 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 3618 Schedule == OMP_sch_static_balanced_chunked || 3619 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static)) 3620 Modifier = OMP_sch_modifier_nonmonotonic; 3621 } 3622 return Schedule | Modifier; 3623 } 3624 3625 void CGOpenMPRuntime::emitForDispatchInit( 3626 CodeGenFunction &CGF, SourceLocation Loc, 3627 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3628 bool Ordered, const DispatchRTInput &DispatchValues) { 3629 if (!CGF.HaveInsertPoint()) 3630 return; 3631 OpenMPSchedType Schedule = getRuntimeSchedule( 3632 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3633 assert(Ordered || 3634 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3635 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3636 Schedule != OMP_sch_static_balanced_chunked)); 3637 // Call __kmpc_dispatch_init( 3638 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3639 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3640 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3641 3642 // If the Chunk was not specified in the clause - use default value 1. 3643 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3644 : CGF.Builder.getIntN(IVSize, 1); 3645 llvm::Value *Args[] = { 3646 emitUpdateLocation(CGF, Loc), 3647 getThreadID(CGF, Loc), 3648 CGF.Builder.getInt32(addMonoNonMonoModifier( 3649 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3650 DispatchValues.LB, // Lower 3651 DispatchValues.UB, // Upper 3652 CGF.Builder.getIntN(IVSize, 1), // Stride 3653 Chunk // Chunk 3654 }; 3655 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3656 } 3657 3658 static void emitForStaticInitCall( 3659 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3660 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 3661 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3662 const CGOpenMPRuntime::StaticRTInput &Values) { 3663 if (!CGF.HaveInsertPoint()) 3664 return; 3665 3666 assert(!Values.Ordered); 3667 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3668 Schedule == OMP_sch_static_balanced_chunked || 3669 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3670 Schedule == OMP_dist_sch_static || 3671 Schedule == OMP_dist_sch_static_chunked); 3672 3673 // Call __kmpc_for_static_init( 3674 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3675 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3676 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3677 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3678 llvm::Value *Chunk = Values.Chunk; 3679 if (Chunk == nullptr) { 3680 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3681 Schedule == OMP_dist_sch_static) && 3682 "expected static non-chunked schedule"); 3683 // If the Chunk was not specified in the clause - use default value 1. 3684 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3685 } else { 3686 assert((Schedule == OMP_sch_static_chunked || 3687 Schedule == OMP_sch_static_balanced_chunked || 3688 Schedule == OMP_ord_static_chunked || 3689 Schedule == OMP_dist_sch_static_chunked) && 3690 "expected static chunked schedule"); 3691 } 3692 llvm::Value *Args[] = { 3693 UpdateLocation, 3694 ThreadId, 3695 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 3696 M2)), // Schedule type 3697 Values.IL.getPointer(), // &isLastIter 3698 Values.LB.getPointer(), // &LB 3699 Values.UB.getPointer(), // &UB 3700 Values.ST.getPointer(), // &Stride 3701 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3702 Chunk // Chunk 3703 }; 3704 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3705 } 3706 3707 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3708 SourceLocation Loc, 3709 OpenMPDirectiveKind DKind, 3710 const OpenMPScheduleTy &ScheduleKind, 3711 const StaticRTInput &Values) { 3712 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3713 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3714 assert(isOpenMPWorksharingDirective(DKind) && 3715 "Expected loop-based or sections-based directive."); 3716 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3717 isOpenMPLoopDirective(DKind) 3718 ? OMP_IDENT_WORK_LOOP 3719 : OMP_IDENT_WORK_SECTIONS); 3720 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3721 llvm::FunctionCallee StaticInitFunction = 3722 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3723 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3724 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3725 } 3726 3727 void CGOpenMPRuntime::emitDistributeStaticInit( 3728 CodeGenFunction &CGF, SourceLocation Loc, 3729 OpenMPDistScheduleClauseKind SchedKind, 3730 const CGOpenMPRuntime::StaticRTInput &Values) { 3731 OpenMPSchedType ScheduleNum = 3732 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3733 llvm::Value *UpdatedLocation = 3734 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3735 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3736 llvm::FunctionCallee StaticInitFunction = 3737 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3738 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3739 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3740 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3741 } 3742 3743 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3744 SourceLocation Loc, 3745 OpenMPDirectiveKind DKind) { 3746 if (!CGF.HaveInsertPoint()) 3747 return; 3748 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3749 llvm::Value *Args[] = { 3750 emitUpdateLocation(CGF, Loc, 3751 isOpenMPDistributeDirective(DKind) 3752 ? OMP_IDENT_WORK_DISTRIBUTE 3753 : isOpenMPLoopDirective(DKind) 3754 ? OMP_IDENT_WORK_LOOP 3755 : OMP_IDENT_WORK_SECTIONS), 3756 getThreadID(CGF, Loc)}; 3757 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3758 Args); 3759 } 3760 3761 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3762 SourceLocation Loc, 3763 unsigned IVSize, 3764 bool IVSigned) { 3765 if (!CGF.HaveInsertPoint()) 3766 return; 3767 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3768 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3769 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3770 } 3771 3772 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3773 SourceLocation Loc, unsigned IVSize, 3774 bool IVSigned, Address IL, 3775 Address LB, Address UB, 3776 Address ST) { 3777 // Call __kmpc_dispatch_next( 3778 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3779 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3780 // kmp_int[32|64] *p_stride); 3781 llvm::Value *Args[] = { 3782 emitUpdateLocation(CGF, Loc), 3783 getThreadID(CGF, Loc), 3784 IL.getPointer(), // &isLastIter 3785 LB.getPointer(), // &Lower 3786 UB.getPointer(), // &Upper 3787 ST.getPointer() // &Stride 3788 }; 3789 llvm::Value *Call = 3790 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3791 return CGF.EmitScalarConversion( 3792 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3793 CGF.getContext().BoolTy, Loc); 3794 } 3795 3796 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3797 llvm::Value *NumThreads, 3798 SourceLocation Loc) { 3799 if (!CGF.HaveInsertPoint()) 3800 return; 3801 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3802 llvm::Value *Args[] = { 3803 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3804 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3805 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3806 Args); 3807 } 3808 3809 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3810 OpenMPProcBindClauseKind ProcBind, 3811 SourceLocation Loc) { 3812 if (!CGF.HaveInsertPoint()) 3813 return; 3814 // Constants for proc bind value accepted by the runtime. 3815 enum ProcBindTy { 3816 ProcBindFalse = 0, 3817 ProcBindTrue, 3818 ProcBindMaster, 3819 ProcBindClose, 3820 ProcBindSpread, 3821 ProcBindIntel, 3822 ProcBindDefault 3823 } RuntimeProcBind; 3824 switch (ProcBind) { 3825 case OMPC_PROC_BIND_master: 3826 RuntimeProcBind = ProcBindMaster; 3827 break; 3828 case OMPC_PROC_BIND_close: 3829 RuntimeProcBind = ProcBindClose; 3830 break; 3831 case OMPC_PROC_BIND_spread: 3832 RuntimeProcBind = ProcBindSpread; 3833 break; 3834 case OMPC_PROC_BIND_unknown: 3835 llvm_unreachable("Unsupported proc_bind value."); 3836 } 3837 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3838 llvm::Value *Args[] = { 3839 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3840 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 3841 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3842 } 3843 3844 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3845 SourceLocation Loc) { 3846 if (!CGF.HaveInsertPoint()) 3847 return; 3848 // Build call void __kmpc_flush(ident_t *loc) 3849 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3850 emitUpdateLocation(CGF, Loc)); 3851 } 3852 3853 namespace { 3854 /// Indexes of fields for type kmp_task_t. 3855 enum KmpTaskTFields { 3856 /// List of shared variables. 3857 KmpTaskTShareds, 3858 /// Task routine. 3859 KmpTaskTRoutine, 3860 /// Partition id for the untied tasks. 3861 KmpTaskTPartId, 3862 /// Function with call of destructors for private variables. 3863 Data1, 3864 /// Task priority. 3865 Data2, 3866 /// (Taskloops only) Lower bound. 3867 KmpTaskTLowerBound, 3868 /// (Taskloops only) Upper bound. 3869 KmpTaskTUpperBound, 3870 /// (Taskloops only) Stride. 3871 KmpTaskTStride, 3872 /// (Taskloops only) Is last iteration flag. 3873 KmpTaskTLastIter, 3874 /// (Taskloops only) Reduction data. 3875 KmpTaskTReductions, 3876 }; 3877 } // anonymous namespace 3878 3879 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3880 return OffloadEntriesTargetRegion.empty() && 3881 OffloadEntriesDeviceGlobalVar.empty(); 3882 } 3883 3884 /// Initialize target region entry. 3885 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3886 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3887 StringRef ParentName, unsigned LineNum, 3888 unsigned Order) { 3889 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3890 "only required for the device " 3891 "code generation."); 3892 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3893 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3894 OMPTargetRegionEntryTargetRegion); 3895 ++OffloadingEntriesNum; 3896 } 3897 3898 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3899 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3900 StringRef ParentName, unsigned LineNum, 3901 llvm::Constant *Addr, llvm::Constant *ID, 3902 OMPTargetRegionEntryKind Flags) { 3903 // If we are emitting code for a target, the entry is already initialized, 3904 // only has to be registered. 3905 if (CGM.getLangOpts().OpenMPIsDevice) { 3906 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3907 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3908 DiagnosticsEngine::Error, 3909 "Unable to find target region on line '%0' in the device code."); 3910 CGM.getDiags().Report(DiagID) << LineNum; 3911 return; 3912 } 3913 auto &Entry = 3914 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3915 assert(Entry.isValid() && "Entry not initialized!"); 3916 Entry.setAddress(Addr); 3917 Entry.setID(ID); 3918 Entry.setFlags(Flags); 3919 } else { 3920 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3921 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3922 ++OffloadingEntriesNum; 3923 } 3924 } 3925 3926 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3927 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3928 unsigned LineNum) const { 3929 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3930 if (PerDevice == OffloadEntriesTargetRegion.end()) 3931 return false; 3932 auto PerFile = PerDevice->second.find(FileID); 3933 if (PerFile == PerDevice->second.end()) 3934 return false; 3935 auto PerParentName = PerFile->second.find(ParentName); 3936 if (PerParentName == PerFile->second.end()) 3937 return false; 3938 auto PerLine = PerParentName->second.find(LineNum); 3939 if (PerLine == PerParentName->second.end()) 3940 return false; 3941 // Fail if this entry is already registered. 3942 if (PerLine->second.getAddress() || PerLine->second.getID()) 3943 return false; 3944 return true; 3945 } 3946 3947 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3948 const OffloadTargetRegionEntryInfoActTy &Action) { 3949 // Scan all target region entries and perform the provided action. 3950 for (const auto &D : OffloadEntriesTargetRegion) 3951 for (const auto &F : D.second) 3952 for (const auto &P : F.second) 3953 for (const auto &L : P.second) 3954 Action(D.first, F.first, P.first(), L.first, L.second); 3955 } 3956 3957 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3958 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3959 OMPTargetGlobalVarEntryKind Flags, 3960 unsigned Order) { 3961 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3962 "only required for the device " 3963 "code generation."); 3964 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3965 ++OffloadingEntriesNum; 3966 } 3967 3968 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3969 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3970 CharUnits VarSize, 3971 OMPTargetGlobalVarEntryKind Flags, 3972 llvm::GlobalValue::LinkageTypes Linkage) { 3973 if (CGM.getLangOpts().OpenMPIsDevice) { 3974 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3975 assert(Entry.isValid() && Entry.getFlags() == Flags && 3976 "Entry not initialized!"); 3977 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3978 "Resetting with the new address."); 3979 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3980 if (Entry.getVarSize().isZero()) { 3981 Entry.setVarSize(VarSize); 3982 Entry.setLinkage(Linkage); 3983 } 3984 return; 3985 } 3986 Entry.setVarSize(VarSize); 3987 Entry.setLinkage(Linkage); 3988 Entry.setAddress(Addr); 3989 } else { 3990 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3991 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3992 assert(Entry.isValid() && Entry.getFlags() == Flags && 3993 "Entry not initialized!"); 3994 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3995 "Resetting with the new address."); 3996 if (Entry.getVarSize().isZero()) { 3997 Entry.setVarSize(VarSize); 3998 Entry.setLinkage(Linkage); 3999 } 4000 return; 4001 } 4002 OffloadEntriesDeviceGlobalVar.try_emplace( 4003 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 4004 ++OffloadingEntriesNum; 4005 } 4006 } 4007 4008 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4009 actOnDeviceGlobalVarEntriesInfo( 4010 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 4011 // Scan all target region entries and perform the provided action. 4012 for (const auto &E : OffloadEntriesDeviceGlobalVar) 4013 Action(E.getKey(), E.getValue()); 4014 } 4015 4016 void CGOpenMPRuntime::createOffloadEntry( 4017 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 4018 llvm::GlobalValue::LinkageTypes Linkage) { 4019 StringRef Name = Addr->getName(); 4020 llvm::Module &M = CGM.getModule(); 4021 llvm::LLVMContext &C = M.getContext(); 4022 4023 // Create constant string with the name. 4024 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 4025 4026 std::string StringName = getName({"omp_offloading", "entry_name"}); 4027 auto *Str = new llvm::GlobalVariable( 4028 M, StrPtrInit->getType(), /*isConstant=*/true, 4029 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 4030 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4031 4032 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 4033 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 4034 llvm::ConstantInt::get(CGM.SizeTy, Size), 4035 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 4036 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 4037 std::string EntryName = getName({"omp_offloading", "entry", ""}); 4038 llvm::GlobalVariable *Entry = createGlobalStruct( 4039 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 4040 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 4041 4042 // The entry has to be created in the section the linker expects it to be. 4043 Entry->setSection("omp_offloading_entries"); 4044 } 4045 4046 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 4047 // Emit the offloading entries and metadata so that the device codegen side 4048 // can easily figure out what to emit. The produced metadata looks like 4049 // this: 4050 // 4051 // !omp_offload.info = !{!1, ...} 4052 // 4053 // Right now we only generate metadata for function that contain target 4054 // regions. 4055 4056 // If we are in simd mode or there are no entries, we don't need to do 4057 // anything. 4058 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 4059 return; 4060 4061 llvm::Module &M = CGM.getModule(); 4062 llvm::LLVMContext &C = M.getContext(); 4063 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 4064 SourceLocation, StringRef>, 4065 16> 4066 OrderedEntries(OffloadEntriesInfoManager.size()); 4067 llvm::SmallVector<StringRef, 16> ParentFunctions( 4068 OffloadEntriesInfoManager.size()); 4069 4070 // Auxiliary methods to create metadata values and strings. 4071 auto &&GetMDInt = [this](unsigned V) { 4072 return llvm::ConstantAsMetadata::get( 4073 llvm::ConstantInt::get(CGM.Int32Ty, V)); 4074 }; 4075 4076 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 4077 4078 // Create the offloading info metadata node. 4079 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 4080 4081 // Create function that emits metadata for each target region entry; 4082 auto &&TargetRegionMetadataEmitter = 4083 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 4084 &GetMDString]( 4085 unsigned DeviceID, unsigned FileID, StringRef ParentName, 4086 unsigned Line, 4087 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 4088 // Generate metadata for target regions. Each entry of this metadata 4089 // contains: 4090 // - Entry 0 -> Kind of this type of metadata (0). 4091 // - Entry 1 -> Device ID of the file where the entry was identified. 4092 // - Entry 2 -> File ID of the file where the entry was identified. 4093 // - Entry 3 -> Mangled name of the function where the entry was 4094 // identified. 4095 // - Entry 4 -> Line in the file where the entry was identified. 4096 // - Entry 5 -> Order the entry was created. 4097 // The first element of the metadata node is the kind. 4098 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 4099 GetMDInt(FileID), GetMDString(ParentName), 4100 GetMDInt(Line), GetMDInt(E.getOrder())}; 4101 4102 SourceLocation Loc; 4103 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 4104 E = CGM.getContext().getSourceManager().fileinfo_end(); 4105 I != E; ++I) { 4106 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 4107 I->getFirst()->getUniqueID().getFile() == FileID) { 4108 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 4109 I->getFirst(), Line, 1); 4110 break; 4111 } 4112 } 4113 // Save this entry in the right position of the ordered entries array. 4114 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 4115 ParentFunctions[E.getOrder()] = ParentName; 4116 4117 // Add metadata to the named metadata node. 4118 MD->addOperand(llvm::MDNode::get(C, Ops)); 4119 }; 4120 4121 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 4122 TargetRegionMetadataEmitter); 4123 4124 // Create function that emits metadata for each device global variable entry; 4125 auto &&DeviceGlobalVarMetadataEmitter = 4126 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 4127 MD](StringRef MangledName, 4128 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 4129 &E) { 4130 // Generate metadata for global variables. Each entry of this metadata 4131 // contains: 4132 // - Entry 0 -> Kind of this type of metadata (1). 4133 // - Entry 1 -> Mangled name of the variable. 4134 // - Entry 2 -> Declare target kind. 4135 // - Entry 3 -> Order the entry was created. 4136 // The first element of the metadata node is the kind. 4137 llvm::Metadata *Ops[] = { 4138 GetMDInt(E.getKind()), GetMDString(MangledName), 4139 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 4140 4141 // Save this entry in the right position of the ordered entries array. 4142 OrderedEntries[E.getOrder()] = 4143 std::make_tuple(&E, SourceLocation(), MangledName); 4144 4145 // Add metadata to the named metadata node. 4146 MD->addOperand(llvm::MDNode::get(C, Ops)); 4147 }; 4148 4149 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 4150 DeviceGlobalVarMetadataEmitter); 4151 4152 for (const auto &E : OrderedEntries) { 4153 assert(std::get<0>(E) && "All ordered entries must exist!"); 4154 if (const auto *CE = 4155 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 4156 std::get<0>(E))) { 4157 if (!CE->getID() || !CE->getAddress()) { 4158 // Do not blame the entry if the parent funtion is not emitted. 4159 StringRef FnName = ParentFunctions[CE->getOrder()]; 4160 if (!CGM.GetGlobalValue(FnName)) 4161 continue; 4162 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4163 DiagnosticsEngine::Error, 4164 "Offloading entry for target region in %0 is incorrect: either the " 4165 "address or the ID is invalid."); 4166 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 4167 continue; 4168 } 4169 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 4170 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 4171 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 4172 OffloadEntryInfoDeviceGlobalVar>( 4173 std::get<0>(E))) { 4174 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 4175 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4176 CE->getFlags()); 4177 switch (Flags) { 4178 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 4179 if (CGM.getLangOpts().OpenMPIsDevice && 4180 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 4181 continue; 4182 if (!CE->getAddress()) { 4183 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4184 DiagnosticsEngine::Error, "Offloading entry for declare target " 4185 "variable %0 is incorrect: the " 4186 "address is invalid."); 4187 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 4188 continue; 4189 } 4190 // The vaiable has no definition - no need to add the entry. 4191 if (CE->getVarSize().isZero()) 4192 continue; 4193 break; 4194 } 4195 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 4196 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 4197 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 4198 "Declaret target link address is set."); 4199 if (CGM.getLangOpts().OpenMPIsDevice) 4200 continue; 4201 if (!CE->getAddress()) { 4202 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4203 DiagnosticsEngine::Error, 4204 "Offloading entry for declare target variable is incorrect: the " 4205 "address is invalid."); 4206 CGM.getDiags().Report(DiagID); 4207 continue; 4208 } 4209 break; 4210 } 4211 createOffloadEntry(CE->getAddress(), CE->getAddress(), 4212 CE->getVarSize().getQuantity(), Flags, 4213 CE->getLinkage()); 4214 } else { 4215 llvm_unreachable("Unsupported entry kind."); 4216 } 4217 } 4218 } 4219 4220 /// Loads all the offload entries information from the host IR 4221 /// metadata. 4222 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 4223 // If we are in target mode, load the metadata from the host IR. This code has 4224 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 4225 4226 if (!CGM.getLangOpts().OpenMPIsDevice) 4227 return; 4228 4229 if (CGM.getLangOpts().OMPHostIRFile.empty()) 4230 return; 4231 4232 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 4233 if (auto EC = Buf.getError()) { 4234 CGM.getDiags().Report(diag::err_cannot_open_file) 4235 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4236 return; 4237 } 4238 4239 llvm::LLVMContext C; 4240 auto ME = expectedToErrorOrAndEmitErrors( 4241 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 4242 4243 if (auto EC = ME.getError()) { 4244 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4245 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 4246 CGM.getDiags().Report(DiagID) 4247 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4248 return; 4249 } 4250 4251 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4252 if (!MD) 4253 return; 4254 4255 for (llvm::MDNode *MN : MD->operands()) { 4256 auto &&GetMDInt = [MN](unsigned Idx) { 4257 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4258 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4259 }; 4260 4261 auto &&GetMDString = [MN](unsigned Idx) { 4262 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4263 return V->getString(); 4264 }; 4265 4266 switch (GetMDInt(0)) { 4267 default: 4268 llvm_unreachable("Unexpected metadata!"); 4269 break; 4270 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4271 OffloadingEntryInfoTargetRegion: 4272 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4273 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4274 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4275 /*Order=*/GetMDInt(5)); 4276 break; 4277 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4278 OffloadingEntryInfoDeviceGlobalVar: 4279 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4280 /*MangledName=*/GetMDString(1), 4281 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4282 /*Flags=*/GetMDInt(2)), 4283 /*Order=*/GetMDInt(3)); 4284 break; 4285 } 4286 } 4287 } 4288 4289 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4290 if (!KmpRoutineEntryPtrTy) { 4291 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4292 ASTContext &C = CGM.getContext(); 4293 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4294 FunctionProtoType::ExtProtoInfo EPI; 4295 KmpRoutineEntryPtrQTy = C.getPointerType( 4296 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4297 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4298 } 4299 } 4300 4301 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4302 // Make sure the type of the entry is already created. This is the type we 4303 // have to create: 4304 // struct __tgt_offload_entry{ 4305 // void *addr; // Pointer to the offload entry info. 4306 // // (function or global) 4307 // char *name; // Name of the function or global. 4308 // size_t size; // Size of the entry info (0 if it a function). 4309 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4310 // int32_t reserved; // Reserved, to use by the runtime library. 4311 // }; 4312 if (TgtOffloadEntryQTy.isNull()) { 4313 ASTContext &C = CGM.getContext(); 4314 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4315 RD->startDefinition(); 4316 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4317 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4318 addFieldToRecordDecl(C, RD, C.getSizeType()); 4319 addFieldToRecordDecl( 4320 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4321 addFieldToRecordDecl( 4322 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4323 RD->completeDefinition(); 4324 RD->addAttr(PackedAttr::CreateImplicit(C)); 4325 TgtOffloadEntryQTy = C.getRecordType(RD); 4326 } 4327 return TgtOffloadEntryQTy; 4328 } 4329 4330 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 4331 // These are the types we need to build: 4332 // struct __tgt_device_image{ 4333 // void *ImageStart; // Pointer to the target code start. 4334 // void *ImageEnd; // Pointer to the target code end. 4335 // // We also add the host entries to the device image, as it may be useful 4336 // // for the target runtime to have access to that information. 4337 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 4338 // // the entries. 4339 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4340 // // entries (non inclusive). 4341 // }; 4342 if (TgtDeviceImageQTy.isNull()) { 4343 ASTContext &C = CGM.getContext(); 4344 RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image"); 4345 RD->startDefinition(); 4346 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4347 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4348 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4349 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4350 RD->completeDefinition(); 4351 TgtDeviceImageQTy = C.getRecordType(RD); 4352 } 4353 return TgtDeviceImageQTy; 4354 } 4355 4356 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 4357 // struct __tgt_bin_desc{ 4358 // int32_t NumDevices; // Number of devices supported. 4359 // __tgt_device_image *DeviceImages; // Arrays of device images 4360 // // (one per device). 4361 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 4362 // // entries. 4363 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4364 // // entries (non inclusive). 4365 // }; 4366 if (TgtBinaryDescriptorQTy.isNull()) { 4367 ASTContext &C = CGM.getContext(); 4368 RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc"); 4369 RD->startDefinition(); 4370 addFieldToRecordDecl( 4371 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4372 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 4373 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4374 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4375 RD->completeDefinition(); 4376 TgtBinaryDescriptorQTy = C.getRecordType(RD); 4377 } 4378 return TgtBinaryDescriptorQTy; 4379 } 4380 4381 namespace { 4382 struct PrivateHelpersTy { 4383 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 4384 const VarDecl *PrivateElemInit) 4385 : Original(Original), PrivateCopy(PrivateCopy), 4386 PrivateElemInit(PrivateElemInit) {} 4387 const VarDecl *Original; 4388 const VarDecl *PrivateCopy; 4389 const VarDecl *PrivateElemInit; 4390 }; 4391 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4392 } // anonymous namespace 4393 4394 static RecordDecl * 4395 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4396 if (!Privates.empty()) { 4397 ASTContext &C = CGM.getContext(); 4398 // Build struct .kmp_privates_t. { 4399 // /* private vars */ 4400 // }; 4401 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4402 RD->startDefinition(); 4403 for (const auto &Pair : Privates) { 4404 const VarDecl *VD = Pair.second.Original; 4405 QualType Type = VD->getType().getNonReferenceType(); 4406 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4407 if (VD->hasAttrs()) { 4408 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4409 E(VD->getAttrs().end()); 4410 I != E; ++I) 4411 FD->addAttr(*I); 4412 } 4413 } 4414 RD->completeDefinition(); 4415 return RD; 4416 } 4417 return nullptr; 4418 } 4419 4420 static RecordDecl * 4421 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4422 QualType KmpInt32Ty, 4423 QualType KmpRoutineEntryPointerQTy) { 4424 ASTContext &C = CGM.getContext(); 4425 // Build struct kmp_task_t { 4426 // void * shareds; 4427 // kmp_routine_entry_t routine; 4428 // kmp_int32 part_id; 4429 // kmp_cmplrdata_t data1; 4430 // kmp_cmplrdata_t data2; 4431 // For taskloops additional fields: 4432 // kmp_uint64 lb; 4433 // kmp_uint64 ub; 4434 // kmp_int64 st; 4435 // kmp_int32 liter; 4436 // void * reductions; 4437 // }; 4438 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4439 UD->startDefinition(); 4440 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4441 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4442 UD->completeDefinition(); 4443 QualType KmpCmplrdataTy = C.getRecordType(UD); 4444 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4445 RD->startDefinition(); 4446 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4447 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4448 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4449 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4450 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4451 if (isOpenMPTaskLoopDirective(Kind)) { 4452 QualType KmpUInt64Ty = 4453 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4454 QualType KmpInt64Ty = 4455 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4456 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4457 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4458 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4459 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4460 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4461 } 4462 RD->completeDefinition(); 4463 return RD; 4464 } 4465 4466 static RecordDecl * 4467 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4468 ArrayRef<PrivateDataTy> Privates) { 4469 ASTContext &C = CGM.getContext(); 4470 // Build struct kmp_task_t_with_privates { 4471 // kmp_task_t task_data; 4472 // .kmp_privates_t. privates; 4473 // }; 4474 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4475 RD->startDefinition(); 4476 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4477 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4478 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4479 RD->completeDefinition(); 4480 return RD; 4481 } 4482 4483 /// Emit a proxy function which accepts kmp_task_t as the second 4484 /// argument. 4485 /// \code 4486 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4487 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4488 /// For taskloops: 4489 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4490 /// tt->reductions, tt->shareds); 4491 /// return 0; 4492 /// } 4493 /// \endcode 4494 static llvm::Function * 4495 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4496 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4497 QualType KmpTaskTWithPrivatesPtrQTy, 4498 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4499 QualType SharedsPtrTy, llvm::Function *TaskFunction, 4500 llvm::Value *TaskPrivatesMap) { 4501 ASTContext &C = CGM.getContext(); 4502 FunctionArgList Args; 4503 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4504 ImplicitParamDecl::Other); 4505 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4506 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4507 ImplicitParamDecl::Other); 4508 Args.push_back(&GtidArg); 4509 Args.push_back(&TaskTypeArg); 4510 const auto &TaskEntryFnInfo = 4511 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4512 llvm::FunctionType *TaskEntryTy = 4513 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4514 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 4515 auto *TaskEntry = llvm::Function::Create( 4516 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4517 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4518 TaskEntry->setDoesNotRecurse(); 4519 CodeGenFunction CGF(CGM); 4520 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4521 Loc, Loc); 4522 4523 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4524 // tt, 4525 // For taskloops: 4526 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4527 // tt->task_data.shareds); 4528 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4529 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4530 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4531 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4532 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4533 const auto *KmpTaskTWithPrivatesQTyRD = 4534 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4535 LValue Base = 4536 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4537 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4538 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4539 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4540 llvm::Value *PartidParam = PartIdLVal.getPointer(); 4541 4542 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4543 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4544 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4545 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4546 CGF.ConvertTypeForMem(SharedsPtrTy)); 4547 4548 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4549 llvm::Value *PrivatesParam; 4550 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4551 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4552 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4553 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 4554 } else { 4555 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4556 } 4557 4558 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4559 TaskPrivatesMap, 4560 CGF.Builder 4561 .CreatePointerBitCastOrAddrSpaceCast( 4562 TDBase.getAddress(), CGF.VoidPtrTy) 4563 .getPointer()}; 4564 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4565 std::end(CommonArgs)); 4566 if (isOpenMPTaskLoopDirective(Kind)) { 4567 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4568 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4569 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4570 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4571 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4572 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4573 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4574 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4575 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4576 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4577 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4578 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4579 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4580 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4581 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4582 CallArgs.push_back(LBParam); 4583 CallArgs.push_back(UBParam); 4584 CallArgs.push_back(StParam); 4585 CallArgs.push_back(LIParam); 4586 CallArgs.push_back(RParam); 4587 } 4588 CallArgs.push_back(SharedsParam); 4589 4590 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4591 CallArgs); 4592 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4593 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4594 CGF.FinishFunction(); 4595 return TaskEntry; 4596 } 4597 4598 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4599 SourceLocation Loc, 4600 QualType KmpInt32Ty, 4601 QualType KmpTaskTWithPrivatesPtrQTy, 4602 QualType KmpTaskTWithPrivatesQTy) { 4603 ASTContext &C = CGM.getContext(); 4604 FunctionArgList Args; 4605 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4606 ImplicitParamDecl::Other); 4607 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4608 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4609 ImplicitParamDecl::Other); 4610 Args.push_back(&GtidArg); 4611 Args.push_back(&TaskTypeArg); 4612 const auto &DestructorFnInfo = 4613 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4614 llvm::FunctionType *DestructorFnTy = 4615 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4616 std::string Name = 4617 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 4618 auto *DestructorFn = 4619 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4620 Name, &CGM.getModule()); 4621 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4622 DestructorFnInfo); 4623 DestructorFn->setDoesNotRecurse(); 4624 CodeGenFunction CGF(CGM); 4625 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4626 Args, Loc, Loc); 4627 4628 LValue Base = CGF.EmitLoadOfPointerLValue( 4629 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4630 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4631 const auto *KmpTaskTWithPrivatesQTyRD = 4632 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4633 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4634 Base = CGF.EmitLValueForField(Base, *FI); 4635 for (const auto *Field : 4636 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4637 if (QualType::DestructionKind DtorKind = 4638 Field->getType().isDestructedType()) { 4639 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4640 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 4641 } 4642 } 4643 CGF.FinishFunction(); 4644 return DestructorFn; 4645 } 4646 4647 /// Emit a privates mapping function for correct handling of private and 4648 /// firstprivate variables. 4649 /// \code 4650 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4651 /// **noalias priv1,..., <tyn> **noalias privn) { 4652 /// *priv1 = &.privates.priv1; 4653 /// ...; 4654 /// *privn = &.privates.privn; 4655 /// } 4656 /// \endcode 4657 static llvm::Value * 4658 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4659 ArrayRef<const Expr *> PrivateVars, 4660 ArrayRef<const Expr *> FirstprivateVars, 4661 ArrayRef<const Expr *> LastprivateVars, 4662 QualType PrivatesQTy, 4663 ArrayRef<PrivateDataTy> Privates) { 4664 ASTContext &C = CGM.getContext(); 4665 FunctionArgList Args; 4666 ImplicitParamDecl TaskPrivatesArg( 4667 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4668 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4669 ImplicitParamDecl::Other); 4670 Args.push_back(&TaskPrivatesArg); 4671 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4672 unsigned Counter = 1; 4673 for (const Expr *E : PrivateVars) { 4674 Args.push_back(ImplicitParamDecl::Create( 4675 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4676 C.getPointerType(C.getPointerType(E->getType())) 4677 .withConst() 4678 .withRestrict(), 4679 ImplicitParamDecl::Other)); 4680 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4681 PrivateVarsPos[VD] = Counter; 4682 ++Counter; 4683 } 4684 for (const Expr *E : FirstprivateVars) { 4685 Args.push_back(ImplicitParamDecl::Create( 4686 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4687 C.getPointerType(C.getPointerType(E->getType())) 4688 .withConst() 4689 .withRestrict(), 4690 ImplicitParamDecl::Other)); 4691 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4692 PrivateVarsPos[VD] = Counter; 4693 ++Counter; 4694 } 4695 for (const Expr *E : LastprivateVars) { 4696 Args.push_back(ImplicitParamDecl::Create( 4697 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4698 C.getPointerType(C.getPointerType(E->getType())) 4699 .withConst() 4700 .withRestrict(), 4701 ImplicitParamDecl::Other)); 4702 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4703 PrivateVarsPos[VD] = Counter; 4704 ++Counter; 4705 } 4706 const auto &TaskPrivatesMapFnInfo = 4707 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4708 llvm::FunctionType *TaskPrivatesMapTy = 4709 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4710 std::string Name = 4711 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 4712 auto *TaskPrivatesMap = llvm::Function::Create( 4713 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 4714 &CGM.getModule()); 4715 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4716 TaskPrivatesMapFnInfo); 4717 if (CGM.getLangOpts().Optimize) { 4718 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4719 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4720 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4721 } 4722 CodeGenFunction CGF(CGM); 4723 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4724 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4725 4726 // *privi = &.privates.privi; 4727 LValue Base = CGF.EmitLoadOfPointerLValue( 4728 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4729 TaskPrivatesArg.getType()->castAs<PointerType>()); 4730 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4731 Counter = 0; 4732 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4733 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4734 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4735 LValue RefLVal = 4736 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4737 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4738 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 4739 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 4740 ++Counter; 4741 } 4742 CGF.FinishFunction(); 4743 return TaskPrivatesMap; 4744 } 4745 4746 /// Emit initialization for private variables in task-based directives. 4747 static void emitPrivatesInit(CodeGenFunction &CGF, 4748 const OMPExecutableDirective &D, 4749 Address KmpTaskSharedsPtr, LValue TDBase, 4750 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4751 QualType SharedsTy, QualType SharedsPtrTy, 4752 const OMPTaskDataTy &Data, 4753 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4754 ASTContext &C = CGF.getContext(); 4755 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4756 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4757 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4758 ? OMPD_taskloop 4759 : OMPD_task; 4760 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4761 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4762 LValue SrcBase; 4763 bool IsTargetTask = 4764 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4765 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4766 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4767 // PointersArray and SizesArray. The original variables for these arrays are 4768 // not captured and we get their addresses explicitly. 4769 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || 4770 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4771 SrcBase = CGF.MakeAddrLValue( 4772 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4773 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4774 SharedsTy); 4775 } 4776 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4777 for (const PrivateDataTy &Pair : Privates) { 4778 const VarDecl *VD = Pair.second.PrivateCopy; 4779 const Expr *Init = VD->getAnyInitializer(); 4780 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4781 !CGF.isTrivialInitializer(Init)))) { 4782 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4783 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4784 const VarDecl *OriginalVD = Pair.second.Original; 4785 // Check if the variable is the target-based BasePointersArray, 4786 // PointersArray or SizesArray. 4787 LValue SharedRefLValue; 4788 QualType Type = PrivateLValue.getType(); 4789 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4790 if (IsTargetTask && !SharedField) { 4791 assert(isa<ImplicitParamDecl>(OriginalVD) && 4792 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4793 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4794 ->getNumParams() == 0 && 4795 isa<TranslationUnitDecl>( 4796 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4797 ->getDeclContext()) && 4798 "Expected artificial target data variable."); 4799 SharedRefLValue = 4800 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4801 } else { 4802 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4803 SharedRefLValue = CGF.MakeAddrLValue( 4804 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 4805 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4806 SharedRefLValue.getTBAAInfo()); 4807 } 4808 if (Type->isArrayType()) { 4809 // Initialize firstprivate array. 4810 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4811 // Perform simple memcpy. 4812 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4813 } else { 4814 // Initialize firstprivate array using element-by-element 4815 // initialization. 4816 CGF.EmitOMPAggregateAssign( 4817 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 4818 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4819 Address SrcElement) { 4820 // Clean up any temporaries needed by the initialization. 4821 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4822 InitScope.addPrivate( 4823 Elem, [SrcElement]() -> Address { return SrcElement; }); 4824 (void)InitScope.Privatize(); 4825 // Emit initialization for single element. 4826 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4827 CGF, &CapturesInfo); 4828 CGF.EmitAnyExprToMem(Init, DestElement, 4829 Init->getType().getQualifiers(), 4830 /*IsInitializer=*/false); 4831 }); 4832 } 4833 } else { 4834 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4835 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 4836 return SharedRefLValue.getAddress(); 4837 }); 4838 (void)InitScope.Privatize(); 4839 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4840 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4841 /*capturedByInit=*/false); 4842 } 4843 } else { 4844 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4845 } 4846 } 4847 ++FI; 4848 } 4849 } 4850 4851 /// Check if duplication function is required for taskloops. 4852 static bool checkInitIsRequired(CodeGenFunction &CGF, 4853 ArrayRef<PrivateDataTy> Privates) { 4854 bool InitRequired = false; 4855 for (const PrivateDataTy &Pair : Privates) { 4856 const VarDecl *VD = Pair.second.PrivateCopy; 4857 const Expr *Init = VD->getAnyInitializer(); 4858 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4859 !CGF.isTrivialInitializer(Init)); 4860 if (InitRequired) 4861 break; 4862 } 4863 return InitRequired; 4864 } 4865 4866 4867 /// Emit task_dup function (for initialization of 4868 /// private/firstprivate/lastprivate vars and last_iter flag) 4869 /// \code 4870 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4871 /// lastpriv) { 4872 /// // setup lastprivate flag 4873 /// task_dst->last = lastpriv; 4874 /// // could be constructor calls here... 4875 /// } 4876 /// \endcode 4877 static llvm::Value * 4878 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4879 const OMPExecutableDirective &D, 4880 QualType KmpTaskTWithPrivatesPtrQTy, 4881 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4882 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4883 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4884 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4885 ASTContext &C = CGM.getContext(); 4886 FunctionArgList Args; 4887 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4888 KmpTaskTWithPrivatesPtrQTy, 4889 ImplicitParamDecl::Other); 4890 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4891 KmpTaskTWithPrivatesPtrQTy, 4892 ImplicitParamDecl::Other); 4893 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4894 ImplicitParamDecl::Other); 4895 Args.push_back(&DstArg); 4896 Args.push_back(&SrcArg); 4897 Args.push_back(&LastprivArg); 4898 const auto &TaskDupFnInfo = 4899 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4900 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4901 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4902 auto *TaskDup = llvm::Function::Create( 4903 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4904 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4905 TaskDup->setDoesNotRecurse(); 4906 CodeGenFunction CGF(CGM); 4907 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4908 Loc); 4909 4910 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4911 CGF.GetAddrOfLocalVar(&DstArg), 4912 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4913 // task_dst->liter = lastpriv; 4914 if (WithLastIter) { 4915 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4916 LValue Base = CGF.EmitLValueForField( 4917 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4918 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4919 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4920 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4921 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4922 } 4923 4924 // Emit initial values for private copies (if any). 4925 assert(!Privates.empty()); 4926 Address KmpTaskSharedsPtr = Address::invalid(); 4927 if (!Data.FirstprivateVars.empty()) { 4928 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4929 CGF.GetAddrOfLocalVar(&SrcArg), 4930 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4931 LValue Base = CGF.EmitLValueForField( 4932 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4933 KmpTaskSharedsPtr = Address( 4934 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4935 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4936 KmpTaskTShareds)), 4937 Loc), 4938 CGF.getNaturalTypeAlignment(SharedsTy)); 4939 } 4940 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4941 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4942 CGF.FinishFunction(); 4943 return TaskDup; 4944 } 4945 4946 /// Checks if destructor function is required to be generated. 4947 /// \return true if cleanups are required, false otherwise. 4948 static bool 4949 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4950 bool NeedsCleanup = false; 4951 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4952 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4953 for (const FieldDecl *FD : PrivateRD->fields()) { 4954 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4955 if (NeedsCleanup) 4956 break; 4957 } 4958 return NeedsCleanup; 4959 } 4960 4961 CGOpenMPRuntime::TaskResultTy 4962 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4963 const OMPExecutableDirective &D, 4964 llvm::Function *TaskFunction, QualType SharedsTy, 4965 Address Shareds, const OMPTaskDataTy &Data) { 4966 ASTContext &C = CGM.getContext(); 4967 llvm::SmallVector<PrivateDataTy, 4> Privates; 4968 // Aggregate privates and sort them by the alignment. 4969 auto I = Data.PrivateCopies.begin(); 4970 for (const Expr *E : Data.PrivateVars) { 4971 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4972 Privates.emplace_back( 4973 C.getDeclAlign(VD), 4974 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4975 /*PrivateElemInit=*/nullptr)); 4976 ++I; 4977 } 4978 I = Data.FirstprivateCopies.begin(); 4979 auto IElemInitRef = Data.FirstprivateInits.begin(); 4980 for (const Expr *E : Data.FirstprivateVars) { 4981 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4982 Privates.emplace_back( 4983 C.getDeclAlign(VD), 4984 PrivateHelpersTy( 4985 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4986 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4987 ++I; 4988 ++IElemInitRef; 4989 } 4990 I = Data.LastprivateCopies.begin(); 4991 for (const Expr *E : Data.LastprivateVars) { 4992 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4993 Privates.emplace_back( 4994 C.getDeclAlign(VD), 4995 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4996 /*PrivateElemInit=*/nullptr)); 4997 ++I; 4998 } 4999 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 5000 return L.first > R.first; 5001 }); 5002 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 5003 // Build type kmp_routine_entry_t (if not built yet). 5004 emitKmpRoutineEntryT(KmpInt32Ty); 5005 // Build type kmp_task_t (if not built yet). 5006 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 5007 if (SavedKmpTaskloopTQTy.isNull()) { 5008 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5009 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5010 } 5011 KmpTaskTQTy = SavedKmpTaskloopTQTy; 5012 } else { 5013 assert((D.getDirectiveKind() == OMPD_task || 5014 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 5015 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 5016 "Expected taskloop, task or target directive"); 5017 if (SavedKmpTaskTQTy.isNull()) { 5018 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5019 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5020 } 5021 KmpTaskTQTy = SavedKmpTaskTQTy; 5022 } 5023 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 5024 // Build particular struct kmp_task_t for the given task. 5025 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 5026 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 5027 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 5028 QualType KmpTaskTWithPrivatesPtrQTy = 5029 C.getPointerType(KmpTaskTWithPrivatesQTy); 5030 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 5031 llvm::Type *KmpTaskTWithPrivatesPtrTy = 5032 KmpTaskTWithPrivatesTy->getPointerTo(); 5033 llvm::Value *KmpTaskTWithPrivatesTySize = 5034 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 5035 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 5036 5037 // Emit initial values for private copies (if any). 5038 llvm::Value *TaskPrivatesMap = nullptr; 5039 llvm::Type *TaskPrivatesMapTy = 5040 std::next(TaskFunction->arg_begin(), 3)->getType(); 5041 if (!Privates.empty()) { 5042 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 5043 TaskPrivatesMap = emitTaskPrivateMappingFunction( 5044 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 5045 FI->getType(), Privates); 5046 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5047 TaskPrivatesMap, TaskPrivatesMapTy); 5048 } else { 5049 TaskPrivatesMap = llvm::ConstantPointerNull::get( 5050 cast<llvm::PointerType>(TaskPrivatesMapTy)); 5051 } 5052 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 5053 // kmp_task_t *tt); 5054 llvm::Function *TaskEntry = emitProxyTaskFunction( 5055 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5056 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 5057 TaskPrivatesMap); 5058 5059 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 5060 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 5061 // kmp_routine_entry_t *task_entry); 5062 // Task flags. Format is taken from 5063 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 5064 // description of kmp_tasking_flags struct. 5065 enum { 5066 TiedFlag = 0x1, 5067 FinalFlag = 0x2, 5068 DestructorsFlag = 0x8, 5069 PriorityFlag = 0x20 5070 }; 5071 unsigned Flags = Data.Tied ? TiedFlag : 0; 5072 bool NeedsCleanup = false; 5073 if (!Privates.empty()) { 5074 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 5075 if (NeedsCleanup) 5076 Flags = Flags | DestructorsFlag; 5077 } 5078 if (Data.Priority.getInt()) 5079 Flags = Flags | PriorityFlag; 5080 llvm::Value *TaskFlags = 5081 Data.Final.getPointer() 5082 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 5083 CGF.Builder.getInt32(FinalFlag), 5084 CGF.Builder.getInt32(/*C=*/0)) 5085 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 5086 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 5087 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 5088 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 5089 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 5090 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5091 TaskEntry, KmpRoutineEntryPtrTy)}; 5092 llvm::Value *NewTask; 5093 if (D.hasClausesOfKind<OMPNowaitClause>()) { 5094 // Check if we have any device clause associated with the directive. 5095 const Expr *Device = nullptr; 5096 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 5097 Device = C->getDevice(); 5098 // Emit device ID if any otherwise use default value. 5099 llvm::Value *DeviceID; 5100 if (Device) 5101 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 5102 CGF.Int64Ty, /*isSigned=*/true); 5103 else 5104 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 5105 AllocArgs.push_back(DeviceID); 5106 NewTask = CGF.EmitRuntimeCall( 5107 createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs); 5108 } else { 5109 NewTask = CGF.EmitRuntimeCall( 5110 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 5111 } 5112 llvm::Value *NewTaskNewTaskTTy = 5113 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5114 NewTask, KmpTaskTWithPrivatesPtrTy); 5115 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 5116 KmpTaskTWithPrivatesQTy); 5117 LValue TDBase = 5118 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 5119 // Fill the data in the resulting kmp_task_t record. 5120 // Copy shareds if there are any. 5121 Address KmpTaskSharedsPtr = Address::invalid(); 5122 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 5123 KmpTaskSharedsPtr = 5124 Address(CGF.EmitLoadOfScalar( 5125 CGF.EmitLValueForField( 5126 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 5127 KmpTaskTShareds)), 5128 Loc), 5129 CGF.getNaturalTypeAlignment(SharedsTy)); 5130 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 5131 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 5132 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 5133 } 5134 // Emit initial values for private copies (if any). 5135 TaskResultTy Result; 5136 if (!Privates.empty()) { 5137 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 5138 SharedsTy, SharedsPtrTy, Data, Privates, 5139 /*ForDup=*/false); 5140 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 5141 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 5142 Result.TaskDupFn = emitTaskDupFunction( 5143 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 5144 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 5145 /*WithLastIter=*/!Data.LastprivateVars.empty()); 5146 } 5147 } 5148 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 5149 enum { Priority = 0, Destructors = 1 }; 5150 // Provide pointer to function with destructors for privates. 5151 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 5152 const RecordDecl *KmpCmplrdataUD = 5153 (*FI)->getType()->getAsUnionType()->getDecl(); 5154 if (NeedsCleanup) { 5155 llvm::Value *DestructorFn = emitDestructorsFunction( 5156 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5157 KmpTaskTWithPrivatesQTy); 5158 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 5159 LValue DestructorsLV = CGF.EmitLValueForField( 5160 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 5161 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5162 DestructorFn, KmpRoutineEntryPtrTy), 5163 DestructorsLV); 5164 } 5165 // Set priority. 5166 if (Data.Priority.getInt()) { 5167 LValue Data2LV = CGF.EmitLValueForField( 5168 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 5169 LValue PriorityLV = CGF.EmitLValueForField( 5170 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 5171 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 5172 } 5173 Result.NewTask = NewTask; 5174 Result.TaskEntry = TaskEntry; 5175 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 5176 Result.TDBase = TDBase; 5177 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 5178 return Result; 5179 } 5180 5181 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5182 const OMPExecutableDirective &D, 5183 llvm::Function *TaskFunction, 5184 QualType SharedsTy, Address Shareds, 5185 const Expr *IfCond, 5186 const OMPTaskDataTy &Data) { 5187 if (!CGF.HaveInsertPoint()) 5188 return; 5189 5190 TaskResultTy Result = 5191 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5192 llvm::Value *NewTask = Result.NewTask; 5193 llvm::Function *TaskEntry = Result.TaskEntry; 5194 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5195 LValue TDBase = Result.TDBase; 5196 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5197 ASTContext &C = CGM.getContext(); 5198 // Process list of dependences. 5199 Address DependenciesArray = Address::invalid(); 5200 unsigned NumDependencies = Data.Dependences.size(); 5201 if (NumDependencies) { 5202 // Dependence kind for RTL. 5203 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 }; 5204 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 5205 RecordDecl *KmpDependInfoRD; 5206 QualType FlagsTy = 5207 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 5208 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5209 if (KmpDependInfoTy.isNull()) { 5210 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 5211 KmpDependInfoRD->startDefinition(); 5212 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 5213 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 5214 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 5215 KmpDependInfoRD->completeDefinition(); 5216 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 5217 } else { 5218 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5219 } 5220 // Define type kmp_depend_info[<Dependences.size()>]; 5221 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5222 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 5223 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5224 // kmp_depend_info[<Dependences.size()>] deps; 5225 DependenciesArray = 5226 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 5227 for (unsigned I = 0; I < NumDependencies; ++I) { 5228 const Expr *E = Data.Dependences[I].second; 5229 LValue Addr = CGF.EmitLValue(E); 5230 llvm::Value *Size; 5231 QualType Ty = E->getType(); 5232 if (const auto *ASE = 5233 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 5234 LValue UpAddrLVal = 5235 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 5236 llvm::Value *UpAddr = 5237 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 5238 llvm::Value *LowIntPtr = 5239 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 5240 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 5241 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 5242 } else { 5243 Size = CGF.getTypeSize(Ty); 5244 } 5245 LValue Base = CGF.MakeAddrLValue( 5246 CGF.Builder.CreateConstArrayGEP(DependenciesArray, I), 5247 KmpDependInfoTy); 5248 // deps[i].base_addr = &<Dependences[i].second>; 5249 LValue BaseAddrLVal = CGF.EmitLValueForField( 5250 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5251 CGF.EmitStoreOfScalar( 5252 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 5253 BaseAddrLVal); 5254 // deps[i].len = sizeof(<Dependences[i].second>); 5255 LValue LenLVal = CGF.EmitLValueForField( 5256 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 5257 CGF.EmitStoreOfScalar(Size, LenLVal); 5258 // deps[i].flags = <Dependences[i].first>; 5259 RTLDependenceKindTy DepKind; 5260 switch (Data.Dependences[I].first) { 5261 case OMPC_DEPEND_in: 5262 DepKind = DepIn; 5263 break; 5264 // Out and InOut dependencies must use the same code. 5265 case OMPC_DEPEND_out: 5266 case OMPC_DEPEND_inout: 5267 DepKind = DepInOut; 5268 break; 5269 case OMPC_DEPEND_mutexinoutset: 5270 DepKind = DepMutexInOutSet; 5271 break; 5272 case OMPC_DEPEND_source: 5273 case OMPC_DEPEND_sink: 5274 case OMPC_DEPEND_unknown: 5275 llvm_unreachable("Unknown task dependence type"); 5276 } 5277 LValue FlagsLVal = CGF.EmitLValueForField( 5278 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5279 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5280 FlagsLVal); 5281 } 5282 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5283 CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy); 5284 } 5285 5286 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5287 // libcall. 5288 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5289 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5290 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5291 // list is not empty 5292 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5293 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5294 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5295 llvm::Value *DepTaskArgs[7]; 5296 if (NumDependencies) { 5297 DepTaskArgs[0] = UpLoc; 5298 DepTaskArgs[1] = ThreadID; 5299 DepTaskArgs[2] = NewTask; 5300 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 5301 DepTaskArgs[4] = DependenciesArray.getPointer(); 5302 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5303 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5304 } 5305 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 5306 &TaskArgs, 5307 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5308 if (!Data.Tied) { 5309 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5310 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5311 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5312 } 5313 if (NumDependencies) { 5314 CGF.EmitRuntimeCall( 5315 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5316 } else { 5317 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5318 TaskArgs); 5319 } 5320 // Check if parent region is untied and build return for untied task; 5321 if (auto *Region = 5322 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5323 Region->emitUntiedSwitch(CGF); 5324 }; 5325 5326 llvm::Value *DepWaitTaskArgs[6]; 5327 if (NumDependencies) { 5328 DepWaitTaskArgs[0] = UpLoc; 5329 DepWaitTaskArgs[1] = ThreadID; 5330 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 5331 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5332 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5333 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5334 } 5335 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5336 NumDependencies, &DepWaitTaskArgs, 5337 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5338 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5339 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5340 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5341 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5342 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5343 // is specified. 5344 if (NumDependencies) 5345 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5346 DepWaitTaskArgs); 5347 // Call proxy_task_entry(gtid, new_task); 5348 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5349 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5350 Action.Enter(CGF); 5351 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5352 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5353 OutlinedFnArgs); 5354 }; 5355 5356 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5357 // kmp_task_t *new_task); 5358 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5359 // kmp_task_t *new_task); 5360 RegionCodeGenTy RCG(CodeGen); 5361 CommonActionTy Action( 5362 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5363 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5364 RCG.setAction(Action); 5365 RCG(CGF); 5366 }; 5367 5368 if (IfCond) { 5369 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5370 } else { 5371 RegionCodeGenTy ThenRCG(ThenCodeGen); 5372 ThenRCG(CGF); 5373 } 5374 } 5375 5376 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5377 const OMPLoopDirective &D, 5378 llvm::Function *TaskFunction, 5379 QualType SharedsTy, Address Shareds, 5380 const Expr *IfCond, 5381 const OMPTaskDataTy &Data) { 5382 if (!CGF.HaveInsertPoint()) 5383 return; 5384 TaskResultTy Result = 5385 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5386 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5387 // libcall. 5388 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5389 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5390 // sched, kmp_uint64 grainsize, void *task_dup); 5391 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5392 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5393 llvm::Value *IfVal; 5394 if (IfCond) { 5395 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5396 /*isSigned=*/true); 5397 } else { 5398 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5399 } 5400 5401 LValue LBLVal = CGF.EmitLValueForField( 5402 Result.TDBase, 5403 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5404 const auto *LBVar = 5405 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5406 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 5407 /*IsInitializer=*/true); 5408 LValue UBLVal = CGF.EmitLValueForField( 5409 Result.TDBase, 5410 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5411 const auto *UBVar = 5412 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5413 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 5414 /*IsInitializer=*/true); 5415 LValue StLVal = CGF.EmitLValueForField( 5416 Result.TDBase, 5417 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5418 const auto *StVar = 5419 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5420 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 5421 /*IsInitializer=*/true); 5422 // Store reductions address. 5423 LValue RedLVal = CGF.EmitLValueForField( 5424 Result.TDBase, 5425 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5426 if (Data.Reductions) { 5427 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5428 } else { 5429 CGF.EmitNullInitialization(RedLVal.getAddress(), 5430 CGF.getContext().VoidPtrTy); 5431 } 5432 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5433 llvm::Value *TaskArgs[] = { 5434 UpLoc, 5435 ThreadID, 5436 Result.NewTask, 5437 IfVal, 5438 LBLVal.getPointer(), 5439 UBLVal.getPointer(), 5440 CGF.EmitLoadOfScalar(StLVal, Loc), 5441 llvm::ConstantInt::getSigned( 5442 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5443 llvm::ConstantInt::getSigned( 5444 CGF.IntTy, Data.Schedule.getPointer() 5445 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5446 : NoSchedule), 5447 Data.Schedule.getPointer() 5448 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5449 /*isSigned=*/false) 5450 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5451 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5452 Result.TaskDupFn, CGF.VoidPtrTy) 5453 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5454 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 5455 } 5456 5457 /// Emit reduction operation for each element of array (required for 5458 /// array sections) LHS op = RHS. 5459 /// \param Type Type of array. 5460 /// \param LHSVar Variable on the left side of the reduction operation 5461 /// (references element of array in original variable). 5462 /// \param RHSVar Variable on the right side of the reduction operation 5463 /// (references element of array in original variable). 5464 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5465 /// RHSVar. 5466 static void EmitOMPAggregateReduction( 5467 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5468 const VarDecl *RHSVar, 5469 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5470 const Expr *, const Expr *)> &RedOpGen, 5471 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5472 const Expr *UpExpr = nullptr) { 5473 // Perform element-by-element initialization. 5474 QualType ElementTy; 5475 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5476 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5477 5478 // Drill down to the base element type on both arrays. 5479 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5480 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5481 5482 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5483 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5484 // Cast from pointer to array type to pointer to single element. 5485 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5486 // The basic structure here is a while-do loop. 5487 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5488 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5489 llvm::Value *IsEmpty = 5490 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5491 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5492 5493 // Enter the loop body, making that address the current address. 5494 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5495 CGF.EmitBlock(BodyBB); 5496 5497 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5498 5499 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5500 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5501 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5502 Address RHSElementCurrent = 5503 Address(RHSElementPHI, 5504 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5505 5506 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5507 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5508 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5509 Address LHSElementCurrent = 5510 Address(LHSElementPHI, 5511 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5512 5513 // Emit copy. 5514 CodeGenFunction::OMPPrivateScope Scope(CGF); 5515 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5516 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5517 Scope.Privatize(); 5518 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5519 Scope.ForceCleanup(); 5520 5521 // Shift the address forward by one element. 5522 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5523 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5524 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5525 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5526 // Check whether we've reached the end. 5527 llvm::Value *Done = 5528 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5529 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5530 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5531 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5532 5533 // Done. 5534 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5535 } 5536 5537 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5538 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5539 /// UDR combiner function. 5540 static void emitReductionCombiner(CodeGenFunction &CGF, 5541 const Expr *ReductionOp) { 5542 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5543 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5544 if (const auto *DRE = 5545 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5546 if (const auto *DRD = 5547 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5548 std::pair<llvm::Function *, llvm::Function *> Reduction = 5549 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5550 RValue Func = RValue::get(Reduction.first); 5551 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5552 CGF.EmitIgnoredExpr(ReductionOp); 5553 return; 5554 } 5555 CGF.EmitIgnoredExpr(ReductionOp); 5556 } 5557 5558 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5559 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5560 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5561 ArrayRef<const Expr *> ReductionOps) { 5562 ASTContext &C = CGM.getContext(); 5563 5564 // void reduction_func(void *LHSArg, void *RHSArg); 5565 FunctionArgList Args; 5566 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5567 ImplicitParamDecl::Other); 5568 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5569 ImplicitParamDecl::Other); 5570 Args.push_back(&LHSArg); 5571 Args.push_back(&RHSArg); 5572 const auto &CGFI = 5573 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5574 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5575 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5576 llvm::GlobalValue::InternalLinkage, Name, 5577 &CGM.getModule()); 5578 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5579 Fn->setDoesNotRecurse(); 5580 CodeGenFunction CGF(CGM); 5581 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5582 5583 // Dst = (void*[n])(LHSArg); 5584 // Src = (void*[n])(RHSArg); 5585 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5586 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5587 ArgsType), CGF.getPointerAlign()); 5588 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5589 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5590 ArgsType), CGF.getPointerAlign()); 5591 5592 // ... 5593 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5594 // ... 5595 CodeGenFunction::OMPPrivateScope Scope(CGF); 5596 auto IPriv = Privates.begin(); 5597 unsigned Idx = 0; 5598 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5599 const auto *RHSVar = 5600 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5601 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5602 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5603 }); 5604 const auto *LHSVar = 5605 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5606 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5607 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5608 }); 5609 QualType PrivTy = (*IPriv)->getType(); 5610 if (PrivTy->isVariablyModifiedType()) { 5611 // Get array size and emit VLA type. 5612 ++Idx; 5613 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5614 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5615 const VariableArrayType *VLA = 5616 CGF.getContext().getAsVariableArrayType(PrivTy); 5617 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5618 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5619 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5620 CGF.EmitVariablyModifiedType(PrivTy); 5621 } 5622 } 5623 Scope.Privatize(); 5624 IPriv = Privates.begin(); 5625 auto ILHS = LHSExprs.begin(); 5626 auto IRHS = RHSExprs.begin(); 5627 for (const Expr *E : ReductionOps) { 5628 if ((*IPriv)->getType()->isArrayType()) { 5629 // Emit reduction for array section. 5630 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5631 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5632 EmitOMPAggregateReduction( 5633 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5634 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5635 emitReductionCombiner(CGF, E); 5636 }); 5637 } else { 5638 // Emit reduction for array subscript or single variable. 5639 emitReductionCombiner(CGF, E); 5640 } 5641 ++IPriv; 5642 ++ILHS; 5643 ++IRHS; 5644 } 5645 Scope.ForceCleanup(); 5646 CGF.FinishFunction(); 5647 return Fn; 5648 } 5649 5650 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5651 const Expr *ReductionOp, 5652 const Expr *PrivateRef, 5653 const DeclRefExpr *LHS, 5654 const DeclRefExpr *RHS) { 5655 if (PrivateRef->getType()->isArrayType()) { 5656 // Emit reduction for array section. 5657 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5658 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5659 EmitOMPAggregateReduction( 5660 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5661 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5662 emitReductionCombiner(CGF, ReductionOp); 5663 }); 5664 } else { 5665 // Emit reduction for array subscript or single variable. 5666 emitReductionCombiner(CGF, ReductionOp); 5667 } 5668 } 5669 5670 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5671 ArrayRef<const Expr *> Privates, 5672 ArrayRef<const Expr *> LHSExprs, 5673 ArrayRef<const Expr *> RHSExprs, 5674 ArrayRef<const Expr *> ReductionOps, 5675 ReductionOptionsTy Options) { 5676 if (!CGF.HaveInsertPoint()) 5677 return; 5678 5679 bool WithNowait = Options.WithNowait; 5680 bool SimpleReduction = Options.SimpleReduction; 5681 5682 // Next code should be emitted for reduction: 5683 // 5684 // static kmp_critical_name lock = { 0 }; 5685 // 5686 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5687 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5688 // ... 5689 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5690 // *(Type<n>-1*)rhs[<n>-1]); 5691 // } 5692 // 5693 // ... 5694 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5695 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5696 // RedList, reduce_func, &<lock>)) { 5697 // case 1: 5698 // ... 5699 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5700 // ... 5701 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5702 // break; 5703 // case 2: 5704 // ... 5705 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5706 // ... 5707 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5708 // break; 5709 // default:; 5710 // } 5711 // 5712 // if SimpleReduction is true, only the next code is generated: 5713 // ... 5714 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5715 // ... 5716 5717 ASTContext &C = CGM.getContext(); 5718 5719 if (SimpleReduction) { 5720 CodeGenFunction::RunCleanupsScope Scope(CGF); 5721 auto IPriv = Privates.begin(); 5722 auto ILHS = LHSExprs.begin(); 5723 auto IRHS = RHSExprs.begin(); 5724 for (const Expr *E : ReductionOps) { 5725 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5726 cast<DeclRefExpr>(*IRHS)); 5727 ++IPriv; 5728 ++ILHS; 5729 ++IRHS; 5730 } 5731 return; 5732 } 5733 5734 // 1. Build a list of reduction variables. 5735 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5736 auto Size = RHSExprs.size(); 5737 for (const Expr *E : Privates) { 5738 if (E->getType()->isVariablyModifiedType()) 5739 // Reserve place for array size. 5740 ++Size; 5741 } 5742 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5743 QualType ReductionArrayTy = 5744 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5745 /*IndexTypeQuals=*/0); 5746 Address ReductionList = 5747 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5748 auto IPriv = Privates.begin(); 5749 unsigned Idx = 0; 5750 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5751 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5752 CGF.Builder.CreateStore( 5753 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5754 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 5755 Elem); 5756 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5757 // Store array size. 5758 ++Idx; 5759 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5760 llvm::Value *Size = CGF.Builder.CreateIntCast( 5761 CGF.getVLASize( 5762 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5763 .NumElts, 5764 CGF.SizeTy, /*isSigned=*/false); 5765 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5766 Elem); 5767 } 5768 } 5769 5770 // 2. Emit reduce_func(). 5771 llvm::Function *ReductionFn = emitReductionFunction( 5772 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5773 LHSExprs, RHSExprs, ReductionOps); 5774 5775 // 3. Create static kmp_critical_name lock = { 0 }; 5776 std::string Name = getName({"reduction"}); 5777 llvm::Value *Lock = getCriticalRegionLock(Name); 5778 5779 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5780 // RedList, reduce_func, &<lock>); 5781 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5782 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5783 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5784 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5785 ReductionList.getPointer(), CGF.VoidPtrTy); 5786 llvm::Value *Args[] = { 5787 IdentTLoc, // ident_t *<loc> 5788 ThreadId, // i32 <gtid> 5789 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5790 ReductionArrayTySize, // size_type sizeof(RedList) 5791 RL, // void *RedList 5792 ReductionFn, // void (*) (void *, void *) <reduce_func> 5793 Lock // kmp_critical_name *&<lock> 5794 }; 5795 llvm::Value *Res = CGF.EmitRuntimeCall( 5796 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 5797 : OMPRTL__kmpc_reduce), 5798 Args); 5799 5800 // 5. Build switch(res) 5801 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5802 llvm::SwitchInst *SwInst = 5803 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5804 5805 // 6. Build case 1: 5806 // ... 5807 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5808 // ... 5809 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5810 // break; 5811 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5812 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5813 CGF.EmitBlock(Case1BB); 5814 5815 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5816 llvm::Value *EndArgs[] = { 5817 IdentTLoc, // ident_t *<loc> 5818 ThreadId, // i32 <gtid> 5819 Lock // kmp_critical_name *&<lock> 5820 }; 5821 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5822 CodeGenFunction &CGF, PrePostActionTy &Action) { 5823 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5824 auto IPriv = Privates.begin(); 5825 auto ILHS = LHSExprs.begin(); 5826 auto IRHS = RHSExprs.begin(); 5827 for (const Expr *E : ReductionOps) { 5828 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5829 cast<DeclRefExpr>(*IRHS)); 5830 ++IPriv; 5831 ++ILHS; 5832 ++IRHS; 5833 } 5834 }; 5835 RegionCodeGenTy RCG(CodeGen); 5836 CommonActionTy Action( 5837 nullptr, llvm::None, 5838 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 5839 : OMPRTL__kmpc_end_reduce), 5840 EndArgs); 5841 RCG.setAction(Action); 5842 RCG(CGF); 5843 5844 CGF.EmitBranch(DefaultBB); 5845 5846 // 7. Build case 2: 5847 // ... 5848 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5849 // ... 5850 // break; 5851 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5852 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5853 CGF.EmitBlock(Case2BB); 5854 5855 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5856 CodeGenFunction &CGF, PrePostActionTy &Action) { 5857 auto ILHS = LHSExprs.begin(); 5858 auto IRHS = RHSExprs.begin(); 5859 auto IPriv = Privates.begin(); 5860 for (const Expr *E : ReductionOps) { 5861 const Expr *XExpr = nullptr; 5862 const Expr *EExpr = nullptr; 5863 const Expr *UpExpr = nullptr; 5864 BinaryOperatorKind BO = BO_Comma; 5865 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5866 if (BO->getOpcode() == BO_Assign) { 5867 XExpr = BO->getLHS(); 5868 UpExpr = BO->getRHS(); 5869 } 5870 } 5871 // Try to emit update expression as a simple atomic. 5872 const Expr *RHSExpr = UpExpr; 5873 if (RHSExpr) { 5874 // Analyze RHS part of the whole expression. 5875 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5876 RHSExpr->IgnoreParenImpCasts())) { 5877 // If this is a conditional operator, analyze its condition for 5878 // min/max reduction operator. 5879 RHSExpr = ACO->getCond(); 5880 } 5881 if (const auto *BORHS = 5882 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5883 EExpr = BORHS->getRHS(); 5884 BO = BORHS->getOpcode(); 5885 } 5886 } 5887 if (XExpr) { 5888 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5889 auto &&AtomicRedGen = [BO, VD, 5890 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5891 const Expr *EExpr, const Expr *UpExpr) { 5892 LValue X = CGF.EmitLValue(XExpr); 5893 RValue E; 5894 if (EExpr) 5895 E = CGF.EmitAnyExpr(EExpr); 5896 CGF.EmitOMPAtomicSimpleUpdateExpr( 5897 X, E, BO, /*IsXLHSInRHSPart=*/true, 5898 llvm::AtomicOrdering::Monotonic, Loc, 5899 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5900 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5901 PrivateScope.addPrivate( 5902 VD, [&CGF, VD, XRValue, Loc]() { 5903 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5904 CGF.emitOMPSimpleStore( 5905 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5906 VD->getType().getNonReferenceType(), Loc); 5907 return LHSTemp; 5908 }); 5909 (void)PrivateScope.Privatize(); 5910 return CGF.EmitAnyExpr(UpExpr); 5911 }); 5912 }; 5913 if ((*IPriv)->getType()->isArrayType()) { 5914 // Emit atomic reduction for array section. 5915 const auto *RHSVar = 5916 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5917 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5918 AtomicRedGen, XExpr, EExpr, UpExpr); 5919 } else { 5920 // Emit atomic reduction for array subscript or single variable. 5921 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5922 } 5923 } else { 5924 // Emit as a critical region. 5925 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5926 const Expr *, const Expr *) { 5927 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5928 std::string Name = RT.getName({"atomic_reduction"}); 5929 RT.emitCriticalRegion( 5930 CGF, Name, 5931 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5932 Action.Enter(CGF); 5933 emitReductionCombiner(CGF, E); 5934 }, 5935 Loc); 5936 }; 5937 if ((*IPriv)->getType()->isArrayType()) { 5938 const auto *LHSVar = 5939 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5940 const auto *RHSVar = 5941 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5942 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5943 CritRedGen); 5944 } else { 5945 CritRedGen(CGF, nullptr, nullptr, nullptr); 5946 } 5947 } 5948 ++ILHS; 5949 ++IRHS; 5950 ++IPriv; 5951 } 5952 }; 5953 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5954 if (!WithNowait) { 5955 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5956 llvm::Value *EndArgs[] = { 5957 IdentTLoc, // ident_t *<loc> 5958 ThreadId, // i32 <gtid> 5959 Lock // kmp_critical_name *&<lock> 5960 }; 5961 CommonActionTy Action(nullptr, llvm::None, 5962 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 5963 EndArgs); 5964 AtomicRCG.setAction(Action); 5965 AtomicRCG(CGF); 5966 } else { 5967 AtomicRCG(CGF); 5968 } 5969 5970 CGF.EmitBranch(DefaultBB); 5971 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5972 } 5973 5974 /// Generates unique name for artificial threadprivate variables. 5975 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5976 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5977 const Expr *Ref) { 5978 SmallString<256> Buffer; 5979 llvm::raw_svector_ostream Out(Buffer); 5980 const clang::DeclRefExpr *DE; 5981 const VarDecl *D = ::getBaseDecl(Ref, DE); 5982 if (!D) 5983 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5984 D = D->getCanonicalDecl(); 5985 std::string Name = CGM.getOpenMPRuntime().getName( 5986 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5987 Out << Prefix << Name << "_" 5988 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5989 return Out.str(); 5990 } 5991 5992 /// Emits reduction initializer function: 5993 /// \code 5994 /// void @.red_init(void* %arg) { 5995 /// %0 = bitcast void* %arg to <type>* 5996 /// store <type> <init>, <type>* %0 5997 /// ret void 5998 /// } 5999 /// \endcode 6000 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 6001 SourceLocation Loc, 6002 ReductionCodeGen &RCG, unsigned N) { 6003 ASTContext &C = CGM.getContext(); 6004 FunctionArgList Args; 6005 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6006 ImplicitParamDecl::Other); 6007 Args.emplace_back(&Param); 6008 const auto &FnInfo = 6009 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6010 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6011 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 6012 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6013 Name, &CGM.getModule()); 6014 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6015 Fn->setDoesNotRecurse(); 6016 CodeGenFunction CGF(CGM); 6017 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6018 Address PrivateAddr = CGF.EmitLoadOfPointer( 6019 CGF.GetAddrOfLocalVar(&Param), 6020 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6021 llvm::Value *Size = nullptr; 6022 // If the size of the reduction item is non-constant, load it from global 6023 // threadprivate variable. 6024 if (RCG.getSizes(N).second) { 6025 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6026 CGF, CGM.getContext().getSizeType(), 6027 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6028 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6029 CGM.getContext().getSizeType(), Loc); 6030 } 6031 RCG.emitAggregateType(CGF, N, Size); 6032 LValue SharedLVal; 6033 // If initializer uses initializer from declare reduction construct, emit a 6034 // pointer to the address of the original reduction item (reuired by reduction 6035 // initializer) 6036 if (RCG.usesReductionInitializer(N)) { 6037 Address SharedAddr = 6038 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6039 CGF, CGM.getContext().VoidPtrTy, 6040 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6041 SharedAddr = CGF.EmitLoadOfPointer( 6042 SharedAddr, 6043 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 6044 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 6045 } else { 6046 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 6047 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 6048 CGM.getContext().VoidPtrTy); 6049 } 6050 // Emit the initializer: 6051 // %0 = bitcast void* %arg to <type>* 6052 // store <type> <init>, <type>* %0 6053 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 6054 [](CodeGenFunction &) { return false; }); 6055 CGF.FinishFunction(); 6056 return Fn; 6057 } 6058 6059 /// Emits reduction combiner function: 6060 /// \code 6061 /// void @.red_comb(void* %arg0, void* %arg1) { 6062 /// %lhs = bitcast void* %arg0 to <type>* 6063 /// %rhs = bitcast void* %arg1 to <type>* 6064 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 6065 /// store <type> %2, <type>* %lhs 6066 /// ret void 6067 /// } 6068 /// \endcode 6069 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 6070 SourceLocation Loc, 6071 ReductionCodeGen &RCG, unsigned N, 6072 const Expr *ReductionOp, 6073 const Expr *LHS, const Expr *RHS, 6074 const Expr *PrivateRef) { 6075 ASTContext &C = CGM.getContext(); 6076 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 6077 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 6078 FunctionArgList Args; 6079 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 6080 C.VoidPtrTy, ImplicitParamDecl::Other); 6081 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6082 ImplicitParamDecl::Other); 6083 Args.emplace_back(&ParamInOut); 6084 Args.emplace_back(&ParamIn); 6085 const auto &FnInfo = 6086 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6087 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6088 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 6089 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6090 Name, &CGM.getModule()); 6091 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6092 Fn->setDoesNotRecurse(); 6093 CodeGenFunction CGF(CGM); 6094 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6095 llvm::Value *Size = nullptr; 6096 // If the size of the reduction item is non-constant, load it from global 6097 // threadprivate variable. 6098 if (RCG.getSizes(N).second) { 6099 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6100 CGF, CGM.getContext().getSizeType(), 6101 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6102 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6103 CGM.getContext().getSizeType(), Loc); 6104 } 6105 RCG.emitAggregateType(CGF, N, Size); 6106 // Remap lhs and rhs variables to the addresses of the function arguments. 6107 // %lhs = bitcast void* %arg0 to <type>* 6108 // %rhs = bitcast void* %arg1 to <type>* 6109 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6110 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6111 // Pull out the pointer to the variable. 6112 Address PtrAddr = CGF.EmitLoadOfPointer( 6113 CGF.GetAddrOfLocalVar(&ParamInOut), 6114 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6115 return CGF.Builder.CreateElementBitCast( 6116 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6117 }); 6118 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6119 // Pull out the pointer to the variable. 6120 Address PtrAddr = CGF.EmitLoadOfPointer( 6121 CGF.GetAddrOfLocalVar(&ParamIn), 6122 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6123 return CGF.Builder.CreateElementBitCast( 6124 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6125 }); 6126 PrivateScope.Privatize(); 6127 // Emit the combiner body: 6128 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6129 // store <type> %2, <type>* %lhs 6130 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6131 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6132 cast<DeclRefExpr>(RHS)); 6133 CGF.FinishFunction(); 6134 return Fn; 6135 } 6136 6137 /// Emits reduction finalizer function: 6138 /// \code 6139 /// void @.red_fini(void* %arg) { 6140 /// %0 = bitcast void* %arg to <type>* 6141 /// <destroy>(<type>* %0) 6142 /// ret void 6143 /// } 6144 /// \endcode 6145 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6146 SourceLocation Loc, 6147 ReductionCodeGen &RCG, unsigned N) { 6148 if (!RCG.needCleanups(N)) 6149 return nullptr; 6150 ASTContext &C = CGM.getContext(); 6151 FunctionArgList Args; 6152 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6153 ImplicitParamDecl::Other); 6154 Args.emplace_back(&Param); 6155 const auto &FnInfo = 6156 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6157 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6158 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6159 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6160 Name, &CGM.getModule()); 6161 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6162 Fn->setDoesNotRecurse(); 6163 CodeGenFunction CGF(CGM); 6164 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6165 Address PrivateAddr = CGF.EmitLoadOfPointer( 6166 CGF.GetAddrOfLocalVar(&Param), 6167 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6168 llvm::Value *Size = nullptr; 6169 // If the size of the reduction item is non-constant, load it from global 6170 // threadprivate variable. 6171 if (RCG.getSizes(N).second) { 6172 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6173 CGF, CGM.getContext().getSizeType(), 6174 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6175 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6176 CGM.getContext().getSizeType(), Loc); 6177 } 6178 RCG.emitAggregateType(CGF, N, Size); 6179 // Emit the finalizer body: 6180 // <destroy>(<type>* %0) 6181 RCG.emitCleanups(CGF, N, PrivateAddr); 6182 CGF.FinishFunction(); 6183 return Fn; 6184 } 6185 6186 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6187 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6188 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6189 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6190 return nullptr; 6191 6192 // Build typedef struct: 6193 // kmp_task_red_input { 6194 // void *reduce_shar; // shared reduction item 6195 // size_t reduce_size; // size of data item 6196 // void *reduce_init; // data initialization routine 6197 // void *reduce_fini; // data finalization routine 6198 // void *reduce_comb; // data combiner routine 6199 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6200 // } kmp_task_red_input_t; 6201 ASTContext &C = CGM.getContext(); 6202 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 6203 RD->startDefinition(); 6204 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6205 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6206 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6207 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6208 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6209 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6210 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6211 RD->completeDefinition(); 6212 QualType RDType = C.getRecordType(RD); 6213 unsigned Size = Data.ReductionVars.size(); 6214 llvm::APInt ArraySize(/*numBits=*/64, Size); 6215 QualType ArrayRDType = C.getConstantArrayType( 6216 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6217 // kmp_task_red_input_t .rd_input.[Size]; 6218 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6219 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 6220 Data.ReductionOps); 6221 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6222 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6223 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6224 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6225 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6226 TaskRedInput.getPointer(), Idxs, 6227 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6228 ".rd_input.gep."); 6229 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6230 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6231 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6232 RCG.emitSharedLValue(CGF, Cnt); 6233 llvm::Value *CastedShared = 6234 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); 6235 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6236 RCG.emitAggregateType(CGF, Cnt); 6237 llvm::Value *SizeValInChars; 6238 llvm::Value *SizeVal; 6239 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6240 // We use delayed creation/initialization for VLAs, array sections and 6241 // custom reduction initializations. It is required because runtime does not 6242 // provide the way to pass the sizes of VLAs/array sections to 6243 // initializer/combiner/finalizer functions and does not pass the pointer to 6244 // original reduction item to the initializer. Instead threadprivate global 6245 // variables are used to store these values and use them in the functions. 6246 bool DelayedCreation = !!SizeVal; 6247 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6248 /*isSigned=*/false); 6249 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6250 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6251 // ElemLVal.reduce_init = init; 6252 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6253 llvm::Value *InitAddr = 6254 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6255 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6256 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 6257 // ElemLVal.reduce_fini = fini; 6258 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6259 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6260 llvm::Value *FiniAddr = Fini 6261 ? CGF.EmitCastToVoidPtr(Fini) 6262 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6263 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6264 // ElemLVal.reduce_comb = comb; 6265 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6266 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6267 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6268 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6269 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6270 // ElemLVal.flags = 0; 6271 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6272 if (DelayedCreation) { 6273 CGF.EmitStoreOfScalar( 6274 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6275 FlagsLVal); 6276 } else 6277 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); 6278 } 6279 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 6280 // *data); 6281 llvm::Value *Args[] = { 6282 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6283 /*isSigned=*/true), 6284 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6285 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6286 CGM.VoidPtrTy)}; 6287 return CGF.EmitRuntimeCall( 6288 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 6289 } 6290 6291 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6292 SourceLocation Loc, 6293 ReductionCodeGen &RCG, 6294 unsigned N) { 6295 auto Sizes = RCG.getSizes(N); 6296 // Emit threadprivate global variable if the type is non-constant 6297 // (Sizes.second = nullptr). 6298 if (Sizes.second) { 6299 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6300 /*isSigned=*/false); 6301 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6302 CGF, CGM.getContext().getSizeType(), 6303 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6304 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6305 } 6306 // Store address of the original reduction item if custom initializer is used. 6307 if (RCG.usesReductionInitializer(N)) { 6308 Address SharedAddr = getAddrOfArtificialThreadPrivate( 6309 CGF, CGM.getContext().VoidPtrTy, 6310 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6311 CGF.Builder.CreateStore( 6312 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6313 RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), 6314 SharedAddr, /*IsVolatile=*/false); 6315 } 6316 } 6317 6318 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6319 SourceLocation Loc, 6320 llvm::Value *ReductionsPtr, 6321 LValue SharedLVal) { 6322 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6323 // *d); 6324 llvm::Value *Args[] = { 6325 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6326 /*isSigned=*/true), 6327 ReductionsPtr, 6328 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), 6329 CGM.VoidPtrTy)}; 6330 return Address( 6331 CGF.EmitRuntimeCall( 6332 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6333 SharedLVal.getAlignment()); 6334 } 6335 6336 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6337 SourceLocation Loc) { 6338 if (!CGF.HaveInsertPoint()) 6339 return; 6340 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6341 // global_tid); 6342 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6343 // Ignore return result until untied tasks are supported. 6344 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 6345 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6346 Region->emitUntiedSwitch(CGF); 6347 } 6348 6349 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6350 OpenMPDirectiveKind InnerKind, 6351 const RegionCodeGenTy &CodeGen, 6352 bool HasCancel) { 6353 if (!CGF.HaveInsertPoint()) 6354 return; 6355 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6356 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6357 } 6358 6359 namespace { 6360 enum RTCancelKind { 6361 CancelNoreq = 0, 6362 CancelParallel = 1, 6363 CancelLoop = 2, 6364 CancelSections = 3, 6365 CancelTaskgroup = 4 6366 }; 6367 } // anonymous namespace 6368 6369 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6370 RTCancelKind CancelKind = CancelNoreq; 6371 if (CancelRegion == OMPD_parallel) 6372 CancelKind = CancelParallel; 6373 else if (CancelRegion == OMPD_for) 6374 CancelKind = CancelLoop; 6375 else if (CancelRegion == OMPD_sections) 6376 CancelKind = CancelSections; 6377 else { 6378 assert(CancelRegion == OMPD_taskgroup); 6379 CancelKind = CancelTaskgroup; 6380 } 6381 return CancelKind; 6382 } 6383 6384 void CGOpenMPRuntime::emitCancellationPointCall( 6385 CodeGenFunction &CGF, SourceLocation Loc, 6386 OpenMPDirectiveKind CancelRegion) { 6387 if (!CGF.HaveInsertPoint()) 6388 return; 6389 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6390 // global_tid, kmp_int32 cncl_kind); 6391 if (auto *OMPRegionInfo = 6392 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6393 // For 'cancellation point taskgroup', the task region info may not have a 6394 // cancel. This may instead happen in another adjacent task. 6395 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6396 llvm::Value *Args[] = { 6397 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6398 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6399 // Ignore return result until untied tasks are supported. 6400 llvm::Value *Result = CGF.EmitRuntimeCall( 6401 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 6402 // if (__kmpc_cancellationpoint()) { 6403 // exit from construct; 6404 // } 6405 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6406 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6407 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6408 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6409 CGF.EmitBlock(ExitBB); 6410 // exit from construct; 6411 CodeGenFunction::JumpDest CancelDest = 6412 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6413 CGF.EmitBranchThroughCleanup(CancelDest); 6414 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6415 } 6416 } 6417 } 6418 6419 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6420 const Expr *IfCond, 6421 OpenMPDirectiveKind CancelRegion) { 6422 if (!CGF.HaveInsertPoint()) 6423 return; 6424 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6425 // kmp_int32 cncl_kind); 6426 if (auto *OMPRegionInfo = 6427 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6428 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 6429 PrePostActionTy &) { 6430 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6431 llvm::Value *Args[] = { 6432 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6433 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6434 // Ignore return result until untied tasks are supported. 6435 llvm::Value *Result = CGF.EmitRuntimeCall( 6436 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 6437 // if (__kmpc_cancel()) { 6438 // exit from construct; 6439 // } 6440 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6441 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6442 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6443 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6444 CGF.EmitBlock(ExitBB); 6445 // exit from construct; 6446 CodeGenFunction::JumpDest CancelDest = 6447 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6448 CGF.EmitBranchThroughCleanup(CancelDest); 6449 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6450 }; 6451 if (IfCond) { 6452 emitOMPIfClause(CGF, IfCond, ThenGen, 6453 [](CodeGenFunction &, PrePostActionTy &) {}); 6454 } else { 6455 RegionCodeGenTy ThenRCG(ThenGen); 6456 ThenRCG(CGF); 6457 } 6458 } 6459 } 6460 6461 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6462 const OMPExecutableDirective &D, StringRef ParentName, 6463 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6464 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6465 assert(!ParentName.empty() && "Invalid target region parent name!"); 6466 HasEmittedTargetRegion = true; 6467 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6468 IsOffloadEntry, CodeGen); 6469 } 6470 6471 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6472 const OMPExecutableDirective &D, StringRef ParentName, 6473 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6474 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6475 // Create a unique name for the entry function using the source location 6476 // information of the current target region. The name will be something like: 6477 // 6478 // __omp_offloading_DD_FFFF_PP_lBB 6479 // 6480 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6481 // mangled name of the function that encloses the target region and BB is the 6482 // line number of the target region. 6483 6484 unsigned DeviceID; 6485 unsigned FileID; 6486 unsigned Line; 6487 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6488 Line); 6489 SmallString<64> EntryFnName; 6490 { 6491 llvm::raw_svector_ostream OS(EntryFnName); 6492 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6493 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6494 } 6495 6496 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6497 6498 CodeGenFunction CGF(CGM, true); 6499 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6500 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6501 6502 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 6503 6504 // If this target outline function is not an offload entry, we don't need to 6505 // register it. 6506 if (!IsOffloadEntry) 6507 return; 6508 6509 // The target region ID is used by the runtime library to identify the current 6510 // target region, so it only has to be unique and not necessarily point to 6511 // anything. It could be the pointer to the outlined function that implements 6512 // the target region, but we aren't using that so that the compiler doesn't 6513 // need to keep that, and could therefore inline the host function if proven 6514 // worthwhile during optimization. In the other hand, if emitting code for the 6515 // device, the ID has to be the function address so that it can retrieved from 6516 // the offloading entry and launched by the runtime library. We also mark the 6517 // outlined function to have external linkage in case we are emitting code for 6518 // the device, because these functions will be entry points to the device. 6519 6520 if (CGM.getLangOpts().OpenMPIsDevice) { 6521 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6522 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6523 OutlinedFn->setDSOLocal(false); 6524 } else { 6525 std::string Name = getName({EntryFnName, "region_id"}); 6526 OutlinedFnID = new llvm::GlobalVariable( 6527 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6528 llvm::GlobalValue::WeakAnyLinkage, 6529 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6530 } 6531 6532 // Register the information for the entry associated with this target region. 6533 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6534 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6535 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6536 } 6537 6538 /// Checks if the expression is constant or does not have non-trivial function 6539 /// calls. 6540 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6541 // We can skip constant expressions. 6542 // We can skip expressions with trivial calls or simple expressions. 6543 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6544 !E->hasNonTrivialCall(Ctx)) && 6545 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6546 } 6547 6548 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6549 const Stmt *Body) { 6550 const Stmt *Child = Body->IgnoreContainers(); 6551 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6552 Child = nullptr; 6553 for (const Stmt *S : C->body()) { 6554 if (const auto *E = dyn_cast<Expr>(S)) { 6555 if (isTrivial(Ctx, E)) 6556 continue; 6557 } 6558 // Some of the statements can be ignored. 6559 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6560 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6561 continue; 6562 // Analyze declarations. 6563 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6564 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6565 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6566 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6567 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6568 isa<UsingDirectiveDecl>(D) || 6569 isa<OMPDeclareReductionDecl>(D) || 6570 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6571 return true; 6572 const auto *VD = dyn_cast<VarDecl>(D); 6573 if (!VD) 6574 return false; 6575 return VD->isConstexpr() || 6576 ((VD->getType().isTrivialType(Ctx) || 6577 VD->getType()->isReferenceType()) && 6578 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6579 })) 6580 continue; 6581 } 6582 // Found multiple children - cannot get the one child only. 6583 if (Child) 6584 return nullptr; 6585 Child = S; 6586 } 6587 if (Child) 6588 Child = Child->IgnoreContainers(); 6589 } 6590 return Child; 6591 } 6592 6593 /// Emit the number of teams for a target directive. Inspect the num_teams 6594 /// clause associated with a teams construct combined or closely nested 6595 /// with the target directive. 6596 /// 6597 /// Emit a team of size one for directives such as 'target parallel' that 6598 /// have no associated teams construct. 6599 /// 6600 /// Otherwise, return nullptr. 6601 static llvm::Value * 6602 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6603 const OMPExecutableDirective &D) { 6604 assert(!CGF.getLangOpts().OpenMPIsDevice && 6605 "Clauses associated with the teams directive expected to be emitted " 6606 "only for the host!"); 6607 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6608 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6609 "Expected target-based executable directive."); 6610 CGBuilderTy &Bld = CGF.Builder; 6611 switch (DirectiveKind) { 6612 case OMPD_target: { 6613 const auto *CS = D.getInnermostCapturedStmt(); 6614 const auto *Body = 6615 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6616 const Stmt *ChildStmt = 6617 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6618 if (const auto *NestedDir = 6619 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6620 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6621 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6622 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6623 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6624 const Expr *NumTeams = 6625 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6626 llvm::Value *NumTeamsVal = 6627 CGF.EmitScalarExpr(NumTeams, 6628 /*IgnoreResultAssign*/ true); 6629 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6630 /*isSigned=*/true); 6631 } 6632 return Bld.getInt32(0); 6633 } 6634 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6635 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6636 return Bld.getInt32(1); 6637 return Bld.getInt32(0); 6638 } 6639 return nullptr; 6640 } 6641 case OMPD_target_teams: 6642 case OMPD_target_teams_distribute: 6643 case OMPD_target_teams_distribute_simd: 6644 case OMPD_target_teams_distribute_parallel_for: 6645 case OMPD_target_teams_distribute_parallel_for_simd: { 6646 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6647 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6648 const Expr *NumTeams = 6649 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6650 llvm::Value *NumTeamsVal = 6651 CGF.EmitScalarExpr(NumTeams, 6652 /*IgnoreResultAssign*/ true); 6653 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6654 /*isSigned=*/true); 6655 } 6656 return Bld.getInt32(0); 6657 } 6658 case OMPD_target_parallel: 6659 case OMPD_target_parallel_for: 6660 case OMPD_target_parallel_for_simd: 6661 case OMPD_target_simd: 6662 return Bld.getInt32(1); 6663 case OMPD_parallel: 6664 case OMPD_for: 6665 case OMPD_parallel_for: 6666 case OMPD_parallel_sections: 6667 case OMPD_for_simd: 6668 case OMPD_parallel_for_simd: 6669 case OMPD_cancel: 6670 case OMPD_cancellation_point: 6671 case OMPD_ordered: 6672 case OMPD_threadprivate: 6673 case OMPD_allocate: 6674 case OMPD_task: 6675 case OMPD_simd: 6676 case OMPD_sections: 6677 case OMPD_section: 6678 case OMPD_single: 6679 case OMPD_master: 6680 case OMPD_critical: 6681 case OMPD_taskyield: 6682 case OMPD_barrier: 6683 case OMPD_taskwait: 6684 case OMPD_taskgroup: 6685 case OMPD_atomic: 6686 case OMPD_flush: 6687 case OMPD_teams: 6688 case OMPD_target_data: 6689 case OMPD_target_exit_data: 6690 case OMPD_target_enter_data: 6691 case OMPD_distribute: 6692 case OMPD_distribute_simd: 6693 case OMPD_distribute_parallel_for: 6694 case OMPD_distribute_parallel_for_simd: 6695 case OMPD_teams_distribute: 6696 case OMPD_teams_distribute_simd: 6697 case OMPD_teams_distribute_parallel_for: 6698 case OMPD_teams_distribute_parallel_for_simd: 6699 case OMPD_target_update: 6700 case OMPD_declare_simd: 6701 case OMPD_declare_variant: 6702 case OMPD_declare_target: 6703 case OMPD_end_declare_target: 6704 case OMPD_declare_reduction: 6705 case OMPD_declare_mapper: 6706 case OMPD_taskloop: 6707 case OMPD_taskloop_simd: 6708 case OMPD_master_taskloop: 6709 case OMPD_master_taskloop_simd: 6710 case OMPD_parallel_master_taskloop: 6711 case OMPD_parallel_master_taskloop_simd: 6712 case OMPD_requires: 6713 case OMPD_unknown: 6714 break; 6715 } 6716 llvm_unreachable("Unexpected directive kind."); 6717 } 6718 6719 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6720 llvm::Value *DefaultThreadLimitVal) { 6721 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6722 CGF.getContext(), CS->getCapturedStmt()); 6723 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6724 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6725 llvm::Value *NumThreads = nullptr; 6726 llvm::Value *CondVal = nullptr; 6727 // Handle if clause. If if clause present, the number of threads is 6728 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6729 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6730 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6731 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6732 const OMPIfClause *IfClause = nullptr; 6733 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6734 if (C->getNameModifier() == OMPD_unknown || 6735 C->getNameModifier() == OMPD_parallel) { 6736 IfClause = C; 6737 break; 6738 } 6739 } 6740 if (IfClause) { 6741 const Expr *Cond = IfClause->getCondition(); 6742 bool Result; 6743 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6744 if (!Result) 6745 return CGF.Builder.getInt32(1); 6746 } else { 6747 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6748 if (const auto *PreInit = 6749 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6750 for (const auto *I : PreInit->decls()) { 6751 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6752 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6753 } else { 6754 CodeGenFunction::AutoVarEmission Emission = 6755 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6756 CGF.EmitAutoVarCleanups(Emission); 6757 } 6758 } 6759 } 6760 CondVal = CGF.EvaluateExprAsBool(Cond); 6761 } 6762 } 6763 } 6764 // Check the value of num_threads clause iff if clause was not specified 6765 // or is not evaluated to false. 6766 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6767 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6768 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6769 const auto *NumThreadsClause = 6770 Dir->getSingleClause<OMPNumThreadsClause>(); 6771 CodeGenFunction::LexicalScope Scope( 6772 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6773 if (const auto *PreInit = 6774 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6775 for (const auto *I : PreInit->decls()) { 6776 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6777 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6778 } else { 6779 CodeGenFunction::AutoVarEmission Emission = 6780 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6781 CGF.EmitAutoVarCleanups(Emission); 6782 } 6783 } 6784 } 6785 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6786 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6787 /*isSigned=*/false); 6788 if (DefaultThreadLimitVal) 6789 NumThreads = CGF.Builder.CreateSelect( 6790 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6791 DefaultThreadLimitVal, NumThreads); 6792 } else { 6793 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6794 : CGF.Builder.getInt32(0); 6795 } 6796 // Process condition of the if clause. 6797 if (CondVal) { 6798 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6799 CGF.Builder.getInt32(1)); 6800 } 6801 return NumThreads; 6802 } 6803 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6804 return CGF.Builder.getInt32(1); 6805 return DefaultThreadLimitVal; 6806 } 6807 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6808 : CGF.Builder.getInt32(0); 6809 } 6810 6811 /// Emit the number of threads for a target directive. Inspect the 6812 /// thread_limit clause associated with a teams construct combined or closely 6813 /// nested with the target directive. 6814 /// 6815 /// Emit the num_threads clause for directives such as 'target parallel' that 6816 /// have no associated teams construct. 6817 /// 6818 /// Otherwise, return nullptr. 6819 static llvm::Value * 6820 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6821 const OMPExecutableDirective &D) { 6822 assert(!CGF.getLangOpts().OpenMPIsDevice && 6823 "Clauses associated with the teams directive expected to be emitted " 6824 "only for the host!"); 6825 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6826 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6827 "Expected target-based executable directive."); 6828 CGBuilderTy &Bld = CGF.Builder; 6829 llvm::Value *ThreadLimitVal = nullptr; 6830 llvm::Value *NumThreadsVal = nullptr; 6831 switch (DirectiveKind) { 6832 case OMPD_target: { 6833 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6834 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6835 return NumThreads; 6836 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6837 CGF.getContext(), CS->getCapturedStmt()); 6838 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6839 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6840 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6841 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6842 const auto *ThreadLimitClause = 6843 Dir->getSingleClause<OMPThreadLimitClause>(); 6844 CodeGenFunction::LexicalScope Scope( 6845 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6846 if (const auto *PreInit = 6847 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6848 for (const auto *I : PreInit->decls()) { 6849 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6850 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6851 } else { 6852 CodeGenFunction::AutoVarEmission Emission = 6853 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6854 CGF.EmitAutoVarCleanups(Emission); 6855 } 6856 } 6857 } 6858 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6859 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6860 ThreadLimitVal = 6861 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6862 } 6863 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6864 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6865 CS = Dir->getInnermostCapturedStmt(); 6866 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6867 CGF.getContext(), CS->getCapturedStmt()); 6868 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6869 } 6870 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6871 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6872 CS = Dir->getInnermostCapturedStmt(); 6873 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6874 return NumThreads; 6875 } 6876 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6877 return Bld.getInt32(1); 6878 } 6879 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6880 } 6881 case OMPD_target_teams: { 6882 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6883 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6884 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6885 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6886 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6887 ThreadLimitVal = 6888 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6889 } 6890 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6891 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6892 return NumThreads; 6893 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6894 CGF.getContext(), CS->getCapturedStmt()); 6895 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6896 if (Dir->getDirectiveKind() == OMPD_distribute) { 6897 CS = Dir->getInnermostCapturedStmt(); 6898 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6899 return NumThreads; 6900 } 6901 } 6902 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6903 } 6904 case OMPD_target_teams_distribute: 6905 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6906 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6907 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6908 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6909 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6910 ThreadLimitVal = 6911 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6912 } 6913 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6914 case OMPD_target_parallel: 6915 case OMPD_target_parallel_for: 6916 case OMPD_target_parallel_for_simd: 6917 case OMPD_target_teams_distribute_parallel_for: 6918 case OMPD_target_teams_distribute_parallel_for_simd: { 6919 llvm::Value *CondVal = nullptr; 6920 // Handle if clause. If if clause present, the number of threads is 6921 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6922 if (D.hasClausesOfKind<OMPIfClause>()) { 6923 const OMPIfClause *IfClause = nullptr; 6924 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6925 if (C->getNameModifier() == OMPD_unknown || 6926 C->getNameModifier() == OMPD_parallel) { 6927 IfClause = C; 6928 break; 6929 } 6930 } 6931 if (IfClause) { 6932 const Expr *Cond = IfClause->getCondition(); 6933 bool Result; 6934 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6935 if (!Result) 6936 return Bld.getInt32(1); 6937 } else { 6938 CodeGenFunction::RunCleanupsScope Scope(CGF); 6939 CondVal = CGF.EvaluateExprAsBool(Cond); 6940 } 6941 } 6942 } 6943 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6944 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6945 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6946 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6947 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6948 ThreadLimitVal = 6949 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6950 } 6951 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6952 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6953 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6954 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6955 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6956 NumThreadsVal = 6957 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6958 ThreadLimitVal = ThreadLimitVal 6959 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6960 ThreadLimitVal), 6961 NumThreadsVal, ThreadLimitVal) 6962 : NumThreadsVal; 6963 } 6964 if (!ThreadLimitVal) 6965 ThreadLimitVal = Bld.getInt32(0); 6966 if (CondVal) 6967 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6968 return ThreadLimitVal; 6969 } 6970 case OMPD_target_teams_distribute_simd: 6971 case OMPD_target_simd: 6972 return Bld.getInt32(1); 6973 case OMPD_parallel: 6974 case OMPD_for: 6975 case OMPD_parallel_for: 6976 case OMPD_parallel_sections: 6977 case OMPD_for_simd: 6978 case OMPD_parallel_for_simd: 6979 case OMPD_cancel: 6980 case OMPD_cancellation_point: 6981 case OMPD_ordered: 6982 case OMPD_threadprivate: 6983 case OMPD_allocate: 6984 case OMPD_task: 6985 case OMPD_simd: 6986 case OMPD_sections: 6987 case OMPD_section: 6988 case OMPD_single: 6989 case OMPD_master: 6990 case OMPD_critical: 6991 case OMPD_taskyield: 6992 case OMPD_barrier: 6993 case OMPD_taskwait: 6994 case OMPD_taskgroup: 6995 case OMPD_atomic: 6996 case OMPD_flush: 6997 case OMPD_teams: 6998 case OMPD_target_data: 6999 case OMPD_target_exit_data: 7000 case OMPD_target_enter_data: 7001 case OMPD_distribute: 7002 case OMPD_distribute_simd: 7003 case OMPD_distribute_parallel_for: 7004 case OMPD_distribute_parallel_for_simd: 7005 case OMPD_teams_distribute: 7006 case OMPD_teams_distribute_simd: 7007 case OMPD_teams_distribute_parallel_for: 7008 case OMPD_teams_distribute_parallel_for_simd: 7009 case OMPD_target_update: 7010 case OMPD_declare_simd: 7011 case OMPD_declare_variant: 7012 case OMPD_declare_target: 7013 case OMPD_end_declare_target: 7014 case OMPD_declare_reduction: 7015 case OMPD_declare_mapper: 7016 case OMPD_taskloop: 7017 case OMPD_taskloop_simd: 7018 case OMPD_master_taskloop: 7019 case OMPD_master_taskloop_simd: 7020 case OMPD_parallel_master_taskloop: 7021 case OMPD_parallel_master_taskloop_simd: 7022 case OMPD_requires: 7023 case OMPD_unknown: 7024 break; 7025 } 7026 llvm_unreachable("Unsupported directive kind."); 7027 } 7028 7029 namespace { 7030 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7031 7032 // Utility to handle information from clauses associated with a given 7033 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7034 // It provides a convenient interface to obtain the information and generate 7035 // code for that information. 7036 class MappableExprsHandler { 7037 public: 7038 /// Values for bit flags used to specify the mapping type for 7039 /// offloading. 7040 enum OpenMPOffloadMappingFlags : uint64_t { 7041 /// No flags 7042 OMP_MAP_NONE = 0x0, 7043 /// Allocate memory on the device and move data from host to device. 7044 OMP_MAP_TO = 0x01, 7045 /// Allocate memory on the device and move data from device to host. 7046 OMP_MAP_FROM = 0x02, 7047 /// Always perform the requested mapping action on the element, even 7048 /// if it was already mapped before. 7049 OMP_MAP_ALWAYS = 0x04, 7050 /// Delete the element from the device environment, ignoring the 7051 /// current reference count associated with the element. 7052 OMP_MAP_DELETE = 0x08, 7053 /// The element being mapped is a pointer-pointee pair; both the 7054 /// pointer and the pointee should be mapped. 7055 OMP_MAP_PTR_AND_OBJ = 0x10, 7056 /// This flags signals that the base address of an entry should be 7057 /// passed to the target kernel as an argument. 7058 OMP_MAP_TARGET_PARAM = 0x20, 7059 /// Signal that the runtime library has to return the device pointer 7060 /// in the current position for the data being mapped. Used when we have the 7061 /// use_device_ptr clause. 7062 OMP_MAP_RETURN_PARAM = 0x40, 7063 /// This flag signals that the reference being passed is a pointer to 7064 /// private data. 7065 OMP_MAP_PRIVATE = 0x80, 7066 /// Pass the element to the device by value. 7067 OMP_MAP_LITERAL = 0x100, 7068 /// Implicit map 7069 OMP_MAP_IMPLICIT = 0x200, 7070 /// Close is a hint to the runtime to allocate memory close to 7071 /// the target device. 7072 OMP_MAP_CLOSE = 0x400, 7073 /// The 16 MSBs of the flags indicate whether the entry is member of some 7074 /// struct/class. 7075 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7076 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7077 }; 7078 7079 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7080 static unsigned getFlagMemberOffset() { 7081 unsigned Offset = 0; 7082 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7083 Remain = Remain >> 1) 7084 Offset++; 7085 return Offset; 7086 } 7087 7088 /// Class that associates information with a base pointer to be passed to the 7089 /// runtime library. 7090 class BasePointerInfo { 7091 /// The base pointer. 7092 llvm::Value *Ptr = nullptr; 7093 /// The base declaration that refers to this device pointer, or null if 7094 /// there is none. 7095 const ValueDecl *DevPtrDecl = nullptr; 7096 7097 public: 7098 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7099 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7100 llvm::Value *operator*() const { return Ptr; } 7101 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7102 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7103 }; 7104 7105 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7106 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7107 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7108 7109 /// Map between a struct and the its lowest & highest elements which have been 7110 /// mapped. 7111 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7112 /// HE(FieldIndex, Pointer)} 7113 struct StructRangeInfoTy { 7114 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7115 0, Address::invalid()}; 7116 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7117 0, Address::invalid()}; 7118 Address Base = Address::invalid(); 7119 }; 7120 7121 private: 7122 /// Kind that defines how a device pointer has to be returned. 7123 struct MapInfo { 7124 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7125 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7126 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7127 bool ReturnDevicePointer = false; 7128 bool IsImplicit = false; 7129 7130 MapInfo() = default; 7131 MapInfo( 7132 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7133 OpenMPMapClauseKind MapType, 7134 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7135 bool ReturnDevicePointer, bool IsImplicit) 7136 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7137 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 7138 }; 7139 7140 /// If use_device_ptr is used on a pointer which is a struct member and there 7141 /// is no map information about it, then emission of that entry is deferred 7142 /// until the whole struct has been processed. 7143 struct DeferredDevicePtrEntryTy { 7144 const Expr *IE = nullptr; 7145 const ValueDecl *VD = nullptr; 7146 7147 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 7148 : IE(IE), VD(VD) {} 7149 }; 7150 7151 /// The target directive from where the mappable clauses were extracted. It 7152 /// is either a executable directive or a user-defined mapper directive. 7153 llvm::PointerUnion<const OMPExecutableDirective *, 7154 const OMPDeclareMapperDecl *> 7155 CurDir; 7156 7157 /// Function the directive is being generated for. 7158 CodeGenFunction &CGF; 7159 7160 /// Set of all first private variables in the current directive. 7161 /// bool data is set to true if the variable is implicitly marked as 7162 /// firstprivate, false otherwise. 7163 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7164 7165 /// Map between device pointer declarations and their expression components. 7166 /// The key value for declarations in 'this' is null. 7167 llvm::DenseMap< 7168 const ValueDecl *, 7169 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7170 DevPointersMap; 7171 7172 llvm::Value *getExprTypeSize(const Expr *E) const { 7173 QualType ExprTy = E->getType().getCanonicalType(); 7174 7175 // Reference types are ignored for mapping purposes. 7176 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7177 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7178 7179 // Given that an array section is considered a built-in type, we need to 7180 // do the calculation based on the length of the section instead of relying 7181 // on CGF.getTypeSize(E->getType()). 7182 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7183 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7184 OAE->getBase()->IgnoreParenImpCasts()) 7185 .getCanonicalType(); 7186 7187 // If there is no length associated with the expression and lower bound is 7188 // not specified too, that means we are using the whole length of the 7189 // base. 7190 if (!OAE->getLength() && OAE->getColonLoc().isValid() && 7191 !OAE->getLowerBound()) 7192 return CGF.getTypeSize(BaseTy); 7193 7194 llvm::Value *ElemSize; 7195 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7196 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7197 } else { 7198 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7199 assert(ATy && "Expecting array type if not a pointer type."); 7200 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7201 } 7202 7203 // If we don't have a length at this point, that is because we have an 7204 // array section with a single element. 7205 if (!OAE->getLength() && OAE->getColonLoc().isInvalid()) 7206 return ElemSize; 7207 7208 if (const Expr *LenExpr = OAE->getLength()) { 7209 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7210 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7211 CGF.getContext().getSizeType(), 7212 LenExpr->getExprLoc()); 7213 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7214 } 7215 assert(!OAE->getLength() && OAE->getColonLoc().isValid() && 7216 OAE->getLowerBound() && "expected array_section[lb:]."); 7217 // Size = sizetype - lb * elemtype; 7218 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7219 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7220 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7221 CGF.getContext().getSizeType(), 7222 OAE->getLowerBound()->getExprLoc()); 7223 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7224 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7225 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7226 LengthVal = CGF.Builder.CreateSelect( 7227 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7228 return LengthVal; 7229 } 7230 return CGF.getTypeSize(ExprTy); 7231 } 7232 7233 /// Return the corresponding bits for a given map clause modifier. Add 7234 /// a flag marking the map as a pointer if requested. Add a flag marking the 7235 /// map as the first one of a series of maps that relate to the same map 7236 /// expression. 7237 OpenMPOffloadMappingFlags getMapTypeBits( 7238 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7239 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7240 OpenMPOffloadMappingFlags Bits = 7241 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7242 switch (MapType) { 7243 case OMPC_MAP_alloc: 7244 case OMPC_MAP_release: 7245 // alloc and release is the default behavior in the runtime library, i.e. 7246 // if we don't pass any bits alloc/release that is what the runtime is 7247 // going to do. Therefore, we don't need to signal anything for these two 7248 // type modifiers. 7249 break; 7250 case OMPC_MAP_to: 7251 Bits |= OMP_MAP_TO; 7252 break; 7253 case OMPC_MAP_from: 7254 Bits |= OMP_MAP_FROM; 7255 break; 7256 case OMPC_MAP_tofrom: 7257 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7258 break; 7259 case OMPC_MAP_delete: 7260 Bits |= OMP_MAP_DELETE; 7261 break; 7262 case OMPC_MAP_unknown: 7263 llvm_unreachable("Unexpected map type!"); 7264 } 7265 if (AddPtrFlag) 7266 Bits |= OMP_MAP_PTR_AND_OBJ; 7267 if (AddIsTargetParamFlag) 7268 Bits |= OMP_MAP_TARGET_PARAM; 7269 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7270 != MapModifiers.end()) 7271 Bits |= OMP_MAP_ALWAYS; 7272 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7273 != MapModifiers.end()) 7274 Bits |= OMP_MAP_CLOSE; 7275 return Bits; 7276 } 7277 7278 /// Return true if the provided expression is a final array section. A 7279 /// final array section, is one whose length can't be proved to be one. 7280 bool isFinalArraySectionExpression(const Expr *E) const { 7281 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7282 7283 // It is not an array section and therefore not a unity-size one. 7284 if (!OASE) 7285 return false; 7286 7287 // An array section with no colon always refer to a single element. 7288 if (OASE->getColonLoc().isInvalid()) 7289 return false; 7290 7291 const Expr *Length = OASE->getLength(); 7292 7293 // If we don't have a length we have to check if the array has size 1 7294 // for this dimension. Also, we should always expect a length if the 7295 // base type is pointer. 7296 if (!Length) { 7297 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7298 OASE->getBase()->IgnoreParenImpCasts()) 7299 .getCanonicalType(); 7300 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7301 return ATy->getSize().getSExtValue() != 1; 7302 // If we don't have a constant dimension length, we have to consider 7303 // the current section as having any size, so it is not necessarily 7304 // unitary. If it happen to be unity size, that's user fault. 7305 return true; 7306 } 7307 7308 // Check if the length evaluates to 1. 7309 Expr::EvalResult Result; 7310 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7311 return true; // Can have more that size 1. 7312 7313 llvm::APSInt ConstLength = Result.Val.getInt(); 7314 return ConstLength.getSExtValue() != 1; 7315 } 7316 7317 /// Generate the base pointers, section pointers, sizes and map type 7318 /// bits for the provided map type, map modifier, and expression components. 7319 /// \a IsFirstComponent should be set to true if the provided set of 7320 /// components is the first associated with a capture. 7321 void generateInfoForComponentList( 7322 OpenMPMapClauseKind MapType, 7323 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7324 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7325 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7326 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7327 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 7328 bool IsImplicit, 7329 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7330 OverlappedElements = llvm::None) const { 7331 // The following summarizes what has to be generated for each map and the 7332 // types below. The generated information is expressed in this order: 7333 // base pointer, section pointer, size, flags 7334 // (to add to the ones that come from the map type and modifier). 7335 // 7336 // double d; 7337 // int i[100]; 7338 // float *p; 7339 // 7340 // struct S1 { 7341 // int i; 7342 // float f[50]; 7343 // } 7344 // struct S2 { 7345 // int i; 7346 // float f[50]; 7347 // S1 s; 7348 // double *p; 7349 // struct S2 *ps; 7350 // } 7351 // S2 s; 7352 // S2 *ps; 7353 // 7354 // map(d) 7355 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7356 // 7357 // map(i) 7358 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7359 // 7360 // map(i[1:23]) 7361 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7362 // 7363 // map(p) 7364 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7365 // 7366 // map(p[1:24]) 7367 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7368 // 7369 // map(s) 7370 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7371 // 7372 // map(s.i) 7373 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7374 // 7375 // map(s.s.f) 7376 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7377 // 7378 // map(s.p) 7379 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7380 // 7381 // map(to: s.p[:22]) 7382 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7383 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7384 // &(s.p), &(s.p[0]), 22*sizeof(double), 7385 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7386 // (*) alloc space for struct members, only this is a target parameter 7387 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7388 // optimizes this entry out, same in the examples below) 7389 // (***) map the pointee (map: to) 7390 // 7391 // map(s.ps) 7392 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7393 // 7394 // map(from: s.ps->s.i) 7395 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7396 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7397 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7398 // 7399 // map(to: s.ps->ps) 7400 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7401 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7402 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7403 // 7404 // map(s.ps->ps->ps) 7405 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7406 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7407 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7408 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7409 // 7410 // map(to: s.ps->ps->s.f[:22]) 7411 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7412 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7413 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7414 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7415 // 7416 // map(ps) 7417 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7418 // 7419 // map(ps->i) 7420 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7421 // 7422 // map(ps->s.f) 7423 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7424 // 7425 // map(from: ps->p) 7426 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7427 // 7428 // map(to: ps->p[:22]) 7429 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7430 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7431 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7432 // 7433 // map(ps->ps) 7434 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7435 // 7436 // map(from: ps->ps->s.i) 7437 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7438 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7439 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7440 // 7441 // map(from: ps->ps->ps) 7442 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7443 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7444 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7445 // 7446 // map(ps->ps->ps->ps) 7447 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7448 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7449 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7450 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7451 // 7452 // map(to: ps->ps->ps->s.f[:22]) 7453 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7454 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7455 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7456 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7457 // 7458 // map(to: s.f[:22]) map(from: s.p[:33]) 7459 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7460 // sizeof(double*) (**), TARGET_PARAM 7461 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7462 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7463 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7464 // (*) allocate contiguous space needed to fit all mapped members even if 7465 // we allocate space for members not mapped (in this example, 7466 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7467 // them as well because they fall between &s.f[0] and &s.p) 7468 // 7469 // map(from: s.f[:22]) map(to: ps->p[:33]) 7470 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7471 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7472 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7473 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7474 // (*) the struct this entry pertains to is the 2nd element in the list of 7475 // arguments, hence MEMBER_OF(2) 7476 // 7477 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7478 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7479 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7480 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7481 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7482 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7483 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7484 // (*) the struct this entry pertains to is the 4th element in the list 7485 // of arguments, hence MEMBER_OF(4) 7486 7487 // Track if the map information being generated is the first for a capture. 7488 bool IsCaptureFirstInfo = IsFirstComponentList; 7489 // When the variable is on a declare target link or in a to clause with 7490 // unified memory, a reference is needed to hold the host/device address 7491 // of the variable. 7492 bool RequiresReference = false; 7493 7494 // Scan the components from the base to the complete expression. 7495 auto CI = Components.rbegin(); 7496 auto CE = Components.rend(); 7497 auto I = CI; 7498 7499 // Track if the map information being generated is the first for a list of 7500 // components. 7501 bool IsExpressionFirstInfo = true; 7502 Address BP = Address::invalid(); 7503 const Expr *AssocExpr = I->getAssociatedExpression(); 7504 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7505 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7506 7507 if (isa<MemberExpr>(AssocExpr)) { 7508 // The base is the 'this' pointer. The content of the pointer is going 7509 // to be the base of the field being mapped. 7510 BP = CGF.LoadCXXThisAddress(); 7511 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7512 (OASE && 7513 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7514 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7515 } else { 7516 // The base is the reference to the variable. 7517 // BP = &Var. 7518 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7519 if (const auto *VD = 7520 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7521 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7522 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7523 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7524 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7525 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7526 RequiresReference = true; 7527 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7528 } 7529 } 7530 } 7531 7532 // If the variable is a pointer and is being dereferenced (i.e. is not 7533 // the last component), the base has to be the pointer itself, not its 7534 // reference. References are ignored for mapping purposes. 7535 QualType Ty = 7536 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7537 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7538 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7539 7540 // We do not need to generate individual map information for the 7541 // pointer, it can be associated with the combined storage. 7542 ++I; 7543 } 7544 } 7545 7546 // Track whether a component of the list should be marked as MEMBER_OF some 7547 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7548 // in a component list should be marked as MEMBER_OF, all subsequent entries 7549 // do not belong to the base struct. E.g. 7550 // struct S2 s; 7551 // s.ps->ps->ps->f[:] 7552 // (1) (2) (3) (4) 7553 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7554 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7555 // is the pointee of ps(2) which is not member of struct s, so it should not 7556 // be marked as such (it is still PTR_AND_OBJ). 7557 // The variable is initialized to false so that PTR_AND_OBJ entries which 7558 // are not struct members are not considered (e.g. array of pointers to 7559 // data). 7560 bool ShouldBeMemberOf = false; 7561 7562 // Variable keeping track of whether or not we have encountered a component 7563 // in the component list which is a member expression. Useful when we have a 7564 // pointer or a final array section, in which case it is the previous 7565 // component in the list which tells us whether we have a member expression. 7566 // E.g. X.f[:] 7567 // While processing the final array section "[:]" it is "f" which tells us 7568 // whether we are dealing with a member of a declared struct. 7569 const MemberExpr *EncounteredME = nullptr; 7570 7571 for (; I != CE; ++I) { 7572 // If the current component is member of a struct (parent struct) mark it. 7573 if (!EncounteredME) { 7574 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7575 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7576 // as MEMBER_OF the parent struct. 7577 if (EncounteredME) 7578 ShouldBeMemberOf = true; 7579 } 7580 7581 auto Next = std::next(I); 7582 7583 // We need to generate the addresses and sizes if this is the last 7584 // component, if the component is a pointer or if it is an array section 7585 // whose length can't be proved to be one. If this is a pointer, it 7586 // becomes the base address for the following components. 7587 7588 // A final array section, is one whose length can't be proved to be one. 7589 bool IsFinalArraySection = 7590 isFinalArraySectionExpression(I->getAssociatedExpression()); 7591 7592 // Get information on whether the element is a pointer. Have to do a 7593 // special treatment for array sections given that they are built-in 7594 // types. 7595 const auto *OASE = 7596 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7597 bool IsPointer = 7598 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7599 .getCanonicalType() 7600 ->isAnyPointerType()) || 7601 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7602 7603 if (Next == CE || IsPointer || IsFinalArraySection) { 7604 // If this is not the last component, we expect the pointer to be 7605 // associated with an array expression or member expression. 7606 assert((Next == CE || 7607 isa<MemberExpr>(Next->getAssociatedExpression()) || 7608 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7609 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 7610 "Unexpected expression"); 7611 7612 Address LB = 7613 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); 7614 7615 // If this component is a pointer inside the base struct then we don't 7616 // need to create any entry for it - it will be combined with the object 7617 // it is pointing to into a single PTR_AND_OBJ entry. 7618 bool IsMemberPointer = 7619 IsPointer && EncounteredME && 7620 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7621 EncounteredME); 7622 if (!OverlappedElements.empty()) { 7623 // Handle base element with the info for overlapped elements. 7624 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7625 assert(Next == CE && 7626 "Expected last element for the overlapped elements."); 7627 assert(!IsPointer && 7628 "Unexpected base element with the pointer type."); 7629 // Mark the whole struct as the struct that requires allocation on the 7630 // device. 7631 PartialStruct.LowestElem = {0, LB}; 7632 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7633 I->getAssociatedExpression()->getType()); 7634 Address HB = CGF.Builder.CreateConstGEP( 7635 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7636 CGF.VoidPtrTy), 7637 TypeSize.getQuantity() - 1); 7638 PartialStruct.HighestElem = { 7639 std::numeric_limits<decltype( 7640 PartialStruct.HighestElem.first)>::max(), 7641 HB}; 7642 PartialStruct.Base = BP; 7643 // Emit data for non-overlapped data. 7644 OpenMPOffloadMappingFlags Flags = 7645 OMP_MAP_MEMBER_OF | 7646 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7647 /*AddPtrFlag=*/false, 7648 /*AddIsTargetParamFlag=*/false); 7649 LB = BP; 7650 llvm::Value *Size = nullptr; 7651 // Do bitcopy of all non-overlapped structure elements. 7652 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7653 Component : OverlappedElements) { 7654 Address ComponentLB = Address::invalid(); 7655 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7656 Component) { 7657 if (MC.getAssociatedDeclaration()) { 7658 ComponentLB = 7659 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7660 .getAddress(); 7661 Size = CGF.Builder.CreatePtrDiff( 7662 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7663 CGF.EmitCastToVoidPtr(LB.getPointer())); 7664 break; 7665 } 7666 } 7667 BasePointers.push_back(BP.getPointer()); 7668 Pointers.push_back(LB.getPointer()); 7669 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, 7670 /*isSigned=*/true)); 7671 Types.push_back(Flags); 7672 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7673 } 7674 BasePointers.push_back(BP.getPointer()); 7675 Pointers.push_back(LB.getPointer()); 7676 Size = CGF.Builder.CreatePtrDiff( 7677 CGF.EmitCastToVoidPtr( 7678 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7679 CGF.EmitCastToVoidPtr(LB.getPointer())); 7680 Sizes.push_back( 7681 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7682 Types.push_back(Flags); 7683 break; 7684 } 7685 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7686 if (!IsMemberPointer) { 7687 BasePointers.push_back(BP.getPointer()); 7688 Pointers.push_back(LB.getPointer()); 7689 Sizes.push_back( 7690 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7691 7692 // We need to add a pointer flag for each map that comes from the 7693 // same expression except for the first one. We also need to signal 7694 // this map is the first one that relates with the current capture 7695 // (there is a set of entries for each capture). 7696 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7697 MapType, MapModifiers, IsImplicit, 7698 !IsExpressionFirstInfo || RequiresReference, 7699 IsCaptureFirstInfo && !RequiresReference); 7700 7701 if (!IsExpressionFirstInfo) { 7702 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7703 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7704 if (IsPointer) 7705 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7706 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7707 7708 if (ShouldBeMemberOf) { 7709 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7710 // should be later updated with the correct value of MEMBER_OF. 7711 Flags |= OMP_MAP_MEMBER_OF; 7712 // From now on, all subsequent PTR_AND_OBJ entries should not be 7713 // marked as MEMBER_OF. 7714 ShouldBeMemberOf = false; 7715 } 7716 } 7717 7718 Types.push_back(Flags); 7719 } 7720 7721 // If we have encountered a member expression so far, keep track of the 7722 // mapped member. If the parent is "*this", then the value declaration 7723 // is nullptr. 7724 if (EncounteredME) { 7725 const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl()); 7726 unsigned FieldIndex = FD->getFieldIndex(); 7727 7728 // Update info about the lowest and highest elements for this struct 7729 if (!PartialStruct.Base.isValid()) { 7730 PartialStruct.LowestElem = {FieldIndex, LB}; 7731 PartialStruct.HighestElem = {FieldIndex, LB}; 7732 PartialStruct.Base = BP; 7733 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7734 PartialStruct.LowestElem = {FieldIndex, LB}; 7735 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7736 PartialStruct.HighestElem = {FieldIndex, LB}; 7737 } 7738 } 7739 7740 // If we have a final array section, we are done with this expression. 7741 if (IsFinalArraySection) 7742 break; 7743 7744 // The pointer becomes the base for the next element. 7745 if (Next != CE) 7746 BP = LB; 7747 7748 IsExpressionFirstInfo = false; 7749 IsCaptureFirstInfo = false; 7750 } 7751 } 7752 } 7753 7754 /// Return the adjusted map modifiers if the declaration a capture refers to 7755 /// appears in a first-private clause. This is expected to be used only with 7756 /// directives that start with 'target'. 7757 MappableExprsHandler::OpenMPOffloadMappingFlags 7758 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7759 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7760 7761 // A first private variable captured by reference will use only the 7762 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7763 // declaration is known as first-private in this handler. 7764 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7765 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7766 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7767 return MappableExprsHandler::OMP_MAP_ALWAYS | 7768 MappableExprsHandler::OMP_MAP_TO; 7769 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7770 return MappableExprsHandler::OMP_MAP_TO | 7771 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 7772 return MappableExprsHandler::OMP_MAP_PRIVATE | 7773 MappableExprsHandler::OMP_MAP_TO; 7774 } 7775 return MappableExprsHandler::OMP_MAP_TO | 7776 MappableExprsHandler::OMP_MAP_FROM; 7777 } 7778 7779 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7780 // Rotate by getFlagMemberOffset() bits. 7781 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7782 << getFlagMemberOffset()); 7783 } 7784 7785 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7786 OpenMPOffloadMappingFlags MemberOfFlag) { 7787 // If the entry is PTR_AND_OBJ but has not been marked with the special 7788 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7789 // marked as MEMBER_OF. 7790 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7791 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7792 return; 7793 7794 // Reset the placeholder value to prepare the flag for the assignment of the 7795 // proper MEMBER_OF value. 7796 Flags &= ~OMP_MAP_MEMBER_OF; 7797 Flags |= MemberOfFlag; 7798 } 7799 7800 void getPlainLayout(const CXXRecordDecl *RD, 7801 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7802 bool AsBase) const { 7803 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7804 7805 llvm::StructType *St = 7806 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7807 7808 unsigned NumElements = St->getNumElements(); 7809 llvm::SmallVector< 7810 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7811 RecordLayout(NumElements); 7812 7813 // Fill bases. 7814 for (const auto &I : RD->bases()) { 7815 if (I.isVirtual()) 7816 continue; 7817 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7818 // Ignore empty bases. 7819 if (Base->isEmpty() || CGF.getContext() 7820 .getASTRecordLayout(Base) 7821 .getNonVirtualSize() 7822 .isZero()) 7823 continue; 7824 7825 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7826 RecordLayout[FieldIndex] = Base; 7827 } 7828 // Fill in virtual bases. 7829 for (const auto &I : RD->vbases()) { 7830 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7831 // Ignore empty bases. 7832 if (Base->isEmpty()) 7833 continue; 7834 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7835 if (RecordLayout[FieldIndex]) 7836 continue; 7837 RecordLayout[FieldIndex] = Base; 7838 } 7839 // Fill in all the fields. 7840 assert(!RD->isUnion() && "Unexpected union."); 7841 for (const auto *Field : RD->fields()) { 7842 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7843 // will fill in later.) 7844 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 7845 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7846 RecordLayout[FieldIndex] = Field; 7847 } 7848 } 7849 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7850 &Data : RecordLayout) { 7851 if (Data.isNull()) 7852 continue; 7853 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7854 getPlainLayout(Base, Layout, /*AsBase=*/true); 7855 else 7856 Layout.push_back(Data.get<const FieldDecl *>()); 7857 } 7858 } 7859 7860 public: 7861 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7862 : CurDir(&Dir), CGF(CGF) { 7863 // Extract firstprivate clause information. 7864 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7865 for (const auto *D : C->varlists()) 7866 FirstPrivateDecls.try_emplace( 7867 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 7868 // Extract device pointer clause information. 7869 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7870 for (auto L : C->component_lists()) 7871 DevPointersMap[L.first].push_back(L.second); 7872 } 7873 7874 /// Constructor for the declare mapper directive. 7875 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 7876 : CurDir(&Dir), CGF(CGF) {} 7877 7878 /// Generate code for the combined entry if we have a partially mapped struct 7879 /// and take care of the mapping flags of the arguments corresponding to 7880 /// individual struct members. 7881 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 7882 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7883 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 7884 const StructRangeInfoTy &PartialStruct) const { 7885 // Base is the base of the struct 7886 BasePointers.push_back(PartialStruct.Base.getPointer()); 7887 // Pointer is the address of the lowest element 7888 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7889 Pointers.push_back(LB); 7890 // Size is (addr of {highest+1} element) - (addr of lowest element) 7891 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7892 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7893 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7894 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7895 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7896 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 7897 /*isSigned=*/false); 7898 Sizes.push_back(Size); 7899 // Map type is always TARGET_PARAM 7900 Types.push_back(OMP_MAP_TARGET_PARAM); 7901 // Remove TARGET_PARAM flag from the first element 7902 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7903 7904 // All other current entries will be MEMBER_OF the combined entry 7905 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7906 // 0xFFFF in the MEMBER_OF field). 7907 OpenMPOffloadMappingFlags MemberOfFlag = 7908 getMemberOfFlag(BasePointers.size() - 1); 7909 for (auto &M : CurTypes) 7910 setCorrectMemberOfFlag(M, MemberOfFlag); 7911 } 7912 7913 /// Generate all the base pointers, section pointers, sizes and map 7914 /// types for the extracted mappable expressions. Also, for each item that 7915 /// relates with a device pointer, a pair of the relevant declaration and 7916 /// index where it occurs is appended to the device pointers info array. 7917 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 7918 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7919 MapFlagsArrayTy &Types) const { 7920 // We have to process the component lists that relate with the same 7921 // declaration in a single chunk so that we can generate the map flags 7922 // correctly. Therefore, we organize all lists in a map. 7923 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 7924 7925 // Helper function to fill the information map for the different supported 7926 // clauses. 7927 auto &&InfoGen = [&Info]( 7928 const ValueDecl *D, 7929 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7930 OpenMPMapClauseKind MapType, 7931 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7932 bool ReturnDevicePointer, bool IsImplicit) { 7933 const ValueDecl *VD = 7934 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 7935 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 7936 IsImplicit); 7937 }; 7938 7939 assert(CurDir.is<const OMPExecutableDirective *>() && 7940 "Expect a executable directive"); 7941 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 7942 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 7943 for (const auto &L : C->component_lists()) { 7944 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 7945 /*ReturnDevicePointer=*/false, C->isImplicit()); 7946 } 7947 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 7948 for (const auto &L : C->component_lists()) { 7949 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 7950 /*ReturnDevicePointer=*/false, C->isImplicit()); 7951 } 7952 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 7953 for (const auto &L : C->component_lists()) { 7954 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 7955 /*ReturnDevicePointer=*/false, C->isImplicit()); 7956 } 7957 7958 // Look at the use_device_ptr clause information and mark the existing map 7959 // entries as such. If there is no map information for an entry in the 7960 // use_device_ptr list, we create one with map type 'alloc' and zero size 7961 // section. It is the user fault if that was not mapped before. If there is 7962 // no map information and the pointer is a struct member, then we defer the 7963 // emission of that entry until the whole struct has been processed. 7964 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 7965 DeferredInfo; 7966 7967 for (const auto *C : 7968 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 7969 for (const auto &L : C->component_lists()) { 7970 assert(!L.second.empty() && "Not expecting empty list of components!"); 7971 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 7972 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 7973 const Expr *IE = L.second.back().getAssociatedExpression(); 7974 // If the first component is a member expression, we have to look into 7975 // 'this', which maps to null in the map of map information. Otherwise 7976 // look directly for the information. 7977 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 7978 7979 // We potentially have map information for this declaration already. 7980 // Look for the first set of components that refer to it. 7981 if (It != Info.end()) { 7982 auto CI = std::find_if( 7983 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 7984 return MI.Components.back().getAssociatedDeclaration() == VD; 7985 }); 7986 // If we found a map entry, signal that the pointer has to be returned 7987 // and move on to the next declaration. 7988 if (CI != It->second.end()) { 7989 CI->ReturnDevicePointer = true; 7990 continue; 7991 } 7992 } 7993 7994 // We didn't find any match in our map information - generate a zero 7995 // size array section - if the pointer is a struct member we defer this 7996 // action until the whole struct has been processed. 7997 if (isa<MemberExpr>(IE)) { 7998 // Insert the pointer into Info to be processed by 7999 // generateInfoForComponentList. Because it is a member pointer 8000 // without a pointee, no entry will be generated for it, therefore 8001 // we need to generate one after the whole struct has been processed. 8002 // Nonetheless, generateInfoForComponentList must be called to take 8003 // the pointer into account for the calculation of the range of the 8004 // partial struct. 8005 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 8006 /*ReturnDevicePointer=*/false, C->isImplicit()); 8007 DeferredInfo[nullptr].emplace_back(IE, VD); 8008 } else { 8009 llvm::Value *Ptr = 8010 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8011 BasePointers.emplace_back(Ptr, VD); 8012 Pointers.push_back(Ptr); 8013 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8014 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8015 } 8016 } 8017 } 8018 8019 for (const auto &M : Info) { 8020 // We need to know when we generate information for the first component 8021 // associated with a capture, because the mapping flags depend on it. 8022 bool IsFirstComponentList = true; 8023 8024 // Temporary versions of arrays 8025 MapBaseValuesArrayTy CurBasePointers; 8026 MapValuesArrayTy CurPointers; 8027 MapValuesArrayTy CurSizes; 8028 MapFlagsArrayTy CurTypes; 8029 StructRangeInfoTy PartialStruct; 8030 8031 for (const MapInfo &L : M.second) { 8032 assert(!L.Components.empty() && 8033 "Not expecting declaration with no component lists."); 8034 8035 // Remember the current base pointer index. 8036 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 8037 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8038 CurBasePointers, CurPointers, CurSizes, 8039 CurTypes, PartialStruct, 8040 IsFirstComponentList, L.IsImplicit); 8041 8042 // If this entry relates with a device pointer, set the relevant 8043 // declaration and add the 'return pointer' flag. 8044 if (L.ReturnDevicePointer) { 8045 assert(CurBasePointers.size() > CurrentBasePointersIdx && 8046 "Unexpected number of mapped base pointers."); 8047 8048 const ValueDecl *RelevantVD = 8049 L.Components.back().getAssociatedDeclaration(); 8050 assert(RelevantVD && 8051 "No relevant declaration related with device pointer??"); 8052 8053 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 8054 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8055 } 8056 IsFirstComponentList = false; 8057 } 8058 8059 // Append any pending zero-length pointers which are struct members and 8060 // used with use_device_ptr. 8061 auto CI = DeferredInfo.find(M.first); 8062 if (CI != DeferredInfo.end()) { 8063 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8064 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(); 8065 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 8066 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 8067 CurBasePointers.emplace_back(BasePtr, L.VD); 8068 CurPointers.push_back(Ptr); 8069 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8070 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8071 // value MEMBER_OF=FFFF so that the entry is later updated with the 8072 // correct value of MEMBER_OF. 8073 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8074 OMP_MAP_MEMBER_OF); 8075 } 8076 } 8077 8078 // If there is an entry in PartialStruct it means we have a struct with 8079 // individual members mapped. Emit an extra combined entry. 8080 if (PartialStruct.Base.isValid()) 8081 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8082 PartialStruct); 8083 8084 // We need to append the results of this capture to what we already have. 8085 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8086 Pointers.append(CurPointers.begin(), CurPointers.end()); 8087 Sizes.append(CurSizes.begin(), CurSizes.end()); 8088 Types.append(CurTypes.begin(), CurTypes.end()); 8089 } 8090 } 8091 8092 /// Generate all the base pointers, section pointers, sizes and map types for 8093 /// the extracted map clauses of user-defined mapper. 8094 void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, 8095 MapValuesArrayTy &Pointers, 8096 MapValuesArrayTy &Sizes, 8097 MapFlagsArrayTy &Types) const { 8098 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8099 "Expect a declare mapper directive"); 8100 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8101 // We have to process the component lists that relate with the same 8102 // declaration in a single chunk so that we can generate the map flags 8103 // correctly. Therefore, we organize all lists in a map. 8104 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8105 8106 // Helper function to fill the information map for the different supported 8107 // clauses. 8108 auto &&InfoGen = [&Info]( 8109 const ValueDecl *D, 8110 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8111 OpenMPMapClauseKind MapType, 8112 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8113 bool ReturnDevicePointer, bool IsImplicit) { 8114 const ValueDecl *VD = 8115 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8116 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8117 IsImplicit); 8118 }; 8119 8120 for (const auto *C : CurMapperDir->clauselists()) { 8121 const auto *MC = cast<OMPMapClause>(C); 8122 for (const auto &L : MC->component_lists()) { 8123 InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), 8124 /*ReturnDevicePointer=*/false, MC->isImplicit()); 8125 } 8126 } 8127 8128 for (const auto &M : Info) { 8129 // We need to know when we generate information for the first component 8130 // associated with a capture, because the mapping flags depend on it. 8131 bool IsFirstComponentList = true; 8132 8133 // Temporary versions of arrays 8134 MapBaseValuesArrayTy CurBasePointers; 8135 MapValuesArrayTy CurPointers; 8136 MapValuesArrayTy CurSizes; 8137 MapFlagsArrayTy CurTypes; 8138 StructRangeInfoTy PartialStruct; 8139 8140 for (const MapInfo &L : M.second) { 8141 assert(!L.Components.empty() && 8142 "Not expecting declaration with no component lists."); 8143 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8144 CurBasePointers, CurPointers, CurSizes, 8145 CurTypes, PartialStruct, 8146 IsFirstComponentList, L.IsImplicit); 8147 IsFirstComponentList = false; 8148 } 8149 8150 // If there is an entry in PartialStruct it means we have a struct with 8151 // individual members mapped. Emit an extra combined entry. 8152 if (PartialStruct.Base.isValid()) 8153 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8154 PartialStruct); 8155 8156 // We need to append the results of this capture to what we already have. 8157 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8158 Pointers.append(CurPointers.begin(), CurPointers.end()); 8159 Sizes.append(CurSizes.begin(), CurSizes.end()); 8160 Types.append(CurTypes.begin(), CurTypes.end()); 8161 } 8162 } 8163 8164 /// Emit capture info for lambdas for variables captured by reference. 8165 void generateInfoForLambdaCaptures( 8166 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 8167 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8168 MapFlagsArrayTy &Types, 8169 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8170 const auto *RD = VD->getType() 8171 .getCanonicalType() 8172 .getNonReferenceType() 8173 ->getAsCXXRecordDecl(); 8174 if (!RD || !RD->isLambda()) 8175 return; 8176 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8177 LValue VDLVal = CGF.MakeAddrLValue( 8178 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8179 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8180 FieldDecl *ThisCapture = nullptr; 8181 RD->getCaptureFields(Captures, ThisCapture); 8182 if (ThisCapture) { 8183 LValue ThisLVal = 8184 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8185 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8186 LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer()); 8187 BasePointers.push_back(ThisLVal.getPointer()); 8188 Pointers.push_back(ThisLValVal.getPointer()); 8189 Sizes.push_back( 8190 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8191 CGF.Int64Ty, /*isSigned=*/true)); 8192 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8193 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8194 } 8195 for (const LambdaCapture &LC : RD->captures()) { 8196 if (!LC.capturesVariable()) 8197 continue; 8198 const VarDecl *VD = LC.getCapturedVar(); 8199 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8200 continue; 8201 auto It = Captures.find(VD); 8202 assert(It != Captures.end() && "Found lambda capture without field."); 8203 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8204 if (LC.getCaptureKind() == LCK_ByRef) { 8205 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8206 LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); 8207 BasePointers.push_back(VarLVal.getPointer()); 8208 Pointers.push_back(VarLValVal.getPointer()); 8209 Sizes.push_back(CGF.Builder.CreateIntCast( 8210 CGF.getTypeSize( 8211 VD->getType().getCanonicalType().getNonReferenceType()), 8212 CGF.Int64Ty, /*isSigned=*/true)); 8213 } else { 8214 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8215 LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); 8216 BasePointers.push_back(VarLVal.getPointer()); 8217 Pointers.push_back(VarRVal.getScalarVal()); 8218 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8219 } 8220 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8221 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8222 } 8223 } 8224 8225 /// Set correct indices for lambdas captures. 8226 void adjustMemberOfForLambdaCaptures( 8227 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8228 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8229 MapFlagsArrayTy &Types) const { 8230 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8231 // Set correct member_of idx for all implicit lambda captures. 8232 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8233 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8234 continue; 8235 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8236 assert(BasePtr && "Unable to find base lambda address."); 8237 int TgtIdx = -1; 8238 for (unsigned J = I; J > 0; --J) { 8239 unsigned Idx = J - 1; 8240 if (Pointers[Idx] != BasePtr) 8241 continue; 8242 TgtIdx = Idx; 8243 break; 8244 } 8245 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8246 // All other current entries will be MEMBER_OF the combined entry 8247 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8248 // 0xFFFF in the MEMBER_OF field). 8249 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8250 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8251 } 8252 } 8253 8254 /// Generate the base pointers, section pointers, sizes and map types 8255 /// associated to a given capture. 8256 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8257 llvm::Value *Arg, 8258 MapBaseValuesArrayTy &BasePointers, 8259 MapValuesArrayTy &Pointers, 8260 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8261 StructRangeInfoTy &PartialStruct) const { 8262 assert(!Cap->capturesVariableArrayType() && 8263 "Not expecting to generate map info for a variable array type!"); 8264 8265 // We need to know when we generating information for the first component 8266 const ValueDecl *VD = Cap->capturesThis() 8267 ? nullptr 8268 : Cap->getCapturedVar()->getCanonicalDecl(); 8269 8270 // If this declaration appears in a is_device_ptr clause we just have to 8271 // pass the pointer by value. If it is a reference to a declaration, we just 8272 // pass its value. 8273 if (DevPointersMap.count(VD)) { 8274 BasePointers.emplace_back(Arg, VD); 8275 Pointers.push_back(Arg); 8276 Sizes.push_back( 8277 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8278 CGF.Int64Ty, /*isSigned=*/true)); 8279 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8280 return; 8281 } 8282 8283 using MapData = 8284 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8285 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 8286 SmallVector<MapData, 4> DeclComponentLists; 8287 assert(CurDir.is<const OMPExecutableDirective *>() && 8288 "Expect a executable directive"); 8289 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8290 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8291 for (const auto &L : C->decl_component_lists(VD)) { 8292 assert(L.first == VD && 8293 "We got information for the wrong declaration??"); 8294 assert(!L.second.empty() && 8295 "Not expecting declaration with no component lists."); 8296 DeclComponentLists.emplace_back(L.second, C->getMapType(), 8297 C->getMapTypeModifiers(), 8298 C->isImplicit()); 8299 } 8300 } 8301 8302 // Find overlapping elements (including the offset from the base element). 8303 llvm::SmallDenseMap< 8304 const MapData *, 8305 llvm::SmallVector< 8306 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8307 4> 8308 OverlappedData; 8309 size_t Count = 0; 8310 for (const MapData &L : DeclComponentLists) { 8311 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8312 OpenMPMapClauseKind MapType; 8313 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8314 bool IsImplicit; 8315 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8316 ++Count; 8317 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8318 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8319 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 8320 auto CI = Components.rbegin(); 8321 auto CE = Components.rend(); 8322 auto SI = Components1.rbegin(); 8323 auto SE = Components1.rend(); 8324 for (; CI != CE && SI != SE; ++CI, ++SI) { 8325 if (CI->getAssociatedExpression()->getStmtClass() != 8326 SI->getAssociatedExpression()->getStmtClass()) 8327 break; 8328 // Are we dealing with different variables/fields? 8329 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8330 break; 8331 } 8332 // Found overlapping if, at least for one component, reached the head of 8333 // the components list. 8334 if (CI == CE || SI == SE) { 8335 assert((CI != CE || SI != SE) && 8336 "Unexpected full match of the mapping components."); 8337 const MapData &BaseData = CI == CE ? L : L1; 8338 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8339 SI == SE ? Components : Components1; 8340 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8341 OverlappedElements.getSecond().push_back(SubData); 8342 } 8343 } 8344 } 8345 // Sort the overlapped elements for each item. 8346 llvm::SmallVector<const FieldDecl *, 4> Layout; 8347 if (!OverlappedData.empty()) { 8348 if (const auto *CRD = 8349 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8350 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8351 else { 8352 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8353 Layout.append(RD->field_begin(), RD->field_end()); 8354 } 8355 } 8356 for (auto &Pair : OverlappedData) { 8357 llvm::sort( 8358 Pair.getSecond(), 8359 [&Layout]( 8360 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8361 OMPClauseMappableExprCommon::MappableExprComponentListRef 8362 Second) { 8363 auto CI = First.rbegin(); 8364 auto CE = First.rend(); 8365 auto SI = Second.rbegin(); 8366 auto SE = Second.rend(); 8367 for (; CI != CE && SI != SE; ++CI, ++SI) { 8368 if (CI->getAssociatedExpression()->getStmtClass() != 8369 SI->getAssociatedExpression()->getStmtClass()) 8370 break; 8371 // Are we dealing with different variables/fields? 8372 if (CI->getAssociatedDeclaration() != 8373 SI->getAssociatedDeclaration()) 8374 break; 8375 } 8376 8377 // Lists contain the same elements. 8378 if (CI == CE && SI == SE) 8379 return false; 8380 8381 // List with less elements is less than list with more elements. 8382 if (CI == CE || SI == SE) 8383 return CI == CE; 8384 8385 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8386 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8387 if (FD1->getParent() == FD2->getParent()) 8388 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8389 const auto It = 8390 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8391 return FD == FD1 || FD == FD2; 8392 }); 8393 return *It == FD1; 8394 }); 8395 } 8396 8397 // Associated with a capture, because the mapping flags depend on it. 8398 // Go through all of the elements with the overlapped elements. 8399 for (const auto &Pair : OverlappedData) { 8400 const MapData &L = *Pair.getFirst(); 8401 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8402 OpenMPMapClauseKind MapType; 8403 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8404 bool IsImplicit; 8405 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8406 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8407 OverlappedComponents = Pair.getSecond(); 8408 bool IsFirstComponentList = true; 8409 generateInfoForComponentList(MapType, MapModifiers, Components, 8410 BasePointers, Pointers, Sizes, Types, 8411 PartialStruct, IsFirstComponentList, 8412 IsImplicit, OverlappedComponents); 8413 } 8414 // Go through other elements without overlapped elements. 8415 bool IsFirstComponentList = OverlappedData.empty(); 8416 for (const MapData &L : DeclComponentLists) { 8417 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8418 OpenMPMapClauseKind MapType; 8419 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8420 bool IsImplicit; 8421 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8422 auto It = OverlappedData.find(&L); 8423 if (It == OverlappedData.end()) 8424 generateInfoForComponentList(MapType, MapModifiers, Components, 8425 BasePointers, Pointers, Sizes, Types, 8426 PartialStruct, IsFirstComponentList, 8427 IsImplicit); 8428 IsFirstComponentList = false; 8429 } 8430 } 8431 8432 /// Generate the base pointers, section pointers, sizes and map types 8433 /// associated with the declare target link variables. 8434 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 8435 MapValuesArrayTy &Pointers, 8436 MapValuesArrayTy &Sizes, 8437 MapFlagsArrayTy &Types) const { 8438 assert(CurDir.is<const OMPExecutableDirective *>() && 8439 "Expect a executable directive"); 8440 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8441 // Map other list items in the map clause which are not captured variables 8442 // but "declare target link" global variables. 8443 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8444 for (const auto &L : C->component_lists()) { 8445 if (!L.first) 8446 continue; 8447 const auto *VD = dyn_cast<VarDecl>(L.first); 8448 if (!VD) 8449 continue; 8450 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8451 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8452 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8453 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 8454 continue; 8455 StructRangeInfoTy PartialStruct; 8456 generateInfoForComponentList( 8457 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 8458 Pointers, Sizes, Types, PartialStruct, 8459 /*IsFirstComponentList=*/true, C->isImplicit()); 8460 assert(!PartialStruct.Base.isValid() && 8461 "No partial structs for declare target link expected."); 8462 } 8463 } 8464 } 8465 8466 /// Generate the default map information for a given capture \a CI, 8467 /// record field declaration \a RI and captured value \a CV. 8468 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8469 const FieldDecl &RI, llvm::Value *CV, 8470 MapBaseValuesArrayTy &CurBasePointers, 8471 MapValuesArrayTy &CurPointers, 8472 MapValuesArrayTy &CurSizes, 8473 MapFlagsArrayTy &CurMapTypes) const { 8474 bool IsImplicit = true; 8475 // Do the default mapping. 8476 if (CI.capturesThis()) { 8477 CurBasePointers.push_back(CV); 8478 CurPointers.push_back(CV); 8479 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8480 CurSizes.push_back( 8481 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8482 CGF.Int64Ty, /*isSigned=*/true)); 8483 // Default map type. 8484 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8485 } else if (CI.capturesVariableByCopy()) { 8486 CurBasePointers.push_back(CV); 8487 CurPointers.push_back(CV); 8488 if (!RI.getType()->isAnyPointerType()) { 8489 // We have to signal to the runtime captures passed by value that are 8490 // not pointers. 8491 CurMapTypes.push_back(OMP_MAP_LITERAL); 8492 CurSizes.push_back(CGF.Builder.CreateIntCast( 8493 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8494 } else { 8495 // Pointers are implicitly mapped with a zero size and no flags 8496 // (other than first map that is added for all implicit maps). 8497 CurMapTypes.push_back(OMP_MAP_NONE); 8498 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8499 } 8500 const VarDecl *VD = CI.getCapturedVar(); 8501 auto I = FirstPrivateDecls.find(VD); 8502 if (I != FirstPrivateDecls.end()) 8503 IsImplicit = I->getSecond(); 8504 } else { 8505 assert(CI.capturesVariable() && "Expected captured reference."); 8506 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8507 QualType ElementType = PtrTy->getPointeeType(); 8508 CurSizes.push_back(CGF.Builder.CreateIntCast( 8509 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8510 // The default map type for a scalar/complex type is 'to' because by 8511 // default the value doesn't have to be retrieved. For an aggregate 8512 // type, the default is 'tofrom'. 8513 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 8514 const VarDecl *VD = CI.getCapturedVar(); 8515 auto I = FirstPrivateDecls.find(VD); 8516 if (I != FirstPrivateDecls.end() && 8517 VD->getType().isConstant(CGF.getContext())) { 8518 llvm::Constant *Addr = 8519 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8520 // Copy the value of the original variable to the new global copy. 8521 CGF.Builder.CreateMemCpy( 8522 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(), 8523 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8524 CurSizes.back(), /*IsVolatile=*/false); 8525 // Use new global variable as the base pointers. 8526 CurBasePointers.push_back(Addr); 8527 CurPointers.push_back(Addr); 8528 } else { 8529 CurBasePointers.push_back(CV); 8530 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8531 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8532 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8533 AlignmentSource::Decl)); 8534 CurPointers.push_back(PtrAddr.getPointer()); 8535 } else { 8536 CurPointers.push_back(CV); 8537 } 8538 } 8539 if (I != FirstPrivateDecls.end()) 8540 IsImplicit = I->getSecond(); 8541 } 8542 // Every default map produces a single argument which is a target parameter. 8543 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 8544 8545 // Add flag stating this is an implicit map. 8546 if (IsImplicit) 8547 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 8548 } 8549 }; 8550 } // anonymous namespace 8551 8552 /// Emit the arrays used to pass the captures and map information to the 8553 /// offloading runtime library. If there is no map or capture information, 8554 /// return nullptr by reference. 8555 static void 8556 emitOffloadingArrays(CodeGenFunction &CGF, 8557 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 8558 MappableExprsHandler::MapValuesArrayTy &Pointers, 8559 MappableExprsHandler::MapValuesArrayTy &Sizes, 8560 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 8561 CGOpenMPRuntime::TargetDataInfo &Info) { 8562 CodeGenModule &CGM = CGF.CGM; 8563 ASTContext &Ctx = CGF.getContext(); 8564 8565 // Reset the array information. 8566 Info.clearArrayInfo(); 8567 Info.NumberOfPtrs = BasePointers.size(); 8568 8569 if (Info.NumberOfPtrs) { 8570 // Detect if we have any capture size requiring runtime evaluation of the 8571 // size so that a constant array could be eventually used. 8572 bool hasRuntimeEvaluationCaptureSize = false; 8573 for (llvm::Value *S : Sizes) 8574 if (!isa<llvm::Constant>(S)) { 8575 hasRuntimeEvaluationCaptureSize = true; 8576 break; 8577 } 8578 8579 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8580 QualType PointerArrayType = Ctx.getConstantArrayType( 8581 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 8582 /*IndexTypeQuals=*/0); 8583 8584 Info.BasePointersArray = 8585 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8586 Info.PointersArray = 8587 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8588 8589 // If we don't have any VLA types or other types that require runtime 8590 // evaluation, we can use a constant array for the map sizes, otherwise we 8591 // need to fill up the arrays as we do for the pointers. 8592 QualType Int64Ty = 8593 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8594 if (hasRuntimeEvaluationCaptureSize) { 8595 QualType SizeArrayType = Ctx.getConstantArrayType( 8596 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 8597 /*IndexTypeQuals=*/0); 8598 Info.SizesArray = 8599 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8600 } else { 8601 // We expect all the sizes to be constant, so we collect them to create 8602 // a constant array. 8603 SmallVector<llvm::Constant *, 16> ConstSizes; 8604 for (llvm::Value *S : Sizes) 8605 ConstSizes.push_back(cast<llvm::Constant>(S)); 8606 8607 auto *SizesArrayInit = llvm::ConstantArray::get( 8608 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 8609 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8610 auto *SizesArrayGbl = new llvm::GlobalVariable( 8611 CGM.getModule(), SizesArrayInit->getType(), 8612 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8613 SizesArrayInit, Name); 8614 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8615 Info.SizesArray = SizesArrayGbl; 8616 } 8617 8618 // The map types are always constant so we don't need to generate code to 8619 // fill arrays. Instead, we create an array constant. 8620 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 8621 llvm::copy(MapTypes, Mapping.begin()); 8622 llvm::Constant *MapTypesArrayInit = 8623 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8624 std::string MaptypesName = 8625 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8626 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8627 CGM.getModule(), MapTypesArrayInit->getType(), 8628 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8629 MapTypesArrayInit, MaptypesName); 8630 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8631 Info.MapTypesArray = MapTypesArrayGbl; 8632 8633 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8634 llvm::Value *BPVal = *BasePointers[I]; 8635 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8636 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8637 Info.BasePointersArray, 0, I); 8638 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8639 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8640 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8641 CGF.Builder.CreateStore(BPVal, BPAddr); 8642 8643 if (Info.requiresDevicePointerInfo()) 8644 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 8645 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8646 8647 llvm::Value *PVal = Pointers[I]; 8648 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8649 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8650 Info.PointersArray, 0, I); 8651 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8652 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8653 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8654 CGF.Builder.CreateStore(PVal, PAddr); 8655 8656 if (hasRuntimeEvaluationCaptureSize) { 8657 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8658 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8659 Info.SizesArray, 8660 /*Idx0=*/0, 8661 /*Idx1=*/I); 8662 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 8663 CGF.Builder.CreateStore( 8664 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true), 8665 SAddr); 8666 } 8667 } 8668 } 8669 } 8670 8671 /// Emit the arguments to be passed to the runtime library based on the 8672 /// arrays of pointers, sizes and map types. 8673 static void emitOffloadingArraysArgument( 8674 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8675 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8676 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 8677 CodeGenModule &CGM = CGF.CGM; 8678 if (Info.NumberOfPtrs) { 8679 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8680 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8681 Info.BasePointersArray, 8682 /*Idx0=*/0, /*Idx1=*/0); 8683 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8684 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8685 Info.PointersArray, 8686 /*Idx0=*/0, 8687 /*Idx1=*/0); 8688 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8689 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 8690 /*Idx0=*/0, /*Idx1=*/0); 8691 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8692 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8693 Info.MapTypesArray, 8694 /*Idx0=*/0, 8695 /*Idx1=*/0); 8696 } else { 8697 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8698 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8699 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8700 MapTypesArrayArg = 8701 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8702 } 8703 } 8704 8705 /// Check for inner distribute directive. 8706 static const OMPExecutableDirective * 8707 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8708 const auto *CS = D.getInnermostCapturedStmt(); 8709 const auto *Body = 8710 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8711 const Stmt *ChildStmt = 8712 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8713 8714 if (const auto *NestedDir = 8715 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8716 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8717 switch (D.getDirectiveKind()) { 8718 case OMPD_target: 8719 if (isOpenMPDistributeDirective(DKind)) 8720 return NestedDir; 8721 if (DKind == OMPD_teams) { 8722 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8723 /*IgnoreCaptured=*/true); 8724 if (!Body) 8725 return nullptr; 8726 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8727 if (const auto *NND = 8728 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8729 DKind = NND->getDirectiveKind(); 8730 if (isOpenMPDistributeDirective(DKind)) 8731 return NND; 8732 } 8733 } 8734 return nullptr; 8735 case OMPD_target_teams: 8736 if (isOpenMPDistributeDirective(DKind)) 8737 return NestedDir; 8738 return nullptr; 8739 case OMPD_target_parallel: 8740 case OMPD_target_simd: 8741 case OMPD_target_parallel_for: 8742 case OMPD_target_parallel_for_simd: 8743 return nullptr; 8744 case OMPD_target_teams_distribute: 8745 case OMPD_target_teams_distribute_simd: 8746 case OMPD_target_teams_distribute_parallel_for: 8747 case OMPD_target_teams_distribute_parallel_for_simd: 8748 case OMPD_parallel: 8749 case OMPD_for: 8750 case OMPD_parallel_for: 8751 case OMPD_parallel_sections: 8752 case OMPD_for_simd: 8753 case OMPD_parallel_for_simd: 8754 case OMPD_cancel: 8755 case OMPD_cancellation_point: 8756 case OMPD_ordered: 8757 case OMPD_threadprivate: 8758 case OMPD_allocate: 8759 case OMPD_task: 8760 case OMPD_simd: 8761 case OMPD_sections: 8762 case OMPD_section: 8763 case OMPD_single: 8764 case OMPD_master: 8765 case OMPD_critical: 8766 case OMPD_taskyield: 8767 case OMPD_barrier: 8768 case OMPD_taskwait: 8769 case OMPD_taskgroup: 8770 case OMPD_atomic: 8771 case OMPD_flush: 8772 case OMPD_teams: 8773 case OMPD_target_data: 8774 case OMPD_target_exit_data: 8775 case OMPD_target_enter_data: 8776 case OMPD_distribute: 8777 case OMPD_distribute_simd: 8778 case OMPD_distribute_parallel_for: 8779 case OMPD_distribute_parallel_for_simd: 8780 case OMPD_teams_distribute: 8781 case OMPD_teams_distribute_simd: 8782 case OMPD_teams_distribute_parallel_for: 8783 case OMPD_teams_distribute_parallel_for_simd: 8784 case OMPD_target_update: 8785 case OMPD_declare_simd: 8786 case OMPD_declare_variant: 8787 case OMPD_declare_target: 8788 case OMPD_end_declare_target: 8789 case OMPD_declare_reduction: 8790 case OMPD_declare_mapper: 8791 case OMPD_taskloop: 8792 case OMPD_taskloop_simd: 8793 case OMPD_master_taskloop: 8794 case OMPD_master_taskloop_simd: 8795 case OMPD_parallel_master_taskloop: 8796 case OMPD_parallel_master_taskloop_simd: 8797 case OMPD_requires: 8798 case OMPD_unknown: 8799 llvm_unreachable("Unexpected directive."); 8800 } 8801 } 8802 8803 return nullptr; 8804 } 8805 8806 /// Emit the user-defined mapper function. The code generation follows the 8807 /// pattern in the example below. 8808 /// \code 8809 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 8810 /// void *base, void *begin, 8811 /// int64_t size, int64_t type) { 8812 /// // Allocate space for an array section first. 8813 /// if (size > 1 && !maptype.IsDelete) 8814 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8815 /// size*sizeof(Ty), clearToFrom(type)); 8816 /// // Map members. 8817 /// for (unsigned i = 0; i < size; i++) { 8818 /// // For each component specified by this mapper: 8819 /// for (auto c : all_components) { 8820 /// if (c.hasMapper()) 8821 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 8822 /// c.arg_type); 8823 /// else 8824 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 8825 /// c.arg_begin, c.arg_size, c.arg_type); 8826 /// } 8827 /// } 8828 /// // Delete the array section. 8829 /// if (size > 1 && maptype.IsDelete) 8830 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8831 /// size*sizeof(Ty), clearToFrom(type)); 8832 /// } 8833 /// \endcode 8834 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 8835 CodeGenFunction *CGF) { 8836 if (UDMMap.count(D) > 0) 8837 return; 8838 ASTContext &C = CGM.getContext(); 8839 QualType Ty = D->getType(); 8840 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 8841 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 8842 auto *MapperVarDecl = 8843 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 8844 SourceLocation Loc = D->getLocation(); 8845 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 8846 8847 // Prepare mapper function arguments and attributes. 8848 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8849 C.VoidPtrTy, ImplicitParamDecl::Other); 8850 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 8851 ImplicitParamDecl::Other); 8852 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8853 C.VoidPtrTy, ImplicitParamDecl::Other); 8854 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8855 ImplicitParamDecl::Other); 8856 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8857 ImplicitParamDecl::Other); 8858 FunctionArgList Args; 8859 Args.push_back(&HandleArg); 8860 Args.push_back(&BaseArg); 8861 Args.push_back(&BeginArg); 8862 Args.push_back(&SizeArg); 8863 Args.push_back(&TypeArg); 8864 const CGFunctionInfo &FnInfo = 8865 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 8866 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 8867 SmallString<64> TyStr; 8868 llvm::raw_svector_ostream Out(TyStr); 8869 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 8870 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 8871 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 8872 Name, &CGM.getModule()); 8873 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 8874 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 8875 // Start the mapper function code generation. 8876 CodeGenFunction MapperCGF(CGM); 8877 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 8878 // Compute the starting and end addreses of array elements. 8879 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 8880 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 8881 C.getPointerType(Int64Ty), Loc); 8882 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 8883 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 8884 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 8885 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 8886 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 8887 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 8888 C.getPointerType(Int64Ty), Loc); 8889 // Prepare common arguments for array initiation and deletion. 8890 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 8891 MapperCGF.GetAddrOfLocalVar(&HandleArg), 8892 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8893 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 8894 MapperCGF.GetAddrOfLocalVar(&BaseArg), 8895 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8896 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 8897 MapperCGF.GetAddrOfLocalVar(&BeginArg), 8898 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8899 8900 // Emit array initiation if this is an array section and \p MapType indicates 8901 // that memory allocation is required. 8902 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 8903 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 8904 ElementSize, HeadBB, /*IsInit=*/true); 8905 8906 // Emit a for loop to iterate through SizeArg of elements and map all of them. 8907 8908 // Emit the loop header block. 8909 MapperCGF.EmitBlock(HeadBB); 8910 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 8911 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 8912 // Evaluate whether the initial condition is satisfied. 8913 llvm::Value *IsEmpty = 8914 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 8915 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 8916 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 8917 8918 // Emit the loop body block. 8919 MapperCGF.EmitBlock(BodyBB); 8920 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 8921 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 8922 PtrPHI->addIncoming(PtrBegin, EntryBB); 8923 Address PtrCurrent = 8924 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 8925 .getAlignment() 8926 .alignmentOfArrayElement(ElementSize)); 8927 // Privatize the declared variable of mapper to be the current array element. 8928 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 8929 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 8930 return MapperCGF 8931 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 8932 .getAddress(); 8933 }); 8934 (void)Scope.Privatize(); 8935 8936 // Get map clause information. Fill up the arrays with all mapped variables. 8937 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 8938 MappableExprsHandler::MapValuesArrayTy Pointers; 8939 MappableExprsHandler::MapValuesArrayTy Sizes; 8940 MappableExprsHandler::MapFlagsArrayTy MapTypes; 8941 MappableExprsHandler MEHandler(*D, MapperCGF); 8942 MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); 8943 8944 // Call the runtime API __tgt_mapper_num_components to get the number of 8945 // pre-existing components. 8946 llvm::Value *OffloadingArgs[] = {Handle}; 8947 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 8948 createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs); 8949 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 8950 PreviousSize, 8951 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 8952 8953 // Fill up the runtime mapper handle for all components. 8954 for (unsigned I = 0; I < BasePointers.size(); ++I) { 8955 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 8956 *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 8957 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 8958 Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 8959 llvm::Value *CurSizeArg = Sizes[I]; 8960 8961 // Extract the MEMBER_OF field from the map type. 8962 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 8963 MapperCGF.EmitBlock(MemberBB); 8964 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); 8965 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 8966 OriMapType, 8967 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 8968 llvm::BasicBlock *MemberCombineBB = 8969 MapperCGF.createBasicBlock("omp.member.combine"); 8970 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 8971 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 8972 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 8973 // Add the number of pre-existing components to the MEMBER_OF field if it 8974 // is valid. 8975 MapperCGF.EmitBlock(MemberCombineBB); 8976 llvm::Value *CombinedMember = 8977 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 8978 // Do nothing if it is not a member of previous components. 8979 MapperCGF.EmitBlock(TypeBB); 8980 llvm::PHINode *MemberMapType = 8981 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 8982 MemberMapType->addIncoming(OriMapType, MemberBB); 8983 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 8984 8985 // Combine the map type inherited from user-defined mapper with that 8986 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 8987 // bits of the \a MapType, which is the input argument of the mapper 8988 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 8989 // bits of MemberMapType. 8990 // [OpenMP 5.0], 1.2.6. map-type decay. 8991 // | alloc | to | from | tofrom | release | delete 8992 // ---------------------------------------------------------- 8993 // alloc | alloc | alloc | alloc | alloc | release | delete 8994 // to | alloc | to | alloc | to | release | delete 8995 // from | alloc | alloc | from | from | release | delete 8996 // tofrom | alloc | to | from | tofrom | release | delete 8997 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 8998 MapType, 8999 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9000 MappableExprsHandler::OMP_MAP_FROM)); 9001 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9002 llvm::BasicBlock *AllocElseBB = 9003 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9004 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9005 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9006 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9007 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9008 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9009 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9010 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9011 MapperCGF.EmitBlock(AllocBB); 9012 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9013 MemberMapType, 9014 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9015 MappableExprsHandler::OMP_MAP_FROM))); 9016 MapperCGF.Builder.CreateBr(EndBB); 9017 MapperCGF.EmitBlock(AllocElseBB); 9018 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9019 LeftToFrom, 9020 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9021 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9022 // In case of to, clear OMP_MAP_FROM. 9023 MapperCGF.EmitBlock(ToBB); 9024 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9025 MemberMapType, 9026 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9027 MapperCGF.Builder.CreateBr(EndBB); 9028 MapperCGF.EmitBlock(ToElseBB); 9029 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9030 LeftToFrom, 9031 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9032 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9033 // In case of from, clear OMP_MAP_TO. 9034 MapperCGF.EmitBlock(FromBB); 9035 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9036 MemberMapType, 9037 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9038 // In case of tofrom, do nothing. 9039 MapperCGF.EmitBlock(EndBB); 9040 llvm::PHINode *CurMapType = 9041 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9042 CurMapType->addIncoming(AllocMapType, AllocBB); 9043 CurMapType->addIncoming(ToMapType, ToBB); 9044 CurMapType->addIncoming(FromMapType, FromBB); 9045 CurMapType->addIncoming(MemberMapType, ToElseBB); 9046 9047 // TODO: call the corresponding mapper function if a user-defined mapper is 9048 // associated with this map clause. 9049 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9050 // data structure. 9051 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9052 CurSizeArg, CurMapType}; 9053 MapperCGF.EmitRuntimeCall( 9054 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), 9055 OffloadingArgs); 9056 } 9057 9058 // Update the pointer to point to the next element that needs to be mapped, 9059 // and check whether we have mapped all elements. 9060 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9061 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9062 PtrPHI->addIncoming(PtrNext, BodyBB); 9063 llvm::Value *IsDone = 9064 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9065 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9066 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9067 9068 MapperCGF.EmitBlock(ExitBB); 9069 // Emit array deletion if this is an array section and \p MapType indicates 9070 // that deletion is required. 9071 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9072 ElementSize, DoneBB, /*IsInit=*/false); 9073 9074 // Emit the function exit block. 9075 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9076 MapperCGF.FinishFunction(); 9077 UDMMap.try_emplace(D, Fn); 9078 if (CGF) { 9079 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9080 Decls.second.push_back(D); 9081 } 9082 } 9083 9084 /// Emit the array initialization or deletion portion for user-defined mapper 9085 /// code generation. First, it evaluates whether an array section is mapped and 9086 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9087 /// true, and \a MapType indicates to not delete this array, array 9088 /// initialization code is generated. If \a IsInit is false, and \a MapType 9089 /// indicates to not this array, array deletion code is generated. 9090 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9091 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9092 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9093 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9094 StringRef Prefix = IsInit ? ".init" : ".del"; 9095 9096 // Evaluate if this is an array section. 9097 llvm::BasicBlock *IsDeleteBB = 9098 MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete"); 9099 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix); 9100 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9101 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9102 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9103 9104 // Evaluate if we are going to delete this section. 9105 MapperCGF.EmitBlock(IsDeleteBB); 9106 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9107 MapType, 9108 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9109 llvm::Value *DeleteCond; 9110 if (IsInit) { 9111 DeleteCond = MapperCGF.Builder.CreateIsNull( 9112 DeleteBit, "omp.array" + Prefix + ".delete"); 9113 } else { 9114 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9115 DeleteBit, "omp.array" + Prefix + ".delete"); 9116 } 9117 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9118 9119 MapperCGF.EmitBlock(BodyBB); 9120 // Get the array size by multiplying element size and element number (i.e., \p 9121 // Size). 9122 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9123 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9124 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9125 // memory allocation/deletion purpose only. 9126 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9127 MapType, 9128 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9129 MappableExprsHandler::OMP_MAP_FROM))); 9130 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9131 // data structure. 9132 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9133 MapperCGF.EmitRuntimeCall( 9134 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs); 9135 } 9136 9137 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9138 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9139 llvm::Value *DeviceID, 9140 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9141 const OMPLoopDirective &D)> 9142 SizeEmitter) { 9143 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9144 const OMPExecutableDirective *TD = &D; 9145 // Get nested teams distribute kind directive, if any. 9146 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9147 TD = getNestedDistributeDirective(CGM.getContext(), D); 9148 if (!TD) 9149 return; 9150 const auto *LD = cast<OMPLoopDirective>(TD); 9151 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, 9152 PrePostActionTy &) { 9153 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9154 llvm::Value *Args[] = {DeviceID, NumIterations}; 9155 CGF.EmitRuntimeCall( 9156 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); 9157 } 9158 }; 9159 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9160 } 9161 9162 void CGOpenMPRuntime::emitTargetCall( 9163 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9164 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9165 const Expr *Device, 9166 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9167 const OMPLoopDirective &D)> 9168 SizeEmitter) { 9169 if (!CGF.HaveInsertPoint()) 9170 return; 9171 9172 assert(OutlinedFn && "Invalid outlined function!"); 9173 9174 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 9175 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9176 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9177 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9178 PrePostActionTy &) { 9179 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9180 }; 9181 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9182 9183 CodeGenFunction::OMPTargetDataInfo InputInfo; 9184 llvm::Value *MapTypesArray = nullptr; 9185 // Fill up the pointer arrays and transfer execution to the device. 9186 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9187 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, 9188 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9189 // On top of the arrays that were filled up, the target offloading call 9190 // takes as arguments the device id as well as the host pointer. The host 9191 // pointer is used by the runtime library to identify the current target 9192 // region, so it only has to be unique and not necessarily point to 9193 // anything. It could be the pointer to the outlined function that 9194 // implements the target region, but we aren't using that so that the 9195 // compiler doesn't need to keep that, and could therefore inline the host 9196 // function if proven worthwhile during optimization. 9197 9198 // From this point on, we need to have an ID of the target region defined. 9199 assert(OutlinedFnID && "Invalid outlined function ID!"); 9200 9201 // Emit device ID if any. 9202 llvm::Value *DeviceID; 9203 if (Device) { 9204 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9205 CGF.Int64Ty, /*isSigned=*/true); 9206 } else { 9207 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9208 } 9209 9210 // Emit the number of elements in the offloading arrays. 9211 llvm::Value *PointerNum = 9212 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9213 9214 // Return value of the runtime offloading call. 9215 llvm::Value *Return; 9216 9217 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9218 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9219 9220 // Emit tripcount for the target loop-based directive. 9221 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 9222 9223 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9224 // The target region is an outlined function launched by the runtime 9225 // via calls __tgt_target() or __tgt_target_teams(). 9226 // 9227 // __tgt_target() launches a target region with one team and one thread, 9228 // executing a serial region. This master thread may in turn launch 9229 // more threads within its team upon encountering a parallel region, 9230 // however, no additional teams can be launched on the device. 9231 // 9232 // __tgt_target_teams() launches a target region with one or more teams, 9233 // each with one or more threads. This call is required for target 9234 // constructs such as: 9235 // 'target teams' 9236 // 'target' / 'teams' 9237 // 'target teams distribute parallel for' 9238 // 'target parallel' 9239 // and so on. 9240 // 9241 // Note that on the host and CPU targets, the runtime implementation of 9242 // these calls simply call the outlined function without forking threads. 9243 // The outlined functions themselves have runtime calls to 9244 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9245 // the compiler in emitTeamsCall() and emitParallelCall(). 9246 // 9247 // In contrast, on the NVPTX target, the implementation of 9248 // __tgt_target_teams() launches a GPU kernel with the requested number 9249 // of teams and threads so no additional calls to the runtime are required. 9250 if (NumTeams) { 9251 // If we have NumTeams defined this means that we have an enclosed teams 9252 // region. Therefore we also expect to have NumThreads defined. These two 9253 // values should be defined in the presence of a teams directive, 9254 // regardless of having any clauses associated. If the user is using teams 9255 // but no clauses, these two values will be the default that should be 9256 // passed to the runtime library - a 32-bit integer with the value zero. 9257 assert(NumThreads && "Thread limit expression should be available along " 9258 "with number of teams."); 9259 llvm::Value *OffloadingArgs[] = {DeviceID, 9260 OutlinedFnID, 9261 PointerNum, 9262 InputInfo.BasePointersArray.getPointer(), 9263 InputInfo.PointersArray.getPointer(), 9264 InputInfo.SizesArray.getPointer(), 9265 MapTypesArray, 9266 NumTeams, 9267 NumThreads}; 9268 Return = CGF.EmitRuntimeCall( 9269 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 9270 : OMPRTL__tgt_target_teams), 9271 OffloadingArgs); 9272 } else { 9273 llvm::Value *OffloadingArgs[] = {DeviceID, 9274 OutlinedFnID, 9275 PointerNum, 9276 InputInfo.BasePointersArray.getPointer(), 9277 InputInfo.PointersArray.getPointer(), 9278 InputInfo.SizesArray.getPointer(), 9279 MapTypesArray}; 9280 Return = CGF.EmitRuntimeCall( 9281 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 9282 : OMPRTL__tgt_target), 9283 OffloadingArgs); 9284 } 9285 9286 // Check the error code and execute the host version if required. 9287 llvm::BasicBlock *OffloadFailedBlock = 9288 CGF.createBasicBlock("omp_offload.failed"); 9289 llvm::BasicBlock *OffloadContBlock = 9290 CGF.createBasicBlock("omp_offload.cont"); 9291 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9292 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9293 9294 CGF.EmitBlock(OffloadFailedBlock); 9295 if (RequiresOuterTask) { 9296 CapturedVars.clear(); 9297 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9298 } 9299 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9300 CGF.EmitBranch(OffloadContBlock); 9301 9302 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9303 }; 9304 9305 // Notify that the host version must be executed. 9306 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 9307 RequiresOuterTask](CodeGenFunction &CGF, 9308 PrePostActionTy &) { 9309 if (RequiresOuterTask) { 9310 CapturedVars.clear(); 9311 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9312 } 9313 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9314 }; 9315 9316 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 9317 &CapturedVars, RequiresOuterTask, 9318 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 9319 // Fill up the arrays with all the captured variables. 9320 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9321 MappableExprsHandler::MapValuesArrayTy Pointers; 9322 MappableExprsHandler::MapValuesArrayTy Sizes; 9323 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9324 9325 // Get mappable expression information. 9326 MappableExprsHandler MEHandler(D, CGF); 9327 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9328 9329 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9330 auto CV = CapturedVars.begin(); 9331 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9332 CE = CS.capture_end(); 9333 CI != CE; ++CI, ++RI, ++CV) { 9334 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 9335 MappableExprsHandler::MapValuesArrayTy CurPointers; 9336 MappableExprsHandler::MapValuesArrayTy CurSizes; 9337 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 9338 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9339 9340 // VLA sizes are passed to the outlined region by copy and do not have map 9341 // information associated. 9342 if (CI->capturesVariableArrayType()) { 9343 CurBasePointers.push_back(*CV); 9344 CurPointers.push_back(*CV); 9345 CurSizes.push_back(CGF.Builder.CreateIntCast( 9346 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9347 // Copy to the device as an argument. No need to retrieve it. 9348 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 9349 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 9350 MappableExprsHandler::OMP_MAP_IMPLICIT); 9351 } else { 9352 // If we have any information in the map clause, we use it, otherwise we 9353 // just do a default mapping. 9354 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 9355 CurSizes, CurMapTypes, PartialStruct); 9356 if (CurBasePointers.empty()) 9357 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 9358 CurPointers, CurSizes, CurMapTypes); 9359 // Generate correct mapping for variables captured by reference in 9360 // lambdas. 9361 if (CI->capturesVariable()) 9362 MEHandler.generateInfoForLambdaCaptures( 9363 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 9364 CurMapTypes, LambdaPointers); 9365 } 9366 // We expect to have at least an element of information for this capture. 9367 assert(!CurBasePointers.empty() && 9368 "Non-existing map pointer for capture!"); 9369 assert(CurBasePointers.size() == CurPointers.size() && 9370 CurBasePointers.size() == CurSizes.size() && 9371 CurBasePointers.size() == CurMapTypes.size() && 9372 "Inconsistent map information sizes!"); 9373 9374 // If there is an entry in PartialStruct it means we have a struct with 9375 // individual members mapped. Emit an extra combined entry. 9376 if (PartialStruct.Base.isValid()) 9377 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 9378 CurMapTypes, PartialStruct); 9379 9380 // We need to append the results of this capture to what we already have. 9381 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 9382 Pointers.append(CurPointers.begin(), CurPointers.end()); 9383 Sizes.append(CurSizes.begin(), CurSizes.end()); 9384 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 9385 } 9386 // Adjust MEMBER_OF flags for the lambdas captures. 9387 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 9388 Pointers, MapTypes); 9389 // Map other list items in the map clause which are not captured variables 9390 // but "declare target link" global variables. 9391 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 9392 MapTypes); 9393 9394 TargetDataInfo Info; 9395 // Fill up the arrays and create the arguments. 9396 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9397 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9398 Info.PointersArray, Info.SizesArray, 9399 Info.MapTypesArray, Info); 9400 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9401 InputInfo.BasePointersArray = 9402 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9403 InputInfo.PointersArray = 9404 Address(Info.PointersArray, CGM.getPointerAlign()); 9405 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 9406 MapTypesArray = Info.MapTypesArray; 9407 if (RequiresOuterTask) 9408 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9409 else 9410 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9411 }; 9412 9413 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 9414 CodeGenFunction &CGF, PrePostActionTy &) { 9415 if (RequiresOuterTask) { 9416 CodeGenFunction::OMPTargetDataInfo InputInfo; 9417 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9418 } else { 9419 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9420 } 9421 }; 9422 9423 // If we have a target function ID it means that we need to support 9424 // offloading, otherwise, just execute on the host. We need to execute on host 9425 // regardless of the conditional in the if clause if, e.g., the user do not 9426 // specify target triples. 9427 if (OutlinedFnID) { 9428 if (IfCond) { 9429 emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9430 } else { 9431 RegionCodeGenTy ThenRCG(TargetThenGen); 9432 ThenRCG(CGF); 9433 } 9434 } else { 9435 RegionCodeGenTy ElseRCG(TargetElseGen); 9436 ElseRCG(CGF); 9437 } 9438 } 9439 9440 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9441 StringRef ParentName) { 9442 if (!S) 9443 return; 9444 9445 // Codegen OMP target directives that offload compute to the device. 9446 bool RequiresDeviceCodegen = 9447 isa<OMPExecutableDirective>(S) && 9448 isOpenMPTargetExecutionDirective( 9449 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9450 9451 if (RequiresDeviceCodegen) { 9452 const auto &E = *cast<OMPExecutableDirective>(S); 9453 unsigned DeviceID; 9454 unsigned FileID; 9455 unsigned Line; 9456 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 9457 FileID, Line); 9458 9459 // Is this a target region that should not be emitted as an entry point? If 9460 // so just signal we are done with this target region. 9461 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 9462 ParentName, Line)) 9463 return; 9464 9465 switch (E.getDirectiveKind()) { 9466 case OMPD_target: 9467 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9468 cast<OMPTargetDirective>(E)); 9469 break; 9470 case OMPD_target_parallel: 9471 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9472 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9473 break; 9474 case OMPD_target_teams: 9475 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9476 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9477 break; 9478 case OMPD_target_teams_distribute: 9479 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9480 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9481 break; 9482 case OMPD_target_teams_distribute_simd: 9483 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9484 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9485 break; 9486 case OMPD_target_parallel_for: 9487 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9488 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9489 break; 9490 case OMPD_target_parallel_for_simd: 9491 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9492 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9493 break; 9494 case OMPD_target_simd: 9495 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9496 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9497 break; 9498 case OMPD_target_teams_distribute_parallel_for: 9499 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9500 CGM, ParentName, 9501 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9502 break; 9503 case OMPD_target_teams_distribute_parallel_for_simd: 9504 CodeGenFunction:: 9505 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9506 CGM, ParentName, 9507 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9508 break; 9509 case OMPD_parallel: 9510 case OMPD_for: 9511 case OMPD_parallel_for: 9512 case OMPD_parallel_sections: 9513 case OMPD_for_simd: 9514 case OMPD_parallel_for_simd: 9515 case OMPD_cancel: 9516 case OMPD_cancellation_point: 9517 case OMPD_ordered: 9518 case OMPD_threadprivate: 9519 case OMPD_allocate: 9520 case OMPD_task: 9521 case OMPD_simd: 9522 case OMPD_sections: 9523 case OMPD_section: 9524 case OMPD_single: 9525 case OMPD_master: 9526 case OMPD_critical: 9527 case OMPD_taskyield: 9528 case OMPD_barrier: 9529 case OMPD_taskwait: 9530 case OMPD_taskgroup: 9531 case OMPD_atomic: 9532 case OMPD_flush: 9533 case OMPD_teams: 9534 case OMPD_target_data: 9535 case OMPD_target_exit_data: 9536 case OMPD_target_enter_data: 9537 case OMPD_distribute: 9538 case OMPD_distribute_simd: 9539 case OMPD_distribute_parallel_for: 9540 case OMPD_distribute_parallel_for_simd: 9541 case OMPD_teams_distribute: 9542 case OMPD_teams_distribute_simd: 9543 case OMPD_teams_distribute_parallel_for: 9544 case OMPD_teams_distribute_parallel_for_simd: 9545 case OMPD_target_update: 9546 case OMPD_declare_simd: 9547 case OMPD_declare_variant: 9548 case OMPD_declare_target: 9549 case OMPD_end_declare_target: 9550 case OMPD_declare_reduction: 9551 case OMPD_declare_mapper: 9552 case OMPD_taskloop: 9553 case OMPD_taskloop_simd: 9554 case OMPD_master_taskloop: 9555 case OMPD_master_taskloop_simd: 9556 case OMPD_parallel_master_taskloop: 9557 case OMPD_parallel_master_taskloop_simd: 9558 case OMPD_requires: 9559 case OMPD_unknown: 9560 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9561 } 9562 return; 9563 } 9564 9565 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9566 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9567 return; 9568 9569 scanForTargetRegionsFunctions( 9570 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9571 return; 9572 } 9573 9574 // If this is a lambda function, look into its body. 9575 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9576 S = L->getBody(); 9577 9578 // Keep looking for target regions recursively. 9579 for (const Stmt *II : S->children()) 9580 scanForTargetRegionsFunctions(II, ParentName); 9581 } 9582 9583 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9584 // If emitting code for the host, we do not process FD here. Instead we do 9585 // the normal code generation. 9586 if (!CGM.getLangOpts().OpenMPIsDevice) { 9587 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 9588 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9589 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9590 // Do not emit device_type(nohost) functions for the host. 9591 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 9592 return true; 9593 } 9594 return false; 9595 } 9596 9597 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9598 StringRef Name = CGM.getMangledName(GD); 9599 // Try to detect target regions in the function. 9600 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 9601 scanForTargetRegionsFunctions(FD->getBody(), Name); 9602 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9603 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9604 // Do not emit device_type(nohost) functions for the host. 9605 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 9606 return true; 9607 } 9608 9609 // Do not to emit function if it is not marked as declare target. 9610 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9611 AlreadyEmittedTargetFunctions.count(Name) == 0; 9612 } 9613 9614 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9615 if (!CGM.getLangOpts().OpenMPIsDevice) 9616 return false; 9617 9618 // Check if there are Ctors/Dtors in this declaration and look for target 9619 // regions in it. We use the complete variant to produce the kernel name 9620 // mangling. 9621 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9622 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9623 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9624 StringRef ParentName = 9625 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9626 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9627 } 9628 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9629 StringRef ParentName = 9630 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9631 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9632 } 9633 } 9634 9635 // Do not to emit variable if it is not marked as declare target. 9636 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9637 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9638 cast<VarDecl>(GD.getDecl())); 9639 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9640 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9641 HasRequiresUnifiedSharedMemory)) { 9642 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9643 return true; 9644 } 9645 return false; 9646 } 9647 9648 llvm::Constant * 9649 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9650 const VarDecl *VD) { 9651 assert(VD->getType().isConstant(CGM.getContext()) && 9652 "Expected constant variable."); 9653 StringRef VarName; 9654 llvm::Constant *Addr; 9655 llvm::GlobalValue::LinkageTypes Linkage; 9656 QualType Ty = VD->getType(); 9657 SmallString<128> Buffer; 9658 { 9659 unsigned DeviceID; 9660 unsigned FileID; 9661 unsigned Line; 9662 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9663 FileID, Line); 9664 llvm::raw_svector_ostream OS(Buffer); 9665 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9666 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9667 VarName = OS.str(); 9668 } 9669 Linkage = llvm::GlobalValue::InternalLinkage; 9670 Addr = 9671 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9672 getDefaultFirstprivateAddressSpace()); 9673 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9674 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9675 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9676 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9677 VarName, Addr, VarSize, 9678 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9679 return Addr; 9680 } 9681 9682 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9683 llvm::Constant *Addr) { 9684 if (CGM.getLangOpts().OMPTargetTriples.empty() && 9685 !CGM.getLangOpts().OpenMPIsDevice) 9686 return; 9687 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9688 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9689 if (!Res) { 9690 if (CGM.getLangOpts().OpenMPIsDevice) { 9691 // Register non-target variables being emitted in device code (debug info 9692 // may cause this). 9693 StringRef VarName = CGM.getMangledName(VD); 9694 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9695 } 9696 return; 9697 } 9698 // Register declare target variables. 9699 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9700 StringRef VarName; 9701 CharUnits VarSize; 9702 llvm::GlobalValue::LinkageTypes Linkage; 9703 9704 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9705 !HasRequiresUnifiedSharedMemory) { 9706 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9707 VarName = CGM.getMangledName(VD); 9708 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9709 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9710 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9711 } else { 9712 VarSize = CharUnits::Zero(); 9713 } 9714 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 9715 // Temp solution to prevent optimizations of the internal variables. 9716 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 9717 std::string RefName = getName({VarName, "ref"}); 9718 if (!CGM.GetGlobalValue(RefName)) { 9719 llvm::Constant *AddrRef = 9720 getOrCreateInternalVariable(Addr->getType(), RefName); 9721 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 9722 GVAddrRef->setConstant(/*Val=*/true); 9723 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 9724 GVAddrRef->setInitializer(Addr); 9725 CGM.addCompilerUsedGlobal(GVAddrRef); 9726 } 9727 } 9728 } else { 9729 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 9730 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9731 HasRequiresUnifiedSharedMemory)) && 9732 "Declare target attribute must link or to with unified memory."); 9733 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 9734 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 9735 else 9736 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9737 9738 if (CGM.getLangOpts().OpenMPIsDevice) { 9739 VarName = Addr->getName(); 9740 Addr = nullptr; 9741 } else { 9742 VarName = getAddrOfDeclareTargetVar(VD).getName(); 9743 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 9744 } 9745 VarSize = CGM.getPointerSize(); 9746 Linkage = llvm::GlobalValue::WeakAnyLinkage; 9747 } 9748 9749 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9750 VarName, Addr, VarSize, Flags, Linkage); 9751 } 9752 9753 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9754 if (isa<FunctionDecl>(GD.getDecl()) || 9755 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9756 return emitTargetFunctions(GD); 9757 9758 return emitTargetGlobalVariable(GD); 9759 } 9760 9761 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9762 for (const VarDecl *VD : DeferredGlobalVariables) { 9763 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9764 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9765 if (!Res) 9766 continue; 9767 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9768 !HasRequiresUnifiedSharedMemory) { 9769 CGM.EmitGlobal(VD); 9770 } else { 9771 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 9772 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9773 HasRequiresUnifiedSharedMemory)) && 9774 "Expected link clause or to clause with unified memory."); 9775 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 9776 } 9777 } 9778 } 9779 9780 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 9781 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 9782 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 9783 " Expected target-based directive."); 9784 } 9785 9786 void CGOpenMPRuntime::checkArchForUnifiedAddressing( 9787 const OMPRequiresDecl *D) { 9788 for (const OMPClause *Clause : D->clauselists()) { 9789 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 9790 HasRequiresUnifiedSharedMemory = true; 9791 break; 9792 } 9793 } 9794 } 9795 9796 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 9797 LangAS &AS) { 9798 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 9799 return false; 9800 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 9801 switch(A->getAllocatorType()) { 9802 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 9803 // Not supported, fallback to the default mem space. 9804 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 9805 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 9806 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 9807 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 9808 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 9809 case OMPAllocateDeclAttr::OMPConstMemAlloc: 9810 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 9811 AS = LangAS::Default; 9812 return true; 9813 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 9814 llvm_unreachable("Expected predefined allocator for the variables with the " 9815 "static storage."); 9816 } 9817 return false; 9818 } 9819 9820 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 9821 return HasRequiresUnifiedSharedMemory; 9822 } 9823 9824 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 9825 CodeGenModule &CGM) 9826 : CGM(CGM) { 9827 if (CGM.getLangOpts().OpenMPIsDevice) { 9828 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 9829 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 9830 } 9831 } 9832 9833 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 9834 if (CGM.getLangOpts().OpenMPIsDevice) 9835 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 9836 } 9837 9838 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 9839 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 9840 return true; 9841 9842 StringRef Name = CGM.getMangledName(GD); 9843 const auto *D = cast<FunctionDecl>(GD.getDecl()); 9844 // Do not to emit function if it is marked as declare target as it was already 9845 // emitted. 9846 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 9847 if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) { 9848 if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name))) 9849 return !F->isDeclaration(); 9850 return false; 9851 } 9852 return true; 9853 } 9854 9855 return !AlreadyEmittedTargetFunctions.insert(Name).second; 9856 } 9857 9858 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 9859 // If we don't have entries or if we are emitting code for the device, we 9860 // don't need to do anything. 9861 if (CGM.getLangOpts().OMPTargetTriples.empty() || 9862 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 9863 (OffloadEntriesInfoManager.empty() && 9864 !HasEmittedDeclareTargetRegion && 9865 !HasEmittedTargetRegion)) 9866 return nullptr; 9867 9868 // Create and register the function that handles the requires directives. 9869 ASTContext &C = CGM.getContext(); 9870 9871 llvm::Function *RequiresRegFn; 9872 { 9873 CodeGenFunction CGF(CGM); 9874 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 9875 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 9876 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 9877 RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); 9878 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 9879 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 9880 // TODO: check for other requires clauses. 9881 // The requires directive takes effect only when a target region is 9882 // present in the compilation unit. Otherwise it is ignored and not 9883 // passed to the runtime. This avoids the runtime from throwing an error 9884 // for mismatching requires clauses across compilation units that don't 9885 // contain at least 1 target region. 9886 assert((HasEmittedTargetRegion || 9887 HasEmittedDeclareTargetRegion || 9888 !OffloadEntriesInfoManager.empty()) && 9889 "Target or declare target region expected."); 9890 if (HasRequiresUnifiedSharedMemory) 9891 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 9892 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires), 9893 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 9894 CGF.FinishFunction(); 9895 } 9896 return RequiresRegFn; 9897 } 9898 9899 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 9900 const OMPExecutableDirective &D, 9901 SourceLocation Loc, 9902 llvm::Function *OutlinedFn, 9903 ArrayRef<llvm::Value *> CapturedVars) { 9904 if (!CGF.HaveInsertPoint()) 9905 return; 9906 9907 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9908 CodeGenFunction::RunCleanupsScope Scope(CGF); 9909 9910 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 9911 llvm::Value *Args[] = { 9912 RTLoc, 9913 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 9914 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 9915 llvm::SmallVector<llvm::Value *, 16> RealArgs; 9916 RealArgs.append(std::begin(Args), std::end(Args)); 9917 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 9918 9919 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 9920 CGF.EmitRuntimeCall(RTLFn, RealArgs); 9921 } 9922 9923 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 9924 const Expr *NumTeams, 9925 const Expr *ThreadLimit, 9926 SourceLocation Loc) { 9927 if (!CGF.HaveInsertPoint()) 9928 return; 9929 9930 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9931 9932 llvm::Value *NumTeamsVal = 9933 NumTeams 9934 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 9935 CGF.CGM.Int32Ty, /* isSigned = */ true) 9936 : CGF.Builder.getInt32(0); 9937 9938 llvm::Value *ThreadLimitVal = 9939 ThreadLimit 9940 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 9941 CGF.CGM.Int32Ty, /* isSigned = */ true) 9942 : CGF.Builder.getInt32(0); 9943 9944 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 9945 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 9946 ThreadLimitVal}; 9947 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 9948 PushNumTeamsArgs); 9949 } 9950 9951 void CGOpenMPRuntime::emitTargetDataCalls( 9952 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9953 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 9954 if (!CGF.HaveInsertPoint()) 9955 return; 9956 9957 // Action used to replace the default codegen action and turn privatization 9958 // off. 9959 PrePostActionTy NoPrivAction; 9960 9961 // Generate the code for the opening of the data environment. Capture all the 9962 // arguments of the runtime call by reference because they are used in the 9963 // closing of the region. 9964 auto &&BeginThenGen = [this, &D, Device, &Info, 9965 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 9966 // Fill up the arrays with all the mapped variables. 9967 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9968 MappableExprsHandler::MapValuesArrayTy Pointers; 9969 MappableExprsHandler::MapValuesArrayTy Sizes; 9970 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9971 9972 // Get map clause information. 9973 MappableExprsHandler MCHandler(D, CGF); 9974 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 9975 9976 // Fill up the arrays and create the arguments. 9977 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9978 9979 llvm::Value *BasePointersArrayArg = nullptr; 9980 llvm::Value *PointersArrayArg = nullptr; 9981 llvm::Value *SizesArrayArg = nullptr; 9982 llvm::Value *MapTypesArrayArg = nullptr; 9983 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 9984 SizesArrayArg, MapTypesArrayArg, Info); 9985 9986 // Emit device ID if any. 9987 llvm::Value *DeviceID = nullptr; 9988 if (Device) { 9989 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9990 CGF.Int64Ty, /*isSigned=*/true); 9991 } else { 9992 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9993 } 9994 9995 // Emit the number of elements in the offloading arrays. 9996 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 9997 9998 llvm::Value *OffloadingArgs[] = { 9999 DeviceID, PointerNum, BasePointersArrayArg, 10000 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10001 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 10002 OffloadingArgs); 10003 10004 // If device pointer privatization is required, emit the body of the region 10005 // here. It will have to be duplicated: with and without privatization. 10006 if (!Info.CaptureDeviceAddrMap.empty()) 10007 CodeGen(CGF); 10008 }; 10009 10010 // Generate code for the closing of the data region. 10011 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10012 PrePostActionTy &) { 10013 assert(Info.isValid() && "Invalid data environment closing arguments."); 10014 10015 llvm::Value *BasePointersArrayArg = nullptr; 10016 llvm::Value *PointersArrayArg = nullptr; 10017 llvm::Value *SizesArrayArg = nullptr; 10018 llvm::Value *MapTypesArrayArg = nullptr; 10019 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10020 SizesArrayArg, MapTypesArrayArg, Info); 10021 10022 // Emit device ID if any. 10023 llvm::Value *DeviceID = nullptr; 10024 if (Device) { 10025 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10026 CGF.Int64Ty, /*isSigned=*/true); 10027 } else { 10028 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10029 } 10030 10031 // Emit the number of elements in the offloading arrays. 10032 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10033 10034 llvm::Value *OffloadingArgs[] = { 10035 DeviceID, PointerNum, BasePointersArrayArg, 10036 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10037 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 10038 OffloadingArgs); 10039 }; 10040 10041 // If we need device pointer privatization, we need to emit the body of the 10042 // region with no privatization in the 'else' branch of the conditional. 10043 // Otherwise, we don't have to do anything. 10044 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10045 PrePostActionTy &) { 10046 if (!Info.CaptureDeviceAddrMap.empty()) { 10047 CodeGen.setAction(NoPrivAction); 10048 CodeGen(CGF); 10049 } 10050 }; 10051 10052 // We don't have to do anything to close the region if the if clause evaluates 10053 // to false. 10054 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10055 10056 if (IfCond) { 10057 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10058 } else { 10059 RegionCodeGenTy RCG(BeginThenGen); 10060 RCG(CGF); 10061 } 10062 10063 // If we don't require privatization of device pointers, we emit the body in 10064 // between the runtime calls. This avoids duplicating the body code. 10065 if (Info.CaptureDeviceAddrMap.empty()) { 10066 CodeGen.setAction(NoPrivAction); 10067 CodeGen(CGF); 10068 } 10069 10070 if (IfCond) { 10071 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10072 } else { 10073 RegionCodeGenTy RCG(EndThenGen); 10074 RCG(CGF); 10075 } 10076 } 10077 10078 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10079 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10080 const Expr *Device) { 10081 if (!CGF.HaveInsertPoint()) 10082 return; 10083 10084 assert((isa<OMPTargetEnterDataDirective>(D) || 10085 isa<OMPTargetExitDataDirective>(D) || 10086 isa<OMPTargetUpdateDirective>(D)) && 10087 "Expecting either target enter, exit data, or update directives."); 10088 10089 CodeGenFunction::OMPTargetDataInfo InputInfo; 10090 llvm::Value *MapTypesArray = nullptr; 10091 // Generate the code for the opening of the data environment. 10092 auto &&ThenGen = [this, &D, Device, &InputInfo, 10093 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10094 // Emit device ID if any. 10095 llvm::Value *DeviceID = nullptr; 10096 if (Device) { 10097 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10098 CGF.Int64Ty, /*isSigned=*/true); 10099 } else { 10100 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10101 } 10102 10103 // Emit the number of elements in the offloading arrays. 10104 llvm::Constant *PointerNum = 10105 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10106 10107 llvm::Value *OffloadingArgs[] = {DeviceID, 10108 PointerNum, 10109 InputInfo.BasePointersArray.getPointer(), 10110 InputInfo.PointersArray.getPointer(), 10111 InputInfo.SizesArray.getPointer(), 10112 MapTypesArray}; 10113 10114 // Select the right runtime function call for each expected standalone 10115 // directive. 10116 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10117 OpenMPRTLFunction RTLFn; 10118 switch (D.getDirectiveKind()) { 10119 case OMPD_target_enter_data: 10120 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 10121 : OMPRTL__tgt_target_data_begin; 10122 break; 10123 case OMPD_target_exit_data: 10124 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 10125 : OMPRTL__tgt_target_data_end; 10126 break; 10127 case OMPD_target_update: 10128 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 10129 : OMPRTL__tgt_target_data_update; 10130 break; 10131 case OMPD_parallel: 10132 case OMPD_for: 10133 case OMPD_parallel_for: 10134 case OMPD_parallel_sections: 10135 case OMPD_for_simd: 10136 case OMPD_parallel_for_simd: 10137 case OMPD_cancel: 10138 case OMPD_cancellation_point: 10139 case OMPD_ordered: 10140 case OMPD_threadprivate: 10141 case OMPD_allocate: 10142 case OMPD_task: 10143 case OMPD_simd: 10144 case OMPD_sections: 10145 case OMPD_section: 10146 case OMPD_single: 10147 case OMPD_master: 10148 case OMPD_critical: 10149 case OMPD_taskyield: 10150 case OMPD_barrier: 10151 case OMPD_taskwait: 10152 case OMPD_taskgroup: 10153 case OMPD_atomic: 10154 case OMPD_flush: 10155 case OMPD_teams: 10156 case OMPD_target_data: 10157 case OMPD_distribute: 10158 case OMPD_distribute_simd: 10159 case OMPD_distribute_parallel_for: 10160 case OMPD_distribute_parallel_for_simd: 10161 case OMPD_teams_distribute: 10162 case OMPD_teams_distribute_simd: 10163 case OMPD_teams_distribute_parallel_for: 10164 case OMPD_teams_distribute_parallel_for_simd: 10165 case OMPD_declare_simd: 10166 case OMPD_declare_variant: 10167 case OMPD_declare_target: 10168 case OMPD_end_declare_target: 10169 case OMPD_declare_reduction: 10170 case OMPD_declare_mapper: 10171 case OMPD_taskloop: 10172 case OMPD_taskloop_simd: 10173 case OMPD_master_taskloop: 10174 case OMPD_master_taskloop_simd: 10175 case OMPD_parallel_master_taskloop: 10176 case OMPD_parallel_master_taskloop_simd: 10177 case OMPD_target: 10178 case OMPD_target_simd: 10179 case OMPD_target_teams_distribute: 10180 case OMPD_target_teams_distribute_simd: 10181 case OMPD_target_teams_distribute_parallel_for: 10182 case OMPD_target_teams_distribute_parallel_for_simd: 10183 case OMPD_target_teams: 10184 case OMPD_target_parallel: 10185 case OMPD_target_parallel_for: 10186 case OMPD_target_parallel_for_simd: 10187 case OMPD_requires: 10188 case OMPD_unknown: 10189 llvm_unreachable("Unexpected standalone target data directive."); 10190 break; 10191 } 10192 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 10193 }; 10194 10195 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 10196 CodeGenFunction &CGF, PrePostActionTy &) { 10197 // Fill up the arrays with all the mapped variables. 10198 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10199 MappableExprsHandler::MapValuesArrayTy Pointers; 10200 MappableExprsHandler::MapValuesArrayTy Sizes; 10201 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10202 10203 // Get map clause information. 10204 MappableExprsHandler MEHandler(D, CGF); 10205 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10206 10207 TargetDataInfo Info; 10208 // Fill up the arrays and create the arguments. 10209 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10210 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10211 Info.PointersArray, Info.SizesArray, 10212 Info.MapTypesArray, Info); 10213 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10214 InputInfo.BasePointersArray = 10215 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10216 InputInfo.PointersArray = 10217 Address(Info.PointersArray, CGM.getPointerAlign()); 10218 InputInfo.SizesArray = 10219 Address(Info.SizesArray, CGM.getPointerAlign()); 10220 MapTypesArray = Info.MapTypesArray; 10221 if (D.hasClausesOfKind<OMPDependClause>()) 10222 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10223 else 10224 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10225 }; 10226 10227 if (IfCond) { 10228 emitOMPIfClause(CGF, IfCond, TargetThenGen, 10229 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10230 } else { 10231 RegionCodeGenTy ThenRCG(TargetThenGen); 10232 ThenRCG(CGF); 10233 } 10234 } 10235 10236 namespace { 10237 /// Kind of parameter in a function with 'declare simd' directive. 10238 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 10239 /// Attribute set of the parameter. 10240 struct ParamAttrTy { 10241 ParamKindTy Kind = Vector; 10242 llvm::APSInt StrideOrArg; 10243 llvm::APSInt Alignment; 10244 }; 10245 } // namespace 10246 10247 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10248 ArrayRef<ParamAttrTy> ParamAttrs) { 10249 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10250 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10251 // of that clause. The VLEN value must be power of 2. 10252 // In other case the notion of the function`s "characteristic data type" (CDT) 10253 // is used to compute the vector length. 10254 // CDT is defined in the following order: 10255 // a) For non-void function, the CDT is the return type. 10256 // b) If the function has any non-uniform, non-linear parameters, then the 10257 // CDT is the type of the first such parameter. 10258 // c) If the CDT determined by a) or b) above is struct, union, or class 10259 // type which is pass-by-value (except for the type that maps to the 10260 // built-in complex data type), the characteristic data type is int. 10261 // d) If none of the above three cases is applicable, the CDT is int. 10262 // The VLEN is then determined based on the CDT and the size of vector 10263 // register of that ISA for which current vector version is generated. The 10264 // VLEN is computed using the formula below: 10265 // VLEN = sizeof(vector_register) / sizeof(CDT), 10266 // where vector register size specified in section 3.2.1 Registers and the 10267 // Stack Frame of original AMD64 ABI document. 10268 QualType RetType = FD->getReturnType(); 10269 if (RetType.isNull()) 10270 return 0; 10271 ASTContext &C = FD->getASTContext(); 10272 QualType CDT; 10273 if (!RetType.isNull() && !RetType->isVoidType()) { 10274 CDT = RetType; 10275 } else { 10276 unsigned Offset = 0; 10277 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10278 if (ParamAttrs[Offset].Kind == Vector) 10279 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10280 ++Offset; 10281 } 10282 if (CDT.isNull()) { 10283 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10284 if (ParamAttrs[I + Offset].Kind == Vector) { 10285 CDT = FD->getParamDecl(I)->getType(); 10286 break; 10287 } 10288 } 10289 } 10290 } 10291 if (CDT.isNull()) 10292 CDT = C.IntTy; 10293 CDT = CDT->getCanonicalTypeUnqualified(); 10294 if (CDT->isRecordType() || CDT->isUnionType()) 10295 CDT = C.IntTy; 10296 return C.getTypeSize(CDT); 10297 } 10298 10299 static void 10300 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10301 const llvm::APSInt &VLENVal, 10302 ArrayRef<ParamAttrTy> ParamAttrs, 10303 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10304 struct ISADataTy { 10305 char ISA; 10306 unsigned VecRegSize; 10307 }; 10308 ISADataTy ISAData[] = { 10309 { 10310 'b', 128 10311 }, // SSE 10312 { 10313 'c', 256 10314 }, // AVX 10315 { 10316 'd', 256 10317 }, // AVX2 10318 { 10319 'e', 512 10320 }, // AVX512 10321 }; 10322 llvm::SmallVector<char, 2> Masked; 10323 switch (State) { 10324 case OMPDeclareSimdDeclAttr::BS_Undefined: 10325 Masked.push_back('N'); 10326 Masked.push_back('M'); 10327 break; 10328 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10329 Masked.push_back('N'); 10330 break; 10331 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10332 Masked.push_back('M'); 10333 break; 10334 } 10335 for (char Mask : Masked) { 10336 for (const ISADataTy &Data : ISAData) { 10337 SmallString<256> Buffer; 10338 llvm::raw_svector_ostream Out(Buffer); 10339 Out << "_ZGV" << Data.ISA << Mask; 10340 if (!VLENVal) { 10341 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10342 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10343 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10344 } else { 10345 Out << VLENVal; 10346 } 10347 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 10348 switch (ParamAttr.Kind){ 10349 case LinearWithVarStride: 10350 Out << 's' << ParamAttr.StrideOrArg; 10351 break; 10352 case Linear: 10353 Out << 'l'; 10354 if (!!ParamAttr.StrideOrArg) 10355 Out << ParamAttr.StrideOrArg; 10356 break; 10357 case Uniform: 10358 Out << 'u'; 10359 break; 10360 case Vector: 10361 Out << 'v'; 10362 break; 10363 } 10364 if (!!ParamAttr.Alignment) 10365 Out << 'a' << ParamAttr.Alignment; 10366 } 10367 Out << '_' << Fn->getName(); 10368 Fn->addFnAttr(Out.str()); 10369 } 10370 } 10371 } 10372 10373 // This are the Functions that are needed to mangle the name of the 10374 // vector functions generated by the compiler, according to the rules 10375 // defined in the "Vector Function ABI specifications for AArch64", 10376 // available at 10377 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10378 10379 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 10380 /// 10381 /// TODO: Need to implement the behavior for reference marked with a 10382 /// var or no linear modifiers (1.b in the section). For this, we 10383 /// need to extend ParamKindTy to support the linear modifiers. 10384 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10385 QT = QT.getCanonicalType(); 10386 10387 if (QT->isVoidType()) 10388 return false; 10389 10390 if (Kind == ParamKindTy::Uniform) 10391 return false; 10392 10393 if (Kind == ParamKindTy::Linear) 10394 return false; 10395 10396 // TODO: Handle linear references with modifiers 10397 10398 if (Kind == ParamKindTy::LinearWithVarStride) 10399 return false; 10400 10401 return true; 10402 } 10403 10404 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10405 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10406 QT = QT.getCanonicalType(); 10407 unsigned Size = C.getTypeSize(QT); 10408 10409 // Only scalars and complex within 16 bytes wide set PVB to true. 10410 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10411 return false; 10412 10413 if (QT->isFloatingType()) 10414 return true; 10415 10416 if (QT->isIntegerType()) 10417 return true; 10418 10419 if (QT->isPointerType()) 10420 return true; 10421 10422 // TODO: Add support for complex types (section 3.1.2, item 2). 10423 10424 return false; 10425 } 10426 10427 /// Computes the lane size (LS) of a return type or of an input parameter, 10428 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10429 /// TODO: Add support for references, section 3.2.1, item 1. 10430 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10431 if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10432 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10433 if (getAArch64PBV(PTy, C)) 10434 return C.getTypeSize(PTy); 10435 } 10436 if (getAArch64PBV(QT, C)) 10437 return C.getTypeSize(QT); 10438 10439 return C.getTypeSize(C.getUIntPtrType()); 10440 } 10441 10442 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10443 // signature of the scalar function, as defined in 3.2.2 of the 10444 // AAVFABI. 10445 static std::tuple<unsigned, unsigned, bool> 10446 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10447 QualType RetType = FD->getReturnType().getCanonicalType(); 10448 10449 ASTContext &C = FD->getASTContext(); 10450 10451 bool OutputBecomesInput = false; 10452 10453 llvm::SmallVector<unsigned, 8> Sizes; 10454 if (!RetType->isVoidType()) { 10455 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10456 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10457 OutputBecomesInput = true; 10458 } 10459 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10460 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10461 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10462 } 10463 10464 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10465 // The LS of a function parameter / return value can only be a power 10466 // of 2, starting from 8 bits, up to 128. 10467 assert(std::all_of(Sizes.begin(), Sizes.end(), 10468 [](unsigned Size) { 10469 return Size == 8 || Size == 16 || Size == 32 || 10470 Size == 64 || Size == 128; 10471 }) && 10472 "Invalid size"); 10473 10474 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10475 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10476 OutputBecomesInput); 10477 } 10478 10479 /// Mangle the parameter part of the vector function name according to 10480 /// their OpenMP classification. The mangling function is defined in 10481 /// section 3.5 of the AAVFABI. 10482 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10483 SmallString<256> Buffer; 10484 llvm::raw_svector_ostream Out(Buffer); 10485 for (const auto &ParamAttr : ParamAttrs) { 10486 switch (ParamAttr.Kind) { 10487 case LinearWithVarStride: 10488 Out << "ls" << ParamAttr.StrideOrArg; 10489 break; 10490 case Linear: 10491 Out << 'l'; 10492 // Don't print the step value if it is not present or if it is 10493 // equal to 1. 10494 if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1) 10495 Out << ParamAttr.StrideOrArg; 10496 break; 10497 case Uniform: 10498 Out << 'u'; 10499 break; 10500 case Vector: 10501 Out << 'v'; 10502 break; 10503 } 10504 10505 if (!!ParamAttr.Alignment) 10506 Out << 'a' << ParamAttr.Alignment; 10507 } 10508 10509 return Out.str(); 10510 } 10511 10512 // Function used to add the attribute. The parameter `VLEN` is 10513 // templated to allow the use of "x" when targeting scalable functions 10514 // for SVE. 10515 template <typename T> 10516 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10517 char ISA, StringRef ParSeq, 10518 StringRef MangledName, bool OutputBecomesInput, 10519 llvm::Function *Fn) { 10520 SmallString<256> Buffer; 10521 llvm::raw_svector_ostream Out(Buffer); 10522 Out << Prefix << ISA << LMask << VLEN; 10523 if (OutputBecomesInput) 10524 Out << "v"; 10525 Out << ParSeq << "_" << MangledName; 10526 Fn->addFnAttr(Out.str()); 10527 } 10528 10529 // Helper function to generate the Advanced SIMD names depending on 10530 // the value of the NDS when simdlen is not present. 10531 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10532 StringRef Prefix, char ISA, 10533 StringRef ParSeq, StringRef MangledName, 10534 bool OutputBecomesInput, 10535 llvm::Function *Fn) { 10536 switch (NDS) { 10537 case 8: 10538 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10539 OutputBecomesInput, Fn); 10540 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10541 OutputBecomesInput, Fn); 10542 break; 10543 case 16: 10544 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10545 OutputBecomesInput, Fn); 10546 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10547 OutputBecomesInput, Fn); 10548 break; 10549 case 32: 10550 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10551 OutputBecomesInput, Fn); 10552 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10553 OutputBecomesInput, Fn); 10554 break; 10555 case 64: 10556 case 128: 10557 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10558 OutputBecomesInput, Fn); 10559 break; 10560 default: 10561 llvm_unreachable("Scalar type is too wide."); 10562 } 10563 } 10564 10565 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10566 static void emitAArch64DeclareSimdFunction( 10567 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10568 ArrayRef<ParamAttrTy> ParamAttrs, 10569 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10570 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10571 10572 // Get basic data for building the vector signature. 10573 const auto Data = getNDSWDS(FD, ParamAttrs); 10574 const unsigned NDS = std::get<0>(Data); 10575 const unsigned WDS = std::get<1>(Data); 10576 const bool OutputBecomesInput = std::get<2>(Data); 10577 10578 // Check the values provided via `simdlen` by the user. 10579 // 1. A `simdlen(1)` doesn't produce vector signatures, 10580 if (UserVLEN == 1) { 10581 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10582 DiagnosticsEngine::Warning, 10583 "The clause simdlen(1) has no effect when targeting aarch64."); 10584 CGM.getDiags().Report(SLoc, DiagID); 10585 return; 10586 } 10587 10588 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10589 // Advanced SIMD output. 10590 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10591 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10592 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10593 "power of 2 when targeting Advanced SIMD."); 10594 CGM.getDiags().Report(SLoc, DiagID); 10595 return; 10596 } 10597 10598 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10599 // limits. 10600 if (ISA == 's' && UserVLEN != 0) { 10601 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10602 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10603 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10604 "lanes in the architectural constraints " 10605 "for SVE (min is 128-bit, max is " 10606 "2048-bit, by steps of 128-bit)"); 10607 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10608 return; 10609 } 10610 } 10611 10612 // Sort out parameter sequence. 10613 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10614 StringRef Prefix = "_ZGV"; 10615 // Generate simdlen from user input (if any). 10616 if (UserVLEN) { 10617 if (ISA == 's') { 10618 // SVE generates only a masked function. 10619 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10620 OutputBecomesInput, Fn); 10621 } else { 10622 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10623 // Advanced SIMD generates one or two functions, depending on 10624 // the `[not]inbranch` clause. 10625 switch (State) { 10626 case OMPDeclareSimdDeclAttr::BS_Undefined: 10627 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10628 OutputBecomesInput, Fn); 10629 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10630 OutputBecomesInput, Fn); 10631 break; 10632 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10633 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10634 OutputBecomesInput, Fn); 10635 break; 10636 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10637 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10638 OutputBecomesInput, Fn); 10639 break; 10640 } 10641 } 10642 } else { 10643 // If no user simdlen is provided, follow the AAVFABI rules for 10644 // generating the vector length. 10645 if (ISA == 's') { 10646 // SVE, section 3.4.1, item 1. 10647 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10648 OutputBecomesInput, Fn); 10649 } else { 10650 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10651 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10652 // two vector names depending on the use of the clause 10653 // `[not]inbranch`. 10654 switch (State) { 10655 case OMPDeclareSimdDeclAttr::BS_Undefined: 10656 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10657 OutputBecomesInput, Fn); 10658 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10659 OutputBecomesInput, Fn); 10660 break; 10661 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10662 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10663 OutputBecomesInput, Fn); 10664 break; 10665 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10666 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10667 OutputBecomesInput, Fn); 10668 break; 10669 } 10670 } 10671 } 10672 } 10673 10674 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10675 llvm::Function *Fn) { 10676 ASTContext &C = CGM.getContext(); 10677 FD = FD->getMostRecentDecl(); 10678 // Map params to their positions in function decl. 10679 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10680 if (isa<CXXMethodDecl>(FD)) 10681 ParamPositions.try_emplace(FD, 0); 10682 unsigned ParamPos = ParamPositions.size(); 10683 for (const ParmVarDecl *P : FD->parameters()) { 10684 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10685 ++ParamPos; 10686 } 10687 while (FD) { 10688 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10689 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10690 // Mark uniform parameters. 10691 for (const Expr *E : Attr->uniforms()) { 10692 E = E->IgnoreParenImpCasts(); 10693 unsigned Pos; 10694 if (isa<CXXThisExpr>(E)) { 10695 Pos = ParamPositions[FD]; 10696 } else { 10697 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10698 ->getCanonicalDecl(); 10699 Pos = ParamPositions[PVD]; 10700 } 10701 ParamAttrs[Pos].Kind = Uniform; 10702 } 10703 // Get alignment info. 10704 auto NI = Attr->alignments_begin(); 10705 for (const Expr *E : Attr->aligneds()) { 10706 E = E->IgnoreParenImpCasts(); 10707 unsigned Pos; 10708 QualType ParmTy; 10709 if (isa<CXXThisExpr>(E)) { 10710 Pos = ParamPositions[FD]; 10711 ParmTy = E->getType(); 10712 } else { 10713 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10714 ->getCanonicalDecl(); 10715 Pos = ParamPositions[PVD]; 10716 ParmTy = PVD->getType(); 10717 } 10718 ParamAttrs[Pos].Alignment = 10719 (*NI) 10720 ? (*NI)->EvaluateKnownConstInt(C) 10721 : llvm::APSInt::getUnsigned( 10722 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10723 .getQuantity()); 10724 ++NI; 10725 } 10726 // Mark linear parameters. 10727 auto SI = Attr->steps_begin(); 10728 auto MI = Attr->modifiers_begin(); 10729 for (const Expr *E : Attr->linears()) { 10730 E = E->IgnoreParenImpCasts(); 10731 unsigned Pos; 10732 if (isa<CXXThisExpr>(E)) { 10733 Pos = ParamPositions[FD]; 10734 } else { 10735 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10736 ->getCanonicalDecl(); 10737 Pos = ParamPositions[PVD]; 10738 } 10739 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 10740 ParamAttr.Kind = Linear; 10741 if (*SI) { 10742 Expr::EvalResult Result; 10743 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 10744 if (const auto *DRE = 10745 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 10746 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 10747 ParamAttr.Kind = LinearWithVarStride; 10748 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 10749 ParamPositions[StridePVD->getCanonicalDecl()]); 10750 } 10751 } 10752 } else { 10753 ParamAttr.StrideOrArg = Result.Val.getInt(); 10754 } 10755 } 10756 ++SI; 10757 ++MI; 10758 } 10759 llvm::APSInt VLENVal; 10760 SourceLocation ExprLoc; 10761 const Expr *VLENExpr = Attr->getSimdlen(); 10762 if (VLENExpr) { 10763 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 10764 ExprLoc = VLENExpr->getExprLoc(); 10765 } 10766 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 10767 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 10768 CGM.getTriple().getArch() == llvm::Triple::x86_64) { 10769 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 10770 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 10771 unsigned VLEN = VLENVal.getExtValue(); 10772 StringRef MangledName = Fn->getName(); 10773 if (CGM.getTarget().hasFeature("sve")) 10774 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10775 MangledName, 's', 128, Fn, ExprLoc); 10776 if (CGM.getTarget().hasFeature("neon")) 10777 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10778 MangledName, 'n', 128, Fn, ExprLoc); 10779 } 10780 } 10781 FD = FD->getPreviousDecl(); 10782 } 10783 } 10784 10785 namespace { 10786 /// Cleanup action for doacross support. 10787 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 10788 public: 10789 static const int DoacrossFinArgs = 2; 10790 10791 private: 10792 llvm::FunctionCallee RTLFn; 10793 llvm::Value *Args[DoacrossFinArgs]; 10794 10795 public: 10796 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 10797 ArrayRef<llvm::Value *> CallArgs) 10798 : RTLFn(RTLFn) { 10799 assert(CallArgs.size() == DoacrossFinArgs); 10800 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10801 } 10802 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10803 if (!CGF.HaveInsertPoint()) 10804 return; 10805 CGF.EmitRuntimeCall(RTLFn, Args); 10806 } 10807 }; 10808 } // namespace 10809 10810 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 10811 const OMPLoopDirective &D, 10812 ArrayRef<Expr *> NumIterations) { 10813 if (!CGF.HaveInsertPoint()) 10814 return; 10815 10816 ASTContext &C = CGM.getContext(); 10817 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 10818 RecordDecl *RD; 10819 if (KmpDimTy.isNull()) { 10820 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 10821 // kmp_int64 lo; // lower 10822 // kmp_int64 up; // upper 10823 // kmp_int64 st; // stride 10824 // }; 10825 RD = C.buildImplicitRecord("kmp_dim"); 10826 RD->startDefinition(); 10827 addFieldToRecordDecl(C, RD, Int64Ty); 10828 addFieldToRecordDecl(C, RD, Int64Ty); 10829 addFieldToRecordDecl(C, RD, Int64Ty); 10830 RD->completeDefinition(); 10831 KmpDimTy = C.getRecordType(RD); 10832 } else { 10833 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 10834 } 10835 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 10836 QualType ArrayTy = 10837 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 10838 10839 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 10840 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 10841 enum { LowerFD = 0, UpperFD, StrideFD }; 10842 // Fill dims with data. 10843 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 10844 LValue DimsLVal = CGF.MakeAddrLValue( 10845 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 10846 // dims.upper = num_iterations; 10847 LValue UpperLVal = CGF.EmitLValueForField( 10848 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 10849 llvm::Value *NumIterVal = 10850 CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]), 10851 D.getNumIterations()->getType(), Int64Ty, 10852 D.getNumIterations()->getExprLoc()); 10853 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 10854 // dims.stride = 1; 10855 LValue StrideLVal = CGF.EmitLValueForField( 10856 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 10857 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 10858 StrideLVal); 10859 } 10860 10861 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 10862 // kmp_int32 num_dims, struct kmp_dim * dims); 10863 llvm::Value *Args[] = { 10864 emitUpdateLocation(CGF, D.getBeginLoc()), 10865 getThreadID(CGF, D.getBeginLoc()), 10866 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 10867 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 10868 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 10869 CGM.VoidPtrTy)}; 10870 10871 llvm::FunctionCallee RTLFn = 10872 createRuntimeFunction(OMPRTL__kmpc_doacross_init); 10873 CGF.EmitRuntimeCall(RTLFn, Args); 10874 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 10875 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 10876 llvm::FunctionCallee FiniRTLFn = 10877 createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 10878 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 10879 llvm::makeArrayRef(FiniArgs)); 10880 } 10881 10882 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 10883 const OMPDependClause *C) { 10884 QualType Int64Ty = 10885 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 10886 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 10887 QualType ArrayTy = CGM.getContext().getConstantArrayType( 10888 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 10889 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 10890 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 10891 const Expr *CounterVal = C->getLoopData(I); 10892 assert(CounterVal); 10893 llvm::Value *CntVal = CGF.EmitScalarConversion( 10894 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 10895 CounterVal->getExprLoc()); 10896 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 10897 /*Volatile=*/false, Int64Ty); 10898 } 10899 llvm::Value *Args[] = { 10900 emitUpdateLocation(CGF, C->getBeginLoc()), 10901 getThreadID(CGF, C->getBeginLoc()), 10902 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 10903 llvm::FunctionCallee RTLFn; 10904 if (C->getDependencyKind() == OMPC_DEPEND_source) { 10905 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 10906 } else { 10907 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 10908 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 10909 } 10910 CGF.EmitRuntimeCall(RTLFn, Args); 10911 } 10912 10913 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 10914 llvm::FunctionCallee Callee, 10915 ArrayRef<llvm::Value *> Args) const { 10916 assert(Loc.isValid() && "Outlined function call location must be valid."); 10917 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 10918 10919 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 10920 if (Fn->doesNotThrow()) { 10921 CGF.EmitNounwindRuntimeCall(Fn, Args); 10922 return; 10923 } 10924 } 10925 CGF.EmitRuntimeCall(Callee, Args); 10926 } 10927 10928 void CGOpenMPRuntime::emitOutlinedFunctionCall( 10929 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 10930 ArrayRef<llvm::Value *> Args) const { 10931 emitCall(CGF, Loc, OutlinedFn, Args); 10932 } 10933 10934 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 10935 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 10936 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 10937 HasEmittedDeclareTargetRegion = true; 10938 } 10939 10940 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 10941 const VarDecl *NativeParam, 10942 const VarDecl *TargetParam) const { 10943 return CGF.GetAddrOfLocalVar(NativeParam); 10944 } 10945 10946 namespace { 10947 /// Cleanup action for allocate support. 10948 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 10949 public: 10950 static const int CleanupArgs = 3; 10951 10952 private: 10953 llvm::FunctionCallee RTLFn; 10954 llvm::Value *Args[CleanupArgs]; 10955 10956 public: 10957 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 10958 ArrayRef<llvm::Value *> CallArgs) 10959 : RTLFn(RTLFn) { 10960 assert(CallArgs.size() == CleanupArgs && 10961 "Size of arguments does not match."); 10962 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10963 } 10964 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10965 if (!CGF.HaveInsertPoint()) 10966 return; 10967 CGF.EmitRuntimeCall(RTLFn, Args); 10968 } 10969 }; 10970 } // namespace 10971 10972 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 10973 const VarDecl *VD) { 10974 if (!VD) 10975 return Address::invalid(); 10976 const VarDecl *CVD = VD->getCanonicalDecl(); 10977 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 10978 return Address::invalid(); 10979 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 10980 // Use the default allocation. 10981 if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 10982 !AA->getAllocator()) 10983 return Address::invalid(); 10984 llvm::Value *Size; 10985 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 10986 if (CVD->getType()->isVariablyModifiedType()) { 10987 Size = CGF.getTypeSize(CVD->getType()); 10988 // Align the size: ((size + align - 1) / align) * align 10989 Size = CGF.Builder.CreateNUWAdd( 10990 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 10991 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 10992 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 10993 } else { 10994 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 10995 Size = CGM.getSize(Sz.alignTo(Align)); 10996 } 10997 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 10998 assert(AA->getAllocator() && 10999 "Expected allocator expression for non-default allocator."); 11000 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11001 // According to the standard, the original allocator type is a enum (integer). 11002 // Convert to pointer type, if required. 11003 if (Allocator->getType()->isIntegerTy()) 11004 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 11005 else if (Allocator->getType()->isPointerTy()) 11006 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 11007 CGM.VoidPtrTy); 11008 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11009 11010 llvm::Value *Addr = 11011 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, 11012 CVD->getName() + ".void.addr"); 11013 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 11014 Allocator}; 11015 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); 11016 11017 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11018 llvm::makeArrayRef(FiniArgs)); 11019 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11020 Addr, 11021 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 11022 CVD->getName() + ".addr"); 11023 return Address(Addr, Align); 11024 } 11025 11026 /// Checks current context and returns true if it matches the context selector. 11027 template <OMPDeclareVariantAttr::CtxSelectorSetType CtxSet, 11028 OMPDeclareVariantAttr::CtxSelectorType Ctx> 11029 static bool checkContext(const OMPDeclareVariantAttr *A) { 11030 assert(CtxSet != OMPDeclareVariantAttr::CtxSetUnknown && 11031 Ctx != OMPDeclareVariantAttr::CtxUnknown && 11032 "Unknown context selector or context selector set."); 11033 return false; 11034 } 11035 11036 /// Checks for implementation={vendor(<vendor>)} context selector. 11037 /// \returns true iff <vendor>="llvm", false otherwise. 11038 template <> 11039 bool checkContext<OMPDeclareVariantAttr::CtxSetImplementation, 11040 OMPDeclareVariantAttr::CtxVendor>( 11041 const OMPDeclareVariantAttr *A) { 11042 return llvm::all_of(A->implVendors(), 11043 [](StringRef S) { return !S.compare_lower("llvm"); }); 11044 } 11045 11046 static bool greaterCtxScore(ASTContext &Ctx, const Expr *LHS, const Expr *RHS) { 11047 // If both scores are unknown, choose the very first one. 11048 if (!LHS && !RHS) 11049 return true; 11050 // If only one is known, return this one. 11051 if (LHS && !RHS) 11052 return true; 11053 if (!LHS && RHS) 11054 return false; 11055 llvm::APSInt LHSVal = LHS->EvaluateKnownConstInt(Ctx); 11056 llvm::APSInt RHSVal = RHS->EvaluateKnownConstInt(Ctx); 11057 return llvm::APSInt::compareValues(LHSVal, RHSVal) >= 0; 11058 } 11059 11060 namespace { 11061 /// Comparator for the priority queue for context selector. 11062 class OMPDeclareVariantAttrComparer 11063 : public std::greater<const OMPDeclareVariantAttr *> { 11064 private: 11065 ASTContext &Ctx; 11066 11067 public: 11068 OMPDeclareVariantAttrComparer(ASTContext &Ctx) : Ctx(Ctx) {} 11069 bool operator()(const OMPDeclareVariantAttr *LHS, 11070 const OMPDeclareVariantAttr *RHS) const { 11071 const Expr *LHSExpr = nullptr; 11072 const Expr *RHSExpr = nullptr; 11073 if (LHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified) 11074 LHSExpr = LHS->getScore(); 11075 if (RHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified) 11076 RHSExpr = RHS->getScore(); 11077 return greaterCtxScore(Ctx, LHSExpr, RHSExpr); 11078 } 11079 }; 11080 } // anonymous namespace 11081 11082 /// Finds the variant function that matches current context with its context 11083 /// selector. 11084 static const FunctionDecl *getDeclareVariantFunction(ASTContext &Ctx, 11085 const FunctionDecl *FD) { 11086 if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>()) 11087 return FD; 11088 // Iterate through all DeclareVariant attributes and check context selectors. 11089 auto &&Comparer = [&Ctx](const OMPDeclareVariantAttr *LHS, 11090 const OMPDeclareVariantAttr *RHS) { 11091 const Expr *LHSExpr = nullptr; 11092 const Expr *RHSExpr = nullptr; 11093 if (LHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified) 11094 LHSExpr = LHS->getScore(); 11095 if (RHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified) 11096 RHSExpr = RHS->getScore(); 11097 return greaterCtxScore(Ctx, LHSExpr, RHSExpr); 11098 }; 11099 const OMPDeclareVariantAttr *TopMostAttr = nullptr; 11100 for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) { 11101 const OMPDeclareVariantAttr *SelectedAttr = nullptr; 11102 switch (A->getCtxSelectorSet()) { 11103 case OMPDeclareVariantAttr::CtxSetImplementation: 11104 switch (A->getCtxSelector()) { 11105 case OMPDeclareVariantAttr::CtxVendor: 11106 if (checkContext<OMPDeclareVariantAttr::CtxSetImplementation, 11107 OMPDeclareVariantAttr::CtxVendor>(A)) 11108 SelectedAttr = A; 11109 break; 11110 case OMPDeclareVariantAttr::CtxUnknown: 11111 llvm_unreachable( 11112 "Unknown context selector in implementation selector set."); 11113 } 11114 break; 11115 case OMPDeclareVariantAttr::CtxSetUnknown: 11116 llvm_unreachable("Unknown context selector set."); 11117 } 11118 // If the attribute matches the context, find the attribute with the highest 11119 // score. 11120 if (SelectedAttr && (!TopMostAttr || !Comparer(TopMostAttr, SelectedAttr))) 11121 TopMostAttr = SelectedAttr; 11122 } 11123 if (!TopMostAttr) 11124 return FD; 11125 return cast<FunctionDecl>( 11126 cast<DeclRefExpr>(TopMostAttr->getVariantFuncRef()->IgnoreParenImpCasts()) 11127 ->getDecl()); 11128 } 11129 11130 bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) { 11131 const auto *D = cast<FunctionDecl>(GD.getDecl()); 11132 // If the original function is defined already, use its definition. 11133 StringRef MangledName = CGM.getMangledName(GD); 11134 llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName); 11135 if (Orig && !Orig->isDeclaration()) 11136 return false; 11137 const FunctionDecl *NewFD = getDeclareVariantFunction(CGM.getContext(), D); 11138 // Emit original function if it does not have declare variant attribute or the 11139 // context does not match. 11140 if (NewFD == D) 11141 return false; 11142 GlobalDecl NewGD = GD.getWithDecl(NewFD); 11143 if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) { 11144 DeferredVariantFunction.erase(D); 11145 return true; 11146 } 11147 DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD))); 11148 return true; 11149 } 11150 11151 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11152 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11153 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11154 llvm_unreachable("Not supported in SIMD-only mode"); 11155 } 11156 11157 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11158 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11159 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11160 llvm_unreachable("Not supported in SIMD-only mode"); 11161 } 11162 11163 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11164 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11165 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11166 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11167 bool Tied, unsigned &NumberOfParts) { 11168 llvm_unreachable("Not supported in SIMD-only mode"); 11169 } 11170 11171 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11172 SourceLocation Loc, 11173 llvm::Function *OutlinedFn, 11174 ArrayRef<llvm::Value *> CapturedVars, 11175 const Expr *IfCond) { 11176 llvm_unreachable("Not supported in SIMD-only mode"); 11177 } 11178 11179 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11180 CodeGenFunction &CGF, StringRef CriticalName, 11181 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11182 const Expr *Hint) { 11183 llvm_unreachable("Not supported in SIMD-only mode"); 11184 } 11185 11186 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11187 const RegionCodeGenTy &MasterOpGen, 11188 SourceLocation Loc) { 11189 llvm_unreachable("Not supported in SIMD-only mode"); 11190 } 11191 11192 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11193 SourceLocation Loc) { 11194 llvm_unreachable("Not supported in SIMD-only mode"); 11195 } 11196 11197 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 11198 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 11199 SourceLocation Loc) { 11200 llvm_unreachable("Not supported in SIMD-only mode"); 11201 } 11202 11203 void CGOpenMPSIMDRuntime::emitSingleRegion( 11204 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 11205 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 11206 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 11207 ArrayRef<const Expr *> AssignmentOps) { 11208 llvm_unreachable("Not supported in SIMD-only mode"); 11209 } 11210 11211 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 11212 const RegionCodeGenTy &OrderedOpGen, 11213 SourceLocation Loc, 11214 bool IsThreads) { 11215 llvm_unreachable("Not supported in SIMD-only mode"); 11216 } 11217 11218 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 11219 SourceLocation Loc, 11220 OpenMPDirectiveKind Kind, 11221 bool EmitChecks, 11222 bool ForceSimpleCall) { 11223 llvm_unreachable("Not supported in SIMD-only mode"); 11224 } 11225 11226 void CGOpenMPSIMDRuntime::emitForDispatchInit( 11227 CodeGenFunction &CGF, SourceLocation Loc, 11228 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 11229 bool Ordered, const DispatchRTInput &DispatchValues) { 11230 llvm_unreachable("Not supported in SIMD-only mode"); 11231 } 11232 11233 void CGOpenMPSIMDRuntime::emitForStaticInit( 11234 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 11235 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 11236 llvm_unreachable("Not supported in SIMD-only mode"); 11237 } 11238 11239 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 11240 CodeGenFunction &CGF, SourceLocation Loc, 11241 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 11242 llvm_unreachable("Not supported in SIMD-only mode"); 11243 } 11244 11245 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 11246 SourceLocation Loc, 11247 unsigned IVSize, 11248 bool IVSigned) { 11249 llvm_unreachable("Not supported in SIMD-only mode"); 11250 } 11251 11252 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 11253 SourceLocation Loc, 11254 OpenMPDirectiveKind DKind) { 11255 llvm_unreachable("Not supported in SIMD-only mode"); 11256 } 11257 11258 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 11259 SourceLocation Loc, 11260 unsigned IVSize, bool IVSigned, 11261 Address IL, Address LB, 11262 Address UB, Address ST) { 11263 llvm_unreachable("Not supported in SIMD-only mode"); 11264 } 11265 11266 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 11267 llvm::Value *NumThreads, 11268 SourceLocation Loc) { 11269 llvm_unreachable("Not supported in SIMD-only mode"); 11270 } 11271 11272 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 11273 OpenMPProcBindClauseKind ProcBind, 11274 SourceLocation Loc) { 11275 llvm_unreachable("Not supported in SIMD-only mode"); 11276 } 11277 11278 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 11279 const VarDecl *VD, 11280 Address VDAddr, 11281 SourceLocation Loc) { 11282 llvm_unreachable("Not supported in SIMD-only mode"); 11283 } 11284 11285 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 11286 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 11287 CodeGenFunction *CGF) { 11288 llvm_unreachable("Not supported in SIMD-only mode"); 11289 } 11290 11291 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 11292 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 11293 llvm_unreachable("Not supported in SIMD-only mode"); 11294 } 11295 11296 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 11297 ArrayRef<const Expr *> Vars, 11298 SourceLocation Loc) { 11299 llvm_unreachable("Not supported in SIMD-only mode"); 11300 } 11301 11302 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 11303 const OMPExecutableDirective &D, 11304 llvm::Function *TaskFunction, 11305 QualType SharedsTy, Address Shareds, 11306 const Expr *IfCond, 11307 const OMPTaskDataTy &Data) { 11308 llvm_unreachable("Not supported in SIMD-only mode"); 11309 } 11310 11311 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 11312 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 11313 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 11314 const Expr *IfCond, const OMPTaskDataTy &Data) { 11315 llvm_unreachable("Not supported in SIMD-only mode"); 11316 } 11317 11318 void CGOpenMPSIMDRuntime::emitReduction( 11319 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 11320 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 11321 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 11322 assert(Options.SimpleReduction && "Only simple reduction is expected."); 11323 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 11324 ReductionOps, Options); 11325 } 11326 11327 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 11328 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 11329 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 11330 llvm_unreachable("Not supported in SIMD-only mode"); 11331 } 11332 11333 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 11334 SourceLocation Loc, 11335 ReductionCodeGen &RCG, 11336 unsigned N) { 11337 llvm_unreachable("Not supported in SIMD-only mode"); 11338 } 11339 11340 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 11341 SourceLocation Loc, 11342 llvm::Value *ReductionsPtr, 11343 LValue SharedLVal) { 11344 llvm_unreachable("Not supported in SIMD-only mode"); 11345 } 11346 11347 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 11348 SourceLocation Loc) { 11349 llvm_unreachable("Not supported in SIMD-only mode"); 11350 } 11351 11352 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 11353 CodeGenFunction &CGF, SourceLocation Loc, 11354 OpenMPDirectiveKind CancelRegion) { 11355 llvm_unreachable("Not supported in SIMD-only mode"); 11356 } 11357 11358 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 11359 SourceLocation Loc, const Expr *IfCond, 11360 OpenMPDirectiveKind CancelRegion) { 11361 llvm_unreachable("Not supported in SIMD-only mode"); 11362 } 11363 11364 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 11365 const OMPExecutableDirective &D, StringRef ParentName, 11366 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 11367 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 11368 llvm_unreachable("Not supported in SIMD-only mode"); 11369 } 11370 11371 void CGOpenMPSIMDRuntime::emitTargetCall( 11372 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11373 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 11374 const Expr *Device, 11375 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 11376 const OMPLoopDirective &D)> 11377 SizeEmitter) { 11378 llvm_unreachable("Not supported in SIMD-only mode"); 11379 } 11380 11381 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 11382 llvm_unreachable("Not supported in SIMD-only mode"); 11383 } 11384 11385 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 11386 llvm_unreachable("Not supported in SIMD-only mode"); 11387 } 11388 11389 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 11390 return false; 11391 } 11392 11393 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 11394 const OMPExecutableDirective &D, 11395 SourceLocation Loc, 11396 llvm::Function *OutlinedFn, 11397 ArrayRef<llvm::Value *> CapturedVars) { 11398 llvm_unreachable("Not supported in SIMD-only mode"); 11399 } 11400 11401 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11402 const Expr *NumTeams, 11403 const Expr *ThreadLimit, 11404 SourceLocation Loc) { 11405 llvm_unreachable("Not supported in SIMD-only mode"); 11406 } 11407 11408 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 11409 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11410 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11411 llvm_unreachable("Not supported in SIMD-only mode"); 11412 } 11413 11414 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 11415 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11416 const Expr *Device) { 11417 llvm_unreachable("Not supported in SIMD-only mode"); 11418 } 11419 11420 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11421 const OMPLoopDirective &D, 11422 ArrayRef<Expr *> NumIterations) { 11423 llvm_unreachable("Not supported in SIMD-only mode"); 11424 } 11425 11426 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11427 const OMPDependClause *C) { 11428 llvm_unreachable("Not supported in SIMD-only mode"); 11429 } 11430 11431 const VarDecl * 11432 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 11433 const VarDecl *NativeParam) const { 11434 llvm_unreachable("Not supported in SIMD-only mode"); 11435 } 11436 11437 Address 11438 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 11439 const VarDecl *NativeParam, 11440 const VarDecl *TargetParam) const { 11441 llvm_unreachable("Not supported in SIMD-only mode"); 11442 } 11443