1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGCXXABI.h" 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/CodeGen/ConstantInitBuilder.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/Basic/BitmaskEnum.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/Bitcode/BitcodeReader.h" 24 #include "llvm/IR/DerivedTypes.h" 25 #include "llvm/IR/GlobalValue.h" 26 #include "llvm/IR/Value.h" 27 #include "llvm/Support/Format.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <cassert> 30 31 using namespace clang; 32 using namespace CodeGen; 33 34 namespace { 35 /// Base class for handling code generation inside OpenMP regions. 36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 37 public: 38 /// Kinds of OpenMP regions used in codegen. 39 enum CGOpenMPRegionKind { 40 /// Region with outlined function for standalone 'parallel' 41 /// directive. 42 ParallelOutlinedRegion, 43 /// Region with outlined function for standalone 'task' directive. 44 TaskOutlinedRegion, 45 /// Region for constructs that do not require function outlining, 46 /// like 'for', 'sections', 'atomic' etc. directives. 47 InlinedRegion, 48 /// Region with outlined function for standalone 'target' directive. 49 TargetRegion, 50 }; 51 52 CGOpenMPRegionInfo(const CapturedStmt &CS, 53 const CGOpenMPRegionKind RegionKind, 54 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 55 bool HasCancel) 56 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 57 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 58 59 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 60 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 61 bool HasCancel) 62 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 63 Kind(Kind), HasCancel(HasCancel) {} 64 65 /// Get a variable or parameter for storing global thread id 66 /// inside OpenMP construct. 67 virtual const VarDecl *getThreadIDVariable() const = 0; 68 69 /// Emit the captured statement body. 70 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 71 72 /// Get an LValue for the current ThreadID variable. 73 /// \return LValue for thread id variable. This LValue always has type int32*. 74 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 75 76 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 77 78 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 79 80 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 81 82 bool hasCancel() const { return HasCancel; } 83 84 static bool classof(const CGCapturedStmtInfo *Info) { 85 return Info->getKind() == CR_OpenMP; 86 } 87 88 ~CGOpenMPRegionInfo() override = default; 89 90 protected: 91 CGOpenMPRegionKind RegionKind; 92 RegionCodeGenTy CodeGen; 93 OpenMPDirectiveKind Kind; 94 bool HasCancel; 95 }; 96 97 /// API for captured statement code generation in OpenMP constructs. 98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 99 public: 100 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 101 const RegionCodeGenTy &CodeGen, 102 OpenMPDirectiveKind Kind, bool HasCancel, 103 StringRef HelperName) 104 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 105 HasCancel), 106 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 107 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 108 } 109 110 /// Get a variable or parameter for storing global thread id 111 /// inside OpenMP construct. 112 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 113 114 /// Get the name of the capture helper. 115 StringRef getHelperName() const override { return HelperName; } 116 117 static bool classof(const CGCapturedStmtInfo *Info) { 118 return CGOpenMPRegionInfo::classof(Info) && 119 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 120 ParallelOutlinedRegion; 121 } 122 123 private: 124 /// A variable or parameter storing global thread id for OpenMP 125 /// constructs. 126 const VarDecl *ThreadIDVar; 127 StringRef HelperName; 128 }; 129 130 /// API for captured statement code generation in OpenMP constructs. 131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 132 public: 133 class UntiedTaskActionTy final : public PrePostActionTy { 134 bool Untied; 135 const VarDecl *PartIDVar; 136 const RegionCodeGenTy UntiedCodeGen; 137 llvm::SwitchInst *UntiedSwitch = nullptr; 138 139 public: 140 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 141 const RegionCodeGenTy &UntiedCodeGen) 142 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 143 void Enter(CodeGenFunction &CGF) override { 144 if (Untied) { 145 // Emit task switching point. 146 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 147 CGF.GetAddrOfLocalVar(PartIDVar), 148 PartIDVar->getType()->castAs<PointerType>()); 149 llvm::Value *Res = 150 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 151 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 152 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 153 CGF.EmitBlock(DoneBB); 154 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 155 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 156 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 157 CGF.Builder.GetInsertBlock()); 158 emitUntiedSwitch(CGF); 159 } 160 } 161 void emitUntiedSwitch(CodeGenFunction &CGF) const { 162 if (Untied) { 163 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 164 CGF.GetAddrOfLocalVar(PartIDVar), 165 PartIDVar->getType()->castAs<PointerType>()); 166 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 167 PartIdLVal); 168 UntiedCodeGen(CGF); 169 CodeGenFunction::JumpDest CurPoint = 170 CGF.getJumpDestInCurrentScope(".untied.next."); 171 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 172 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 173 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 174 CGF.Builder.GetInsertBlock()); 175 CGF.EmitBranchThroughCleanup(CurPoint); 176 CGF.EmitBlock(CurPoint.getBlock()); 177 } 178 } 179 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 180 }; 181 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 182 const VarDecl *ThreadIDVar, 183 const RegionCodeGenTy &CodeGen, 184 OpenMPDirectiveKind Kind, bool HasCancel, 185 const UntiedTaskActionTy &Action) 186 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 187 ThreadIDVar(ThreadIDVar), Action(Action) { 188 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 189 } 190 191 /// Get a variable or parameter for storing global thread id 192 /// inside OpenMP construct. 193 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 194 195 /// Get an LValue for the current ThreadID variable. 196 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 197 198 /// Get the name of the capture helper. 199 StringRef getHelperName() const override { return ".omp_outlined."; } 200 201 void emitUntiedSwitch(CodeGenFunction &CGF) override { 202 Action.emitUntiedSwitch(CGF); 203 } 204 205 static bool classof(const CGCapturedStmtInfo *Info) { 206 return CGOpenMPRegionInfo::classof(Info) && 207 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 208 TaskOutlinedRegion; 209 } 210 211 private: 212 /// A variable or parameter storing global thread id for OpenMP 213 /// constructs. 214 const VarDecl *ThreadIDVar; 215 /// Action for emitting code for untied tasks. 216 const UntiedTaskActionTy &Action; 217 }; 218 219 /// API for inlined captured statement code generation in OpenMP 220 /// constructs. 221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 222 public: 223 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 224 const RegionCodeGenTy &CodeGen, 225 OpenMPDirectiveKind Kind, bool HasCancel) 226 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 227 OldCSI(OldCSI), 228 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 229 230 // Retrieve the value of the context parameter. 231 llvm::Value *getContextValue() const override { 232 if (OuterRegionInfo) 233 return OuterRegionInfo->getContextValue(); 234 llvm_unreachable("No context value for inlined OpenMP region"); 235 } 236 237 void setContextValue(llvm::Value *V) override { 238 if (OuterRegionInfo) { 239 OuterRegionInfo->setContextValue(V); 240 return; 241 } 242 llvm_unreachable("No context value for inlined OpenMP region"); 243 } 244 245 /// Lookup the captured field decl for a variable. 246 const FieldDecl *lookup(const VarDecl *VD) const override { 247 if (OuterRegionInfo) 248 return OuterRegionInfo->lookup(VD); 249 // If there is no outer outlined region,no need to lookup in a list of 250 // captured variables, we can use the original one. 251 return nullptr; 252 } 253 254 FieldDecl *getThisFieldDecl() const override { 255 if (OuterRegionInfo) 256 return OuterRegionInfo->getThisFieldDecl(); 257 return nullptr; 258 } 259 260 /// Get a variable or parameter for storing global thread id 261 /// inside OpenMP construct. 262 const VarDecl *getThreadIDVariable() const override { 263 if (OuterRegionInfo) 264 return OuterRegionInfo->getThreadIDVariable(); 265 return nullptr; 266 } 267 268 /// Get an LValue for the current ThreadID variable. 269 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 270 if (OuterRegionInfo) 271 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 272 llvm_unreachable("No LValue for inlined OpenMP construct"); 273 } 274 275 /// Get the name of the capture helper. 276 StringRef getHelperName() const override { 277 if (auto *OuterRegionInfo = getOldCSI()) 278 return OuterRegionInfo->getHelperName(); 279 llvm_unreachable("No helper name for inlined OpenMP construct"); 280 } 281 282 void emitUntiedSwitch(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 OuterRegionInfo->emitUntiedSwitch(CGF); 285 } 286 287 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 288 289 static bool classof(const CGCapturedStmtInfo *Info) { 290 return CGOpenMPRegionInfo::classof(Info) && 291 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 292 } 293 294 ~CGOpenMPInlinedRegionInfo() override = default; 295 296 private: 297 /// CodeGen info about outer OpenMP region. 298 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 299 CGOpenMPRegionInfo *OuterRegionInfo; 300 }; 301 302 /// API for captured statement code generation in OpenMP target 303 /// constructs. For this captures, implicit parameters are used instead of the 304 /// captured fields. The name of the target region has to be unique in a given 305 /// application so it is provided by the client, because only the client has 306 /// the information to generate that. 307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 308 public: 309 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 310 const RegionCodeGenTy &CodeGen, StringRef HelperName) 311 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 312 /*HasCancel=*/false), 313 HelperName(HelperName) {} 314 315 /// This is unused for target regions because each starts executing 316 /// with a single thread. 317 const VarDecl *getThreadIDVariable() const override { return nullptr; } 318 319 /// Get the name of the capture helper. 320 StringRef getHelperName() const override { return HelperName; } 321 322 static bool classof(const CGCapturedStmtInfo *Info) { 323 return CGOpenMPRegionInfo::classof(Info) && 324 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 325 } 326 327 private: 328 StringRef HelperName; 329 }; 330 331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 332 llvm_unreachable("No codegen for expressions"); 333 } 334 /// API for generation of expressions captured in a innermost OpenMP 335 /// region. 336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 337 public: 338 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 339 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 340 OMPD_unknown, 341 /*HasCancel=*/false), 342 PrivScope(CGF) { 343 // Make sure the globals captured in the provided statement are local by 344 // using the privatization logic. We assume the same variable is not 345 // captured more than once. 346 for (const auto &C : CS.captures()) { 347 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 348 continue; 349 350 const VarDecl *VD = C.getCapturedVar(); 351 if (VD->isLocalVarDeclOrParm()) 352 continue; 353 354 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 355 /*RefersToEnclosingVariableOrCapture=*/false, 356 VD->getType().getNonReferenceType(), VK_LValue, 357 C.getLocation()); 358 PrivScope.addPrivate( 359 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); }); 360 } 361 (void)PrivScope.Privatize(); 362 } 363 364 /// Lookup the captured field decl for a variable. 365 const FieldDecl *lookup(const VarDecl *VD) const override { 366 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 367 return FD; 368 return nullptr; 369 } 370 371 /// Emit the captured statement body. 372 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 373 llvm_unreachable("No body for expressions"); 374 } 375 376 /// Get a variable or parameter for storing global thread id 377 /// inside OpenMP construct. 378 const VarDecl *getThreadIDVariable() const override { 379 llvm_unreachable("No thread id for expressions"); 380 } 381 382 /// Get the name of the capture helper. 383 StringRef getHelperName() const override { 384 llvm_unreachable("No helper name for expressions"); 385 } 386 387 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 388 389 private: 390 /// Private scope to capture global variables. 391 CodeGenFunction::OMPPrivateScope PrivScope; 392 }; 393 394 /// RAII for emitting code of OpenMP constructs. 395 class InlinedOpenMPRegionRAII { 396 CodeGenFunction &CGF; 397 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 398 FieldDecl *LambdaThisCaptureField = nullptr; 399 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 400 401 public: 402 /// Constructs region for combined constructs. 403 /// \param CodeGen Code generation sequence for combined directives. Includes 404 /// a list of functions used for code generation of implicitly inlined 405 /// regions. 406 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 407 OpenMPDirectiveKind Kind, bool HasCancel) 408 : CGF(CGF) { 409 // Start emission for the construct. 410 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 411 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 412 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 413 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 414 CGF.LambdaThisCaptureField = nullptr; 415 BlockInfo = CGF.BlockInfo; 416 CGF.BlockInfo = nullptr; 417 } 418 419 ~InlinedOpenMPRegionRAII() { 420 // Restore original CapturedStmtInfo only if we're done with code emission. 421 auto *OldCSI = 422 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 423 delete CGF.CapturedStmtInfo; 424 CGF.CapturedStmtInfo = OldCSI; 425 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 426 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 427 CGF.BlockInfo = BlockInfo; 428 } 429 }; 430 431 /// Values for bit flags used in the ident_t to describe the fields. 432 /// All enumeric elements are named and described in accordance with the code 433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 434 enum OpenMPLocationFlags : unsigned { 435 /// Use trampoline for internal microtask. 436 OMP_IDENT_IMD = 0x01, 437 /// Use c-style ident structure. 438 OMP_IDENT_KMPC = 0x02, 439 /// Atomic reduction option for kmpc_reduce. 440 OMP_ATOMIC_REDUCE = 0x10, 441 /// Explicit 'barrier' directive. 442 OMP_IDENT_BARRIER_EXPL = 0x20, 443 /// Implicit barrier in code. 444 OMP_IDENT_BARRIER_IMPL = 0x40, 445 /// Implicit barrier in 'for' directive. 446 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 447 /// Implicit barrier in 'sections' directive. 448 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 449 /// Implicit barrier in 'single' directive. 450 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 451 /// Call of __kmp_for_static_init for static loop. 452 OMP_IDENT_WORK_LOOP = 0x200, 453 /// Call of __kmp_for_static_init for sections. 454 OMP_IDENT_WORK_SECTIONS = 0x400, 455 /// Call of __kmp_for_static_init for distribute. 456 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 457 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 458 }; 459 460 namespace { 461 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 462 /// Values for bit flags for marking which requires clauses have been used. 463 enum OpenMPOffloadingRequiresDirFlags : int64_t { 464 /// flag undefined. 465 OMP_REQ_UNDEFINED = 0x000, 466 /// no requires clause present. 467 OMP_REQ_NONE = 0x001, 468 /// reverse_offload clause. 469 OMP_REQ_REVERSE_OFFLOAD = 0x002, 470 /// unified_address clause. 471 OMP_REQ_UNIFIED_ADDRESS = 0x004, 472 /// unified_shared_memory clause. 473 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 474 /// dynamic_allocators clause. 475 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 476 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 477 }; 478 479 enum OpenMPOffloadingReservedDeviceIDs { 480 /// Device ID if the device was not defined, runtime should get it 481 /// from environment variables in the spec. 482 OMP_DEVICEID_UNDEF = -1, 483 }; 484 } // anonymous namespace 485 486 /// Describes ident structure that describes a source location. 487 /// All descriptions are taken from 488 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 489 /// Original structure: 490 /// typedef struct ident { 491 /// kmp_int32 reserved_1; /**< might be used in Fortran; 492 /// see above */ 493 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 494 /// KMP_IDENT_KMPC identifies this union 495 /// member */ 496 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 497 /// see above */ 498 ///#if USE_ITT_BUILD 499 /// /* but currently used for storing 500 /// region-specific ITT */ 501 /// /* contextual information. */ 502 ///#endif /* USE_ITT_BUILD */ 503 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 504 /// C++ */ 505 /// char const *psource; /**< String describing the source location. 506 /// The string is composed of semi-colon separated 507 // fields which describe the source file, 508 /// the function and a pair of line numbers that 509 /// delimit the construct. 510 /// */ 511 /// } ident_t; 512 enum IdentFieldIndex { 513 /// might be used in Fortran 514 IdentField_Reserved_1, 515 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 516 IdentField_Flags, 517 /// Not really used in Fortran any more 518 IdentField_Reserved_2, 519 /// Source[4] in Fortran, do not use for C++ 520 IdentField_Reserved_3, 521 /// String describing the source location. The string is composed of 522 /// semi-colon separated fields which describe the source file, the function 523 /// and a pair of line numbers that delimit the construct. 524 IdentField_PSource 525 }; 526 527 /// Schedule types for 'omp for' loops (these enumerators are taken from 528 /// the enum sched_type in kmp.h). 529 enum OpenMPSchedType { 530 /// Lower bound for default (unordered) versions. 531 OMP_sch_lower = 32, 532 OMP_sch_static_chunked = 33, 533 OMP_sch_static = 34, 534 OMP_sch_dynamic_chunked = 35, 535 OMP_sch_guided_chunked = 36, 536 OMP_sch_runtime = 37, 537 OMP_sch_auto = 38, 538 /// static with chunk adjustment (e.g., simd) 539 OMP_sch_static_balanced_chunked = 45, 540 /// Lower bound for 'ordered' versions. 541 OMP_ord_lower = 64, 542 OMP_ord_static_chunked = 65, 543 OMP_ord_static = 66, 544 OMP_ord_dynamic_chunked = 67, 545 OMP_ord_guided_chunked = 68, 546 OMP_ord_runtime = 69, 547 OMP_ord_auto = 70, 548 OMP_sch_default = OMP_sch_static, 549 /// dist_schedule types 550 OMP_dist_sch_static_chunked = 91, 551 OMP_dist_sch_static = 92, 552 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 553 /// Set if the monotonic schedule modifier was present. 554 OMP_sch_modifier_monotonic = (1 << 29), 555 /// Set if the nonmonotonic schedule modifier was present. 556 OMP_sch_modifier_nonmonotonic = (1 << 30), 557 }; 558 559 enum OpenMPRTLFunction { 560 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 561 /// kmpc_micro microtask, ...); 562 OMPRTL__kmpc_fork_call, 563 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, 564 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 565 OMPRTL__kmpc_threadprivate_cached, 566 /// Call to void __kmpc_threadprivate_register( ident_t *, 567 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 568 OMPRTL__kmpc_threadprivate_register, 569 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 570 OMPRTL__kmpc_global_thread_num, 571 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 572 // kmp_critical_name *crit); 573 OMPRTL__kmpc_critical, 574 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 575 // global_tid, kmp_critical_name *crit, uintptr_t hint); 576 OMPRTL__kmpc_critical_with_hint, 577 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 578 // kmp_critical_name *crit); 579 OMPRTL__kmpc_end_critical, 580 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 581 // global_tid); 582 OMPRTL__kmpc_cancel_barrier, 583 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 584 OMPRTL__kmpc_barrier, 585 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 586 OMPRTL__kmpc_for_static_fini, 587 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 588 // global_tid); 589 OMPRTL__kmpc_serialized_parallel, 590 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 591 // global_tid); 592 OMPRTL__kmpc_end_serialized_parallel, 593 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 594 // kmp_int32 num_threads); 595 OMPRTL__kmpc_push_num_threads, 596 // Call to void __kmpc_flush(ident_t *loc); 597 OMPRTL__kmpc_flush, 598 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 599 OMPRTL__kmpc_master, 600 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 601 OMPRTL__kmpc_end_master, 602 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 603 // int end_part); 604 OMPRTL__kmpc_omp_taskyield, 605 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 606 OMPRTL__kmpc_single, 607 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 608 OMPRTL__kmpc_end_single, 609 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 610 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 611 // kmp_routine_entry_t *task_entry); 612 OMPRTL__kmpc_omp_task_alloc, 613 // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *, 614 // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, 615 // size_t sizeof_shareds, kmp_routine_entry_t *task_entry, 616 // kmp_int64 device_id); 617 OMPRTL__kmpc_omp_target_task_alloc, 618 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 619 // new_task); 620 OMPRTL__kmpc_omp_task, 621 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 622 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 623 // kmp_int32 didit); 624 OMPRTL__kmpc_copyprivate, 625 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 626 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 627 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 628 OMPRTL__kmpc_reduce, 629 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 630 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 631 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 632 // *lck); 633 OMPRTL__kmpc_reduce_nowait, 634 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 635 // kmp_critical_name *lck); 636 OMPRTL__kmpc_end_reduce, 637 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 638 // kmp_critical_name *lck); 639 OMPRTL__kmpc_end_reduce_nowait, 640 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 641 // kmp_task_t * new_task); 642 OMPRTL__kmpc_omp_task_begin_if0, 643 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 644 // kmp_task_t * new_task); 645 OMPRTL__kmpc_omp_task_complete_if0, 646 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 647 OMPRTL__kmpc_ordered, 648 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 649 OMPRTL__kmpc_end_ordered, 650 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 651 // global_tid); 652 OMPRTL__kmpc_omp_taskwait, 653 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 654 OMPRTL__kmpc_taskgroup, 655 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 656 OMPRTL__kmpc_end_taskgroup, 657 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 658 // int proc_bind); 659 OMPRTL__kmpc_push_proc_bind, 660 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 661 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 662 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 663 OMPRTL__kmpc_omp_task_with_deps, 664 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 665 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 666 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 667 OMPRTL__kmpc_omp_wait_deps, 668 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 669 // global_tid, kmp_int32 cncl_kind); 670 OMPRTL__kmpc_cancellationpoint, 671 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 672 // kmp_int32 cncl_kind); 673 OMPRTL__kmpc_cancel, 674 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 675 // kmp_int32 num_teams, kmp_int32 thread_limit); 676 OMPRTL__kmpc_push_num_teams, 677 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 678 // microtask, ...); 679 OMPRTL__kmpc_fork_teams, 680 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 681 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 682 // sched, kmp_uint64 grainsize, void *task_dup); 683 OMPRTL__kmpc_taskloop, 684 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 685 // num_dims, struct kmp_dim *dims); 686 OMPRTL__kmpc_doacross_init, 687 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 688 OMPRTL__kmpc_doacross_fini, 689 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 690 // *vec); 691 OMPRTL__kmpc_doacross_post, 692 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 693 // *vec); 694 OMPRTL__kmpc_doacross_wait, 695 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 696 // *data); 697 OMPRTL__kmpc_task_reduction_init, 698 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 699 // *d); 700 OMPRTL__kmpc_task_reduction_get_th_data, 701 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); 702 OMPRTL__kmpc_alloc, 703 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); 704 OMPRTL__kmpc_free, 705 706 // 707 // Offloading related calls 708 // 709 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 710 // size); 711 OMPRTL__kmpc_push_target_tripcount, 712 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 713 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 714 // *arg_types); 715 OMPRTL__tgt_target, 716 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 717 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 718 // *arg_types); 719 OMPRTL__tgt_target_nowait, 720 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 721 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 722 // *arg_types, int32_t num_teams, int32_t thread_limit); 723 OMPRTL__tgt_target_teams, 724 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 725 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 726 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 727 OMPRTL__tgt_target_teams_nowait, 728 // Call to void __tgt_register_requires(int64_t flags); 729 OMPRTL__tgt_register_requires, 730 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 731 OMPRTL__tgt_register_lib, 732 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 733 OMPRTL__tgt_unregister_lib, 734 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 735 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 736 OMPRTL__tgt_target_data_begin, 737 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 738 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 739 // *arg_types); 740 OMPRTL__tgt_target_data_begin_nowait, 741 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 742 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 743 OMPRTL__tgt_target_data_end, 744 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 745 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 746 // *arg_types); 747 OMPRTL__tgt_target_data_end_nowait, 748 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 749 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 750 OMPRTL__tgt_target_data_update, 751 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 752 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 753 // *arg_types); 754 OMPRTL__tgt_target_data_update_nowait, 755 // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 756 OMPRTL__tgt_mapper_num_components, 757 // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void 758 // *base, void *begin, int64_t size, int64_t type); 759 OMPRTL__tgt_push_mapper_component, 760 }; 761 762 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 763 /// region. 764 class CleanupTy final : public EHScopeStack::Cleanup { 765 PrePostActionTy *Action; 766 767 public: 768 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 769 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 770 if (!CGF.HaveInsertPoint()) 771 return; 772 Action->Exit(CGF); 773 } 774 }; 775 776 } // anonymous namespace 777 778 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 779 CodeGenFunction::RunCleanupsScope Scope(CGF); 780 if (PrePostAction) { 781 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 782 Callback(CodeGen, CGF, *PrePostAction); 783 } else { 784 PrePostActionTy Action; 785 Callback(CodeGen, CGF, Action); 786 } 787 } 788 789 /// Check if the combiner is a call to UDR combiner and if it is so return the 790 /// UDR decl used for reduction. 791 static const OMPDeclareReductionDecl * 792 getReductionInit(const Expr *ReductionOp) { 793 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 794 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 795 if (const auto *DRE = 796 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 797 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 798 return DRD; 799 return nullptr; 800 } 801 802 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 803 const OMPDeclareReductionDecl *DRD, 804 const Expr *InitOp, 805 Address Private, Address Original, 806 QualType Ty) { 807 if (DRD->getInitializer()) { 808 std::pair<llvm::Function *, llvm::Function *> Reduction = 809 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 810 const auto *CE = cast<CallExpr>(InitOp); 811 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 812 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 813 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 814 const auto *LHSDRE = 815 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 816 const auto *RHSDRE = 817 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 818 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 819 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 820 [=]() { return Private; }); 821 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 822 [=]() { return Original; }); 823 (void)PrivateScope.Privatize(); 824 RValue Func = RValue::get(Reduction.second); 825 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 826 CGF.EmitIgnoredExpr(InitOp); 827 } else { 828 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 829 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 830 auto *GV = new llvm::GlobalVariable( 831 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 832 llvm::GlobalValue::PrivateLinkage, Init, Name); 833 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 834 RValue InitRVal; 835 switch (CGF.getEvaluationKind(Ty)) { 836 case TEK_Scalar: 837 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 838 break; 839 case TEK_Complex: 840 InitRVal = 841 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 842 break; 843 case TEK_Aggregate: 844 InitRVal = RValue::getAggregate(LV.getAddress()); 845 break; 846 } 847 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 848 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 849 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 850 /*IsInitializer=*/false); 851 } 852 } 853 854 /// Emit initialization of arrays of complex types. 855 /// \param DestAddr Address of the array. 856 /// \param Type Type of array. 857 /// \param Init Initial expression of array. 858 /// \param SrcAddr Address of the original array. 859 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 860 QualType Type, bool EmitDeclareReductionInit, 861 const Expr *Init, 862 const OMPDeclareReductionDecl *DRD, 863 Address SrcAddr = Address::invalid()) { 864 // Perform element-by-element initialization. 865 QualType ElementTy; 866 867 // Drill down to the base element type on both arrays. 868 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 869 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 870 DestAddr = 871 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 872 if (DRD) 873 SrcAddr = 874 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 875 876 llvm::Value *SrcBegin = nullptr; 877 if (DRD) 878 SrcBegin = SrcAddr.getPointer(); 879 llvm::Value *DestBegin = DestAddr.getPointer(); 880 // Cast from pointer to array type to pointer to single element. 881 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 882 // The basic structure here is a while-do loop. 883 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 884 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 885 llvm::Value *IsEmpty = 886 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 887 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 888 889 // Enter the loop body, making that address the current address. 890 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 891 CGF.EmitBlock(BodyBB); 892 893 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 894 895 llvm::PHINode *SrcElementPHI = nullptr; 896 Address SrcElementCurrent = Address::invalid(); 897 if (DRD) { 898 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 899 "omp.arraycpy.srcElementPast"); 900 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 901 SrcElementCurrent = 902 Address(SrcElementPHI, 903 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 904 } 905 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 906 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 907 DestElementPHI->addIncoming(DestBegin, EntryBB); 908 Address DestElementCurrent = 909 Address(DestElementPHI, 910 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 911 912 // Emit copy. 913 { 914 CodeGenFunction::RunCleanupsScope InitScope(CGF); 915 if (EmitDeclareReductionInit) { 916 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 917 SrcElementCurrent, ElementTy); 918 } else 919 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 920 /*IsInitializer=*/false); 921 } 922 923 if (DRD) { 924 // Shift the address forward by one element. 925 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 926 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 927 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 928 } 929 930 // Shift the address forward by one element. 931 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 932 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 933 // Check whether we've reached the end. 934 llvm::Value *Done = 935 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 936 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 937 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 938 939 // Done. 940 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 941 } 942 943 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 944 return CGF.EmitOMPSharedLValue(E); 945 } 946 947 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 948 const Expr *E) { 949 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 950 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 951 return LValue(); 952 } 953 954 void ReductionCodeGen::emitAggregateInitialization( 955 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 956 const OMPDeclareReductionDecl *DRD) { 957 // Emit VarDecl with copy init for arrays. 958 // Get the address of the original variable captured in current 959 // captured region. 960 const auto *PrivateVD = 961 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 962 bool EmitDeclareReductionInit = 963 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 964 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 965 EmitDeclareReductionInit, 966 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 967 : PrivateVD->getInit(), 968 DRD, SharedLVal.getAddress()); 969 } 970 971 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 972 ArrayRef<const Expr *> Privates, 973 ArrayRef<const Expr *> ReductionOps) { 974 ClausesData.reserve(Shareds.size()); 975 SharedAddresses.reserve(Shareds.size()); 976 Sizes.reserve(Shareds.size()); 977 BaseDecls.reserve(Shareds.size()); 978 auto IPriv = Privates.begin(); 979 auto IRed = ReductionOps.begin(); 980 for (const Expr *Ref : Shareds) { 981 ClausesData.emplace_back(Ref, *IPriv, *IRed); 982 std::advance(IPriv, 1); 983 std::advance(IRed, 1); 984 } 985 } 986 987 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 988 assert(SharedAddresses.size() == N && 989 "Number of generated lvalues must be exactly N."); 990 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 991 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 992 SharedAddresses.emplace_back(First, Second); 993 } 994 995 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 996 const auto *PrivateVD = 997 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 998 QualType PrivateType = PrivateVD->getType(); 999 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 1000 if (!PrivateType->isVariablyModifiedType()) { 1001 Sizes.emplace_back( 1002 CGF.getTypeSize( 1003 SharedAddresses[N].first.getType().getNonReferenceType()), 1004 nullptr); 1005 return; 1006 } 1007 llvm::Value *Size; 1008 llvm::Value *SizeInChars; 1009 auto *ElemType = 1010 cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) 1011 ->getElementType(); 1012 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 1013 if (AsArraySection) { 1014 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), 1015 SharedAddresses[N].first.getPointer()); 1016 Size = CGF.Builder.CreateNUWAdd( 1017 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 1018 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 1019 } else { 1020 SizeInChars = CGF.getTypeSize( 1021 SharedAddresses[N].first.getType().getNonReferenceType()); 1022 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 1023 } 1024 Sizes.emplace_back(SizeInChars, Size); 1025 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1026 CGF, 1027 cast<OpaqueValueExpr>( 1028 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1029 RValue::get(Size)); 1030 CGF.EmitVariablyModifiedType(PrivateType); 1031 } 1032 1033 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 1034 llvm::Value *Size) { 1035 const auto *PrivateVD = 1036 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1037 QualType PrivateType = PrivateVD->getType(); 1038 if (!PrivateType->isVariablyModifiedType()) { 1039 assert(!Size && !Sizes[N].second && 1040 "Size should be nullptr for non-variably modified reduction " 1041 "items."); 1042 return; 1043 } 1044 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1045 CGF, 1046 cast<OpaqueValueExpr>( 1047 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1048 RValue::get(Size)); 1049 CGF.EmitVariablyModifiedType(PrivateType); 1050 } 1051 1052 void ReductionCodeGen::emitInitialization( 1053 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1054 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1055 assert(SharedAddresses.size() > N && "No variable was generated"); 1056 const auto *PrivateVD = 1057 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1058 const OMPDeclareReductionDecl *DRD = 1059 getReductionInit(ClausesData[N].ReductionOp); 1060 QualType PrivateType = PrivateVD->getType(); 1061 PrivateAddr = CGF.Builder.CreateElementBitCast( 1062 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1063 QualType SharedType = SharedAddresses[N].first.getType(); 1064 SharedLVal = CGF.MakeAddrLValue( 1065 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), 1066 CGF.ConvertTypeForMem(SharedType)), 1067 SharedType, SharedAddresses[N].first.getBaseInfo(), 1068 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1069 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1070 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1071 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1072 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1073 PrivateAddr, SharedLVal.getAddress(), 1074 SharedLVal.getType()); 1075 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1076 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1077 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1078 PrivateVD->getType().getQualifiers(), 1079 /*IsInitializer=*/false); 1080 } 1081 } 1082 1083 bool ReductionCodeGen::needCleanups(unsigned N) { 1084 const auto *PrivateVD = 1085 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1086 QualType PrivateType = PrivateVD->getType(); 1087 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1088 return DTorKind != QualType::DK_none; 1089 } 1090 1091 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1092 Address PrivateAddr) { 1093 const auto *PrivateVD = 1094 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1095 QualType PrivateType = PrivateVD->getType(); 1096 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1097 if (needCleanups(N)) { 1098 PrivateAddr = CGF.Builder.CreateElementBitCast( 1099 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1100 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1101 } 1102 } 1103 1104 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1105 LValue BaseLV) { 1106 BaseTy = BaseTy.getNonReferenceType(); 1107 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1108 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1109 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1110 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 1111 } else { 1112 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); 1113 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1114 } 1115 BaseTy = BaseTy->getPointeeType(); 1116 } 1117 return CGF.MakeAddrLValue( 1118 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), 1119 CGF.ConvertTypeForMem(ElTy)), 1120 BaseLV.getType(), BaseLV.getBaseInfo(), 1121 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1122 } 1123 1124 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1125 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1126 llvm::Value *Addr) { 1127 Address Tmp = Address::invalid(); 1128 Address TopTmp = Address::invalid(); 1129 Address MostTopTmp = Address::invalid(); 1130 BaseTy = BaseTy.getNonReferenceType(); 1131 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1132 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1133 Tmp = CGF.CreateMemTemp(BaseTy); 1134 if (TopTmp.isValid()) 1135 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1136 else 1137 MostTopTmp = Tmp; 1138 TopTmp = Tmp; 1139 BaseTy = BaseTy->getPointeeType(); 1140 } 1141 llvm::Type *Ty = BaseLVType; 1142 if (Tmp.isValid()) 1143 Ty = Tmp.getElementType(); 1144 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1145 if (Tmp.isValid()) { 1146 CGF.Builder.CreateStore(Addr, Tmp); 1147 return MostTopTmp; 1148 } 1149 return Address(Addr, BaseLVAlignment); 1150 } 1151 1152 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1153 const VarDecl *OrigVD = nullptr; 1154 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1155 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1156 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1157 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1158 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1159 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1160 DE = cast<DeclRefExpr>(Base); 1161 OrigVD = cast<VarDecl>(DE->getDecl()); 1162 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1163 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1164 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1165 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1166 DE = cast<DeclRefExpr>(Base); 1167 OrigVD = cast<VarDecl>(DE->getDecl()); 1168 } 1169 return OrigVD; 1170 } 1171 1172 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1173 Address PrivateAddr) { 1174 const DeclRefExpr *DE; 1175 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1176 BaseDecls.emplace_back(OrigVD); 1177 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1178 LValue BaseLValue = 1179 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1180 OriginalBaseLValue); 1181 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1182 BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); 1183 llvm::Value *PrivatePointer = 1184 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1185 PrivateAddr.getPointer(), 1186 SharedAddresses[N].first.getAddress().getType()); 1187 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1188 return castToBase(CGF, OrigVD->getType(), 1189 SharedAddresses[N].first.getType(), 1190 OriginalBaseLValue.getAddress().getType(), 1191 OriginalBaseLValue.getAlignment(), Ptr); 1192 } 1193 BaseDecls.emplace_back( 1194 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1195 return PrivateAddr; 1196 } 1197 1198 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1199 const OMPDeclareReductionDecl *DRD = 1200 getReductionInit(ClausesData[N].ReductionOp); 1201 return DRD && DRD->getInitializer(); 1202 } 1203 1204 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1205 return CGF.EmitLoadOfPointerLValue( 1206 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1207 getThreadIDVariable()->getType()->castAs<PointerType>()); 1208 } 1209 1210 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1211 if (!CGF.HaveInsertPoint()) 1212 return; 1213 // 1.2.2 OpenMP Language Terminology 1214 // Structured block - An executable statement with a single entry at the 1215 // top and a single exit at the bottom. 1216 // The point of exit cannot be a branch out of the structured block. 1217 // longjmp() and throw() must not violate the entry/exit criteria. 1218 CGF.EHStack.pushTerminate(); 1219 CodeGen(CGF); 1220 CGF.EHStack.popTerminate(); 1221 } 1222 1223 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1224 CodeGenFunction &CGF) { 1225 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1226 getThreadIDVariable()->getType(), 1227 AlignmentSource::Decl); 1228 } 1229 1230 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1231 QualType FieldTy) { 1232 auto *Field = FieldDecl::Create( 1233 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1234 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1235 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1236 Field->setAccess(AS_public); 1237 DC->addDecl(Field); 1238 return Field; 1239 } 1240 1241 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1242 StringRef Separator) 1243 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1244 OffloadEntriesInfoManager(CGM) { 1245 ASTContext &C = CGM.getContext(); 1246 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1247 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1248 RD->startDefinition(); 1249 // reserved_1 1250 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1251 // flags 1252 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1253 // reserved_2 1254 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1255 // reserved_3 1256 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1257 // psource 1258 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1259 RD->completeDefinition(); 1260 IdentQTy = C.getRecordType(RD); 1261 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1262 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1263 1264 loadOffloadInfoMetadata(); 1265 } 1266 1267 void CGOpenMPRuntime::clear() { 1268 InternalVars.clear(); 1269 // Clean non-target variable declarations possibly used only in debug info. 1270 for (const auto &Data : EmittedNonTargetVariables) { 1271 if (!Data.getValue().pointsToAliveValue()) 1272 continue; 1273 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1274 if (!GV) 1275 continue; 1276 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1277 continue; 1278 GV->eraseFromParent(); 1279 } 1280 } 1281 1282 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1283 SmallString<128> Buffer; 1284 llvm::raw_svector_ostream OS(Buffer); 1285 StringRef Sep = FirstSeparator; 1286 for (StringRef Part : Parts) { 1287 OS << Sep << Part; 1288 Sep = Separator; 1289 } 1290 return OS.str(); 1291 } 1292 1293 static llvm::Function * 1294 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1295 const Expr *CombinerInitializer, const VarDecl *In, 1296 const VarDecl *Out, bool IsCombiner) { 1297 // void .omp_combiner.(Ty *in, Ty *out); 1298 ASTContext &C = CGM.getContext(); 1299 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1300 FunctionArgList Args; 1301 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1302 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1303 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1304 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1305 Args.push_back(&OmpOutParm); 1306 Args.push_back(&OmpInParm); 1307 const CGFunctionInfo &FnInfo = 1308 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1309 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1310 std::string Name = CGM.getOpenMPRuntime().getName( 1311 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1312 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1313 Name, &CGM.getModule()); 1314 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1315 if (CGM.getLangOpts().Optimize) { 1316 Fn->removeFnAttr(llvm::Attribute::NoInline); 1317 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1318 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1319 } 1320 CodeGenFunction CGF(CGM); 1321 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1322 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1323 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1324 Out->getLocation()); 1325 CodeGenFunction::OMPPrivateScope Scope(CGF); 1326 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1327 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1328 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1329 .getAddress(); 1330 }); 1331 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1332 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1333 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1334 .getAddress(); 1335 }); 1336 (void)Scope.Privatize(); 1337 if (!IsCombiner && Out->hasInit() && 1338 !CGF.isTrivialInitializer(Out->getInit())) { 1339 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1340 Out->getType().getQualifiers(), 1341 /*IsInitializer=*/true); 1342 } 1343 if (CombinerInitializer) 1344 CGF.EmitIgnoredExpr(CombinerInitializer); 1345 Scope.ForceCleanup(); 1346 CGF.FinishFunction(); 1347 return Fn; 1348 } 1349 1350 void CGOpenMPRuntime::emitUserDefinedReduction( 1351 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1352 if (UDRMap.count(D) > 0) 1353 return; 1354 llvm::Function *Combiner = emitCombinerOrInitializer( 1355 CGM, D->getType(), D->getCombiner(), 1356 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1357 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1358 /*IsCombiner=*/true); 1359 llvm::Function *Initializer = nullptr; 1360 if (const Expr *Init = D->getInitializer()) { 1361 Initializer = emitCombinerOrInitializer( 1362 CGM, D->getType(), 1363 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1364 : nullptr, 1365 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1366 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1367 /*IsCombiner=*/false); 1368 } 1369 UDRMap.try_emplace(D, Combiner, Initializer); 1370 if (CGF) { 1371 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1372 Decls.second.push_back(D); 1373 } 1374 } 1375 1376 std::pair<llvm::Function *, llvm::Function *> 1377 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1378 auto I = UDRMap.find(D); 1379 if (I != UDRMap.end()) 1380 return I->second; 1381 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1382 return UDRMap.lookup(D); 1383 } 1384 1385 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1386 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1387 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1388 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1389 assert(ThreadIDVar->getType()->isPointerType() && 1390 "thread id variable must be of type kmp_int32 *"); 1391 CodeGenFunction CGF(CGM, true); 1392 bool HasCancel = false; 1393 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1394 HasCancel = OPD->hasCancel(); 1395 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1396 HasCancel = OPSD->hasCancel(); 1397 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1398 HasCancel = OPFD->hasCancel(); 1399 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1400 HasCancel = OPFD->hasCancel(); 1401 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1402 HasCancel = OPFD->hasCancel(); 1403 else if (const auto *OPFD = 1404 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1405 HasCancel = OPFD->hasCancel(); 1406 else if (const auto *OPFD = 1407 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1408 HasCancel = OPFD->hasCancel(); 1409 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1410 HasCancel, OutlinedHelperName); 1411 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1412 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 1413 } 1414 1415 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1416 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1417 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1418 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1419 return emitParallelOrTeamsOutlinedFunction( 1420 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1421 } 1422 1423 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1424 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1425 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1426 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1427 return emitParallelOrTeamsOutlinedFunction( 1428 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1429 } 1430 1431 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1432 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1433 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1434 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1435 bool Tied, unsigned &NumberOfParts) { 1436 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1437 PrePostActionTy &) { 1438 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1439 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1440 llvm::Value *TaskArgs[] = { 1441 UpLoc, ThreadID, 1442 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1443 TaskTVar->getType()->castAs<PointerType>()) 1444 .getPointer()}; 1445 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1446 }; 1447 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1448 UntiedCodeGen); 1449 CodeGen.setAction(Action); 1450 assert(!ThreadIDVar->getType()->isPointerType() && 1451 "thread id variable must be of type kmp_int32 for tasks"); 1452 const OpenMPDirectiveKind Region = 1453 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1454 : OMPD_task; 1455 const CapturedStmt *CS = D.getCapturedStmt(Region); 1456 const auto *TD = dyn_cast<OMPTaskDirective>(&D); 1457 CodeGenFunction CGF(CGM, true); 1458 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1459 InnermostKind, 1460 TD ? TD->hasCancel() : false, Action); 1461 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1462 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1463 if (!Tied) 1464 NumberOfParts = Action.getNumberOfParts(); 1465 return Res; 1466 } 1467 1468 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1469 const RecordDecl *RD, const CGRecordLayout &RL, 1470 ArrayRef<llvm::Constant *> Data) { 1471 llvm::StructType *StructTy = RL.getLLVMType(); 1472 unsigned PrevIdx = 0; 1473 ConstantInitBuilder CIBuilder(CGM); 1474 auto DI = Data.begin(); 1475 for (const FieldDecl *FD : RD->fields()) { 1476 unsigned Idx = RL.getLLVMFieldNo(FD); 1477 // Fill the alignment. 1478 for (unsigned I = PrevIdx; I < Idx; ++I) 1479 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1480 PrevIdx = Idx + 1; 1481 Fields.add(*DI); 1482 ++DI; 1483 } 1484 } 1485 1486 template <class... As> 1487 static llvm::GlobalVariable * 1488 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1489 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1490 As &&... Args) { 1491 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1492 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1493 ConstantInitBuilder CIBuilder(CGM); 1494 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1495 buildStructValue(Fields, CGM, RD, RL, Data); 1496 return Fields.finishAndCreateGlobal( 1497 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1498 std::forward<As>(Args)...); 1499 } 1500 1501 template <typename T> 1502 static void 1503 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1504 ArrayRef<llvm::Constant *> Data, 1505 T &Parent) { 1506 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1507 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1508 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1509 buildStructValue(Fields, CGM, RD, RL, Data); 1510 Fields.finishAndAddTo(Parent); 1511 } 1512 1513 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1514 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1515 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1516 FlagsTy FlagsKey(Flags, Reserved2Flags); 1517 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1518 if (!Entry) { 1519 if (!DefaultOpenMPPSource) { 1520 // Initialize default location for psource field of ident_t structure of 1521 // all ident_t objects. Format is ";file;function;line;column;;". 1522 // Taken from 1523 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1524 DefaultOpenMPPSource = 1525 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1526 DefaultOpenMPPSource = 1527 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1528 } 1529 1530 llvm::Constant *Data[] = { 1531 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1532 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1533 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1534 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1535 llvm::GlobalValue *DefaultOpenMPLocation = 1536 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1537 llvm::GlobalValue::PrivateLinkage); 1538 DefaultOpenMPLocation->setUnnamedAddr( 1539 llvm::GlobalValue::UnnamedAddr::Global); 1540 1541 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1542 } 1543 return Address(Entry, Align); 1544 } 1545 1546 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1547 bool AtCurrentPoint) { 1548 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1549 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1550 1551 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1552 if (AtCurrentPoint) { 1553 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1554 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1555 } else { 1556 Elem.second.ServiceInsertPt = 1557 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1558 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1559 } 1560 } 1561 1562 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1563 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1564 if (Elem.second.ServiceInsertPt) { 1565 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1566 Elem.second.ServiceInsertPt = nullptr; 1567 Ptr->eraseFromParent(); 1568 } 1569 } 1570 1571 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1572 SourceLocation Loc, 1573 unsigned Flags) { 1574 Flags |= OMP_IDENT_KMPC; 1575 // If no debug info is generated - return global default location. 1576 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1577 Loc.isInvalid()) 1578 return getOrCreateDefaultLocation(Flags).getPointer(); 1579 1580 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1581 1582 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1583 Address LocValue = Address::invalid(); 1584 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1585 if (I != OpenMPLocThreadIDMap.end()) 1586 LocValue = Address(I->second.DebugLoc, Align); 1587 1588 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1589 // GetOpenMPThreadID was called before this routine. 1590 if (!LocValue.isValid()) { 1591 // Generate "ident_t .kmpc_loc.addr;" 1592 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1593 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1594 Elem.second.DebugLoc = AI.getPointer(); 1595 LocValue = AI; 1596 1597 if (!Elem.second.ServiceInsertPt) 1598 setLocThreadIdInsertPt(CGF); 1599 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1600 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1601 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1602 CGF.getTypeSize(IdentQTy)); 1603 } 1604 1605 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1606 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1607 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1608 LValue PSource = 1609 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1610 1611 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1612 if (OMPDebugLoc == nullptr) { 1613 SmallString<128> Buffer2; 1614 llvm::raw_svector_ostream OS2(Buffer2); 1615 // Build debug location 1616 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1617 OS2 << ";" << PLoc.getFilename() << ";"; 1618 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1619 OS2 << FD->getQualifiedNameAsString(); 1620 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1621 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1622 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1623 } 1624 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1625 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1626 1627 // Our callers always pass this to a runtime function, so for 1628 // convenience, go ahead and return a naked pointer. 1629 return LocValue.getPointer(); 1630 } 1631 1632 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1633 SourceLocation Loc) { 1634 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1635 1636 llvm::Value *ThreadID = nullptr; 1637 // Check whether we've already cached a load of the thread id in this 1638 // function. 1639 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1640 if (I != OpenMPLocThreadIDMap.end()) { 1641 ThreadID = I->second.ThreadID; 1642 if (ThreadID != nullptr) 1643 return ThreadID; 1644 } 1645 // If exceptions are enabled, do not use parameter to avoid possible crash. 1646 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1647 !CGF.getLangOpts().CXXExceptions || 1648 CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1649 if (auto *OMPRegionInfo = 1650 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1651 if (OMPRegionInfo->getThreadIDVariable()) { 1652 // Check if this an outlined function with thread id passed as argument. 1653 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1654 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1655 // If value loaded in entry block, cache it and use it everywhere in 1656 // function. 1657 if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) { 1658 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1659 Elem.second.ThreadID = ThreadID; 1660 } 1661 return ThreadID; 1662 } 1663 } 1664 } 1665 1666 // This is not an outlined function region - need to call __kmpc_int32 1667 // kmpc_global_thread_num(ident_t *loc). 1668 // Generate thread id value and cache this value for use across the 1669 // function. 1670 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1671 if (!Elem.second.ServiceInsertPt) 1672 setLocThreadIdInsertPt(CGF); 1673 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1674 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1675 llvm::CallInst *Call = CGF.Builder.CreateCall( 1676 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1677 emitUpdateLocation(CGF, Loc)); 1678 Call->setCallingConv(CGF.getRuntimeCC()); 1679 Elem.second.ThreadID = Call; 1680 return Call; 1681 } 1682 1683 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1684 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1685 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1686 clearLocThreadIdInsertPt(CGF); 1687 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1688 } 1689 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1690 for(auto *D : FunctionUDRMap[CGF.CurFn]) 1691 UDRMap.erase(D); 1692 FunctionUDRMap.erase(CGF.CurFn); 1693 } 1694 auto I = FunctionUDMMap.find(CGF.CurFn); 1695 if (I != FunctionUDMMap.end()) { 1696 for(auto *D : I->second) 1697 UDMMap.erase(D); 1698 FunctionUDMMap.erase(I); 1699 } 1700 } 1701 1702 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1703 return IdentTy->getPointerTo(); 1704 } 1705 1706 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1707 if (!Kmpc_MicroTy) { 1708 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1709 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1710 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1711 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1712 } 1713 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1714 } 1715 1716 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1717 llvm::FunctionCallee RTLFn = nullptr; 1718 switch (static_cast<OpenMPRTLFunction>(Function)) { 1719 case OMPRTL__kmpc_fork_call: { 1720 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1721 // microtask, ...); 1722 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1723 getKmpc_MicroPointerTy()}; 1724 auto *FnTy = 1725 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1726 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1727 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 1728 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 1729 llvm::LLVMContext &Ctx = F->getContext(); 1730 llvm::MDBuilder MDB(Ctx); 1731 // Annotate the callback behavior of the __kmpc_fork_call: 1732 // - The callback callee is argument number 2 (microtask). 1733 // - The first two arguments of the callback callee are unknown (-1). 1734 // - All variadic arguments to the __kmpc_fork_call are passed to the 1735 // callback callee. 1736 F->addMetadata( 1737 llvm::LLVMContext::MD_callback, 1738 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 1739 2, {-1, -1}, 1740 /* VarArgsArePassed */ true)})); 1741 } 1742 } 1743 break; 1744 } 1745 case OMPRTL__kmpc_global_thread_num: { 1746 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1747 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1748 auto *FnTy = 1749 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1750 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1751 break; 1752 } 1753 case OMPRTL__kmpc_threadprivate_cached: { 1754 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1755 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1756 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1757 CGM.VoidPtrTy, CGM.SizeTy, 1758 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1759 auto *FnTy = 1760 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1761 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1762 break; 1763 } 1764 case OMPRTL__kmpc_critical: { 1765 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1766 // kmp_critical_name *crit); 1767 llvm::Type *TypeParams[] = { 1768 getIdentTyPointerTy(), CGM.Int32Ty, 1769 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1770 auto *FnTy = 1771 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1772 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1773 break; 1774 } 1775 case OMPRTL__kmpc_critical_with_hint: { 1776 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1777 // kmp_critical_name *crit, uintptr_t hint); 1778 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1779 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1780 CGM.IntPtrTy}; 1781 auto *FnTy = 1782 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1783 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1784 break; 1785 } 1786 case OMPRTL__kmpc_threadprivate_register: { 1787 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1788 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1789 // typedef void *(*kmpc_ctor)(void *); 1790 auto *KmpcCtorTy = 1791 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1792 /*isVarArg*/ false)->getPointerTo(); 1793 // typedef void *(*kmpc_cctor)(void *, void *); 1794 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1795 auto *KmpcCopyCtorTy = 1796 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1797 /*isVarArg*/ false) 1798 ->getPointerTo(); 1799 // typedef void (*kmpc_dtor)(void *); 1800 auto *KmpcDtorTy = 1801 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1802 ->getPointerTo(); 1803 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1804 KmpcCopyCtorTy, KmpcDtorTy}; 1805 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1806 /*isVarArg*/ false); 1807 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1808 break; 1809 } 1810 case OMPRTL__kmpc_end_critical: { 1811 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1812 // kmp_critical_name *crit); 1813 llvm::Type *TypeParams[] = { 1814 getIdentTyPointerTy(), CGM.Int32Ty, 1815 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1816 auto *FnTy = 1817 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1818 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1819 break; 1820 } 1821 case OMPRTL__kmpc_cancel_barrier: { 1822 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1823 // global_tid); 1824 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1825 auto *FnTy = 1826 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1827 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1828 break; 1829 } 1830 case OMPRTL__kmpc_barrier: { 1831 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1832 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1833 auto *FnTy = 1834 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1835 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1836 break; 1837 } 1838 case OMPRTL__kmpc_for_static_fini: { 1839 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1840 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1841 auto *FnTy = 1842 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1843 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1844 break; 1845 } 1846 case OMPRTL__kmpc_push_num_threads: { 1847 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1848 // kmp_int32 num_threads) 1849 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1850 CGM.Int32Ty}; 1851 auto *FnTy = 1852 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1853 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1854 break; 1855 } 1856 case OMPRTL__kmpc_serialized_parallel: { 1857 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1858 // global_tid); 1859 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1860 auto *FnTy = 1861 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1862 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1863 break; 1864 } 1865 case OMPRTL__kmpc_end_serialized_parallel: { 1866 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1867 // global_tid); 1868 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1869 auto *FnTy = 1870 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1871 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1872 break; 1873 } 1874 case OMPRTL__kmpc_flush: { 1875 // Build void __kmpc_flush(ident_t *loc); 1876 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1877 auto *FnTy = 1878 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1879 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1880 break; 1881 } 1882 case OMPRTL__kmpc_master: { 1883 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1884 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1885 auto *FnTy = 1886 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1887 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1888 break; 1889 } 1890 case OMPRTL__kmpc_end_master: { 1891 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1892 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1893 auto *FnTy = 1894 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1895 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1896 break; 1897 } 1898 case OMPRTL__kmpc_omp_taskyield: { 1899 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1900 // int end_part); 1901 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1902 auto *FnTy = 1903 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1904 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1905 break; 1906 } 1907 case OMPRTL__kmpc_single: { 1908 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1909 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1910 auto *FnTy = 1911 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1912 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1913 break; 1914 } 1915 case OMPRTL__kmpc_end_single: { 1916 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1917 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1918 auto *FnTy = 1919 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1920 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1921 break; 1922 } 1923 case OMPRTL__kmpc_omp_task_alloc: { 1924 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1925 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1926 // kmp_routine_entry_t *task_entry); 1927 assert(KmpRoutineEntryPtrTy != nullptr && 1928 "Type kmp_routine_entry_t must be created."); 1929 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1930 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1931 // Return void * and then cast to particular kmp_task_t type. 1932 auto *FnTy = 1933 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1934 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1935 break; 1936 } 1937 case OMPRTL__kmpc_omp_target_task_alloc: { 1938 // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid, 1939 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1940 // kmp_routine_entry_t *task_entry, kmp_int64 device_id); 1941 assert(KmpRoutineEntryPtrTy != nullptr && 1942 "Type kmp_routine_entry_t must be created."); 1943 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1944 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy, 1945 CGM.Int64Ty}; 1946 // Return void * and then cast to particular kmp_task_t type. 1947 auto *FnTy = 1948 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1949 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc"); 1950 break; 1951 } 1952 case OMPRTL__kmpc_omp_task: { 1953 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 1954 // *new_task); 1955 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1956 CGM.VoidPtrTy}; 1957 auto *FnTy = 1958 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1959 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 1960 break; 1961 } 1962 case OMPRTL__kmpc_copyprivate: { 1963 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 1964 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 1965 // kmp_int32 didit); 1966 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1967 auto *CpyFnTy = 1968 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 1969 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 1970 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 1971 CGM.Int32Ty}; 1972 auto *FnTy = 1973 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1974 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 1975 break; 1976 } 1977 case OMPRTL__kmpc_reduce: { 1978 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 1979 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 1980 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 1981 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1982 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 1983 /*isVarArg=*/false); 1984 llvm::Type *TypeParams[] = { 1985 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 1986 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 1987 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1988 auto *FnTy = 1989 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1990 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 1991 break; 1992 } 1993 case OMPRTL__kmpc_reduce_nowait: { 1994 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 1995 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 1996 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 1997 // *lck); 1998 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1999 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2000 /*isVarArg=*/false); 2001 llvm::Type *TypeParams[] = { 2002 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2003 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2004 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2005 auto *FnTy = 2006 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2007 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 2008 break; 2009 } 2010 case OMPRTL__kmpc_end_reduce: { 2011 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 2012 // kmp_critical_name *lck); 2013 llvm::Type *TypeParams[] = { 2014 getIdentTyPointerTy(), CGM.Int32Ty, 2015 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2016 auto *FnTy = 2017 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2018 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 2019 break; 2020 } 2021 case OMPRTL__kmpc_end_reduce_nowait: { 2022 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 2023 // kmp_critical_name *lck); 2024 llvm::Type *TypeParams[] = { 2025 getIdentTyPointerTy(), CGM.Int32Ty, 2026 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2027 auto *FnTy = 2028 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2029 RTLFn = 2030 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 2031 break; 2032 } 2033 case OMPRTL__kmpc_omp_task_begin_if0: { 2034 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2035 // *new_task); 2036 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2037 CGM.VoidPtrTy}; 2038 auto *FnTy = 2039 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2040 RTLFn = 2041 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 2042 break; 2043 } 2044 case OMPRTL__kmpc_omp_task_complete_if0: { 2045 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2046 // *new_task); 2047 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2048 CGM.VoidPtrTy}; 2049 auto *FnTy = 2050 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2051 RTLFn = CGM.CreateRuntimeFunction(FnTy, 2052 /*Name=*/"__kmpc_omp_task_complete_if0"); 2053 break; 2054 } 2055 case OMPRTL__kmpc_ordered: { 2056 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 2057 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2058 auto *FnTy = 2059 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2060 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 2061 break; 2062 } 2063 case OMPRTL__kmpc_end_ordered: { 2064 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 2065 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2066 auto *FnTy = 2067 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2068 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 2069 break; 2070 } 2071 case OMPRTL__kmpc_omp_taskwait: { 2072 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 2073 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2074 auto *FnTy = 2075 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2076 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 2077 break; 2078 } 2079 case OMPRTL__kmpc_taskgroup: { 2080 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 2081 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2082 auto *FnTy = 2083 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2084 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 2085 break; 2086 } 2087 case OMPRTL__kmpc_end_taskgroup: { 2088 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 2089 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2090 auto *FnTy = 2091 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2092 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 2093 break; 2094 } 2095 case OMPRTL__kmpc_push_proc_bind: { 2096 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 2097 // int proc_bind) 2098 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2099 auto *FnTy = 2100 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2101 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 2102 break; 2103 } 2104 case OMPRTL__kmpc_omp_task_with_deps: { 2105 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2106 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2107 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 2108 llvm::Type *TypeParams[] = { 2109 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 2110 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 2111 auto *FnTy = 2112 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2113 RTLFn = 2114 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 2115 break; 2116 } 2117 case OMPRTL__kmpc_omp_wait_deps: { 2118 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2119 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2120 // kmp_depend_info_t *noalias_dep_list); 2121 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2122 CGM.Int32Ty, CGM.VoidPtrTy, 2123 CGM.Int32Ty, CGM.VoidPtrTy}; 2124 auto *FnTy = 2125 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2126 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2127 break; 2128 } 2129 case OMPRTL__kmpc_cancellationpoint: { 2130 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2131 // global_tid, kmp_int32 cncl_kind) 2132 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2133 auto *FnTy = 2134 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2135 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2136 break; 2137 } 2138 case OMPRTL__kmpc_cancel: { 2139 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2140 // kmp_int32 cncl_kind) 2141 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2142 auto *FnTy = 2143 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2144 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2145 break; 2146 } 2147 case OMPRTL__kmpc_push_num_teams: { 2148 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2149 // kmp_int32 num_teams, kmp_int32 num_threads) 2150 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2151 CGM.Int32Ty}; 2152 auto *FnTy = 2153 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2154 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2155 break; 2156 } 2157 case OMPRTL__kmpc_fork_teams: { 2158 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2159 // microtask, ...); 2160 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2161 getKmpc_MicroPointerTy()}; 2162 auto *FnTy = 2163 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2164 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2165 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 2166 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 2167 llvm::LLVMContext &Ctx = F->getContext(); 2168 llvm::MDBuilder MDB(Ctx); 2169 // Annotate the callback behavior of the __kmpc_fork_teams: 2170 // - The callback callee is argument number 2 (microtask). 2171 // - The first two arguments of the callback callee are unknown (-1). 2172 // - All variadic arguments to the __kmpc_fork_teams are passed to the 2173 // callback callee. 2174 F->addMetadata( 2175 llvm::LLVMContext::MD_callback, 2176 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 2177 2, {-1, -1}, 2178 /* VarArgsArePassed */ true)})); 2179 } 2180 } 2181 break; 2182 } 2183 case OMPRTL__kmpc_taskloop: { 2184 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2185 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2186 // sched, kmp_uint64 grainsize, void *task_dup); 2187 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2188 CGM.IntTy, 2189 CGM.VoidPtrTy, 2190 CGM.IntTy, 2191 CGM.Int64Ty->getPointerTo(), 2192 CGM.Int64Ty->getPointerTo(), 2193 CGM.Int64Ty, 2194 CGM.IntTy, 2195 CGM.IntTy, 2196 CGM.Int64Ty, 2197 CGM.VoidPtrTy}; 2198 auto *FnTy = 2199 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2200 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2201 break; 2202 } 2203 case OMPRTL__kmpc_doacross_init: { 2204 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2205 // num_dims, struct kmp_dim *dims); 2206 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2207 CGM.Int32Ty, 2208 CGM.Int32Ty, 2209 CGM.VoidPtrTy}; 2210 auto *FnTy = 2211 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2212 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2213 break; 2214 } 2215 case OMPRTL__kmpc_doacross_fini: { 2216 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2217 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2218 auto *FnTy = 2219 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2220 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2221 break; 2222 } 2223 case OMPRTL__kmpc_doacross_post: { 2224 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2225 // *vec); 2226 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2227 CGM.Int64Ty->getPointerTo()}; 2228 auto *FnTy = 2229 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2230 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2231 break; 2232 } 2233 case OMPRTL__kmpc_doacross_wait: { 2234 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2235 // *vec); 2236 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2237 CGM.Int64Ty->getPointerTo()}; 2238 auto *FnTy = 2239 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2240 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2241 break; 2242 } 2243 case OMPRTL__kmpc_task_reduction_init: { 2244 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2245 // *data); 2246 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2247 auto *FnTy = 2248 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2249 RTLFn = 2250 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2251 break; 2252 } 2253 case OMPRTL__kmpc_task_reduction_get_th_data: { 2254 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2255 // *d); 2256 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2257 auto *FnTy = 2258 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2259 RTLFn = CGM.CreateRuntimeFunction( 2260 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2261 break; 2262 } 2263 case OMPRTL__kmpc_alloc: { 2264 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t 2265 // al); omp_allocator_handle_t type is void *. 2266 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy}; 2267 auto *FnTy = 2268 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2269 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc"); 2270 break; 2271 } 2272 case OMPRTL__kmpc_free: { 2273 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t 2274 // al); omp_allocator_handle_t type is void *. 2275 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2276 auto *FnTy = 2277 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2278 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); 2279 break; 2280 } 2281 case OMPRTL__kmpc_push_target_tripcount: { 2282 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 2283 // size); 2284 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; 2285 llvm::FunctionType *FnTy = 2286 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2287 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); 2288 break; 2289 } 2290 case OMPRTL__tgt_target: { 2291 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2292 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2293 // *arg_types); 2294 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2295 CGM.VoidPtrTy, 2296 CGM.Int32Ty, 2297 CGM.VoidPtrPtrTy, 2298 CGM.VoidPtrPtrTy, 2299 CGM.Int64Ty->getPointerTo(), 2300 CGM.Int64Ty->getPointerTo()}; 2301 auto *FnTy = 2302 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2303 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2304 break; 2305 } 2306 case OMPRTL__tgt_target_nowait: { 2307 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2308 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2309 // int64_t *arg_types); 2310 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2311 CGM.VoidPtrTy, 2312 CGM.Int32Ty, 2313 CGM.VoidPtrPtrTy, 2314 CGM.VoidPtrPtrTy, 2315 CGM.Int64Ty->getPointerTo(), 2316 CGM.Int64Ty->getPointerTo()}; 2317 auto *FnTy = 2318 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2319 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2320 break; 2321 } 2322 case OMPRTL__tgt_target_teams: { 2323 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2324 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2325 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2326 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2327 CGM.VoidPtrTy, 2328 CGM.Int32Ty, 2329 CGM.VoidPtrPtrTy, 2330 CGM.VoidPtrPtrTy, 2331 CGM.Int64Ty->getPointerTo(), 2332 CGM.Int64Ty->getPointerTo(), 2333 CGM.Int32Ty, 2334 CGM.Int32Ty}; 2335 auto *FnTy = 2336 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2337 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2338 break; 2339 } 2340 case OMPRTL__tgt_target_teams_nowait: { 2341 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2342 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 2343 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2344 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2345 CGM.VoidPtrTy, 2346 CGM.Int32Ty, 2347 CGM.VoidPtrPtrTy, 2348 CGM.VoidPtrPtrTy, 2349 CGM.Int64Ty->getPointerTo(), 2350 CGM.Int64Ty->getPointerTo(), 2351 CGM.Int32Ty, 2352 CGM.Int32Ty}; 2353 auto *FnTy = 2354 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2355 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2356 break; 2357 } 2358 case OMPRTL__tgt_register_requires: { 2359 // Build void __tgt_register_requires(int64_t flags); 2360 llvm::Type *TypeParams[] = {CGM.Int64Ty}; 2361 auto *FnTy = 2362 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2363 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); 2364 break; 2365 } 2366 case OMPRTL__tgt_register_lib: { 2367 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 2368 QualType ParamTy = 2369 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2370 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2371 auto *FnTy = 2372 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2373 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 2374 break; 2375 } 2376 case OMPRTL__tgt_unregister_lib: { 2377 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 2378 QualType ParamTy = 2379 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2380 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2381 auto *FnTy = 2382 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2383 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 2384 break; 2385 } 2386 case OMPRTL__tgt_target_data_begin: { 2387 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2388 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2389 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2390 CGM.Int32Ty, 2391 CGM.VoidPtrPtrTy, 2392 CGM.VoidPtrPtrTy, 2393 CGM.Int64Ty->getPointerTo(), 2394 CGM.Int64Ty->getPointerTo()}; 2395 auto *FnTy = 2396 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2397 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2398 break; 2399 } 2400 case OMPRTL__tgt_target_data_begin_nowait: { 2401 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2402 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2403 // *arg_types); 2404 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2405 CGM.Int32Ty, 2406 CGM.VoidPtrPtrTy, 2407 CGM.VoidPtrPtrTy, 2408 CGM.Int64Ty->getPointerTo(), 2409 CGM.Int64Ty->getPointerTo()}; 2410 auto *FnTy = 2411 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2412 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2413 break; 2414 } 2415 case OMPRTL__tgt_target_data_end: { 2416 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2417 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2418 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2419 CGM.Int32Ty, 2420 CGM.VoidPtrPtrTy, 2421 CGM.VoidPtrPtrTy, 2422 CGM.Int64Ty->getPointerTo(), 2423 CGM.Int64Ty->getPointerTo()}; 2424 auto *FnTy = 2425 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2426 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2427 break; 2428 } 2429 case OMPRTL__tgt_target_data_end_nowait: { 2430 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2431 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2432 // *arg_types); 2433 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2434 CGM.Int32Ty, 2435 CGM.VoidPtrPtrTy, 2436 CGM.VoidPtrPtrTy, 2437 CGM.Int64Ty->getPointerTo(), 2438 CGM.Int64Ty->getPointerTo()}; 2439 auto *FnTy = 2440 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2441 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2442 break; 2443 } 2444 case OMPRTL__tgt_target_data_update: { 2445 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2446 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2447 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2448 CGM.Int32Ty, 2449 CGM.VoidPtrPtrTy, 2450 CGM.VoidPtrPtrTy, 2451 CGM.Int64Ty->getPointerTo(), 2452 CGM.Int64Ty->getPointerTo()}; 2453 auto *FnTy = 2454 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2455 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2456 break; 2457 } 2458 case OMPRTL__tgt_target_data_update_nowait: { 2459 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2460 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2461 // *arg_types); 2462 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2463 CGM.Int32Ty, 2464 CGM.VoidPtrPtrTy, 2465 CGM.VoidPtrPtrTy, 2466 CGM.Int64Ty->getPointerTo(), 2467 CGM.Int64Ty->getPointerTo()}; 2468 auto *FnTy = 2469 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2470 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2471 break; 2472 } 2473 case OMPRTL__tgt_mapper_num_components: { 2474 // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 2475 llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; 2476 auto *FnTy = 2477 llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); 2478 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components"); 2479 break; 2480 } 2481 case OMPRTL__tgt_push_mapper_component: { 2482 // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void 2483 // *base, void *begin, int64_t size, int64_t type); 2484 llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy, 2485 CGM.Int64Ty, CGM.Int64Ty}; 2486 auto *FnTy = 2487 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2488 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component"); 2489 break; 2490 } 2491 } 2492 assert(RTLFn && "Unable to find OpenMP runtime function"); 2493 return RTLFn; 2494 } 2495 2496 llvm::FunctionCallee 2497 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 2498 assert((IVSize == 32 || IVSize == 64) && 2499 "IV size is not compatible with the omp runtime"); 2500 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2501 : "__kmpc_for_static_init_4u") 2502 : (IVSigned ? "__kmpc_for_static_init_8" 2503 : "__kmpc_for_static_init_8u"); 2504 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2505 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2506 llvm::Type *TypeParams[] = { 2507 getIdentTyPointerTy(), // loc 2508 CGM.Int32Ty, // tid 2509 CGM.Int32Ty, // schedtype 2510 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2511 PtrTy, // p_lower 2512 PtrTy, // p_upper 2513 PtrTy, // p_stride 2514 ITy, // incr 2515 ITy // chunk 2516 }; 2517 auto *FnTy = 2518 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2519 return CGM.CreateRuntimeFunction(FnTy, Name); 2520 } 2521 2522 llvm::FunctionCallee 2523 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 2524 assert((IVSize == 32 || IVSize == 64) && 2525 "IV size is not compatible with the omp runtime"); 2526 StringRef Name = 2527 IVSize == 32 2528 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2529 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2530 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2531 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2532 CGM.Int32Ty, // tid 2533 CGM.Int32Ty, // schedtype 2534 ITy, // lower 2535 ITy, // upper 2536 ITy, // stride 2537 ITy // chunk 2538 }; 2539 auto *FnTy = 2540 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2541 return CGM.CreateRuntimeFunction(FnTy, Name); 2542 } 2543 2544 llvm::FunctionCallee 2545 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 2546 assert((IVSize == 32 || IVSize == 64) && 2547 "IV size is not compatible with the omp runtime"); 2548 StringRef Name = 2549 IVSize == 32 2550 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2551 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2552 llvm::Type *TypeParams[] = { 2553 getIdentTyPointerTy(), // loc 2554 CGM.Int32Ty, // tid 2555 }; 2556 auto *FnTy = 2557 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2558 return CGM.CreateRuntimeFunction(FnTy, Name); 2559 } 2560 2561 llvm::FunctionCallee 2562 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 2563 assert((IVSize == 32 || IVSize == 64) && 2564 "IV size is not compatible with the omp runtime"); 2565 StringRef Name = 2566 IVSize == 32 2567 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2568 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2569 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2570 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2571 llvm::Type *TypeParams[] = { 2572 getIdentTyPointerTy(), // loc 2573 CGM.Int32Ty, // tid 2574 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2575 PtrTy, // p_lower 2576 PtrTy, // p_upper 2577 PtrTy // p_stride 2578 }; 2579 auto *FnTy = 2580 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2581 return CGM.CreateRuntimeFunction(FnTy, Name); 2582 } 2583 2584 /// Obtain information that uniquely identifies a target entry. This 2585 /// consists of the file and device IDs as well as line number associated with 2586 /// the relevant entry source location. 2587 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2588 unsigned &DeviceID, unsigned &FileID, 2589 unsigned &LineNum) { 2590 SourceManager &SM = C.getSourceManager(); 2591 2592 // The loc should be always valid and have a file ID (the user cannot use 2593 // #pragma directives in macros) 2594 2595 assert(Loc.isValid() && "Source location is expected to be always valid."); 2596 2597 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2598 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2599 2600 llvm::sys::fs::UniqueID ID; 2601 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2602 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2603 << PLoc.getFilename() << EC.message(); 2604 2605 DeviceID = ID.getDevice(); 2606 FileID = ID.getFile(); 2607 LineNum = PLoc.getLine(); 2608 } 2609 2610 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 2611 if (CGM.getLangOpts().OpenMPSimd) 2612 return Address::invalid(); 2613 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2614 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2615 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 2616 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2617 HasRequiresUnifiedSharedMemory))) { 2618 SmallString<64> PtrName; 2619 { 2620 llvm::raw_svector_ostream OS(PtrName); 2621 OS << CGM.getMangledName(GlobalDecl(VD)); 2622 if (!VD->isExternallyVisible()) { 2623 unsigned DeviceID, FileID, Line; 2624 getTargetEntryUniqueInfo(CGM.getContext(), 2625 VD->getCanonicalDecl()->getBeginLoc(), 2626 DeviceID, FileID, Line); 2627 OS << llvm::format("_%x", FileID); 2628 } 2629 OS << "_decl_tgt_ref_ptr"; 2630 } 2631 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2632 if (!Ptr) { 2633 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2634 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2635 PtrName); 2636 2637 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2638 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 2639 2640 if (!CGM.getLangOpts().OpenMPIsDevice) 2641 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2642 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2643 } 2644 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2645 } 2646 return Address::invalid(); 2647 } 2648 2649 llvm::Constant * 2650 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2651 assert(!CGM.getLangOpts().OpenMPUseTLS || 2652 !CGM.getContext().getTargetInfo().isTLSSupported()); 2653 // Lookup the entry, lazily creating it if necessary. 2654 std::string Suffix = getName({"cache", ""}); 2655 return getOrCreateInternalVariable( 2656 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 2657 } 2658 2659 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2660 const VarDecl *VD, 2661 Address VDAddr, 2662 SourceLocation Loc) { 2663 if (CGM.getLangOpts().OpenMPUseTLS && 2664 CGM.getContext().getTargetInfo().isTLSSupported()) 2665 return VDAddr; 2666 2667 llvm::Type *VarTy = VDAddr.getElementType(); 2668 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2669 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2670 CGM.Int8PtrTy), 2671 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2672 getOrCreateThreadPrivateCache(VD)}; 2673 return Address(CGF.EmitRuntimeCall( 2674 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2675 VDAddr.getAlignment()); 2676 } 2677 2678 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2679 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2680 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2681 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2682 // library. 2683 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2684 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2685 OMPLoc); 2686 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2687 // to register constructor/destructor for variable. 2688 llvm::Value *Args[] = { 2689 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2690 Ctor, CopyCtor, Dtor}; 2691 CGF.EmitRuntimeCall( 2692 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2693 } 2694 2695 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2696 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2697 bool PerformInit, CodeGenFunction *CGF) { 2698 if (CGM.getLangOpts().OpenMPUseTLS && 2699 CGM.getContext().getTargetInfo().isTLSSupported()) 2700 return nullptr; 2701 2702 VD = VD->getDefinition(CGM.getContext()); 2703 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 2704 QualType ASTTy = VD->getType(); 2705 2706 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2707 const Expr *Init = VD->getAnyInitializer(); 2708 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2709 // Generate function that re-emits the declaration's initializer into the 2710 // threadprivate copy of the variable VD 2711 CodeGenFunction CtorCGF(CGM); 2712 FunctionArgList Args; 2713 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2714 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2715 ImplicitParamDecl::Other); 2716 Args.push_back(&Dst); 2717 2718 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2719 CGM.getContext().VoidPtrTy, Args); 2720 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2721 std::string Name = getName({"__kmpc_global_ctor_", ""}); 2722 llvm::Function *Fn = 2723 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2724 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2725 Args, Loc, Loc); 2726 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2727 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2728 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2729 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2730 Arg = CtorCGF.Builder.CreateElementBitCast( 2731 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2732 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2733 /*IsInitializer=*/true); 2734 ArgVal = CtorCGF.EmitLoadOfScalar( 2735 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2736 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2737 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2738 CtorCGF.FinishFunction(); 2739 Ctor = Fn; 2740 } 2741 if (VD->getType().isDestructedType() != QualType::DK_none) { 2742 // Generate function that emits destructor call for the threadprivate copy 2743 // of the variable VD 2744 CodeGenFunction DtorCGF(CGM); 2745 FunctionArgList Args; 2746 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2747 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2748 ImplicitParamDecl::Other); 2749 Args.push_back(&Dst); 2750 2751 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2752 CGM.getContext().VoidTy, Args); 2753 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2754 std::string Name = getName({"__kmpc_global_dtor_", ""}); 2755 llvm::Function *Fn = 2756 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2757 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2758 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2759 Loc, Loc); 2760 // Create a scope with an artificial location for the body of this function. 2761 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2762 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2763 DtorCGF.GetAddrOfLocalVar(&Dst), 2764 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2765 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2766 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2767 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2768 DtorCGF.FinishFunction(); 2769 Dtor = Fn; 2770 } 2771 // Do not emit init function if it is not required. 2772 if (!Ctor && !Dtor) 2773 return nullptr; 2774 2775 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2776 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2777 /*isVarArg=*/false) 2778 ->getPointerTo(); 2779 // Copying constructor for the threadprivate variable. 2780 // Must be NULL - reserved by runtime, but currently it requires that this 2781 // parameter is always NULL. Otherwise it fires assertion. 2782 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2783 if (Ctor == nullptr) { 2784 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2785 /*isVarArg=*/false) 2786 ->getPointerTo(); 2787 Ctor = llvm::Constant::getNullValue(CtorTy); 2788 } 2789 if (Dtor == nullptr) { 2790 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2791 /*isVarArg=*/false) 2792 ->getPointerTo(); 2793 Dtor = llvm::Constant::getNullValue(DtorTy); 2794 } 2795 if (!CGF) { 2796 auto *InitFunctionTy = 2797 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2798 std::string Name = getName({"__omp_threadprivate_init_", ""}); 2799 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2800 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 2801 CodeGenFunction InitCGF(CGM); 2802 FunctionArgList ArgList; 2803 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2804 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2805 Loc, Loc); 2806 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2807 InitCGF.FinishFunction(); 2808 return InitFunction; 2809 } 2810 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2811 } 2812 return nullptr; 2813 } 2814 2815 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2816 llvm::GlobalVariable *Addr, 2817 bool PerformInit) { 2818 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2819 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2820 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 2821 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2822 HasRequiresUnifiedSharedMemory)) 2823 return CGM.getLangOpts().OpenMPIsDevice; 2824 VD = VD->getDefinition(CGM.getContext()); 2825 if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 2826 return CGM.getLangOpts().OpenMPIsDevice; 2827 2828 QualType ASTTy = VD->getType(); 2829 2830 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 2831 // Produce the unique prefix to identify the new target regions. We use 2832 // the source location of the variable declaration which we know to not 2833 // conflict with any target region. 2834 unsigned DeviceID; 2835 unsigned FileID; 2836 unsigned Line; 2837 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2838 SmallString<128> Buffer, Out; 2839 { 2840 llvm::raw_svector_ostream OS(Buffer); 2841 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2842 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2843 } 2844 2845 const Expr *Init = VD->getAnyInitializer(); 2846 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2847 llvm::Constant *Ctor; 2848 llvm::Constant *ID; 2849 if (CGM.getLangOpts().OpenMPIsDevice) { 2850 // Generate function that re-emits the declaration's initializer into 2851 // the threadprivate copy of the variable VD 2852 CodeGenFunction CtorCGF(CGM); 2853 2854 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2855 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2856 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2857 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2858 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2859 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2860 FunctionArgList(), Loc, Loc); 2861 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2862 CtorCGF.EmitAnyExprToMem(Init, 2863 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2864 Init->getType().getQualifiers(), 2865 /*IsInitializer=*/true); 2866 CtorCGF.FinishFunction(); 2867 Ctor = Fn; 2868 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2869 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 2870 } else { 2871 Ctor = new llvm::GlobalVariable( 2872 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2873 llvm::GlobalValue::PrivateLinkage, 2874 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2875 ID = Ctor; 2876 } 2877 2878 // Register the information for the entry associated with the constructor. 2879 Out.clear(); 2880 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2881 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2882 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2883 } 2884 if (VD->getType().isDestructedType() != QualType::DK_none) { 2885 llvm::Constant *Dtor; 2886 llvm::Constant *ID; 2887 if (CGM.getLangOpts().OpenMPIsDevice) { 2888 // Generate function that emits destructor call for the threadprivate 2889 // copy of the variable VD 2890 CodeGenFunction DtorCGF(CGM); 2891 2892 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2893 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2894 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2895 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2896 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2897 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2898 FunctionArgList(), Loc, Loc); 2899 // Create a scope with an artificial location for the body of this 2900 // function. 2901 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2902 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 2903 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2904 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2905 DtorCGF.FinishFunction(); 2906 Dtor = Fn; 2907 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2908 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 2909 } else { 2910 Dtor = new llvm::GlobalVariable( 2911 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2912 llvm::GlobalValue::PrivateLinkage, 2913 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2914 ID = Dtor; 2915 } 2916 // Register the information for the entry associated with the destructor. 2917 Out.clear(); 2918 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2919 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2920 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2921 } 2922 return CGM.getLangOpts().OpenMPIsDevice; 2923 } 2924 2925 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2926 QualType VarType, 2927 StringRef Name) { 2928 std::string Suffix = getName({"artificial", ""}); 2929 std::string CacheSuffix = getName({"cache", ""}); 2930 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2931 llvm::Value *GAddr = 2932 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2933 llvm::Value *Args[] = { 2934 emitUpdateLocation(CGF, SourceLocation()), 2935 getThreadID(CGF, SourceLocation()), 2936 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2937 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 2938 /*isSigned=*/false), 2939 getOrCreateInternalVariable( 2940 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 2941 return Address( 2942 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 2943 CGF.EmitRuntimeCall( 2944 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2945 VarLVType->getPointerTo(/*AddrSpace=*/0)), 2946 CGM.getPointerAlign()); 2947 } 2948 2949 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 2950 const RegionCodeGenTy &ThenGen, 2951 const RegionCodeGenTy &ElseGen) { 2952 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 2953 2954 // If the condition constant folds and can be elided, try to avoid emitting 2955 // the condition and the dead arm of the if/else. 2956 bool CondConstant; 2957 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 2958 if (CondConstant) 2959 ThenGen(CGF); 2960 else 2961 ElseGen(CGF); 2962 return; 2963 } 2964 2965 // Otherwise, the condition did not fold, or we couldn't elide it. Just 2966 // emit the conditional branch. 2967 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 2968 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 2969 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 2970 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 2971 2972 // Emit the 'then' code. 2973 CGF.EmitBlock(ThenBlock); 2974 ThenGen(CGF); 2975 CGF.EmitBranch(ContBlock); 2976 // Emit the 'else' code if present. 2977 // There is no need to emit line number for unconditional branch. 2978 (void)ApplyDebugLocation::CreateEmpty(CGF); 2979 CGF.EmitBlock(ElseBlock); 2980 ElseGen(CGF); 2981 // There is no need to emit line number for unconditional branch. 2982 (void)ApplyDebugLocation::CreateEmpty(CGF); 2983 CGF.EmitBranch(ContBlock); 2984 // Emit the continuation block for code after the if. 2985 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 2986 } 2987 2988 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 2989 llvm::Function *OutlinedFn, 2990 ArrayRef<llvm::Value *> CapturedVars, 2991 const Expr *IfCond) { 2992 if (!CGF.HaveInsertPoint()) 2993 return; 2994 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 2995 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 2996 PrePostActionTy &) { 2997 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 2998 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 2999 llvm::Value *Args[] = { 3000 RTLoc, 3001 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 3002 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 3003 llvm::SmallVector<llvm::Value *, 16> RealArgs; 3004 RealArgs.append(std::begin(Args), std::end(Args)); 3005 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 3006 3007 llvm::FunctionCallee RTLFn = 3008 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 3009 CGF.EmitRuntimeCall(RTLFn, RealArgs); 3010 }; 3011 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 3012 PrePostActionTy &) { 3013 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3014 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 3015 // Build calls: 3016 // __kmpc_serialized_parallel(&Loc, GTid); 3017 llvm::Value *Args[] = {RTLoc, ThreadID}; 3018 CGF.EmitRuntimeCall( 3019 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 3020 3021 // OutlinedFn(>id, &zero, CapturedStruct); 3022 Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 3023 /*Name*/ ".zero.addr"); 3024 CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); 3025 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 3026 // ThreadId for serialized parallels is 0. 3027 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 3028 OutlinedFnArgs.push_back(ZeroAddr.getPointer()); 3029 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 3030 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 3031 3032 // __kmpc_end_serialized_parallel(&Loc, GTid); 3033 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 3034 CGF.EmitRuntimeCall( 3035 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 3036 EndArgs); 3037 }; 3038 if (IfCond) { 3039 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 3040 } else { 3041 RegionCodeGenTy ThenRCG(ThenGen); 3042 ThenRCG(CGF); 3043 } 3044 } 3045 3046 // If we're inside an (outlined) parallel region, use the region info's 3047 // thread-ID variable (it is passed in a first argument of the outlined function 3048 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 3049 // regular serial code region, get thread ID by calling kmp_int32 3050 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 3051 // return the address of that temp. 3052 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 3053 SourceLocation Loc) { 3054 if (auto *OMPRegionInfo = 3055 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3056 if (OMPRegionInfo->getThreadIDVariable()) 3057 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 3058 3059 llvm::Value *ThreadID = getThreadID(CGF, Loc); 3060 QualType Int32Ty = 3061 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 3062 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 3063 CGF.EmitStoreOfScalar(ThreadID, 3064 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 3065 3066 return ThreadIDTemp; 3067 } 3068 3069 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 3070 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 3071 SmallString<256> Buffer; 3072 llvm::raw_svector_ostream Out(Buffer); 3073 Out << Name; 3074 StringRef RuntimeName = Out.str(); 3075 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 3076 if (Elem.second) { 3077 assert(Elem.second->getType()->getPointerElementType() == Ty && 3078 "OMP internal variable has different type than requested"); 3079 return &*Elem.second; 3080 } 3081 3082 return Elem.second = new llvm::GlobalVariable( 3083 CGM.getModule(), Ty, /*IsConstant*/ false, 3084 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 3085 Elem.first(), /*InsertBefore=*/nullptr, 3086 llvm::GlobalValue::NotThreadLocal, AddressSpace); 3087 } 3088 3089 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 3090 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 3091 std::string Name = getName({Prefix, "var"}); 3092 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 3093 } 3094 3095 namespace { 3096 /// Common pre(post)-action for different OpenMP constructs. 3097 class CommonActionTy final : public PrePostActionTy { 3098 llvm::FunctionCallee EnterCallee; 3099 ArrayRef<llvm::Value *> EnterArgs; 3100 llvm::FunctionCallee ExitCallee; 3101 ArrayRef<llvm::Value *> ExitArgs; 3102 bool Conditional; 3103 llvm::BasicBlock *ContBlock = nullptr; 3104 3105 public: 3106 CommonActionTy(llvm::FunctionCallee EnterCallee, 3107 ArrayRef<llvm::Value *> EnterArgs, 3108 llvm::FunctionCallee ExitCallee, 3109 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 3110 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 3111 ExitArgs(ExitArgs), Conditional(Conditional) {} 3112 void Enter(CodeGenFunction &CGF) override { 3113 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 3114 if (Conditional) { 3115 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 3116 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3117 ContBlock = CGF.createBasicBlock("omp_if.end"); 3118 // Generate the branch (If-stmt) 3119 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 3120 CGF.EmitBlock(ThenBlock); 3121 } 3122 } 3123 void Done(CodeGenFunction &CGF) { 3124 // Emit the rest of blocks/branches 3125 CGF.EmitBranch(ContBlock); 3126 CGF.EmitBlock(ContBlock, true); 3127 } 3128 void Exit(CodeGenFunction &CGF) override { 3129 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 3130 } 3131 }; 3132 } // anonymous namespace 3133 3134 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 3135 StringRef CriticalName, 3136 const RegionCodeGenTy &CriticalOpGen, 3137 SourceLocation Loc, const Expr *Hint) { 3138 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 3139 // CriticalOpGen(); 3140 // __kmpc_end_critical(ident_t *, gtid, Lock); 3141 // Prepare arguments and build a call to __kmpc_critical 3142 if (!CGF.HaveInsertPoint()) 3143 return; 3144 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3145 getCriticalRegionLock(CriticalName)}; 3146 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 3147 std::end(Args)); 3148 if (Hint) { 3149 EnterArgs.push_back(CGF.Builder.CreateIntCast( 3150 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 3151 } 3152 CommonActionTy Action( 3153 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 3154 : OMPRTL__kmpc_critical), 3155 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 3156 CriticalOpGen.setAction(Action); 3157 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 3158 } 3159 3160 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 3161 const RegionCodeGenTy &MasterOpGen, 3162 SourceLocation Loc) { 3163 if (!CGF.HaveInsertPoint()) 3164 return; 3165 // if(__kmpc_master(ident_t *, gtid)) { 3166 // MasterOpGen(); 3167 // __kmpc_end_master(ident_t *, gtid); 3168 // } 3169 // Prepare arguments and build a call to __kmpc_master 3170 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3171 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 3172 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 3173 /*Conditional=*/true); 3174 MasterOpGen.setAction(Action); 3175 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 3176 Action.Done(CGF); 3177 } 3178 3179 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 3180 SourceLocation Loc) { 3181 if (!CGF.HaveInsertPoint()) 3182 return; 3183 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 3184 llvm::Value *Args[] = { 3185 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3186 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 3187 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 3188 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3189 Region->emitUntiedSwitch(CGF); 3190 } 3191 3192 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 3193 const RegionCodeGenTy &TaskgroupOpGen, 3194 SourceLocation Loc) { 3195 if (!CGF.HaveInsertPoint()) 3196 return; 3197 // __kmpc_taskgroup(ident_t *, gtid); 3198 // TaskgroupOpGen(); 3199 // __kmpc_end_taskgroup(ident_t *, gtid); 3200 // Prepare arguments and build a call to __kmpc_taskgroup 3201 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3202 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3203 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3204 Args); 3205 TaskgroupOpGen.setAction(Action); 3206 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3207 } 3208 3209 /// Given an array of pointers to variables, project the address of a 3210 /// given variable. 3211 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3212 unsigned Index, const VarDecl *Var) { 3213 // Pull out the pointer to the variable. 3214 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 3215 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3216 3217 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3218 Addr = CGF.Builder.CreateElementBitCast( 3219 Addr, CGF.ConvertTypeForMem(Var->getType())); 3220 return Addr; 3221 } 3222 3223 static llvm::Value *emitCopyprivateCopyFunction( 3224 CodeGenModule &CGM, llvm::Type *ArgsType, 3225 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3226 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3227 SourceLocation Loc) { 3228 ASTContext &C = CGM.getContext(); 3229 // void copy_func(void *LHSArg, void *RHSArg); 3230 FunctionArgList Args; 3231 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3232 ImplicitParamDecl::Other); 3233 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3234 ImplicitParamDecl::Other); 3235 Args.push_back(&LHSArg); 3236 Args.push_back(&RHSArg); 3237 const auto &CGFI = 3238 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3239 std::string Name = 3240 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 3241 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 3242 llvm::GlobalValue::InternalLinkage, Name, 3243 &CGM.getModule()); 3244 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3245 Fn->setDoesNotRecurse(); 3246 CodeGenFunction CGF(CGM); 3247 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3248 // Dest = (void*[n])(LHSArg); 3249 // Src = (void*[n])(RHSArg); 3250 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3251 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3252 ArgsType), CGF.getPointerAlign()); 3253 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3254 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3255 ArgsType), CGF.getPointerAlign()); 3256 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3257 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3258 // ... 3259 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3260 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3261 const auto *DestVar = 3262 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3263 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3264 3265 const auto *SrcVar = 3266 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3267 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3268 3269 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3270 QualType Type = VD->getType(); 3271 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3272 } 3273 CGF.FinishFunction(); 3274 return Fn; 3275 } 3276 3277 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3278 const RegionCodeGenTy &SingleOpGen, 3279 SourceLocation Loc, 3280 ArrayRef<const Expr *> CopyprivateVars, 3281 ArrayRef<const Expr *> SrcExprs, 3282 ArrayRef<const Expr *> DstExprs, 3283 ArrayRef<const Expr *> AssignmentOps) { 3284 if (!CGF.HaveInsertPoint()) 3285 return; 3286 assert(CopyprivateVars.size() == SrcExprs.size() && 3287 CopyprivateVars.size() == DstExprs.size() && 3288 CopyprivateVars.size() == AssignmentOps.size()); 3289 ASTContext &C = CGM.getContext(); 3290 // int32 did_it = 0; 3291 // if(__kmpc_single(ident_t *, gtid)) { 3292 // SingleOpGen(); 3293 // __kmpc_end_single(ident_t *, gtid); 3294 // did_it = 1; 3295 // } 3296 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3297 // <copy_func>, did_it); 3298 3299 Address DidIt = Address::invalid(); 3300 if (!CopyprivateVars.empty()) { 3301 // int32 did_it = 0; 3302 QualType KmpInt32Ty = 3303 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3304 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3305 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3306 } 3307 // Prepare arguments and build a call to __kmpc_single 3308 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3309 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3310 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3311 /*Conditional=*/true); 3312 SingleOpGen.setAction(Action); 3313 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3314 if (DidIt.isValid()) { 3315 // did_it = 1; 3316 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3317 } 3318 Action.Done(CGF); 3319 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3320 // <copy_func>, did_it); 3321 if (DidIt.isValid()) { 3322 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3323 QualType CopyprivateArrayTy = 3324 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 3325 /*IndexTypeQuals=*/0); 3326 // Create a list of all private variables for copyprivate. 3327 Address CopyprivateList = 3328 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3329 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3330 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 3331 CGF.Builder.CreateStore( 3332 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3333 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 3334 Elem); 3335 } 3336 // Build function that copies private values from single region to all other 3337 // threads in the corresponding parallel region. 3338 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3339 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3340 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3341 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3342 Address CL = 3343 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3344 CGF.VoidPtrTy); 3345 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3346 llvm::Value *Args[] = { 3347 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3348 getThreadID(CGF, Loc), // i32 <gtid> 3349 BufSize, // size_t <buf_size> 3350 CL.getPointer(), // void *<copyprivate list> 3351 CpyFn, // void (*) (void *, void *) <copy_func> 3352 DidItVal // i32 did_it 3353 }; 3354 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3355 } 3356 } 3357 3358 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3359 const RegionCodeGenTy &OrderedOpGen, 3360 SourceLocation Loc, bool IsThreads) { 3361 if (!CGF.HaveInsertPoint()) 3362 return; 3363 // __kmpc_ordered(ident_t *, gtid); 3364 // OrderedOpGen(); 3365 // __kmpc_end_ordered(ident_t *, gtid); 3366 // Prepare arguments and build a call to __kmpc_ordered 3367 if (IsThreads) { 3368 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3369 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3370 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3371 Args); 3372 OrderedOpGen.setAction(Action); 3373 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3374 return; 3375 } 3376 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3377 } 3378 3379 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 3380 unsigned Flags; 3381 if (Kind == OMPD_for) 3382 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3383 else if (Kind == OMPD_sections) 3384 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3385 else if (Kind == OMPD_single) 3386 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3387 else if (Kind == OMPD_barrier) 3388 Flags = OMP_IDENT_BARRIER_EXPL; 3389 else 3390 Flags = OMP_IDENT_BARRIER_IMPL; 3391 return Flags; 3392 } 3393 3394 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 3395 CodeGenFunction &CGF, const OMPLoopDirective &S, 3396 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 3397 // Check if the loop directive is actually a doacross loop directive. In this 3398 // case choose static, 1 schedule. 3399 if (llvm::any_of( 3400 S.getClausesOfKind<OMPOrderedClause>(), 3401 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 3402 ScheduleKind = OMPC_SCHEDULE_static; 3403 // Chunk size is 1 in this case. 3404 llvm::APInt ChunkSize(32, 1); 3405 ChunkExpr = IntegerLiteral::Create( 3406 CGF.getContext(), ChunkSize, 3407 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3408 SourceLocation()); 3409 } 3410 } 3411 3412 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3413 OpenMPDirectiveKind Kind, bool EmitChecks, 3414 bool ForceSimpleCall) { 3415 if (!CGF.HaveInsertPoint()) 3416 return; 3417 // Build call __kmpc_cancel_barrier(loc, thread_id); 3418 // Build call __kmpc_barrier(loc, thread_id); 3419 unsigned Flags = getDefaultFlagsForBarriers(Kind); 3420 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3421 // thread_id); 3422 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3423 getThreadID(CGF, Loc)}; 3424 if (auto *OMPRegionInfo = 3425 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 3426 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3427 llvm::Value *Result = CGF.EmitRuntimeCall( 3428 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3429 if (EmitChecks) { 3430 // if (__kmpc_cancel_barrier()) { 3431 // exit from construct; 3432 // } 3433 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3434 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3435 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3436 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3437 CGF.EmitBlock(ExitBB); 3438 // exit from construct; 3439 CodeGenFunction::JumpDest CancelDestination = 3440 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3441 CGF.EmitBranchThroughCleanup(CancelDestination); 3442 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3443 } 3444 return; 3445 } 3446 } 3447 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3448 } 3449 3450 /// Map the OpenMP loop schedule to the runtime enumeration. 3451 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3452 bool Chunked, bool Ordered) { 3453 switch (ScheduleKind) { 3454 case OMPC_SCHEDULE_static: 3455 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3456 : (Ordered ? OMP_ord_static : OMP_sch_static); 3457 case OMPC_SCHEDULE_dynamic: 3458 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3459 case OMPC_SCHEDULE_guided: 3460 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3461 case OMPC_SCHEDULE_runtime: 3462 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3463 case OMPC_SCHEDULE_auto: 3464 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3465 case OMPC_SCHEDULE_unknown: 3466 assert(!Chunked && "chunk was specified but schedule kind not known"); 3467 return Ordered ? OMP_ord_static : OMP_sch_static; 3468 } 3469 llvm_unreachable("Unexpected runtime schedule"); 3470 } 3471 3472 /// Map the OpenMP distribute schedule to the runtime enumeration. 3473 static OpenMPSchedType 3474 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3475 // only static is allowed for dist_schedule 3476 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3477 } 3478 3479 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3480 bool Chunked) const { 3481 OpenMPSchedType Schedule = 3482 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3483 return Schedule == OMP_sch_static; 3484 } 3485 3486 bool CGOpenMPRuntime::isStaticNonchunked( 3487 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3488 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3489 return Schedule == OMP_dist_sch_static; 3490 } 3491 3492 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 3493 bool Chunked) const { 3494 OpenMPSchedType Schedule = 3495 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3496 return Schedule == OMP_sch_static_chunked; 3497 } 3498 3499 bool CGOpenMPRuntime::isStaticChunked( 3500 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3501 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3502 return Schedule == OMP_dist_sch_static_chunked; 3503 } 3504 3505 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3506 OpenMPSchedType Schedule = 3507 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3508 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3509 return Schedule != OMP_sch_static; 3510 } 3511 3512 static int addMonoNonMonoModifier(OpenMPSchedType Schedule, 3513 OpenMPScheduleClauseModifier M1, 3514 OpenMPScheduleClauseModifier M2) { 3515 int Modifier = 0; 3516 switch (M1) { 3517 case OMPC_SCHEDULE_MODIFIER_monotonic: 3518 Modifier = OMP_sch_modifier_monotonic; 3519 break; 3520 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3521 Modifier = OMP_sch_modifier_nonmonotonic; 3522 break; 3523 case OMPC_SCHEDULE_MODIFIER_simd: 3524 if (Schedule == OMP_sch_static_chunked) 3525 Schedule = OMP_sch_static_balanced_chunked; 3526 break; 3527 case OMPC_SCHEDULE_MODIFIER_last: 3528 case OMPC_SCHEDULE_MODIFIER_unknown: 3529 break; 3530 } 3531 switch (M2) { 3532 case OMPC_SCHEDULE_MODIFIER_monotonic: 3533 Modifier = OMP_sch_modifier_monotonic; 3534 break; 3535 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3536 Modifier = OMP_sch_modifier_nonmonotonic; 3537 break; 3538 case OMPC_SCHEDULE_MODIFIER_simd: 3539 if (Schedule == OMP_sch_static_chunked) 3540 Schedule = OMP_sch_static_balanced_chunked; 3541 break; 3542 case OMPC_SCHEDULE_MODIFIER_last: 3543 case OMPC_SCHEDULE_MODIFIER_unknown: 3544 break; 3545 } 3546 return Schedule | Modifier; 3547 } 3548 3549 void CGOpenMPRuntime::emitForDispatchInit( 3550 CodeGenFunction &CGF, SourceLocation Loc, 3551 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3552 bool Ordered, const DispatchRTInput &DispatchValues) { 3553 if (!CGF.HaveInsertPoint()) 3554 return; 3555 OpenMPSchedType Schedule = getRuntimeSchedule( 3556 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3557 assert(Ordered || 3558 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3559 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3560 Schedule != OMP_sch_static_balanced_chunked)); 3561 // Call __kmpc_dispatch_init( 3562 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3563 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3564 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3565 3566 // If the Chunk was not specified in the clause - use default value 1. 3567 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3568 : CGF.Builder.getIntN(IVSize, 1); 3569 llvm::Value *Args[] = { 3570 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3571 CGF.Builder.getInt32(addMonoNonMonoModifier( 3572 Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3573 DispatchValues.LB, // Lower 3574 DispatchValues.UB, // Upper 3575 CGF.Builder.getIntN(IVSize, 1), // Stride 3576 Chunk // Chunk 3577 }; 3578 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3579 } 3580 3581 static void emitForStaticInitCall( 3582 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3583 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 3584 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3585 const CGOpenMPRuntime::StaticRTInput &Values) { 3586 if (!CGF.HaveInsertPoint()) 3587 return; 3588 3589 assert(!Values.Ordered); 3590 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3591 Schedule == OMP_sch_static_balanced_chunked || 3592 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3593 Schedule == OMP_dist_sch_static || 3594 Schedule == OMP_dist_sch_static_chunked); 3595 3596 // Call __kmpc_for_static_init( 3597 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3598 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3599 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3600 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3601 llvm::Value *Chunk = Values.Chunk; 3602 if (Chunk == nullptr) { 3603 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3604 Schedule == OMP_dist_sch_static) && 3605 "expected static non-chunked schedule"); 3606 // If the Chunk was not specified in the clause - use default value 1. 3607 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3608 } else { 3609 assert((Schedule == OMP_sch_static_chunked || 3610 Schedule == OMP_sch_static_balanced_chunked || 3611 Schedule == OMP_ord_static_chunked || 3612 Schedule == OMP_dist_sch_static_chunked) && 3613 "expected static chunked schedule"); 3614 } 3615 llvm::Value *Args[] = { 3616 UpdateLocation, 3617 ThreadId, 3618 CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1, 3619 M2)), // Schedule type 3620 Values.IL.getPointer(), // &isLastIter 3621 Values.LB.getPointer(), // &LB 3622 Values.UB.getPointer(), // &UB 3623 Values.ST.getPointer(), // &Stride 3624 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3625 Chunk // Chunk 3626 }; 3627 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3628 } 3629 3630 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3631 SourceLocation Loc, 3632 OpenMPDirectiveKind DKind, 3633 const OpenMPScheduleTy &ScheduleKind, 3634 const StaticRTInput &Values) { 3635 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3636 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3637 assert(isOpenMPWorksharingDirective(DKind) && 3638 "Expected loop-based or sections-based directive."); 3639 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3640 isOpenMPLoopDirective(DKind) 3641 ? OMP_IDENT_WORK_LOOP 3642 : OMP_IDENT_WORK_SECTIONS); 3643 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3644 llvm::FunctionCallee StaticInitFunction = 3645 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3646 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3647 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3648 } 3649 3650 void CGOpenMPRuntime::emitDistributeStaticInit( 3651 CodeGenFunction &CGF, SourceLocation Loc, 3652 OpenMPDistScheduleClauseKind SchedKind, 3653 const CGOpenMPRuntime::StaticRTInput &Values) { 3654 OpenMPSchedType ScheduleNum = 3655 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3656 llvm::Value *UpdatedLocation = 3657 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3658 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3659 llvm::FunctionCallee StaticInitFunction = 3660 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3661 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3662 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3663 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3664 } 3665 3666 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3667 SourceLocation Loc, 3668 OpenMPDirectiveKind DKind) { 3669 if (!CGF.HaveInsertPoint()) 3670 return; 3671 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3672 llvm::Value *Args[] = { 3673 emitUpdateLocation(CGF, Loc, 3674 isOpenMPDistributeDirective(DKind) 3675 ? OMP_IDENT_WORK_DISTRIBUTE 3676 : isOpenMPLoopDirective(DKind) 3677 ? OMP_IDENT_WORK_LOOP 3678 : OMP_IDENT_WORK_SECTIONS), 3679 getThreadID(CGF, Loc)}; 3680 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3681 Args); 3682 } 3683 3684 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3685 SourceLocation Loc, 3686 unsigned IVSize, 3687 bool IVSigned) { 3688 if (!CGF.HaveInsertPoint()) 3689 return; 3690 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3691 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3692 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3693 } 3694 3695 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3696 SourceLocation Loc, unsigned IVSize, 3697 bool IVSigned, Address IL, 3698 Address LB, Address UB, 3699 Address ST) { 3700 // Call __kmpc_dispatch_next( 3701 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3702 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3703 // kmp_int[32|64] *p_stride); 3704 llvm::Value *Args[] = { 3705 emitUpdateLocation(CGF, Loc), 3706 getThreadID(CGF, Loc), 3707 IL.getPointer(), // &isLastIter 3708 LB.getPointer(), // &Lower 3709 UB.getPointer(), // &Upper 3710 ST.getPointer() // &Stride 3711 }; 3712 llvm::Value *Call = 3713 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3714 return CGF.EmitScalarConversion( 3715 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3716 CGF.getContext().BoolTy, Loc); 3717 } 3718 3719 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3720 llvm::Value *NumThreads, 3721 SourceLocation Loc) { 3722 if (!CGF.HaveInsertPoint()) 3723 return; 3724 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3725 llvm::Value *Args[] = { 3726 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3727 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3728 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3729 Args); 3730 } 3731 3732 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3733 OpenMPProcBindClauseKind ProcBind, 3734 SourceLocation Loc) { 3735 if (!CGF.HaveInsertPoint()) 3736 return; 3737 // Constants for proc bind value accepted by the runtime. 3738 enum ProcBindTy { 3739 ProcBindFalse = 0, 3740 ProcBindTrue, 3741 ProcBindMaster, 3742 ProcBindClose, 3743 ProcBindSpread, 3744 ProcBindIntel, 3745 ProcBindDefault 3746 } RuntimeProcBind; 3747 switch (ProcBind) { 3748 case OMPC_PROC_BIND_master: 3749 RuntimeProcBind = ProcBindMaster; 3750 break; 3751 case OMPC_PROC_BIND_close: 3752 RuntimeProcBind = ProcBindClose; 3753 break; 3754 case OMPC_PROC_BIND_spread: 3755 RuntimeProcBind = ProcBindSpread; 3756 break; 3757 case OMPC_PROC_BIND_unknown: 3758 llvm_unreachable("Unsupported proc_bind value."); 3759 } 3760 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3761 llvm::Value *Args[] = { 3762 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3763 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 3764 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3765 } 3766 3767 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3768 SourceLocation Loc) { 3769 if (!CGF.HaveInsertPoint()) 3770 return; 3771 // Build call void __kmpc_flush(ident_t *loc) 3772 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3773 emitUpdateLocation(CGF, Loc)); 3774 } 3775 3776 namespace { 3777 /// Indexes of fields for type kmp_task_t. 3778 enum KmpTaskTFields { 3779 /// List of shared variables. 3780 KmpTaskTShareds, 3781 /// Task routine. 3782 KmpTaskTRoutine, 3783 /// Partition id for the untied tasks. 3784 KmpTaskTPartId, 3785 /// Function with call of destructors for private variables. 3786 Data1, 3787 /// Task priority. 3788 Data2, 3789 /// (Taskloops only) Lower bound. 3790 KmpTaskTLowerBound, 3791 /// (Taskloops only) Upper bound. 3792 KmpTaskTUpperBound, 3793 /// (Taskloops only) Stride. 3794 KmpTaskTStride, 3795 /// (Taskloops only) Is last iteration flag. 3796 KmpTaskTLastIter, 3797 /// (Taskloops only) Reduction data. 3798 KmpTaskTReductions, 3799 }; 3800 } // anonymous namespace 3801 3802 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3803 return OffloadEntriesTargetRegion.empty() && 3804 OffloadEntriesDeviceGlobalVar.empty(); 3805 } 3806 3807 /// Initialize target region entry. 3808 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3809 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3810 StringRef ParentName, unsigned LineNum, 3811 unsigned Order) { 3812 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3813 "only required for the device " 3814 "code generation."); 3815 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3816 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3817 OMPTargetRegionEntryTargetRegion); 3818 ++OffloadingEntriesNum; 3819 } 3820 3821 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3822 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3823 StringRef ParentName, unsigned LineNum, 3824 llvm::Constant *Addr, llvm::Constant *ID, 3825 OMPTargetRegionEntryKind Flags) { 3826 // If we are emitting code for a target, the entry is already initialized, 3827 // only has to be registered. 3828 if (CGM.getLangOpts().OpenMPIsDevice) { 3829 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3830 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3831 DiagnosticsEngine::Error, 3832 "Unable to find target region on line '%0' in the device code."); 3833 CGM.getDiags().Report(DiagID) << LineNum; 3834 return; 3835 } 3836 auto &Entry = 3837 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3838 assert(Entry.isValid() && "Entry not initialized!"); 3839 Entry.setAddress(Addr); 3840 Entry.setID(ID); 3841 Entry.setFlags(Flags); 3842 } else { 3843 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3844 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3845 ++OffloadingEntriesNum; 3846 } 3847 } 3848 3849 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3850 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3851 unsigned LineNum) const { 3852 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3853 if (PerDevice == OffloadEntriesTargetRegion.end()) 3854 return false; 3855 auto PerFile = PerDevice->second.find(FileID); 3856 if (PerFile == PerDevice->second.end()) 3857 return false; 3858 auto PerParentName = PerFile->second.find(ParentName); 3859 if (PerParentName == PerFile->second.end()) 3860 return false; 3861 auto PerLine = PerParentName->second.find(LineNum); 3862 if (PerLine == PerParentName->second.end()) 3863 return false; 3864 // Fail if this entry is already registered. 3865 if (PerLine->second.getAddress() || PerLine->second.getID()) 3866 return false; 3867 return true; 3868 } 3869 3870 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3871 const OffloadTargetRegionEntryInfoActTy &Action) { 3872 // Scan all target region entries and perform the provided action. 3873 for (const auto &D : OffloadEntriesTargetRegion) 3874 for (const auto &F : D.second) 3875 for (const auto &P : F.second) 3876 for (const auto &L : P.second) 3877 Action(D.first, F.first, P.first(), L.first, L.second); 3878 } 3879 3880 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3881 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3882 OMPTargetGlobalVarEntryKind Flags, 3883 unsigned Order) { 3884 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3885 "only required for the device " 3886 "code generation."); 3887 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3888 ++OffloadingEntriesNum; 3889 } 3890 3891 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3892 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3893 CharUnits VarSize, 3894 OMPTargetGlobalVarEntryKind Flags, 3895 llvm::GlobalValue::LinkageTypes Linkage) { 3896 if (CGM.getLangOpts().OpenMPIsDevice) { 3897 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3898 assert(Entry.isValid() && Entry.getFlags() == Flags && 3899 "Entry not initialized!"); 3900 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3901 "Resetting with the new address."); 3902 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3903 if (Entry.getVarSize().isZero()) { 3904 Entry.setVarSize(VarSize); 3905 Entry.setLinkage(Linkage); 3906 } 3907 return; 3908 } 3909 Entry.setVarSize(VarSize); 3910 Entry.setLinkage(Linkage); 3911 Entry.setAddress(Addr); 3912 } else { 3913 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3914 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3915 assert(Entry.isValid() && Entry.getFlags() == Flags && 3916 "Entry not initialized!"); 3917 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3918 "Resetting with the new address."); 3919 if (Entry.getVarSize().isZero()) { 3920 Entry.setVarSize(VarSize); 3921 Entry.setLinkage(Linkage); 3922 } 3923 return; 3924 } 3925 OffloadEntriesDeviceGlobalVar.try_emplace( 3926 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 3927 ++OffloadingEntriesNum; 3928 } 3929 } 3930 3931 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3932 actOnDeviceGlobalVarEntriesInfo( 3933 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 3934 // Scan all target region entries and perform the provided action. 3935 for (const auto &E : OffloadEntriesDeviceGlobalVar) 3936 Action(E.getKey(), E.getValue()); 3937 } 3938 3939 llvm::Function * 3940 CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { 3941 // If we don't have entries or if we are emitting code for the device, we 3942 // don't need to do anything. 3943 if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty()) 3944 return nullptr; 3945 3946 llvm::Module &M = CGM.getModule(); 3947 ASTContext &C = CGM.getContext(); 3948 3949 // Get list of devices we care about 3950 const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples; 3951 3952 // We should be creating an offloading descriptor only if there are devices 3953 // specified. 3954 assert(!Devices.empty() && "No OpenMP offloading devices??"); 3955 3956 // Create the external variables that will point to the begin and end of the 3957 // host entries section. These will be defined by the linker. 3958 llvm::Type *OffloadEntryTy = 3959 CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()); 3960 std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"}); 3961 auto *HostEntriesBegin = new llvm::GlobalVariable( 3962 M, OffloadEntryTy, /*isConstant=*/true, 3963 llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr, 3964 EntriesBeginName); 3965 std::string EntriesEndName = getName({"omp_offloading", "entries_end"}); 3966 auto *HostEntriesEnd = 3967 new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true, 3968 llvm::GlobalValue::ExternalLinkage, 3969 /*Initializer=*/nullptr, EntriesEndName); 3970 3971 // Create all device images 3972 auto *DeviceImageTy = cast<llvm::StructType>( 3973 CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy())); 3974 ConstantInitBuilder DeviceImagesBuilder(CGM); 3975 ConstantArrayBuilder DeviceImagesEntries = 3976 DeviceImagesBuilder.beginArray(DeviceImageTy); 3977 3978 for (const llvm::Triple &Device : Devices) { 3979 StringRef T = Device.getTriple(); 3980 std::string BeginName = getName({"omp_offloading", "img_start", ""}); 3981 auto *ImgBegin = new llvm::GlobalVariable( 3982 M, CGM.Int8Ty, /*isConstant=*/true, 3983 llvm::GlobalValue::ExternalWeakLinkage, 3984 /*Initializer=*/nullptr, Twine(BeginName).concat(T)); 3985 std::string EndName = getName({"omp_offloading", "img_end", ""}); 3986 auto *ImgEnd = new llvm::GlobalVariable( 3987 M, CGM.Int8Ty, /*isConstant=*/true, 3988 llvm::GlobalValue::ExternalWeakLinkage, 3989 /*Initializer=*/nullptr, Twine(EndName).concat(T)); 3990 3991 llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin, 3992 HostEntriesEnd}; 3993 createConstantGlobalStructAndAddToParent(CGM, getTgtDeviceImageQTy(), Data, 3994 DeviceImagesEntries); 3995 } 3996 3997 // Create device images global array. 3998 std::string ImagesName = getName({"omp_offloading", "device_images"}); 3999 llvm::GlobalVariable *DeviceImages = 4000 DeviceImagesEntries.finishAndCreateGlobal(ImagesName, 4001 CGM.getPointerAlign(), 4002 /*isConstant=*/true); 4003 DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4004 4005 // This is a Zero array to be used in the creation of the constant expressions 4006 llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty), 4007 llvm::Constant::getNullValue(CGM.Int32Ty)}; 4008 4009 // Create the target region descriptor. 4010 llvm::Constant *Data[] = { 4011 llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()), 4012 llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(), 4013 DeviceImages, Index), 4014 HostEntriesBegin, HostEntriesEnd}; 4015 std::string Descriptor = getName({"omp_offloading", "descriptor"}); 4016 llvm::GlobalVariable *Desc = createGlobalStruct( 4017 CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor); 4018 4019 // Emit code to register or unregister the descriptor at execution 4020 // startup or closing, respectively. 4021 4022 llvm::Function *UnRegFn; 4023 { 4024 FunctionArgList Args; 4025 ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other); 4026 Args.push_back(&DummyPtr); 4027 4028 CodeGenFunction CGF(CGM); 4029 // Disable debug info for global (de-)initializer because they are not part 4030 // of some particular construct. 4031 CGF.disableDebugInfo(); 4032 const auto &FI = 4033 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4034 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 4035 std::string UnregName = getName({"omp_offloading", "descriptor_unreg"}); 4036 UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI); 4037 CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args); 4038 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_unregister_lib), 4039 Desc); 4040 CGF.FinishFunction(); 4041 } 4042 llvm::Function *RegFn; 4043 { 4044 CodeGenFunction CGF(CGM); 4045 // Disable debug info for global (de-)initializer because they are not part 4046 // of some particular construct. 4047 CGF.disableDebugInfo(); 4048 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 4049 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 4050 4051 // Encode offload target triples into the registration function name. It 4052 // will serve as a comdat key for the registration/unregistration code for 4053 // this particular combination of offloading targets. 4054 SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U); 4055 RegFnNameParts[0] = "omp_offloading"; 4056 RegFnNameParts[1] = "descriptor_reg"; 4057 llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2), 4058 [](const llvm::Triple &T) -> const std::string& { 4059 return T.getTriple(); 4060 }); 4061 llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end()); 4062 std::string Descriptor = getName(RegFnNameParts); 4063 RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI); 4064 CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList()); 4065 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); 4066 // Create a variable to drive the registration and unregistration of the 4067 // descriptor, so we can reuse the logic that emits Ctors and Dtors. 4068 ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), 4069 SourceLocation(), nullptr, C.CharTy, 4070 ImplicitParamDecl::Other); 4071 CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); 4072 CGF.FinishFunction(); 4073 } 4074 if (CGM.supportsCOMDAT()) { 4075 // It is sufficient to call registration function only once, so create a 4076 // COMDAT group for registration/unregistration functions and associated 4077 // data. That would reduce startup time and code size. Registration 4078 // function serves as a COMDAT group key. 4079 llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName()); 4080 RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); 4081 RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); 4082 RegFn->setComdat(ComdatKey); 4083 UnRegFn->setComdat(ComdatKey); 4084 DeviceImages->setComdat(ComdatKey); 4085 Desc->setComdat(ComdatKey); 4086 } 4087 return RegFn; 4088 } 4089 4090 void CGOpenMPRuntime::createOffloadEntry( 4091 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 4092 llvm::GlobalValue::LinkageTypes Linkage) { 4093 StringRef Name = Addr->getName(); 4094 llvm::Module &M = CGM.getModule(); 4095 llvm::LLVMContext &C = M.getContext(); 4096 4097 // Create constant string with the name. 4098 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 4099 4100 std::string StringName = getName({"omp_offloading", "entry_name"}); 4101 auto *Str = new llvm::GlobalVariable( 4102 M, StrPtrInit->getType(), /*isConstant=*/true, 4103 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 4104 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4105 4106 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 4107 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 4108 llvm::ConstantInt::get(CGM.SizeTy, Size), 4109 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 4110 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 4111 std::string EntryName = getName({"omp_offloading", "entry", ""}); 4112 llvm::GlobalVariable *Entry = createGlobalStruct( 4113 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 4114 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 4115 4116 // The entry has to be created in the section the linker expects it to be. 4117 std::string Section = getName({"omp_offloading", "entries"}); 4118 Entry->setSection(Section); 4119 } 4120 4121 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 4122 // Emit the offloading entries and metadata so that the device codegen side 4123 // can easily figure out what to emit. The produced metadata looks like 4124 // this: 4125 // 4126 // !omp_offload.info = !{!1, ...} 4127 // 4128 // Right now we only generate metadata for function that contain target 4129 // regions. 4130 4131 // If we do not have entries, we don't need to do anything. 4132 if (OffloadEntriesInfoManager.empty()) 4133 return; 4134 4135 llvm::Module &M = CGM.getModule(); 4136 llvm::LLVMContext &C = M.getContext(); 4137 SmallVector<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 16> 4138 OrderedEntries(OffloadEntriesInfoManager.size()); 4139 llvm::SmallVector<StringRef, 16> ParentFunctions( 4140 OffloadEntriesInfoManager.size()); 4141 4142 // Auxiliary methods to create metadata values and strings. 4143 auto &&GetMDInt = [this](unsigned V) { 4144 return llvm::ConstantAsMetadata::get( 4145 llvm::ConstantInt::get(CGM.Int32Ty, V)); 4146 }; 4147 4148 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 4149 4150 // Create the offloading info metadata node. 4151 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 4152 4153 // Create function that emits metadata for each target region entry; 4154 auto &&TargetRegionMetadataEmitter = 4155 [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString]( 4156 unsigned DeviceID, unsigned FileID, StringRef ParentName, 4157 unsigned Line, 4158 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 4159 // Generate metadata for target regions. Each entry of this metadata 4160 // contains: 4161 // - Entry 0 -> Kind of this type of metadata (0). 4162 // - Entry 1 -> Device ID of the file where the entry was identified. 4163 // - Entry 2 -> File ID of the file where the entry was identified. 4164 // - Entry 3 -> Mangled name of the function where the entry was 4165 // identified. 4166 // - Entry 4 -> Line in the file where the entry was identified. 4167 // - Entry 5 -> Order the entry was created. 4168 // The first element of the metadata node is the kind. 4169 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 4170 GetMDInt(FileID), GetMDString(ParentName), 4171 GetMDInt(Line), GetMDInt(E.getOrder())}; 4172 4173 // Save this entry in the right position of the ordered entries array. 4174 OrderedEntries[E.getOrder()] = &E; 4175 ParentFunctions[E.getOrder()] = ParentName; 4176 4177 // Add metadata to the named metadata node. 4178 MD->addOperand(llvm::MDNode::get(C, Ops)); 4179 }; 4180 4181 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 4182 TargetRegionMetadataEmitter); 4183 4184 // Create function that emits metadata for each device global variable entry; 4185 auto &&DeviceGlobalVarMetadataEmitter = 4186 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 4187 MD](StringRef MangledName, 4188 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 4189 &E) { 4190 // Generate metadata for global variables. Each entry of this metadata 4191 // contains: 4192 // - Entry 0 -> Kind of this type of metadata (1). 4193 // - Entry 1 -> Mangled name of the variable. 4194 // - Entry 2 -> Declare target kind. 4195 // - Entry 3 -> Order the entry was created. 4196 // The first element of the metadata node is the kind. 4197 llvm::Metadata *Ops[] = { 4198 GetMDInt(E.getKind()), GetMDString(MangledName), 4199 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 4200 4201 // Save this entry in the right position of the ordered entries array. 4202 OrderedEntries[E.getOrder()] = &E; 4203 4204 // Add metadata to the named metadata node. 4205 MD->addOperand(llvm::MDNode::get(C, Ops)); 4206 }; 4207 4208 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 4209 DeviceGlobalVarMetadataEmitter); 4210 4211 for (const auto *E : OrderedEntries) { 4212 assert(E && "All ordered entries must exist!"); 4213 if (const auto *CE = 4214 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 4215 E)) { 4216 if (!CE->getID() || !CE->getAddress()) { 4217 // Do not blame the entry if the parent funtion is not emitted. 4218 StringRef FnName = ParentFunctions[CE->getOrder()]; 4219 if (!CGM.GetGlobalValue(FnName)) 4220 continue; 4221 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4222 DiagnosticsEngine::Error, 4223 "Offloading entry for target region is incorrect: either the " 4224 "address or the ID is invalid."); 4225 CGM.getDiags().Report(DiagID); 4226 continue; 4227 } 4228 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 4229 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 4230 } else if (const auto *CE = 4231 dyn_cast<OffloadEntriesInfoManagerTy:: 4232 OffloadEntryInfoDeviceGlobalVar>(E)) { 4233 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 4234 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4235 CE->getFlags()); 4236 switch (Flags) { 4237 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 4238 if (CGM.getLangOpts().OpenMPIsDevice && 4239 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 4240 continue; 4241 if (!CE->getAddress()) { 4242 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4243 DiagnosticsEngine::Error, 4244 "Offloading entry for declare target variable is incorrect: the " 4245 "address is invalid."); 4246 CGM.getDiags().Report(DiagID); 4247 continue; 4248 } 4249 // The vaiable has no definition - no need to add the entry. 4250 if (CE->getVarSize().isZero()) 4251 continue; 4252 break; 4253 } 4254 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 4255 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 4256 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 4257 "Declaret target link address is set."); 4258 if (CGM.getLangOpts().OpenMPIsDevice) 4259 continue; 4260 if (!CE->getAddress()) { 4261 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4262 DiagnosticsEngine::Error, 4263 "Offloading entry for declare target variable is incorrect: the " 4264 "address is invalid."); 4265 CGM.getDiags().Report(DiagID); 4266 continue; 4267 } 4268 break; 4269 } 4270 createOffloadEntry(CE->getAddress(), CE->getAddress(), 4271 CE->getVarSize().getQuantity(), Flags, 4272 CE->getLinkage()); 4273 } else { 4274 llvm_unreachable("Unsupported entry kind."); 4275 } 4276 } 4277 } 4278 4279 /// Loads all the offload entries information from the host IR 4280 /// metadata. 4281 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 4282 // If we are in target mode, load the metadata from the host IR. This code has 4283 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 4284 4285 if (!CGM.getLangOpts().OpenMPIsDevice) 4286 return; 4287 4288 if (CGM.getLangOpts().OMPHostIRFile.empty()) 4289 return; 4290 4291 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 4292 if (auto EC = Buf.getError()) { 4293 CGM.getDiags().Report(diag::err_cannot_open_file) 4294 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4295 return; 4296 } 4297 4298 llvm::LLVMContext C; 4299 auto ME = expectedToErrorOrAndEmitErrors( 4300 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 4301 4302 if (auto EC = ME.getError()) { 4303 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4304 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 4305 CGM.getDiags().Report(DiagID) 4306 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4307 return; 4308 } 4309 4310 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4311 if (!MD) 4312 return; 4313 4314 for (llvm::MDNode *MN : MD->operands()) { 4315 auto &&GetMDInt = [MN](unsigned Idx) { 4316 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4317 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4318 }; 4319 4320 auto &&GetMDString = [MN](unsigned Idx) { 4321 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4322 return V->getString(); 4323 }; 4324 4325 switch (GetMDInt(0)) { 4326 default: 4327 llvm_unreachable("Unexpected metadata!"); 4328 break; 4329 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4330 OffloadingEntryInfoTargetRegion: 4331 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4332 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4333 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4334 /*Order=*/GetMDInt(5)); 4335 break; 4336 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4337 OffloadingEntryInfoDeviceGlobalVar: 4338 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4339 /*MangledName=*/GetMDString(1), 4340 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4341 /*Flags=*/GetMDInt(2)), 4342 /*Order=*/GetMDInt(3)); 4343 break; 4344 } 4345 } 4346 } 4347 4348 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4349 if (!KmpRoutineEntryPtrTy) { 4350 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4351 ASTContext &C = CGM.getContext(); 4352 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4353 FunctionProtoType::ExtProtoInfo EPI; 4354 KmpRoutineEntryPtrQTy = C.getPointerType( 4355 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4356 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4357 } 4358 } 4359 4360 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4361 // Make sure the type of the entry is already created. This is the type we 4362 // have to create: 4363 // struct __tgt_offload_entry{ 4364 // void *addr; // Pointer to the offload entry info. 4365 // // (function or global) 4366 // char *name; // Name of the function or global. 4367 // size_t size; // Size of the entry info (0 if it a function). 4368 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4369 // int32_t reserved; // Reserved, to use by the runtime library. 4370 // }; 4371 if (TgtOffloadEntryQTy.isNull()) { 4372 ASTContext &C = CGM.getContext(); 4373 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4374 RD->startDefinition(); 4375 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4376 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4377 addFieldToRecordDecl(C, RD, C.getSizeType()); 4378 addFieldToRecordDecl( 4379 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4380 addFieldToRecordDecl( 4381 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4382 RD->completeDefinition(); 4383 RD->addAttr(PackedAttr::CreateImplicit(C)); 4384 TgtOffloadEntryQTy = C.getRecordType(RD); 4385 } 4386 return TgtOffloadEntryQTy; 4387 } 4388 4389 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 4390 // These are the types we need to build: 4391 // struct __tgt_device_image{ 4392 // void *ImageStart; // Pointer to the target code start. 4393 // void *ImageEnd; // Pointer to the target code end. 4394 // // We also add the host entries to the device image, as it may be useful 4395 // // for the target runtime to have access to that information. 4396 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 4397 // // the entries. 4398 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4399 // // entries (non inclusive). 4400 // }; 4401 if (TgtDeviceImageQTy.isNull()) { 4402 ASTContext &C = CGM.getContext(); 4403 RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image"); 4404 RD->startDefinition(); 4405 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4406 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4407 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4408 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4409 RD->completeDefinition(); 4410 TgtDeviceImageQTy = C.getRecordType(RD); 4411 } 4412 return TgtDeviceImageQTy; 4413 } 4414 4415 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 4416 // struct __tgt_bin_desc{ 4417 // int32_t NumDevices; // Number of devices supported. 4418 // __tgt_device_image *DeviceImages; // Arrays of device images 4419 // // (one per device). 4420 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 4421 // // entries. 4422 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4423 // // entries (non inclusive). 4424 // }; 4425 if (TgtBinaryDescriptorQTy.isNull()) { 4426 ASTContext &C = CGM.getContext(); 4427 RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc"); 4428 RD->startDefinition(); 4429 addFieldToRecordDecl( 4430 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4431 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 4432 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4433 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4434 RD->completeDefinition(); 4435 TgtBinaryDescriptorQTy = C.getRecordType(RD); 4436 } 4437 return TgtBinaryDescriptorQTy; 4438 } 4439 4440 namespace { 4441 struct PrivateHelpersTy { 4442 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 4443 const VarDecl *PrivateElemInit) 4444 : Original(Original), PrivateCopy(PrivateCopy), 4445 PrivateElemInit(PrivateElemInit) {} 4446 const VarDecl *Original; 4447 const VarDecl *PrivateCopy; 4448 const VarDecl *PrivateElemInit; 4449 }; 4450 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4451 } // anonymous namespace 4452 4453 static RecordDecl * 4454 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4455 if (!Privates.empty()) { 4456 ASTContext &C = CGM.getContext(); 4457 // Build struct .kmp_privates_t. { 4458 // /* private vars */ 4459 // }; 4460 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4461 RD->startDefinition(); 4462 for (const auto &Pair : Privates) { 4463 const VarDecl *VD = Pair.second.Original; 4464 QualType Type = VD->getType().getNonReferenceType(); 4465 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4466 if (VD->hasAttrs()) { 4467 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4468 E(VD->getAttrs().end()); 4469 I != E; ++I) 4470 FD->addAttr(*I); 4471 } 4472 } 4473 RD->completeDefinition(); 4474 return RD; 4475 } 4476 return nullptr; 4477 } 4478 4479 static RecordDecl * 4480 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4481 QualType KmpInt32Ty, 4482 QualType KmpRoutineEntryPointerQTy) { 4483 ASTContext &C = CGM.getContext(); 4484 // Build struct kmp_task_t { 4485 // void * shareds; 4486 // kmp_routine_entry_t routine; 4487 // kmp_int32 part_id; 4488 // kmp_cmplrdata_t data1; 4489 // kmp_cmplrdata_t data2; 4490 // For taskloops additional fields: 4491 // kmp_uint64 lb; 4492 // kmp_uint64 ub; 4493 // kmp_int64 st; 4494 // kmp_int32 liter; 4495 // void * reductions; 4496 // }; 4497 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4498 UD->startDefinition(); 4499 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4500 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4501 UD->completeDefinition(); 4502 QualType KmpCmplrdataTy = C.getRecordType(UD); 4503 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4504 RD->startDefinition(); 4505 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4506 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4507 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4508 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4509 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4510 if (isOpenMPTaskLoopDirective(Kind)) { 4511 QualType KmpUInt64Ty = 4512 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4513 QualType KmpInt64Ty = 4514 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4515 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4516 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4517 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4518 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4519 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4520 } 4521 RD->completeDefinition(); 4522 return RD; 4523 } 4524 4525 static RecordDecl * 4526 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4527 ArrayRef<PrivateDataTy> Privates) { 4528 ASTContext &C = CGM.getContext(); 4529 // Build struct kmp_task_t_with_privates { 4530 // kmp_task_t task_data; 4531 // .kmp_privates_t. privates; 4532 // }; 4533 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4534 RD->startDefinition(); 4535 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4536 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4537 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4538 RD->completeDefinition(); 4539 return RD; 4540 } 4541 4542 /// Emit a proxy function which accepts kmp_task_t as the second 4543 /// argument. 4544 /// \code 4545 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4546 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4547 /// For taskloops: 4548 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4549 /// tt->reductions, tt->shareds); 4550 /// return 0; 4551 /// } 4552 /// \endcode 4553 static llvm::Function * 4554 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4555 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4556 QualType KmpTaskTWithPrivatesPtrQTy, 4557 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4558 QualType SharedsPtrTy, llvm::Function *TaskFunction, 4559 llvm::Value *TaskPrivatesMap) { 4560 ASTContext &C = CGM.getContext(); 4561 FunctionArgList Args; 4562 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4563 ImplicitParamDecl::Other); 4564 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4565 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4566 ImplicitParamDecl::Other); 4567 Args.push_back(&GtidArg); 4568 Args.push_back(&TaskTypeArg); 4569 const auto &TaskEntryFnInfo = 4570 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4571 llvm::FunctionType *TaskEntryTy = 4572 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4573 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 4574 auto *TaskEntry = llvm::Function::Create( 4575 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4576 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4577 TaskEntry->setDoesNotRecurse(); 4578 CodeGenFunction CGF(CGM); 4579 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4580 Loc, Loc); 4581 4582 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4583 // tt, 4584 // For taskloops: 4585 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4586 // tt->task_data.shareds); 4587 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4588 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4589 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4590 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4591 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4592 const auto *KmpTaskTWithPrivatesQTyRD = 4593 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4594 LValue Base = 4595 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4596 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4597 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4598 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4599 llvm::Value *PartidParam = PartIdLVal.getPointer(); 4600 4601 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4602 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4603 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4604 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4605 CGF.ConvertTypeForMem(SharedsPtrTy)); 4606 4607 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4608 llvm::Value *PrivatesParam; 4609 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4610 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4611 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4612 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 4613 } else { 4614 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4615 } 4616 4617 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4618 TaskPrivatesMap, 4619 CGF.Builder 4620 .CreatePointerBitCastOrAddrSpaceCast( 4621 TDBase.getAddress(), CGF.VoidPtrTy) 4622 .getPointer()}; 4623 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4624 std::end(CommonArgs)); 4625 if (isOpenMPTaskLoopDirective(Kind)) { 4626 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4627 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4628 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4629 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4630 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4631 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4632 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4633 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4634 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4635 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4636 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4637 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4638 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4639 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4640 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4641 CallArgs.push_back(LBParam); 4642 CallArgs.push_back(UBParam); 4643 CallArgs.push_back(StParam); 4644 CallArgs.push_back(LIParam); 4645 CallArgs.push_back(RParam); 4646 } 4647 CallArgs.push_back(SharedsParam); 4648 4649 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4650 CallArgs); 4651 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4652 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4653 CGF.FinishFunction(); 4654 return TaskEntry; 4655 } 4656 4657 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4658 SourceLocation Loc, 4659 QualType KmpInt32Ty, 4660 QualType KmpTaskTWithPrivatesPtrQTy, 4661 QualType KmpTaskTWithPrivatesQTy) { 4662 ASTContext &C = CGM.getContext(); 4663 FunctionArgList Args; 4664 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4665 ImplicitParamDecl::Other); 4666 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4667 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4668 ImplicitParamDecl::Other); 4669 Args.push_back(&GtidArg); 4670 Args.push_back(&TaskTypeArg); 4671 const auto &DestructorFnInfo = 4672 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4673 llvm::FunctionType *DestructorFnTy = 4674 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4675 std::string Name = 4676 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 4677 auto *DestructorFn = 4678 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4679 Name, &CGM.getModule()); 4680 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4681 DestructorFnInfo); 4682 DestructorFn->setDoesNotRecurse(); 4683 CodeGenFunction CGF(CGM); 4684 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4685 Args, Loc, Loc); 4686 4687 LValue Base = CGF.EmitLoadOfPointerLValue( 4688 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4689 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4690 const auto *KmpTaskTWithPrivatesQTyRD = 4691 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4692 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4693 Base = CGF.EmitLValueForField(Base, *FI); 4694 for (const auto *Field : 4695 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4696 if (QualType::DestructionKind DtorKind = 4697 Field->getType().isDestructedType()) { 4698 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4699 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 4700 } 4701 } 4702 CGF.FinishFunction(); 4703 return DestructorFn; 4704 } 4705 4706 /// Emit a privates mapping function for correct handling of private and 4707 /// firstprivate variables. 4708 /// \code 4709 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4710 /// **noalias priv1,..., <tyn> **noalias privn) { 4711 /// *priv1 = &.privates.priv1; 4712 /// ...; 4713 /// *privn = &.privates.privn; 4714 /// } 4715 /// \endcode 4716 static llvm::Value * 4717 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4718 ArrayRef<const Expr *> PrivateVars, 4719 ArrayRef<const Expr *> FirstprivateVars, 4720 ArrayRef<const Expr *> LastprivateVars, 4721 QualType PrivatesQTy, 4722 ArrayRef<PrivateDataTy> Privates) { 4723 ASTContext &C = CGM.getContext(); 4724 FunctionArgList Args; 4725 ImplicitParamDecl TaskPrivatesArg( 4726 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4727 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4728 ImplicitParamDecl::Other); 4729 Args.push_back(&TaskPrivatesArg); 4730 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4731 unsigned Counter = 1; 4732 for (const Expr *E : PrivateVars) { 4733 Args.push_back(ImplicitParamDecl::Create( 4734 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4735 C.getPointerType(C.getPointerType(E->getType())) 4736 .withConst() 4737 .withRestrict(), 4738 ImplicitParamDecl::Other)); 4739 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4740 PrivateVarsPos[VD] = Counter; 4741 ++Counter; 4742 } 4743 for (const Expr *E : FirstprivateVars) { 4744 Args.push_back(ImplicitParamDecl::Create( 4745 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4746 C.getPointerType(C.getPointerType(E->getType())) 4747 .withConst() 4748 .withRestrict(), 4749 ImplicitParamDecl::Other)); 4750 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4751 PrivateVarsPos[VD] = Counter; 4752 ++Counter; 4753 } 4754 for (const Expr *E : LastprivateVars) { 4755 Args.push_back(ImplicitParamDecl::Create( 4756 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4757 C.getPointerType(C.getPointerType(E->getType())) 4758 .withConst() 4759 .withRestrict(), 4760 ImplicitParamDecl::Other)); 4761 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4762 PrivateVarsPos[VD] = Counter; 4763 ++Counter; 4764 } 4765 const auto &TaskPrivatesMapFnInfo = 4766 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4767 llvm::FunctionType *TaskPrivatesMapTy = 4768 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4769 std::string Name = 4770 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 4771 auto *TaskPrivatesMap = llvm::Function::Create( 4772 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 4773 &CGM.getModule()); 4774 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4775 TaskPrivatesMapFnInfo); 4776 if (CGM.getLangOpts().Optimize) { 4777 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4778 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4779 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4780 } 4781 CodeGenFunction CGF(CGM); 4782 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4783 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4784 4785 // *privi = &.privates.privi; 4786 LValue Base = CGF.EmitLoadOfPointerLValue( 4787 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4788 TaskPrivatesArg.getType()->castAs<PointerType>()); 4789 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4790 Counter = 0; 4791 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4792 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4793 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4794 LValue RefLVal = 4795 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4796 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4797 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 4798 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 4799 ++Counter; 4800 } 4801 CGF.FinishFunction(); 4802 return TaskPrivatesMap; 4803 } 4804 4805 /// Emit initialization for private variables in task-based directives. 4806 static void emitPrivatesInit(CodeGenFunction &CGF, 4807 const OMPExecutableDirective &D, 4808 Address KmpTaskSharedsPtr, LValue TDBase, 4809 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4810 QualType SharedsTy, QualType SharedsPtrTy, 4811 const OMPTaskDataTy &Data, 4812 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4813 ASTContext &C = CGF.getContext(); 4814 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4815 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4816 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4817 ? OMPD_taskloop 4818 : OMPD_task; 4819 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4820 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4821 LValue SrcBase; 4822 bool IsTargetTask = 4823 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4824 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4825 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4826 // PointersArray and SizesArray. The original variables for these arrays are 4827 // not captured and we get their addresses explicitly. 4828 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || 4829 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4830 SrcBase = CGF.MakeAddrLValue( 4831 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4832 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4833 SharedsTy); 4834 } 4835 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4836 for (const PrivateDataTy &Pair : Privates) { 4837 const VarDecl *VD = Pair.second.PrivateCopy; 4838 const Expr *Init = VD->getAnyInitializer(); 4839 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4840 !CGF.isTrivialInitializer(Init)))) { 4841 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4842 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4843 const VarDecl *OriginalVD = Pair.second.Original; 4844 // Check if the variable is the target-based BasePointersArray, 4845 // PointersArray or SizesArray. 4846 LValue SharedRefLValue; 4847 QualType Type = PrivateLValue.getType(); 4848 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4849 if (IsTargetTask && !SharedField) { 4850 assert(isa<ImplicitParamDecl>(OriginalVD) && 4851 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4852 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4853 ->getNumParams() == 0 && 4854 isa<TranslationUnitDecl>( 4855 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4856 ->getDeclContext()) && 4857 "Expected artificial target data variable."); 4858 SharedRefLValue = 4859 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4860 } else { 4861 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4862 SharedRefLValue = CGF.MakeAddrLValue( 4863 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 4864 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4865 SharedRefLValue.getTBAAInfo()); 4866 } 4867 if (Type->isArrayType()) { 4868 // Initialize firstprivate array. 4869 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4870 // Perform simple memcpy. 4871 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4872 } else { 4873 // Initialize firstprivate array using element-by-element 4874 // initialization. 4875 CGF.EmitOMPAggregateAssign( 4876 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 4877 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4878 Address SrcElement) { 4879 // Clean up any temporaries needed by the initialization. 4880 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4881 InitScope.addPrivate( 4882 Elem, [SrcElement]() -> Address { return SrcElement; }); 4883 (void)InitScope.Privatize(); 4884 // Emit initialization for single element. 4885 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4886 CGF, &CapturesInfo); 4887 CGF.EmitAnyExprToMem(Init, DestElement, 4888 Init->getType().getQualifiers(), 4889 /*IsInitializer=*/false); 4890 }); 4891 } 4892 } else { 4893 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4894 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 4895 return SharedRefLValue.getAddress(); 4896 }); 4897 (void)InitScope.Privatize(); 4898 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4899 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4900 /*capturedByInit=*/false); 4901 } 4902 } else { 4903 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4904 } 4905 } 4906 ++FI; 4907 } 4908 } 4909 4910 /// Check if duplication function is required for taskloops. 4911 static bool checkInitIsRequired(CodeGenFunction &CGF, 4912 ArrayRef<PrivateDataTy> Privates) { 4913 bool InitRequired = false; 4914 for (const PrivateDataTy &Pair : Privates) { 4915 const VarDecl *VD = Pair.second.PrivateCopy; 4916 const Expr *Init = VD->getAnyInitializer(); 4917 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4918 !CGF.isTrivialInitializer(Init)); 4919 if (InitRequired) 4920 break; 4921 } 4922 return InitRequired; 4923 } 4924 4925 4926 /// Emit task_dup function (for initialization of 4927 /// private/firstprivate/lastprivate vars and last_iter flag) 4928 /// \code 4929 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4930 /// lastpriv) { 4931 /// // setup lastprivate flag 4932 /// task_dst->last = lastpriv; 4933 /// // could be constructor calls here... 4934 /// } 4935 /// \endcode 4936 static llvm::Value * 4937 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4938 const OMPExecutableDirective &D, 4939 QualType KmpTaskTWithPrivatesPtrQTy, 4940 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4941 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4942 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4943 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4944 ASTContext &C = CGM.getContext(); 4945 FunctionArgList Args; 4946 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4947 KmpTaskTWithPrivatesPtrQTy, 4948 ImplicitParamDecl::Other); 4949 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4950 KmpTaskTWithPrivatesPtrQTy, 4951 ImplicitParamDecl::Other); 4952 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4953 ImplicitParamDecl::Other); 4954 Args.push_back(&DstArg); 4955 Args.push_back(&SrcArg); 4956 Args.push_back(&LastprivArg); 4957 const auto &TaskDupFnInfo = 4958 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4959 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4960 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4961 auto *TaskDup = llvm::Function::Create( 4962 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4963 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4964 TaskDup->setDoesNotRecurse(); 4965 CodeGenFunction CGF(CGM); 4966 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4967 Loc); 4968 4969 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4970 CGF.GetAddrOfLocalVar(&DstArg), 4971 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4972 // task_dst->liter = lastpriv; 4973 if (WithLastIter) { 4974 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4975 LValue Base = CGF.EmitLValueForField( 4976 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4977 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4978 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4979 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4980 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4981 } 4982 4983 // Emit initial values for private copies (if any). 4984 assert(!Privates.empty()); 4985 Address KmpTaskSharedsPtr = Address::invalid(); 4986 if (!Data.FirstprivateVars.empty()) { 4987 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4988 CGF.GetAddrOfLocalVar(&SrcArg), 4989 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4990 LValue Base = CGF.EmitLValueForField( 4991 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4992 KmpTaskSharedsPtr = Address( 4993 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4994 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4995 KmpTaskTShareds)), 4996 Loc), 4997 CGF.getNaturalTypeAlignment(SharedsTy)); 4998 } 4999 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 5000 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 5001 CGF.FinishFunction(); 5002 return TaskDup; 5003 } 5004 5005 /// Checks if destructor function is required to be generated. 5006 /// \return true if cleanups are required, false otherwise. 5007 static bool 5008 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 5009 bool NeedsCleanup = false; 5010 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 5011 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 5012 for (const FieldDecl *FD : PrivateRD->fields()) { 5013 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 5014 if (NeedsCleanup) 5015 break; 5016 } 5017 return NeedsCleanup; 5018 } 5019 5020 CGOpenMPRuntime::TaskResultTy 5021 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 5022 const OMPExecutableDirective &D, 5023 llvm::Function *TaskFunction, QualType SharedsTy, 5024 Address Shareds, const OMPTaskDataTy &Data) { 5025 ASTContext &C = CGM.getContext(); 5026 llvm::SmallVector<PrivateDataTy, 4> Privates; 5027 // Aggregate privates and sort them by the alignment. 5028 auto I = Data.PrivateCopies.begin(); 5029 for (const Expr *E : Data.PrivateVars) { 5030 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5031 Privates.emplace_back( 5032 C.getDeclAlign(VD), 5033 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5034 /*PrivateElemInit=*/nullptr)); 5035 ++I; 5036 } 5037 I = Data.FirstprivateCopies.begin(); 5038 auto IElemInitRef = Data.FirstprivateInits.begin(); 5039 for (const Expr *E : Data.FirstprivateVars) { 5040 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5041 Privates.emplace_back( 5042 C.getDeclAlign(VD), 5043 PrivateHelpersTy( 5044 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5045 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 5046 ++I; 5047 ++IElemInitRef; 5048 } 5049 I = Data.LastprivateCopies.begin(); 5050 for (const Expr *E : Data.LastprivateVars) { 5051 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 5052 Privates.emplace_back( 5053 C.getDeclAlign(VD), 5054 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 5055 /*PrivateElemInit=*/nullptr)); 5056 ++I; 5057 } 5058 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 5059 return L.first > R.first; 5060 }); 5061 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 5062 // Build type kmp_routine_entry_t (if not built yet). 5063 emitKmpRoutineEntryT(KmpInt32Ty); 5064 // Build type kmp_task_t (if not built yet). 5065 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 5066 if (SavedKmpTaskloopTQTy.isNull()) { 5067 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5068 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5069 } 5070 KmpTaskTQTy = SavedKmpTaskloopTQTy; 5071 } else { 5072 assert((D.getDirectiveKind() == OMPD_task || 5073 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 5074 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 5075 "Expected taskloop, task or target directive"); 5076 if (SavedKmpTaskTQTy.isNull()) { 5077 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5078 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5079 } 5080 KmpTaskTQTy = SavedKmpTaskTQTy; 5081 } 5082 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 5083 // Build particular struct kmp_task_t for the given task. 5084 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 5085 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 5086 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 5087 QualType KmpTaskTWithPrivatesPtrQTy = 5088 C.getPointerType(KmpTaskTWithPrivatesQTy); 5089 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 5090 llvm::Type *KmpTaskTWithPrivatesPtrTy = 5091 KmpTaskTWithPrivatesTy->getPointerTo(); 5092 llvm::Value *KmpTaskTWithPrivatesTySize = 5093 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 5094 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 5095 5096 // Emit initial values for private copies (if any). 5097 llvm::Value *TaskPrivatesMap = nullptr; 5098 llvm::Type *TaskPrivatesMapTy = 5099 std::next(TaskFunction->arg_begin(), 3)->getType(); 5100 if (!Privates.empty()) { 5101 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 5102 TaskPrivatesMap = emitTaskPrivateMappingFunction( 5103 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 5104 FI->getType(), Privates); 5105 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5106 TaskPrivatesMap, TaskPrivatesMapTy); 5107 } else { 5108 TaskPrivatesMap = llvm::ConstantPointerNull::get( 5109 cast<llvm::PointerType>(TaskPrivatesMapTy)); 5110 } 5111 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 5112 // kmp_task_t *tt); 5113 llvm::Function *TaskEntry = emitProxyTaskFunction( 5114 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5115 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 5116 TaskPrivatesMap); 5117 5118 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 5119 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 5120 // kmp_routine_entry_t *task_entry); 5121 // Task flags. Format is taken from 5122 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 5123 // description of kmp_tasking_flags struct. 5124 enum { 5125 TiedFlag = 0x1, 5126 FinalFlag = 0x2, 5127 DestructorsFlag = 0x8, 5128 PriorityFlag = 0x20 5129 }; 5130 unsigned Flags = Data.Tied ? TiedFlag : 0; 5131 bool NeedsCleanup = false; 5132 if (!Privates.empty()) { 5133 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 5134 if (NeedsCleanup) 5135 Flags = Flags | DestructorsFlag; 5136 } 5137 if (Data.Priority.getInt()) 5138 Flags = Flags | PriorityFlag; 5139 llvm::Value *TaskFlags = 5140 Data.Final.getPointer() 5141 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 5142 CGF.Builder.getInt32(FinalFlag), 5143 CGF.Builder.getInt32(/*C=*/0)) 5144 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 5145 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 5146 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 5147 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 5148 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 5149 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5150 TaskEntry, KmpRoutineEntryPtrTy)}; 5151 llvm::Value *NewTask; 5152 if (D.hasClausesOfKind<OMPNowaitClause>()) { 5153 // Check if we have any device clause associated with the directive. 5154 const Expr *Device = nullptr; 5155 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 5156 Device = C->getDevice(); 5157 // Emit device ID if any otherwise use default value. 5158 llvm::Value *DeviceID; 5159 if (Device) 5160 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 5161 CGF.Int64Ty, /*isSigned=*/true); 5162 else 5163 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 5164 AllocArgs.push_back(DeviceID); 5165 NewTask = CGF.EmitRuntimeCall( 5166 createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs); 5167 } else { 5168 NewTask = CGF.EmitRuntimeCall( 5169 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 5170 } 5171 llvm::Value *NewTaskNewTaskTTy = 5172 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5173 NewTask, KmpTaskTWithPrivatesPtrTy); 5174 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 5175 KmpTaskTWithPrivatesQTy); 5176 LValue TDBase = 5177 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 5178 // Fill the data in the resulting kmp_task_t record. 5179 // Copy shareds if there are any. 5180 Address KmpTaskSharedsPtr = Address::invalid(); 5181 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 5182 KmpTaskSharedsPtr = 5183 Address(CGF.EmitLoadOfScalar( 5184 CGF.EmitLValueForField( 5185 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 5186 KmpTaskTShareds)), 5187 Loc), 5188 CGF.getNaturalTypeAlignment(SharedsTy)); 5189 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 5190 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 5191 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 5192 } 5193 // Emit initial values for private copies (if any). 5194 TaskResultTy Result; 5195 if (!Privates.empty()) { 5196 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 5197 SharedsTy, SharedsPtrTy, Data, Privates, 5198 /*ForDup=*/false); 5199 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 5200 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 5201 Result.TaskDupFn = emitTaskDupFunction( 5202 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 5203 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 5204 /*WithLastIter=*/!Data.LastprivateVars.empty()); 5205 } 5206 } 5207 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 5208 enum { Priority = 0, Destructors = 1 }; 5209 // Provide pointer to function with destructors for privates. 5210 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 5211 const RecordDecl *KmpCmplrdataUD = 5212 (*FI)->getType()->getAsUnionType()->getDecl(); 5213 if (NeedsCleanup) { 5214 llvm::Value *DestructorFn = emitDestructorsFunction( 5215 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5216 KmpTaskTWithPrivatesQTy); 5217 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 5218 LValue DestructorsLV = CGF.EmitLValueForField( 5219 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 5220 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5221 DestructorFn, KmpRoutineEntryPtrTy), 5222 DestructorsLV); 5223 } 5224 // Set priority. 5225 if (Data.Priority.getInt()) { 5226 LValue Data2LV = CGF.EmitLValueForField( 5227 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 5228 LValue PriorityLV = CGF.EmitLValueForField( 5229 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 5230 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 5231 } 5232 Result.NewTask = NewTask; 5233 Result.TaskEntry = TaskEntry; 5234 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 5235 Result.TDBase = TDBase; 5236 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 5237 return Result; 5238 } 5239 5240 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5241 const OMPExecutableDirective &D, 5242 llvm::Function *TaskFunction, 5243 QualType SharedsTy, Address Shareds, 5244 const Expr *IfCond, 5245 const OMPTaskDataTy &Data) { 5246 if (!CGF.HaveInsertPoint()) 5247 return; 5248 5249 TaskResultTy Result = 5250 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5251 llvm::Value *NewTask = Result.NewTask; 5252 llvm::Function *TaskEntry = Result.TaskEntry; 5253 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5254 LValue TDBase = Result.TDBase; 5255 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5256 ASTContext &C = CGM.getContext(); 5257 // Process list of dependences. 5258 Address DependenciesArray = Address::invalid(); 5259 unsigned NumDependencies = Data.Dependences.size(); 5260 if (NumDependencies) { 5261 // Dependence kind for RTL. 5262 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 }; 5263 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 5264 RecordDecl *KmpDependInfoRD; 5265 QualType FlagsTy = 5266 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 5267 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5268 if (KmpDependInfoTy.isNull()) { 5269 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 5270 KmpDependInfoRD->startDefinition(); 5271 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 5272 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 5273 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 5274 KmpDependInfoRD->completeDefinition(); 5275 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 5276 } else { 5277 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5278 } 5279 // Define type kmp_depend_info[<Dependences.size()>]; 5280 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5281 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 5282 ArrayType::Normal, /*IndexTypeQuals=*/0); 5283 // kmp_depend_info[<Dependences.size()>] deps; 5284 DependenciesArray = 5285 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 5286 for (unsigned I = 0; I < NumDependencies; ++I) { 5287 const Expr *E = Data.Dependences[I].second; 5288 LValue Addr = CGF.EmitLValue(E); 5289 llvm::Value *Size; 5290 QualType Ty = E->getType(); 5291 if (const auto *ASE = 5292 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 5293 LValue UpAddrLVal = 5294 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 5295 llvm::Value *UpAddr = 5296 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 5297 llvm::Value *LowIntPtr = 5298 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 5299 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 5300 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 5301 } else { 5302 Size = CGF.getTypeSize(Ty); 5303 } 5304 LValue Base = CGF.MakeAddrLValue( 5305 CGF.Builder.CreateConstArrayGEP(DependenciesArray, I), 5306 KmpDependInfoTy); 5307 // deps[i].base_addr = &<Dependences[i].second>; 5308 LValue BaseAddrLVal = CGF.EmitLValueForField( 5309 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5310 CGF.EmitStoreOfScalar( 5311 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 5312 BaseAddrLVal); 5313 // deps[i].len = sizeof(<Dependences[i].second>); 5314 LValue LenLVal = CGF.EmitLValueForField( 5315 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 5316 CGF.EmitStoreOfScalar(Size, LenLVal); 5317 // deps[i].flags = <Dependences[i].first>; 5318 RTLDependenceKindTy DepKind; 5319 switch (Data.Dependences[I].first) { 5320 case OMPC_DEPEND_in: 5321 DepKind = DepIn; 5322 break; 5323 // Out and InOut dependencies must use the same code. 5324 case OMPC_DEPEND_out: 5325 case OMPC_DEPEND_inout: 5326 DepKind = DepInOut; 5327 break; 5328 case OMPC_DEPEND_mutexinoutset: 5329 DepKind = DepMutexInOutSet; 5330 break; 5331 case OMPC_DEPEND_source: 5332 case OMPC_DEPEND_sink: 5333 case OMPC_DEPEND_unknown: 5334 llvm_unreachable("Unknown task dependence type"); 5335 } 5336 LValue FlagsLVal = CGF.EmitLValueForField( 5337 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5338 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5339 FlagsLVal); 5340 } 5341 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5342 CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy); 5343 } 5344 5345 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5346 // libcall. 5347 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5348 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5349 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5350 // list is not empty 5351 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5352 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5353 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5354 llvm::Value *DepTaskArgs[7]; 5355 if (NumDependencies) { 5356 DepTaskArgs[0] = UpLoc; 5357 DepTaskArgs[1] = ThreadID; 5358 DepTaskArgs[2] = NewTask; 5359 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 5360 DepTaskArgs[4] = DependenciesArray.getPointer(); 5361 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5362 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5363 } 5364 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 5365 &TaskArgs, 5366 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5367 if (!Data.Tied) { 5368 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5369 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5370 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5371 } 5372 if (NumDependencies) { 5373 CGF.EmitRuntimeCall( 5374 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5375 } else { 5376 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5377 TaskArgs); 5378 } 5379 // Check if parent region is untied and build return for untied task; 5380 if (auto *Region = 5381 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5382 Region->emitUntiedSwitch(CGF); 5383 }; 5384 5385 llvm::Value *DepWaitTaskArgs[6]; 5386 if (NumDependencies) { 5387 DepWaitTaskArgs[0] = UpLoc; 5388 DepWaitTaskArgs[1] = ThreadID; 5389 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 5390 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5391 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5392 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5393 } 5394 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5395 NumDependencies, &DepWaitTaskArgs, 5396 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5397 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5398 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5399 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5400 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5401 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5402 // is specified. 5403 if (NumDependencies) 5404 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5405 DepWaitTaskArgs); 5406 // Call proxy_task_entry(gtid, new_task); 5407 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5408 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5409 Action.Enter(CGF); 5410 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5411 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5412 OutlinedFnArgs); 5413 }; 5414 5415 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5416 // kmp_task_t *new_task); 5417 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5418 // kmp_task_t *new_task); 5419 RegionCodeGenTy RCG(CodeGen); 5420 CommonActionTy Action( 5421 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5422 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5423 RCG.setAction(Action); 5424 RCG(CGF); 5425 }; 5426 5427 if (IfCond) { 5428 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5429 } else { 5430 RegionCodeGenTy ThenRCG(ThenCodeGen); 5431 ThenRCG(CGF); 5432 } 5433 } 5434 5435 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5436 const OMPLoopDirective &D, 5437 llvm::Function *TaskFunction, 5438 QualType SharedsTy, Address Shareds, 5439 const Expr *IfCond, 5440 const OMPTaskDataTy &Data) { 5441 if (!CGF.HaveInsertPoint()) 5442 return; 5443 TaskResultTy Result = 5444 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5445 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5446 // libcall. 5447 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5448 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5449 // sched, kmp_uint64 grainsize, void *task_dup); 5450 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5451 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5452 llvm::Value *IfVal; 5453 if (IfCond) { 5454 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5455 /*isSigned=*/true); 5456 } else { 5457 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5458 } 5459 5460 LValue LBLVal = CGF.EmitLValueForField( 5461 Result.TDBase, 5462 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5463 const auto *LBVar = 5464 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5465 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 5466 /*IsInitializer=*/true); 5467 LValue UBLVal = CGF.EmitLValueForField( 5468 Result.TDBase, 5469 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5470 const auto *UBVar = 5471 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5472 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 5473 /*IsInitializer=*/true); 5474 LValue StLVal = CGF.EmitLValueForField( 5475 Result.TDBase, 5476 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5477 const auto *StVar = 5478 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5479 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 5480 /*IsInitializer=*/true); 5481 // Store reductions address. 5482 LValue RedLVal = CGF.EmitLValueForField( 5483 Result.TDBase, 5484 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5485 if (Data.Reductions) { 5486 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5487 } else { 5488 CGF.EmitNullInitialization(RedLVal.getAddress(), 5489 CGF.getContext().VoidPtrTy); 5490 } 5491 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5492 llvm::Value *TaskArgs[] = { 5493 UpLoc, 5494 ThreadID, 5495 Result.NewTask, 5496 IfVal, 5497 LBLVal.getPointer(), 5498 UBLVal.getPointer(), 5499 CGF.EmitLoadOfScalar(StLVal, Loc), 5500 llvm::ConstantInt::getSigned( 5501 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5502 llvm::ConstantInt::getSigned( 5503 CGF.IntTy, Data.Schedule.getPointer() 5504 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5505 : NoSchedule), 5506 Data.Schedule.getPointer() 5507 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5508 /*isSigned=*/false) 5509 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5510 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5511 Result.TaskDupFn, CGF.VoidPtrTy) 5512 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5513 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 5514 } 5515 5516 /// Emit reduction operation for each element of array (required for 5517 /// array sections) LHS op = RHS. 5518 /// \param Type Type of array. 5519 /// \param LHSVar Variable on the left side of the reduction operation 5520 /// (references element of array in original variable). 5521 /// \param RHSVar Variable on the right side of the reduction operation 5522 /// (references element of array in original variable). 5523 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5524 /// RHSVar. 5525 static void EmitOMPAggregateReduction( 5526 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5527 const VarDecl *RHSVar, 5528 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5529 const Expr *, const Expr *)> &RedOpGen, 5530 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5531 const Expr *UpExpr = nullptr) { 5532 // Perform element-by-element initialization. 5533 QualType ElementTy; 5534 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5535 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5536 5537 // Drill down to the base element type on both arrays. 5538 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5539 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5540 5541 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5542 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5543 // Cast from pointer to array type to pointer to single element. 5544 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5545 // The basic structure here is a while-do loop. 5546 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5547 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5548 llvm::Value *IsEmpty = 5549 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5550 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5551 5552 // Enter the loop body, making that address the current address. 5553 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5554 CGF.EmitBlock(BodyBB); 5555 5556 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5557 5558 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5559 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5560 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5561 Address RHSElementCurrent = 5562 Address(RHSElementPHI, 5563 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5564 5565 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5566 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5567 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5568 Address LHSElementCurrent = 5569 Address(LHSElementPHI, 5570 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5571 5572 // Emit copy. 5573 CodeGenFunction::OMPPrivateScope Scope(CGF); 5574 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5575 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5576 Scope.Privatize(); 5577 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5578 Scope.ForceCleanup(); 5579 5580 // Shift the address forward by one element. 5581 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5582 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5583 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5584 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5585 // Check whether we've reached the end. 5586 llvm::Value *Done = 5587 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5588 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5589 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5590 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5591 5592 // Done. 5593 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5594 } 5595 5596 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5597 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5598 /// UDR combiner function. 5599 static void emitReductionCombiner(CodeGenFunction &CGF, 5600 const Expr *ReductionOp) { 5601 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5602 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5603 if (const auto *DRE = 5604 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5605 if (const auto *DRD = 5606 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5607 std::pair<llvm::Function *, llvm::Function *> Reduction = 5608 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5609 RValue Func = RValue::get(Reduction.first); 5610 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5611 CGF.EmitIgnoredExpr(ReductionOp); 5612 return; 5613 } 5614 CGF.EmitIgnoredExpr(ReductionOp); 5615 } 5616 5617 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5618 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5619 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5620 ArrayRef<const Expr *> ReductionOps) { 5621 ASTContext &C = CGM.getContext(); 5622 5623 // void reduction_func(void *LHSArg, void *RHSArg); 5624 FunctionArgList Args; 5625 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5626 ImplicitParamDecl::Other); 5627 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5628 ImplicitParamDecl::Other); 5629 Args.push_back(&LHSArg); 5630 Args.push_back(&RHSArg); 5631 const auto &CGFI = 5632 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5633 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5634 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5635 llvm::GlobalValue::InternalLinkage, Name, 5636 &CGM.getModule()); 5637 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5638 Fn->setDoesNotRecurse(); 5639 CodeGenFunction CGF(CGM); 5640 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5641 5642 // Dst = (void*[n])(LHSArg); 5643 // Src = (void*[n])(RHSArg); 5644 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5645 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5646 ArgsType), CGF.getPointerAlign()); 5647 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5648 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5649 ArgsType), CGF.getPointerAlign()); 5650 5651 // ... 5652 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5653 // ... 5654 CodeGenFunction::OMPPrivateScope Scope(CGF); 5655 auto IPriv = Privates.begin(); 5656 unsigned Idx = 0; 5657 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5658 const auto *RHSVar = 5659 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5660 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5661 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5662 }); 5663 const auto *LHSVar = 5664 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5665 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5666 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5667 }); 5668 QualType PrivTy = (*IPriv)->getType(); 5669 if (PrivTy->isVariablyModifiedType()) { 5670 // Get array size and emit VLA type. 5671 ++Idx; 5672 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5673 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5674 const VariableArrayType *VLA = 5675 CGF.getContext().getAsVariableArrayType(PrivTy); 5676 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5677 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5678 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5679 CGF.EmitVariablyModifiedType(PrivTy); 5680 } 5681 } 5682 Scope.Privatize(); 5683 IPriv = Privates.begin(); 5684 auto ILHS = LHSExprs.begin(); 5685 auto IRHS = RHSExprs.begin(); 5686 for (const Expr *E : ReductionOps) { 5687 if ((*IPriv)->getType()->isArrayType()) { 5688 // Emit reduction for array section. 5689 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5690 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5691 EmitOMPAggregateReduction( 5692 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5693 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5694 emitReductionCombiner(CGF, E); 5695 }); 5696 } else { 5697 // Emit reduction for array subscript or single variable. 5698 emitReductionCombiner(CGF, E); 5699 } 5700 ++IPriv; 5701 ++ILHS; 5702 ++IRHS; 5703 } 5704 Scope.ForceCleanup(); 5705 CGF.FinishFunction(); 5706 return Fn; 5707 } 5708 5709 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5710 const Expr *ReductionOp, 5711 const Expr *PrivateRef, 5712 const DeclRefExpr *LHS, 5713 const DeclRefExpr *RHS) { 5714 if (PrivateRef->getType()->isArrayType()) { 5715 // Emit reduction for array section. 5716 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5717 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5718 EmitOMPAggregateReduction( 5719 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5720 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5721 emitReductionCombiner(CGF, ReductionOp); 5722 }); 5723 } else { 5724 // Emit reduction for array subscript or single variable. 5725 emitReductionCombiner(CGF, ReductionOp); 5726 } 5727 } 5728 5729 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5730 ArrayRef<const Expr *> Privates, 5731 ArrayRef<const Expr *> LHSExprs, 5732 ArrayRef<const Expr *> RHSExprs, 5733 ArrayRef<const Expr *> ReductionOps, 5734 ReductionOptionsTy Options) { 5735 if (!CGF.HaveInsertPoint()) 5736 return; 5737 5738 bool WithNowait = Options.WithNowait; 5739 bool SimpleReduction = Options.SimpleReduction; 5740 5741 // Next code should be emitted for reduction: 5742 // 5743 // static kmp_critical_name lock = { 0 }; 5744 // 5745 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5746 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5747 // ... 5748 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5749 // *(Type<n>-1*)rhs[<n>-1]); 5750 // } 5751 // 5752 // ... 5753 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5754 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5755 // RedList, reduce_func, &<lock>)) { 5756 // case 1: 5757 // ... 5758 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5759 // ... 5760 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5761 // break; 5762 // case 2: 5763 // ... 5764 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5765 // ... 5766 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5767 // break; 5768 // default:; 5769 // } 5770 // 5771 // if SimpleReduction is true, only the next code is generated: 5772 // ... 5773 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5774 // ... 5775 5776 ASTContext &C = CGM.getContext(); 5777 5778 if (SimpleReduction) { 5779 CodeGenFunction::RunCleanupsScope Scope(CGF); 5780 auto IPriv = Privates.begin(); 5781 auto ILHS = LHSExprs.begin(); 5782 auto IRHS = RHSExprs.begin(); 5783 for (const Expr *E : ReductionOps) { 5784 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5785 cast<DeclRefExpr>(*IRHS)); 5786 ++IPriv; 5787 ++ILHS; 5788 ++IRHS; 5789 } 5790 return; 5791 } 5792 5793 // 1. Build a list of reduction variables. 5794 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5795 auto Size = RHSExprs.size(); 5796 for (const Expr *E : Privates) { 5797 if (E->getType()->isVariablyModifiedType()) 5798 // Reserve place for array size. 5799 ++Size; 5800 } 5801 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5802 QualType ReductionArrayTy = 5803 C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, 5804 /*IndexTypeQuals=*/0); 5805 Address ReductionList = 5806 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5807 auto IPriv = Privates.begin(); 5808 unsigned Idx = 0; 5809 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5810 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5811 CGF.Builder.CreateStore( 5812 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5813 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 5814 Elem); 5815 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5816 // Store array size. 5817 ++Idx; 5818 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5819 llvm::Value *Size = CGF.Builder.CreateIntCast( 5820 CGF.getVLASize( 5821 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5822 .NumElts, 5823 CGF.SizeTy, /*isSigned=*/false); 5824 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5825 Elem); 5826 } 5827 } 5828 5829 // 2. Emit reduce_func(). 5830 llvm::Function *ReductionFn = emitReductionFunction( 5831 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5832 LHSExprs, RHSExprs, ReductionOps); 5833 5834 // 3. Create static kmp_critical_name lock = { 0 }; 5835 std::string Name = getName({"reduction"}); 5836 llvm::Value *Lock = getCriticalRegionLock(Name); 5837 5838 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5839 // RedList, reduce_func, &<lock>); 5840 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5841 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5842 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5843 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5844 ReductionList.getPointer(), CGF.VoidPtrTy); 5845 llvm::Value *Args[] = { 5846 IdentTLoc, // ident_t *<loc> 5847 ThreadId, // i32 <gtid> 5848 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5849 ReductionArrayTySize, // size_type sizeof(RedList) 5850 RL, // void *RedList 5851 ReductionFn, // void (*) (void *, void *) <reduce_func> 5852 Lock // kmp_critical_name *&<lock> 5853 }; 5854 llvm::Value *Res = CGF.EmitRuntimeCall( 5855 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 5856 : OMPRTL__kmpc_reduce), 5857 Args); 5858 5859 // 5. Build switch(res) 5860 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5861 llvm::SwitchInst *SwInst = 5862 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5863 5864 // 6. Build case 1: 5865 // ... 5866 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5867 // ... 5868 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5869 // break; 5870 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5871 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5872 CGF.EmitBlock(Case1BB); 5873 5874 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5875 llvm::Value *EndArgs[] = { 5876 IdentTLoc, // ident_t *<loc> 5877 ThreadId, // i32 <gtid> 5878 Lock // kmp_critical_name *&<lock> 5879 }; 5880 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5881 CodeGenFunction &CGF, PrePostActionTy &Action) { 5882 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5883 auto IPriv = Privates.begin(); 5884 auto ILHS = LHSExprs.begin(); 5885 auto IRHS = RHSExprs.begin(); 5886 for (const Expr *E : ReductionOps) { 5887 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5888 cast<DeclRefExpr>(*IRHS)); 5889 ++IPriv; 5890 ++ILHS; 5891 ++IRHS; 5892 } 5893 }; 5894 RegionCodeGenTy RCG(CodeGen); 5895 CommonActionTy Action( 5896 nullptr, llvm::None, 5897 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 5898 : OMPRTL__kmpc_end_reduce), 5899 EndArgs); 5900 RCG.setAction(Action); 5901 RCG(CGF); 5902 5903 CGF.EmitBranch(DefaultBB); 5904 5905 // 7. Build case 2: 5906 // ... 5907 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5908 // ... 5909 // break; 5910 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5911 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5912 CGF.EmitBlock(Case2BB); 5913 5914 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5915 CodeGenFunction &CGF, PrePostActionTy &Action) { 5916 auto ILHS = LHSExprs.begin(); 5917 auto IRHS = RHSExprs.begin(); 5918 auto IPriv = Privates.begin(); 5919 for (const Expr *E : ReductionOps) { 5920 const Expr *XExpr = nullptr; 5921 const Expr *EExpr = nullptr; 5922 const Expr *UpExpr = nullptr; 5923 BinaryOperatorKind BO = BO_Comma; 5924 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5925 if (BO->getOpcode() == BO_Assign) { 5926 XExpr = BO->getLHS(); 5927 UpExpr = BO->getRHS(); 5928 } 5929 } 5930 // Try to emit update expression as a simple atomic. 5931 const Expr *RHSExpr = UpExpr; 5932 if (RHSExpr) { 5933 // Analyze RHS part of the whole expression. 5934 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5935 RHSExpr->IgnoreParenImpCasts())) { 5936 // If this is a conditional operator, analyze its condition for 5937 // min/max reduction operator. 5938 RHSExpr = ACO->getCond(); 5939 } 5940 if (const auto *BORHS = 5941 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5942 EExpr = BORHS->getRHS(); 5943 BO = BORHS->getOpcode(); 5944 } 5945 } 5946 if (XExpr) { 5947 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5948 auto &&AtomicRedGen = [BO, VD, 5949 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5950 const Expr *EExpr, const Expr *UpExpr) { 5951 LValue X = CGF.EmitLValue(XExpr); 5952 RValue E; 5953 if (EExpr) 5954 E = CGF.EmitAnyExpr(EExpr); 5955 CGF.EmitOMPAtomicSimpleUpdateExpr( 5956 X, E, BO, /*IsXLHSInRHSPart=*/true, 5957 llvm::AtomicOrdering::Monotonic, Loc, 5958 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5959 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5960 PrivateScope.addPrivate( 5961 VD, [&CGF, VD, XRValue, Loc]() { 5962 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5963 CGF.emitOMPSimpleStore( 5964 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5965 VD->getType().getNonReferenceType(), Loc); 5966 return LHSTemp; 5967 }); 5968 (void)PrivateScope.Privatize(); 5969 return CGF.EmitAnyExpr(UpExpr); 5970 }); 5971 }; 5972 if ((*IPriv)->getType()->isArrayType()) { 5973 // Emit atomic reduction for array section. 5974 const auto *RHSVar = 5975 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5976 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5977 AtomicRedGen, XExpr, EExpr, UpExpr); 5978 } else { 5979 // Emit atomic reduction for array subscript or single variable. 5980 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5981 } 5982 } else { 5983 // Emit as a critical region. 5984 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5985 const Expr *, const Expr *) { 5986 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5987 std::string Name = RT.getName({"atomic_reduction"}); 5988 RT.emitCriticalRegion( 5989 CGF, Name, 5990 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5991 Action.Enter(CGF); 5992 emitReductionCombiner(CGF, E); 5993 }, 5994 Loc); 5995 }; 5996 if ((*IPriv)->getType()->isArrayType()) { 5997 const auto *LHSVar = 5998 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5999 const auto *RHSVar = 6000 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 6001 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 6002 CritRedGen); 6003 } else { 6004 CritRedGen(CGF, nullptr, nullptr, nullptr); 6005 } 6006 } 6007 ++ILHS; 6008 ++IRHS; 6009 ++IPriv; 6010 } 6011 }; 6012 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 6013 if (!WithNowait) { 6014 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 6015 llvm::Value *EndArgs[] = { 6016 IdentTLoc, // ident_t *<loc> 6017 ThreadId, // i32 <gtid> 6018 Lock // kmp_critical_name *&<lock> 6019 }; 6020 CommonActionTy Action(nullptr, llvm::None, 6021 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 6022 EndArgs); 6023 AtomicRCG.setAction(Action); 6024 AtomicRCG(CGF); 6025 } else { 6026 AtomicRCG(CGF); 6027 } 6028 6029 CGF.EmitBranch(DefaultBB); 6030 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 6031 } 6032 6033 /// Generates unique name for artificial threadprivate variables. 6034 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 6035 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 6036 const Expr *Ref) { 6037 SmallString<256> Buffer; 6038 llvm::raw_svector_ostream Out(Buffer); 6039 const clang::DeclRefExpr *DE; 6040 const VarDecl *D = ::getBaseDecl(Ref, DE); 6041 if (!D) 6042 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 6043 D = D->getCanonicalDecl(); 6044 std::string Name = CGM.getOpenMPRuntime().getName( 6045 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 6046 Out << Prefix << Name << "_" 6047 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 6048 return Out.str(); 6049 } 6050 6051 /// Emits reduction initializer function: 6052 /// \code 6053 /// void @.red_init(void* %arg) { 6054 /// %0 = bitcast void* %arg to <type>* 6055 /// store <type> <init>, <type>* %0 6056 /// ret void 6057 /// } 6058 /// \endcode 6059 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 6060 SourceLocation Loc, 6061 ReductionCodeGen &RCG, unsigned N) { 6062 ASTContext &C = CGM.getContext(); 6063 FunctionArgList Args; 6064 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6065 ImplicitParamDecl::Other); 6066 Args.emplace_back(&Param); 6067 const auto &FnInfo = 6068 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6069 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6070 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 6071 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6072 Name, &CGM.getModule()); 6073 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6074 Fn->setDoesNotRecurse(); 6075 CodeGenFunction CGF(CGM); 6076 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6077 Address PrivateAddr = CGF.EmitLoadOfPointer( 6078 CGF.GetAddrOfLocalVar(&Param), 6079 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6080 llvm::Value *Size = nullptr; 6081 // If the size of the reduction item is non-constant, load it from global 6082 // threadprivate variable. 6083 if (RCG.getSizes(N).second) { 6084 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6085 CGF, CGM.getContext().getSizeType(), 6086 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6087 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6088 CGM.getContext().getSizeType(), Loc); 6089 } 6090 RCG.emitAggregateType(CGF, N, Size); 6091 LValue SharedLVal; 6092 // If initializer uses initializer from declare reduction construct, emit a 6093 // pointer to the address of the original reduction item (reuired by reduction 6094 // initializer) 6095 if (RCG.usesReductionInitializer(N)) { 6096 Address SharedAddr = 6097 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6098 CGF, CGM.getContext().VoidPtrTy, 6099 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6100 SharedAddr = CGF.EmitLoadOfPointer( 6101 SharedAddr, 6102 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 6103 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 6104 } else { 6105 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 6106 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 6107 CGM.getContext().VoidPtrTy); 6108 } 6109 // Emit the initializer: 6110 // %0 = bitcast void* %arg to <type>* 6111 // store <type> <init>, <type>* %0 6112 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 6113 [](CodeGenFunction &) { return false; }); 6114 CGF.FinishFunction(); 6115 return Fn; 6116 } 6117 6118 /// Emits reduction combiner function: 6119 /// \code 6120 /// void @.red_comb(void* %arg0, void* %arg1) { 6121 /// %lhs = bitcast void* %arg0 to <type>* 6122 /// %rhs = bitcast void* %arg1 to <type>* 6123 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 6124 /// store <type> %2, <type>* %lhs 6125 /// ret void 6126 /// } 6127 /// \endcode 6128 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 6129 SourceLocation Loc, 6130 ReductionCodeGen &RCG, unsigned N, 6131 const Expr *ReductionOp, 6132 const Expr *LHS, const Expr *RHS, 6133 const Expr *PrivateRef) { 6134 ASTContext &C = CGM.getContext(); 6135 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 6136 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 6137 FunctionArgList Args; 6138 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 6139 C.VoidPtrTy, ImplicitParamDecl::Other); 6140 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6141 ImplicitParamDecl::Other); 6142 Args.emplace_back(&ParamInOut); 6143 Args.emplace_back(&ParamIn); 6144 const auto &FnInfo = 6145 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6146 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6147 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 6148 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6149 Name, &CGM.getModule()); 6150 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6151 Fn->setDoesNotRecurse(); 6152 CodeGenFunction CGF(CGM); 6153 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6154 llvm::Value *Size = nullptr; 6155 // If the size of the reduction item is non-constant, load it from global 6156 // threadprivate variable. 6157 if (RCG.getSizes(N).second) { 6158 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6159 CGF, CGM.getContext().getSizeType(), 6160 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6161 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6162 CGM.getContext().getSizeType(), Loc); 6163 } 6164 RCG.emitAggregateType(CGF, N, Size); 6165 // Remap lhs and rhs variables to the addresses of the function arguments. 6166 // %lhs = bitcast void* %arg0 to <type>* 6167 // %rhs = bitcast void* %arg1 to <type>* 6168 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6169 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6170 // Pull out the pointer to the variable. 6171 Address PtrAddr = CGF.EmitLoadOfPointer( 6172 CGF.GetAddrOfLocalVar(&ParamInOut), 6173 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6174 return CGF.Builder.CreateElementBitCast( 6175 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6176 }); 6177 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6178 // Pull out the pointer to the variable. 6179 Address PtrAddr = CGF.EmitLoadOfPointer( 6180 CGF.GetAddrOfLocalVar(&ParamIn), 6181 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6182 return CGF.Builder.CreateElementBitCast( 6183 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6184 }); 6185 PrivateScope.Privatize(); 6186 // Emit the combiner body: 6187 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6188 // store <type> %2, <type>* %lhs 6189 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6190 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6191 cast<DeclRefExpr>(RHS)); 6192 CGF.FinishFunction(); 6193 return Fn; 6194 } 6195 6196 /// Emits reduction finalizer function: 6197 /// \code 6198 /// void @.red_fini(void* %arg) { 6199 /// %0 = bitcast void* %arg to <type>* 6200 /// <destroy>(<type>* %0) 6201 /// ret void 6202 /// } 6203 /// \endcode 6204 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6205 SourceLocation Loc, 6206 ReductionCodeGen &RCG, unsigned N) { 6207 if (!RCG.needCleanups(N)) 6208 return nullptr; 6209 ASTContext &C = CGM.getContext(); 6210 FunctionArgList Args; 6211 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6212 ImplicitParamDecl::Other); 6213 Args.emplace_back(&Param); 6214 const auto &FnInfo = 6215 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6216 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6217 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6218 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6219 Name, &CGM.getModule()); 6220 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6221 Fn->setDoesNotRecurse(); 6222 CodeGenFunction CGF(CGM); 6223 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6224 Address PrivateAddr = CGF.EmitLoadOfPointer( 6225 CGF.GetAddrOfLocalVar(&Param), 6226 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6227 llvm::Value *Size = nullptr; 6228 // If the size of the reduction item is non-constant, load it from global 6229 // threadprivate variable. 6230 if (RCG.getSizes(N).second) { 6231 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6232 CGF, CGM.getContext().getSizeType(), 6233 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6234 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6235 CGM.getContext().getSizeType(), Loc); 6236 } 6237 RCG.emitAggregateType(CGF, N, Size); 6238 // Emit the finalizer body: 6239 // <destroy>(<type>* %0) 6240 RCG.emitCleanups(CGF, N, PrivateAddr); 6241 CGF.FinishFunction(); 6242 return Fn; 6243 } 6244 6245 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6246 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6247 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6248 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6249 return nullptr; 6250 6251 // Build typedef struct: 6252 // kmp_task_red_input { 6253 // void *reduce_shar; // shared reduction item 6254 // size_t reduce_size; // size of data item 6255 // void *reduce_init; // data initialization routine 6256 // void *reduce_fini; // data finalization routine 6257 // void *reduce_comb; // data combiner routine 6258 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6259 // } kmp_task_red_input_t; 6260 ASTContext &C = CGM.getContext(); 6261 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 6262 RD->startDefinition(); 6263 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6264 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6265 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6266 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6267 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6268 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6269 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6270 RD->completeDefinition(); 6271 QualType RDType = C.getRecordType(RD); 6272 unsigned Size = Data.ReductionVars.size(); 6273 llvm::APInt ArraySize(/*numBits=*/64, Size); 6274 QualType ArrayRDType = C.getConstantArrayType( 6275 RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0); 6276 // kmp_task_red_input_t .rd_input.[Size]; 6277 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6278 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 6279 Data.ReductionOps); 6280 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6281 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6282 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6283 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6284 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6285 TaskRedInput.getPointer(), Idxs, 6286 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6287 ".rd_input.gep."); 6288 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6289 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6290 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6291 RCG.emitSharedLValue(CGF, Cnt); 6292 llvm::Value *CastedShared = 6293 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); 6294 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6295 RCG.emitAggregateType(CGF, Cnt); 6296 llvm::Value *SizeValInChars; 6297 llvm::Value *SizeVal; 6298 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6299 // We use delayed creation/initialization for VLAs, array sections and 6300 // custom reduction initializations. It is required because runtime does not 6301 // provide the way to pass the sizes of VLAs/array sections to 6302 // initializer/combiner/finalizer functions and does not pass the pointer to 6303 // original reduction item to the initializer. Instead threadprivate global 6304 // variables are used to store these values and use them in the functions. 6305 bool DelayedCreation = !!SizeVal; 6306 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6307 /*isSigned=*/false); 6308 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6309 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6310 // ElemLVal.reduce_init = init; 6311 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6312 llvm::Value *InitAddr = 6313 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6314 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6315 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 6316 // ElemLVal.reduce_fini = fini; 6317 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6318 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6319 llvm::Value *FiniAddr = Fini 6320 ? CGF.EmitCastToVoidPtr(Fini) 6321 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6322 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6323 // ElemLVal.reduce_comb = comb; 6324 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6325 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6326 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6327 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6328 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6329 // ElemLVal.flags = 0; 6330 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6331 if (DelayedCreation) { 6332 CGF.EmitStoreOfScalar( 6333 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6334 FlagsLVal); 6335 } else 6336 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); 6337 } 6338 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 6339 // *data); 6340 llvm::Value *Args[] = { 6341 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6342 /*isSigned=*/true), 6343 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6344 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6345 CGM.VoidPtrTy)}; 6346 return CGF.EmitRuntimeCall( 6347 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 6348 } 6349 6350 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6351 SourceLocation Loc, 6352 ReductionCodeGen &RCG, 6353 unsigned N) { 6354 auto Sizes = RCG.getSizes(N); 6355 // Emit threadprivate global variable if the type is non-constant 6356 // (Sizes.second = nullptr). 6357 if (Sizes.second) { 6358 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6359 /*isSigned=*/false); 6360 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6361 CGF, CGM.getContext().getSizeType(), 6362 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6363 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6364 } 6365 // Store address of the original reduction item if custom initializer is used. 6366 if (RCG.usesReductionInitializer(N)) { 6367 Address SharedAddr = getAddrOfArtificialThreadPrivate( 6368 CGF, CGM.getContext().VoidPtrTy, 6369 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6370 CGF.Builder.CreateStore( 6371 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6372 RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), 6373 SharedAddr, /*IsVolatile=*/false); 6374 } 6375 } 6376 6377 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6378 SourceLocation Loc, 6379 llvm::Value *ReductionsPtr, 6380 LValue SharedLVal) { 6381 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6382 // *d); 6383 llvm::Value *Args[] = { 6384 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6385 /*isSigned=*/true), 6386 ReductionsPtr, 6387 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), 6388 CGM.VoidPtrTy)}; 6389 return Address( 6390 CGF.EmitRuntimeCall( 6391 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6392 SharedLVal.getAlignment()); 6393 } 6394 6395 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6396 SourceLocation Loc) { 6397 if (!CGF.HaveInsertPoint()) 6398 return; 6399 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6400 // global_tid); 6401 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6402 // Ignore return result until untied tasks are supported. 6403 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 6404 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6405 Region->emitUntiedSwitch(CGF); 6406 } 6407 6408 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6409 OpenMPDirectiveKind InnerKind, 6410 const RegionCodeGenTy &CodeGen, 6411 bool HasCancel) { 6412 if (!CGF.HaveInsertPoint()) 6413 return; 6414 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6415 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6416 } 6417 6418 namespace { 6419 enum RTCancelKind { 6420 CancelNoreq = 0, 6421 CancelParallel = 1, 6422 CancelLoop = 2, 6423 CancelSections = 3, 6424 CancelTaskgroup = 4 6425 }; 6426 } // anonymous namespace 6427 6428 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6429 RTCancelKind CancelKind = CancelNoreq; 6430 if (CancelRegion == OMPD_parallel) 6431 CancelKind = CancelParallel; 6432 else if (CancelRegion == OMPD_for) 6433 CancelKind = CancelLoop; 6434 else if (CancelRegion == OMPD_sections) 6435 CancelKind = CancelSections; 6436 else { 6437 assert(CancelRegion == OMPD_taskgroup); 6438 CancelKind = CancelTaskgroup; 6439 } 6440 return CancelKind; 6441 } 6442 6443 void CGOpenMPRuntime::emitCancellationPointCall( 6444 CodeGenFunction &CGF, SourceLocation Loc, 6445 OpenMPDirectiveKind CancelRegion) { 6446 if (!CGF.HaveInsertPoint()) 6447 return; 6448 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6449 // global_tid, kmp_int32 cncl_kind); 6450 if (auto *OMPRegionInfo = 6451 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6452 // For 'cancellation point taskgroup', the task region info may not have a 6453 // cancel. This may instead happen in another adjacent task. 6454 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6455 llvm::Value *Args[] = { 6456 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6457 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6458 // Ignore return result until untied tasks are supported. 6459 llvm::Value *Result = CGF.EmitRuntimeCall( 6460 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 6461 // if (__kmpc_cancellationpoint()) { 6462 // exit from construct; 6463 // } 6464 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6465 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6466 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6467 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6468 CGF.EmitBlock(ExitBB); 6469 // exit from construct; 6470 CodeGenFunction::JumpDest CancelDest = 6471 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6472 CGF.EmitBranchThroughCleanup(CancelDest); 6473 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6474 } 6475 } 6476 } 6477 6478 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6479 const Expr *IfCond, 6480 OpenMPDirectiveKind CancelRegion) { 6481 if (!CGF.HaveInsertPoint()) 6482 return; 6483 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6484 // kmp_int32 cncl_kind); 6485 if (auto *OMPRegionInfo = 6486 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6487 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 6488 PrePostActionTy &) { 6489 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6490 llvm::Value *Args[] = { 6491 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6492 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6493 // Ignore return result until untied tasks are supported. 6494 llvm::Value *Result = CGF.EmitRuntimeCall( 6495 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 6496 // if (__kmpc_cancel()) { 6497 // exit from construct; 6498 // } 6499 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6500 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6501 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6502 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6503 CGF.EmitBlock(ExitBB); 6504 // exit from construct; 6505 CodeGenFunction::JumpDest CancelDest = 6506 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6507 CGF.EmitBranchThroughCleanup(CancelDest); 6508 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6509 }; 6510 if (IfCond) { 6511 emitOMPIfClause(CGF, IfCond, ThenGen, 6512 [](CodeGenFunction &, PrePostActionTy &) {}); 6513 } else { 6514 RegionCodeGenTy ThenRCG(ThenGen); 6515 ThenRCG(CGF); 6516 } 6517 } 6518 } 6519 6520 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6521 const OMPExecutableDirective &D, StringRef ParentName, 6522 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6523 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6524 assert(!ParentName.empty() && "Invalid target region parent name!"); 6525 HasEmittedTargetRegion = true; 6526 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6527 IsOffloadEntry, CodeGen); 6528 } 6529 6530 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6531 const OMPExecutableDirective &D, StringRef ParentName, 6532 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6533 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6534 // Create a unique name for the entry function using the source location 6535 // information of the current target region. The name will be something like: 6536 // 6537 // __omp_offloading_DD_FFFF_PP_lBB 6538 // 6539 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6540 // mangled name of the function that encloses the target region and BB is the 6541 // line number of the target region. 6542 6543 unsigned DeviceID; 6544 unsigned FileID; 6545 unsigned Line; 6546 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6547 Line); 6548 SmallString<64> EntryFnName; 6549 { 6550 llvm::raw_svector_ostream OS(EntryFnName); 6551 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6552 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6553 } 6554 6555 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6556 6557 CodeGenFunction CGF(CGM, true); 6558 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6559 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6560 6561 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 6562 6563 // If this target outline function is not an offload entry, we don't need to 6564 // register it. 6565 if (!IsOffloadEntry) 6566 return; 6567 6568 // The target region ID is used by the runtime library to identify the current 6569 // target region, so it only has to be unique and not necessarily point to 6570 // anything. It could be the pointer to the outlined function that implements 6571 // the target region, but we aren't using that so that the compiler doesn't 6572 // need to keep that, and could therefore inline the host function if proven 6573 // worthwhile during optimization. In the other hand, if emitting code for the 6574 // device, the ID has to be the function address so that it can retrieved from 6575 // the offloading entry and launched by the runtime library. We also mark the 6576 // outlined function to have external linkage in case we are emitting code for 6577 // the device, because these functions will be entry points to the device. 6578 6579 if (CGM.getLangOpts().OpenMPIsDevice) { 6580 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6581 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6582 OutlinedFn->setDSOLocal(false); 6583 } else { 6584 std::string Name = getName({EntryFnName, "region_id"}); 6585 OutlinedFnID = new llvm::GlobalVariable( 6586 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6587 llvm::GlobalValue::WeakAnyLinkage, 6588 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6589 } 6590 6591 // Register the information for the entry associated with this target region. 6592 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6593 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6594 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6595 } 6596 6597 /// Checks if the expression is constant or does not have non-trivial function 6598 /// calls. 6599 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6600 // We can skip constant expressions. 6601 // We can skip expressions with trivial calls or simple expressions. 6602 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6603 !E->hasNonTrivialCall(Ctx)) && 6604 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6605 } 6606 6607 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6608 const Stmt *Body) { 6609 const Stmt *Child = Body->IgnoreContainers(); 6610 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6611 Child = nullptr; 6612 for (const Stmt *S : C->body()) { 6613 if (const auto *E = dyn_cast<Expr>(S)) { 6614 if (isTrivial(Ctx, E)) 6615 continue; 6616 } 6617 // Some of the statements can be ignored. 6618 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6619 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6620 continue; 6621 // Analyze declarations. 6622 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6623 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6624 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6625 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6626 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6627 isa<UsingDirectiveDecl>(D) || 6628 isa<OMPDeclareReductionDecl>(D) || 6629 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6630 return true; 6631 const auto *VD = dyn_cast<VarDecl>(D); 6632 if (!VD) 6633 return false; 6634 return VD->isConstexpr() || 6635 ((VD->getType().isTrivialType(Ctx) || 6636 VD->getType()->isReferenceType()) && 6637 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6638 })) 6639 continue; 6640 } 6641 // Found multiple children - cannot get the one child only. 6642 if (Child) 6643 return nullptr; 6644 Child = S; 6645 } 6646 if (Child) 6647 Child = Child->IgnoreContainers(); 6648 } 6649 return Child; 6650 } 6651 6652 /// Emit the number of teams for a target directive. Inspect the num_teams 6653 /// clause associated with a teams construct combined or closely nested 6654 /// with the target directive. 6655 /// 6656 /// Emit a team of size one for directives such as 'target parallel' that 6657 /// have no associated teams construct. 6658 /// 6659 /// Otherwise, return nullptr. 6660 static llvm::Value * 6661 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6662 const OMPExecutableDirective &D) { 6663 assert(!CGF.getLangOpts().OpenMPIsDevice && 6664 "Clauses associated with the teams directive expected to be emitted " 6665 "only for the host!"); 6666 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6667 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6668 "Expected target-based executable directive."); 6669 CGBuilderTy &Bld = CGF.Builder; 6670 switch (DirectiveKind) { 6671 case OMPD_target: { 6672 const auto *CS = D.getInnermostCapturedStmt(); 6673 const auto *Body = 6674 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6675 const Stmt *ChildStmt = 6676 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6677 if (const auto *NestedDir = 6678 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6679 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6680 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6681 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6682 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6683 const Expr *NumTeams = 6684 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6685 llvm::Value *NumTeamsVal = 6686 CGF.EmitScalarExpr(NumTeams, 6687 /*IgnoreResultAssign*/ true); 6688 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6689 /*isSigned=*/true); 6690 } 6691 return Bld.getInt32(0); 6692 } 6693 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6694 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6695 return Bld.getInt32(1); 6696 return Bld.getInt32(0); 6697 } 6698 return nullptr; 6699 } 6700 case OMPD_target_teams: 6701 case OMPD_target_teams_distribute: 6702 case OMPD_target_teams_distribute_simd: 6703 case OMPD_target_teams_distribute_parallel_for: 6704 case OMPD_target_teams_distribute_parallel_for_simd: { 6705 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6706 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6707 const Expr *NumTeams = 6708 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6709 llvm::Value *NumTeamsVal = 6710 CGF.EmitScalarExpr(NumTeams, 6711 /*IgnoreResultAssign*/ true); 6712 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6713 /*isSigned=*/true); 6714 } 6715 return Bld.getInt32(0); 6716 } 6717 case OMPD_target_parallel: 6718 case OMPD_target_parallel_for: 6719 case OMPD_target_parallel_for_simd: 6720 case OMPD_target_simd: 6721 return Bld.getInt32(1); 6722 case OMPD_parallel: 6723 case OMPD_for: 6724 case OMPD_parallel_for: 6725 case OMPD_parallel_sections: 6726 case OMPD_for_simd: 6727 case OMPD_parallel_for_simd: 6728 case OMPD_cancel: 6729 case OMPD_cancellation_point: 6730 case OMPD_ordered: 6731 case OMPD_threadprivate: 6732 case OMPD_allocate: 6733 case OMPD_task: 6734 case OMPD_simd: 6735 case OMPD_sections: 6736 case OMPD_section: 6737 case OMPD_single: 6738 case OMPD_master: 6739 case OMPD_critical: 6740 case OMPD_taskyield: 6741 case OMPD_barrier: 6742 case OMPD_taskwait: 6743 case OMPD_taskgroup: 6744 case OMPD_atomic: 6745 case OMPD_flush: 6746 case OMPD_teams: 6747 case OMPD_target_data: 6748 case OMPD_target_exit_data: 6749 case OMPD_target_enter_data: 6750 case OMPD_distribute: 6751 case OMPD_distribute_simd: 6752 case OMPD_distribute_parallel_for: 6753 case OMPD_distribute_parallel_for_simd: 6754 case OMPD_teams_distribute: 6755 case OMPD_teams_distribute_simd: 6756 case OMPD_teams_distribute_parallel_for: 6757 case OMPD_teams_distribute_parallel_for_simd: 6758 case OMPD_target_update: 6759 case OMPD_declare_simd: 6760 case OMPD_declare_target: 6761 case OMPD_end_declare_target: 6762 case OMPD_declare_reduction: 6763 case OMPD_declare_mapper: 6764 case OMPD_taskloop: 6765 case OMPD_taskloop_simd: 6766 case OMPD_requires: 6767 case OMPD_unknown: 6768 break; 6769 } 6770 llvm_unreachable("Unexpected directive kind."); 6771 } 6772 6773 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6774 llvm::Value *DefaultThreadLimitVal) { 6775 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6776 CGF.getContext(), CS->getCapturedStmt()); 6777 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6778 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6779 llvm::Value *NumThreads = nullptr; 6780 llvm::Value *CondVal = nullptr; 6781 // Handle if clause. If if clause present, the number of threads is 6782 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6783 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6784 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6785 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6786 const OMPIfClause *IfClause = nullptr; 6787 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6788 if (C->getNameModifier() == OMPD_unknown || 6789 C->getNameModifier() == OMPD_parallel) { 6790 IfClause = C; 6791 break; 6792 } 6793 } 6794 if (IfClause) { 6795 const Expr *Cond = IfClause->getCondition(); 6796 bool Result; 6797 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6798 if (!Result) 6799 return CGF.Builder.getInt32(1); 6800 } else { 6801 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6802 if (const auto *PreInit = 6803 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6804 for (const auto *I : PreInit->decls()) { 6805 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6806 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6807 } else { 6808 CodeGenFunction::AutoVarEmission Emission = 6809 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6810 CGF.EmitAutoVarCleanups(Emission); 6811 } 6812 } 6813 } 6814 CondVal = CGF.EvaluateExprAsBool(Cond); 6815 } 6816 } 6817 } 6818 // Check the value of num_threads clause iff if clause was not specified 6819 // or is not evaluated to false. 6820 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6821 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6822 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6823 const auto *NumThreadsClause = 6824 Dir->getSingleClause<OMPNumThreadsClause>(); 6825 CodeGenFunction::LexicalScope Scope( 6826 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6827 if (const auto *PreInit = 6828 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6829 for (const auto *I : PreInit->decls()) { 6830 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6831 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6832 } else { 6833 CodeGenFunction::AutoVarEmission Emission = 6834 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6835 CGF.EmitAutoVarCleanups(Emission); 6836 } 6837 } 6838 } 6839 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6840 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6841 /*isSigned=*/false); 6842 if (DefaultThreadLimitVal) 6843 NumThreads = CGF.Builder.CreateSelect( 6844 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6845 DefaultThreadLimitVal, NumThreads); 6846 } else { 6847 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6848 : CGF.Builder.getInt32(0); 6849 } 6850 // Process condition of the if clause. 6851 if (CondVal) { 6852 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6853 CGF.Builder.getInt32(1)); 6854 } 6855 return NumThreads; 6856 } 6857 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6858 return CGF.Builder.getInt32(1); 6859 return DefaultThreadLimitVal; 6860 } 6861 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6862 : CGF.Builder.getInt32(0); 6863 } 6864 6865 /// Emit the number of threads for a target directive. Inspect the 6866 /// thread_limit clause associated with a teams construct combined or closely 6867 /// nested with the target directive. 6868 /// 6869 /// Emit the num_threads clause for directives such as 'target parallel' that 6870 /// have no associated teams construct. 6871 /// 6872 /// Otherwise, return nullptr. 6873 static llvm::Value * 6874 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6875 const OMPExecutableDirective &D) { 6876 assert(!CGF.getLangOpts().OpenMPIsDevice && 6877 "Clauses associated with the teams directive expected to be emitted " 6878 "only for the host!"); 6879 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6880 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6881 "Expected target-based executable directive."); 6882 CGBuilderTy &Bld = CGF.Builder; 6883 llvm::Value *ThreadLimitVal = nullptr; 6884 llvm::Value *NumThreadsVal = nullptr; 6885 switch (DirectiveKind) { 6886 case OMPD_target: { 6887 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6888 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6889 return NumThreads; 6890 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6891 CGF.getContext(), CS->getCapturedStmt()); 6892 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6893 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6894 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6895 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6896 const auto *ThreadLimitClause = 6897 Dir->getSingleClause<OMPThreadLimitClause>(); 6898 CodeGenFunction::LexicalScope Scope( 6899 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6900 if (const auto *PreInit = 6901 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6902 for (const auto *I : PreInit->decls()) { 6903 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6904 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6905 } else { 6906 CodeGenFunction::AutoVarEmission Emission = 6907 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6908 CGF.EmitAutoVarCleanups(Emission); 6909 } 6910 } 6911 } 6912 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6913 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6914 ThreadLimitVal = 6915 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6916 } 6917 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6918 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6919 CS = Dir->getInnermostCapturedStmt(); 6920 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6921 CGF.getContext(), CS->getCapturedStmt()); 6922 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6923 } 6924 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6925 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6926 CS = Dir->getInnermostCapturedStmt(); 6927 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6928 return NumThreads; 6929 } 6930 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6931 return Bld.getInt32(1); 6932 } 6933 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6934 } 6935 case OMPD_target_teams: { 6936 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6937 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6938 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6939 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6940 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6941 ThreadLimitVal = 6942 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6943 } 6944 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6945 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6946 return NumThreads; 6947 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6948 CGF.getContext(), CS->getCapturedStmt()); 6949 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6950 if (Dir->getDirectiveKind() == OMPD_distribute) { 6951 CS = Dir->getInnermostCapturedStmt(); 6952 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6953 return NumThreads; 6954 } 6955 } 6956 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6957 } 6958 case OMPD_target_teams_distribute: 6959 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6960 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6961 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6962 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6963 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6964 ThreadLimitVal = 6965 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6966 } 6967 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6968 case OMPD_target_parallel: 6969 case OMPD_target_parallel_for: 6970 case OMPD_target_parallel_for_simd: 6971 case OMPD_target_teams_distribute_parallel_for: 6972 case OMPD_target_teams_distribute_parallel_for_simd: { 6973 llvm::Value *CondVal = nullptr; 6974 // Handle if clause. If if clause present, the number of threads is 6975 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6976 if (D.hasClausesOfKind<OMPIfClause>()) { 6977 const OMPIfClause *IfClause = nullptr; 6978 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6979 if (C->getNameModifier() == OMPD_unknown || 6980 C->getNameModifier() == OMPD_parallel) { 6981 IfClause = C; 6982 break; 6983 } 6984 } 6985 if (IfClause) { 6986 const Expr *Cond = IfClause->getCondition(); 6987 bool Result; 6988 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6989 if (!Result) 6990 return Bld.getInt32(1); 6991 } else { 6992 CodeGenFunction::RunCleanupsScope Scope(CGF); 6993 CondVal = CGF.EvaluateExprAsBool(Cond); 6994 } 6995 } 6996 } 6997 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6998 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6999 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 7000 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 7001 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 7002 ThreadLimitVal = 7003 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 7004 } 7005 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 7006 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 7007 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 7008 llvm::Value *NumThreads = CGF.EmitScalarExpr( 7009 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 7010 NumThreadsVal = 7011 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 7012 ThreadLimitVal = ThreadLimitVal 7013 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 7014 ThreadLimitVal), 7015 NumThreadsVal, ThreadLimitVal) 7016 : NumThreadsVal; 7017 } 7018 if (!ThreadLimitVal) 7019 ThreadLimitVal = Bld.getInt32(0); 7020 if (CondVal) 7021 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 7022 return ThreadLimitVal; 7023 } 7024 case OMPD_target_teams_distribute_simd: 7025 case OMPD_target_simd: 7026 return Bld.getInt32(1); 7027 case OMPD_parallel: 7028 case OMPD_for: 7029 case OMPD_parallel_for: 7030 case OMPD_parallel_sections: 7031 case OMPD_for_simd: 7032 case OMPD_parallel_for_simd: 7033 case OMPD_cancel: 7034 case OMPD_cancellation_point: 7035 case OMPD_ordered: 7036 case OMPD_threadprivate: 7037 case OMPD_allocate: 7038 case OMPD_task: 7039 case OMPD_simd: 7040 case OMPD_sections: 7041 case OMPD_section: 7042 case OMPD_single: 7043 case OMPD_master: 7044 case OMPD_critical: 7045 case OMPD_taskyield: 7046 case OMPD_barrier: 7047 case OMPD_taskwait: 7048 case OMPD_taskgroup: 7049 case OMPD_atomic: 7050 case OMPD_flush: 7051 case OMPD_teams: 7052 case OMPD_target_data: 7053 case OMPD_target_exit_data: 7054 case OMPD_target_enter_data: 7055 case OMPD_distribute: 7056 case OMPD_distribute_simd: 7057 case OMPD_distribute_parallel_for: 7058 case OMPD_distribute_parallel_for_simd: 7059 case OMPD_teams_distribute: 7060 case OMPD_teams_distribute_simd: 7061 case OMPD_teams_distribute_parallel_for: 7062 case OMPD_teams_distribute_parallel_for_simd: 7063 case OMPD_target_update: 7064 case OMPD_declare_simd: 7065 case OMPD_declare_target: 7066 case OMPD_end_declare_target: 7067 case OMPD_declare_reduction: 7068 case OMPD_declare_mapper: 7069 case OMPD_taskloop: 7070 case OMPD_taskloop_simd: 7071 case OMPD_requires: 7072 case OMPD_unknown: 7073 break; 7074 } 7075 llvm_unreachable("Unsupported directive kind."); 7076 } 7077 7078 namespace { 7079 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7080 7081 // Utility to handle information from clauses associated with a given 7082 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7083 // It provides a convenient interface to obtain the information and generate 7084 // code for that information. 7085 class MappableExprsHandler { 7086 public: 7087 /// Values for bit flags used to specify the mapping type for 7088 /// offloading. 7089 enum OpenMPOffloadMappingFlags : uint64_t { 7090 /// No flags 7091 OMP_MAP_NONE = 0x0, 7092 /// Allocate memory on the device and move data from host to device. 7093 OMP_MAP_TO = 0x01, 7094 /// Allocate memory on the device and move data from device to host. 7095 OMP_MAP_FROM = 0x02, 7096 /// Always perform the requested mapping action on the element, even 7097 /// if it was already mapped before. 7098 OMP_MAP_ALWAYS = 0x04, 7099 /// Delete the element from the device environment, ignoring the 7100 /// current reference count associated with the element. 7101 OMP_MAP_DELETE = 0x08, 7102 /// The element being mapped is a pointer-pointee pair; both the 7103 /// pointer and the pointee should be mapped. 7104 OMP_MAP_PTR_AND_OBJ = 0x10, 7105 /// This flags signals that the base address of an entry should be 7106 /// passed to the target kernel as an argument. 7107 OMP_MAP_TARGET_PARAM = 0x20, 7108 /// Signal that the runtime library has to return the device pointer 7109 /// in the current position for the data being mapped. Used when we have the 7110 /// use_device_ptr clause. 7111 OMP_MAP_RETURN_PARAM = 0x40, 7112 /// This flag signals that the reference being passed is a pointer to 7113 /// private data. 7114 OMP_MAP_PRIVATE = 0x80, 7115 /// Pass the element to the device by value. 7116 OMP_MAP_LITERAL = 0x100, 7117 /// Implicit map 7118 OMP_MAP_IMPLICIT = 0x200, 7119 /// The 16 MSBs of the flags indicate whether the entry is member of some 7120 /// struct/class. 7121 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7122 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7123 }; 7124 7125 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7126 static unsigned getFlagMemberOffset() { 7127 unsigned Offset = 0; 7128 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7129 Remain = Remain >> 1) 7130 Offset++; 7131 return Offset; 7132 } 7133 7134 /// Class that associates information with a base pointer to be passed to the 7135 /// runtime library. 7136 class BasePointerInfo { 7137 /// The base pointer. 7138 llvm::Value *Ptr = nullptr; 7139 /// The base declaration that refers to this device pointer, or null if 7140 /// there is none. 7141 const ValueDecl *DevPtrDecl = nullptr; 7142 7143 public: 7144 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7145 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7146 llvm::Value *operator*() const { return Ptr; } 7147 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7148 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7149 }; 7150 7151 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7152 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7153 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7154 7155 /// Map between a struct and the its lowest & highest elements which have been 7156 /// mapped. 7157 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7158 /// HE(FieldIndex, Pointer)} 7159 struct StructRangeInfoTy { 7160 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7161 0, Address::invalid()}; 7162 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7163 0, Address::invalid()}; 7164 Address Base = Address::invalid(); 7165 }; 7166 7167 private: 7168 /// Kind that defines how a device pointer has to be returned. 7169 struct MapInfo { 7170 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7171 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7172 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7173 bool ReturnDevicePointer = false; 7174 bool IsImplicit = false; 7175 7176 MapInfo() = default; 7177 MapInfo( 7178 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7179 OpenMPMapClauseKind MapType, 7180 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7181 bool ReturnDevicePointer, bool IsImplicit) 7182 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7183 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 7184 }; 7185 7186 /// If use_device_ptr is used on a pointer which is a struct member and there 7187 /// is no map information about it, then emission of that entry is deferred 7188 /// until the whole struct has been processed. 7189 struct DeferredDevicePtrEntryTy { 7190 const Expr *IE = nullptr; 7191 const ValueDecl *VD = nullptr; 7192 7193 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 7194 : IE(IE), VD(VD) {} 7195 }; 7196 7197 /// The target directive from where the mappable clauses were extracted. It 7198 /// is either a executable directive or a user-defined mapper directive. 7199 llvm::PointerUnion<const OMPExecutableDirective *, 7200 const OMPDeclareMapperDecl *> 7201 CurDir; 7202 7203 /// Function the directive is being generated for. 7204 CodeGenFunction &CGF; 7205 7206 /// Set of all first private variables in the current directive. 7207 /// bool data is set to true if the variable is implicitly marked as 7208 /// firstprivate, false otherwise. 7209 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7210 7211 /// Map between device pointer declarations and their expression components. 7212 /// The key value for declarations in 'this' is null. 7213 llvm::DenseMap< 7214 const ValueDecl *, 7215 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7216 DevPointersMap; 7217 7218 llvm::Value *getExprTypeSize(const Expr *E) const { 7219 QualType ExprTy = E->getType().getCanonicalType(); 7220 7221 // Reference types are ignored for mapping purposes. 7222 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7223 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7224 7225 // Given that an array section is considered a built-in type, we need to 7226 // do the calculation based on the length of the section instead of relying 7227 // on CGF.getTypeSize(E->getType()). 7228 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7229 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7230 OAE->getBase()->IgnoreParenImpCasts()) 7231 .getCanonicalType(); 7232 7233 // If there is no length associated with the expression, that means we 7234 // are using the whole length of the base. 7235 if (!OAE->getLength() && OAE->getColonLoc().isValid()) 7236 return CGF.getTypeSize(BaseTy); 7237 7238 llvm::Value *ElemSize; 7239 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7240 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7241 } else { 7242 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7243 assert(ATy && "Expecting array type if not a pointer type."); 7244 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7245 } 7246 7247 // If we don't have a length at this point, that is because we have an 7248 // array section with a single element. 7249 if (!OAE->getLength()) 7250 return ElemSize; 7251 7252 llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength()); 7253 LengthVal = 7254 CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false); 7255 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7256 } 7257 return CGF.getTypeSize(ExprTy); 7258 } 7259 7260 /// Return the corresponding bits for a given map clause modifier. Add 7261 /// a flag marking the map as a pointer if requested. Add a flag marking the 7262 /// map as the first one of a series of maps that relate to the same map 7263 /// expression. 7264 OpenMPOffloadMappingFlags getMapTypeBits( 7265 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7266 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7267 OpenMPOffloadMappingFlags Bits = 7268 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7269 switch (MapType) { 7270 case OMPC_MAP_alloc: 7271 case OMPC_MAP_release: 7272 // alloc and release is the default behavior in the runtime library, i.e. 7273 // if we don't pass any bits alloc/release that is what the runtime is 7274 // going to do. Therefore, we don't need to signal anything for these two 7275 // type modifiers. 7276 break; 7277 case OMPC_MAP_to: 7278 Bits |= OMP_MAP_TO; 7279 break; 7280 case OMPC_MAP_from: 7281 Bits |= OMP_MAP_FROM; 7282 break; 7283 case OMPC_MAP_tofrom: 7284 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7285 break; 7286 case OMPC_MAP_delete: 7287 Bits |= OMP_MAP_DELETE; 7288 break; 7289 case OMPC_MAP_unknown: 7290 llvm_unreachable("Unexpected map type!"); 7291 } 7292 if (AddPtrFlag) 7293 Bits |= OMP_MAP_PTR_AND_OBJ; 7294 if (AddIsTargetParamFlag) 7295 Bits |= OMP_MAP_TARGET_PARAM; 7296 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7297 != MapModifiers.end()) 7298 Bits |= OMP_MAP_ALWAYS; 7299 return Bits; 7300 } 7301 7302 /// Return true if the provided expression is a final array section. A 7303 /// final array section, is one whose length can't be proved to be one. 7304 bool isFinalArraySectionExpression(const Expr *E) const { 7305 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7306 7307 // It is not an array section and therefore not a unity-size one. 7308 if (!OASE) 7309 return false; 7310 7311 // An array section with no colon always refer to a single element. 7312 if (OASE->getColonLoc().isInvalid()) 7313 return false; 7314 7315 const Expr *Length = OASE->getLength(); 7316 7317 // If we don't have a length we have to check if the array has size 1 7318 // for this dimension. Also, we should always expect a length if the 7319 // base type is pointer. 7320 if (!Length) { 7321 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7322 OASE->getBase()->IgnoreParenImpCasts()) 7323 .getCanonicalType(); 7324 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7325 return ATy->getSize().getSExtValue() != 1; 7326 // If we don't have a constant dimension length, we have to consider 7327 // the current section as having any size, so it is not necessarily 7328 // unitary. If it happen to be unity size, that's user fault. 7329 return true; 7330 } 7331 7332 // Check if the length evaluates to 1. 7333 Expr::EvalResult Result; 7334 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7335 return true; // Can have more that size 1. 7336 7337 llvm::APSInt ConstLength = Result.Val.getInt(); 7338 return ConstLength.getSExtValue() != 1; 7339 } 7340 7341 /// Generate the base pointers, section pointers, sizes and map type 7342 /// bits for the provided map type, map modifier, and expression components. 7343 /// \a IsFirstComponent should be set to true if the provided set of 7344 /// components is the first associated with a capture. 7345 void generateInfoForComponentList( 7346 OpenMPMapClauseKind MapType, 7347 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7348 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7349 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7350 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7351 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 7352 bool IsImplicit, 7353 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7354 OverlappedElements = llvm::None) const { 7355 // The following summarizes what has to be generated for each map and the 7356 // types below. The generated information is expressed in this order: 7357 // base pointer, section pointer, size, flags 7358 // (to add to the ones that come from the map type and modifier). 7359 // 7360 // double d; 7361 // int i[100]; 7362 // float *p; 7363 // 7364 // struct S1 { 7365 // int i; 7366 // float f[50]; 7367 // } 7368 // struct S2 { 7369 // int i; 7370 // float f[50]; 7371 // S1 s; 7372 // double *p; 7373 // struct S2 *ps; 7374 // } 7375 // S2 s; 7376 // S2 *ps; 7377 // 7378 // map(d) 7379 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7380 // 7381 // map(i) 7382 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7383 // 7384 // map(i[1:23]) 7385 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7386 // 7387 // map(p) 7388 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7389 // 7390 // map(p[1:24]) 7391 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7392 // 7393 // map(s) 7394 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7395 // 7396 // map(s.i) 7397 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7398 // 7399 // map(s.s.f) 7400 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7401 // 7402 // map(s.p) 7403 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7404 // 7405 // map(to: s.p[:22]) 7406 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7407 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7408 // &(s.p), &(s.p[0]), 22*sizeof(double), 7409 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7410 // (*) alloc space for struct members, only this is a target parameter 7411 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7412 // optimizes this entry out, same in the examples below) 7413 // (***) map the pointee (map: to) 7414 // 7415 // map(s.ps) 7416 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7417 // 7418 // map(from: s.ps->s.i) 7419 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7420 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7421 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7422 // 7423 // map(to: s.ps->ps) 7424 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7425 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7426 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7427 // 7428 // map(s.ps->ps->ps) 7429 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7430 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7431 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7432 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7433 // 7434 // map(to: s.ps->ps->s.f[:22]) 7435 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7436 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7437 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7438 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7439 // 7440 // map(ps) 7441 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7442 // 7443 // map(ps->i) 7444 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7445 // 7446 // map(ps->s.f) 7447 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7448 // 7449 // map(from: ps->p) 7450 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7451 // 7452 // map(to: ps->p[:22]) 7453 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7454 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7455 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7456 // 7457 // map(ps->ps) 7458 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7459 // 7460 // map(from: ps->ps->s.i) 7461 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7462 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7463 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7464 // 7465 // map(from: ps->ps->ps) 7466 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7467 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7468 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7469 // 7470 // map(ps->ps->ps->ps) 7471 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7472 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7473 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7474 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7475 // 7476 // map(to: ps->ps->ps->s.f[:22]) 7477 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7478 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7479 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7480 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7481 // 7482 // map(to: s.f[:22]) map(from: s.p[:33]) 7483 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7484 // sizeof(double*) (**), TARGET_PARAM 7485 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7486 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7487 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7488 // (*) allocate contiguous space needed to fit all mapped members even if 7489 // we allocate space for members not mapped (in this example, 7490 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7491 // them as well because they fall between &s.f[0] and &s.p) 7492 // 7493 // map(from: s.f[:22]) map(to: ps->p[:33]) 7494 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7495 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7496 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7497 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7498 // (*) the struct this entry pertains to is the 2nd element in the list of 7499 // arguments, hence MEMBER_OF(2) 7500 // 7501 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7502 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7503 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7504 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7505 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7506 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7507 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7508 // (*) the struct this entry pertains to is the 4th element in the list 7509 // of arguments, hence MEMBER_OF(4) 7510 7511 // Track if the map information being generated is the first for a capture. 7512 bool IsCaptureFirstInfo = IsFirstComponentList; 7513 // When the variable is on a declare target link or in a to clause with 7514 // unified memory, a reference is needed to hold the host/device address 7515 // of the variable. 7516 bool RequiresReference = false; 7517 7518 // Scan the components from the base to the complete expression. 7519 auto CI = Components.rbegin(); 7520 auto CE = Components.rend(); 7521 auto I = CI; 7522 7523 // Track if the map information being generated is the first for a list of 7524 // components. 7525 bool IsExpressionFirstInfo = true; 7526 Address BP = Address::invalid(); 7527 const Expr *AssocExpr = I->getAssociatedExpression(); 7528 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7529 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7530 7531 if (isa<MemberExpr>(AssocExpr)) { 7532 // The base is the 'this' pointer. The content of the pointer is going 7533 // to be the base of the field being mapped. 7534 BP = CGF.LoadCXXThisAddress(); 7535 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7536 (OASE && 7537 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7538 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7539 } else { 7540 // The base is the reference to the variable. 7541 // BP = &Var. 7542 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7543 if (const auto *VD = 7544 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7545 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7546 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7547 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7548 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7549 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7550 RequiresReference = true; 7551 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7552 } 7553 } 7554 } 7555 7556 // If the variable is a pointer and is being dereferenced (i.e. is not 7557 // the last component), the base has to be the pointer itself, not its 7558 // reference. References are ignored for mapping purposes. 7559 QualType Ty = 7560 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7561 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7562 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7563 7564 // We do not need to generate individual map information for the 7565 // pointer, it can be associated with the combined storage. 7566 ++I; 7567 } 7568 } 7569 7570 // Track whether a component of the list should be marked as MEMBER_OF some 7571 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7572 // in a component list should be marked as MEMBER_OF, all subsequent entries 7573 // do not belong to the base struct. E.g. 7574 // struct S2 s; 7575 // s.ps->ps->ps->f[:] 7576 // (1) (2) (3) (4) 7577 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7578 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7579 // is the pointee of ps(2) which is not member of struct s, so it should not 7580 // be marked as such (it is still PTR_AND_OBJ). 7581 // The variable is initialized to false so that PTR_AND_OBJ entries which 7582 // are not struct members are not considered (e.g. array of pointers to 7583 // data). 7584 bool ShouldBeMemberOf = false; 7585 7586 // Variable keeping track of whether or not we have encountered a component 7587 // in the component list which is a member expression. Useful when we have a 7588 // pointer or a final array section, in which case it is the previous 7589 // component in the list which tells us whether we have a member expression. 7590 // E.g. X.f[:] 7591 // While processing the final array section "[:]" it is "f" which tells us 7592 // whether we are dealing with a member of a declared struct. 7593 const MemberExpr *EncounteredME = nullptr; 7594 7595 for (; I != CE; ++I) { 7596 // If the current component is member of a struct (parent struct) mark it. 7597 if (!EncounteredME) { 7598 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7599 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7600 // as MEMBER_OF the parent struct. 7601 if (EncounteredME) 7602 ShouldBeMemberOf = true; 7603 } 7604 7605 auto Next = std::next(I); 7606 7607 // We need to generate the addresses and sizes if this is the last 7608 // component, if the component is a pointer or if it is an array section 7609 // whose length can't be proved to be one. If this is a pointer, it 7610 // becomes the base address for the following components. 7611 7612 // A final array section, is one whose length can't be proved to be one. 7613 bool IsFinalArraySection = 7614 isFinalArraySectionExpression(I->getAssociatedExpression()); 7615 7616 // Get information on whether the element is a pointer. Have to do a 7617 // special treatment for array sections given that they are built-in 7618 // types. 7619 const auto *OASE = 7620 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7621 bool IsPointer = 7622 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7623 .getCanonicalType() 7624 ->isAnyPointerType()) || 7625 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7626 7627 if (Next == CE || IsPointer || IsFinalArraySection) { 7628 // If this is not the last component, we expect the pointer to be 7629 // associated with an array expression or member expression. 7630 assert((Next == CE || 7631 isa<MemberExpr>(Next->getAssociatedExpression()) || 7632 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7633 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 7634 "Unexpected expression"); 7635 7636 Address LB = 7637 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); 7638 7639 // If this component is a pointer inside the base struct then we don't 7640 // need to create any entry for it - it will be combined with the object 7641 // it is pointing to into a single PTR_AND_OBJ entry. 7642 bool IsMemberPointer = 7643 IsPointer && EncounteredME && 7644 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7645 EncounteredME); 7646 if (!OverlappedElements.empty()) { 7647 // Handle base element with the info for overlapped elements. 7648 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7649 assert(Next == CE && 7650 "Expected last element for the overlapped elements."); 7651 assert(!IsPointer && 7652 "Unexpected base element with the pointer type."); 7653 // Mark the whole struct as the struct that requires allocation on the 7654 // device. 7655 PartialStruct.LowestElem = {0, LB}; 7656 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7657 I->getAssociatedExpression()->getType()); 7658 Address HB = CGF.Builder.CreateConstGEP( 7659 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7660 CGF.VoidPtrTy), 7661 TypeSize.getQuantity() - 1); 7662 PartialStruct.HighestElem = { 7663 std::numeric_limits<decltype( 7664 PartialStruct.HighestElem.first)>::max(), 7665 HB}; 7666 PartialStruct.Base = BP; 7667 // Emit data for non-overlapped data. 7668 OpenMPOffloadMappingFlags Flags = 7669 OMP_MAP_MEMBER_OF | 7670 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7671 /*AddPtrFlag=*/false, 7672 /*AddIsTargetParamFlag=*/false); 7673 LB = BP; 7674 llvm::Value *Size = nullptr; 7675 // Do bitcopy of all non-overlapped structure elements. 7676 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7677 Component : OverlappedElements) { 7678 Address ComponentLB = Address::invalid(); 7679 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7680 Component) { 7681 if (MC.getAssociatedDeclaration()) { 7682 ComponentLB = 7683 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7684 .getAddress(); 7685 Size = CGF.Builder.CreatePtrDiff( 7686 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7687 CGF.EmitCastToVoidPtr(LB.getPointer())); 7688 break; 7689 } 7690 } 7691 BasePointers.push_back(BP.getPointer()); 7692 Pointers.push_back(LB.getPointer()); 7693 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, 7694 /*isSigned=*/true)); 7695 Types.push_back(Flags); 7696 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7697 } 7698 BasePointers.push_back(BP.getPointer()); 7699 Pointers.push_back(LB.getPointer()); 7700 Size = CGF.Builder.CreatePtrDiff( 7701 CGF.EmitCastToVoidPtr( 7702 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7703 CGF.EmitCastToVoidPtr(LB.getPointer())); 7704 Sizes.push_back( 7705 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7706 Types.push_back(Flags); 7707 break; 7708 } 7709 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7710 if (!IsMemberPointer) { 7711 BasePointers.push_back(BP.getPointer()); 7712 Pointers.push_back(LB.getPointer()); 7713 Sizes.push_back( 7714 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7715 7716 // We need to add a pointer flag for each map that comes from the 7717 // same expression except for the first one. We also need to signal 7718 // this map is the first one that relates with the current capture 7719 // (there is a set of entries for each capture). 7720 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7721 MapType, MapModifiers, IsImplicit, 7722 !IsExpressionFirstInfo || RequiresReference, 7723 IsCaptureFirstInfo && !RequiresReference); 7724 7725 if (!IsExpressionFirstInfo) { 7726 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7727 // then we reset the TO/FROM/ALWAYS/DELETE flags. 7728 if (IsPointer) 7729 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7730 OMP_MAP_DELETE); 7731 7732 if (ShouldBeMemberOf) { 7733 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7734 // should be later updated with the correct value of MEMBER_OF. 7735 Flags |= OMP_MAP_MEMBER_OF; 7736 // From now on, all subsequent PTR_AND_OBJ entries should not be 7737 // marked as MEMBER_OF. 7738 ShouldBeMemberOf = false; 7739 } 7740 } 7741 7742 Types.push_back(Flags); 7743 } 7744 7745 // If we have encountered a member expression so far, keep track of the 7746 // mapped member. If the parent is "*this", then the value declaration 7747 // is nullptr. 7748 if (EncounteredME) { 7749 const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl()); 7750 unsigned FieldIndex = FD->getFieldIndex(); 7751 7752 // Update info about the lowest and highest elements for this struct 7753 if (!PartialStruct.Base.isValid()) { 7754 PartialStruct.LowestElem = {FieldIndex, LB}; 7755 PartialStruct.HighestElem = {FieldIndex, LB}; 7756 PartialStruct.Base = BP; 7757 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7758 PartialStruct.LowestElem = {FieldIndex, LB}; 7759 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7760 PartialStruct.HighestElem = {FieldIndex, LB}; 7761 } 7762 } 7763 7764 // If we have a final array section, we are done with this expression. 7765 if (IsFinalArraySection) 7766 break; 7767 7768 // The pointer becomes the base for the next element. 7769 if (Next != CE) 7770 BP = LB; 7771 7772 IsExpressionFirstInfo = false; 7773 IsCaptureFirstInfo = false; 7774 } 7775 } 7776 } 7777 7778 /// Return the adjusted map modifiers if the declaration a capture refers to 7779 /// appears in a first-private clause. This is expected to be used only with 7780 /// directives that start with 'target'. 7781 MappableExprsHandler::OpenMPOffloadMappingFlags 7782 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7783 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7784 7785 // A first private variable captured by reference will use only the 7786 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7787 // declaration is known as first-private in this handler. 7788 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7789 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7790 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7791 return MappableExprsHandler::OMP_MAP_ALWAYS | 7792 MappableExprsHandler::OMP_MAP_TO; 7793 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7794 return MappableExprsHandler::OMP_MAP_TO | 7795 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 7796 return MappableExprsHandler::OMP_MAP_PRIVATE | 7797 MappableExprsHandler::OMP_MAP_TO; 7798 } 7799 return MappableExprsHandler::OMP_MAP_TO | 7800 MappableExprsHandler::OMP_MAP_FROM; 7801 } 7802 7803 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7804 // Rotate by getFlagMemberOffset() bits. 7805 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7806 << getFlagMemberOffset()); 7807 } 7808 7809 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7810 OpenMPOffloadMappingFlags MemberOfFlag) { 7811 // If the entry is PTR_AND_OBJ but has not been marked with the special 7812 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7813 // marked as MEMBER_OF. 7814 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7815 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7816 return; 7817 7818 // Reset the placeholder value to prepare the flag for the assignment of the 7819 // proper MEMBER_OF value. 7820 Flags &= ~OMP_MAP_MEMBER_OF; 7821 Flags |= MemberOfFlag; 7822 } 7823 7824 void getPlainLayout(const CXXRecordDecl *RD, 7825 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7826 bool AsBase) const { 7827 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7828 7829 llvm::StructType *St = 7830 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7831 7832 unsigned NumElements = St->getNumElements(); 7833 llvm::SmallVector< 7834 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7835 RecordLayout(NumElements); 7836 7837 // Fill bases. 7838 for (const auto &I : RD->bases()) { 7839 if (I.isVirtual()) 7840 continue; 7841 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7842 // Ignore empty bases. 7843 if (Base->isEmpty() || CGF.getContext() 7844 .getASTRecordLayout(Base) 7845 .getNonVirtualSize() 7846 .isZero()) 7847 continue; 7848 7849 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7850 RecordLayout[FieldIndex] = Base; 7851 } 7852 // Fill in virtual bases. 7853 for (const auto &I : RD->vbases()) { 7854 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7855 // Ignore empty bases. 7856 if (Base->isEmpty()) 7857 continue; 7858 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7859 if (RecordLayout[FieldIndex]) 7860 continue; 7861 RecordLayout[FieldIndex] = Base; 7862 } 7863 // Fill in all the fields. 7864 assert(!RD->isUnion() && "Unexpected union."); 7865 for (const auto *Field : RD->fields()) { 7866 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7867 // will fill in later.) 7868 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 7869 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7870 RecordLayout[FieldIndex] = Field; 7871 } 7872 } 7873 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7874 &Data : RecordLayout) { 7875 if (Data.isNull()) 7876 continue; 7877 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7878 getPlainLayout(Base, Layout, /*AsBase=*/true); 7879 else 7880 Layout.push_back(Data.get<const FieldDecl *>()); 7881 } 7882 } 7883 7884 public: 7885 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7886 : CurDir(&Dir), CGF(CGF) { 7887 // Extract firstprivate clause information. 7888 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7889 for (const auto *D : C->varlists()) 7890 FirstPrivateDecls.try_emplace( 7891 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 7892 // Extract device pointer clause information. 7893 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7894 for (auto L : C->component_lists()) 7895 DevPointersMap[L.first].push_back(L.second); 7896 } 7897 7898 /// Constructor for the declare mapper directive. 7899 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 7900 : CurDir(&Dir), CGF(CGF) {} 7901 7902 /// Generate code for the combined entry if we have a partially mapped struct 7903 /// and take care of the mapping flags of the arguments corresponding to 7904 /// individual struct members. 7905 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 7906 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7907 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 7908 const StructRangeInfoTy &PartialStruct) const { 7909 // Base is the base of the struct 7910 BasePointers.push_back(PartialStruct.Base.getPointer()); 7911 // Pointer is the address of the lowest element 7912 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7913 Pointers.push_back(LB); 7914 // Size is (addr of {highest+1} element) - (addr of lowest element) 7915 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7916 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7917 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7918 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7919 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7920 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 7921 /*isSigned=*/false); 7922 Sizes.push_back(Size); 7923 // Map type is always TARGET_PARAM 7924 Types.push_back(OMP_MAP_TARGET_PARAM); 7925 // Remove TARGET_PARAM flag from the first element 7926 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7927 7928 // All other current entries will be MEMBER_OF the combined entry 7929 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7930 // 0xFFFF in the MEMBER_OF field). 7931 OpenMPOffloadMappingFlags MemberOfFlag = 7932 getMemberOfFlag(BasePointers.size() - 1); 7933 for (auto &M : CurTypes) 7934 setCorrectMemberOfFlag(M, MemberOfFlag); 7935 } 7936 7937 /// Generate all the base pointers, section pointers, sizes and map 7938 /// types for the extracted mappable expressions. Also, for each item that 7939 /// relates with a device pointer, a pair of the relevant declaration and 7940 /// index where it occurs is appended to the device pointers info array. 7941 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 7942 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7943 MapFlagsArrayTy &Types) const { 7944 // We have to process the component lists that relate with the same 7945 // declaration in a single chunk so that we can generate the map flags 7946 // correctly. Therefore, we organize all lists in a map. 7947 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 7948 7949 // Helper function to fill the information map for the different supported 7950 // clauses. 7951 auto &&InfoGen = [&Info]( 7952 const ValueDecl *D, 7953 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7954 OpenMPMapClauseKind MapType, 7955 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7956 bool ReturnDevicePointer, bool IsImplicit) { 7957 const ValueDecl *VD = 7958 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 7959 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 7960 IsImplicit); 7961 }; 7962 7963 assert(CurDir.is<const OMPExecutableDirective *>() && 7964 "Expect a executable directive"); 7965 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 7966 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 7967 for (const auto &L : C->component_lists()) { 7968 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 7969 /*ReturnDevicePointer=*/false, C->isImplicit()); 7970 } 7971 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 7972 for (const auto &L : C->component_lists()) { 7973 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 7974 /*ReturnDevicePointer=*/false, C->isImplicit()); 7975 } 7976 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 7977 for (const auto &L : C->component_lists()) { 7978 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 7979 /*ReturnDevicePointer=*/false, C->isImplicit()); 7980 } 7981 7982 // Look at the use_device_ptr clause information and mark the existing map 7983 // entries as such. If there is no map information for an entry in the 7984 // use_device_ptr list, we create one with map type 'alloc' and zero size 7985 // section. It is the user fault if that was not mapped before. If there is 7986 // no map information and the pointer is a struct member, then we defer the 7987 // emission of that entry until the whole struct has been processed. 7988 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 7989 DeferredInfo; 7990 7991 for (const auto *C : 7992 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 7993 for (const auto &L : C->component_lists()) { 7994 assert(!L.second.empty() && "Not expecting empty list of components!"); 7995 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 7996 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 7997 const Expr *IE = L.second.back().getAssociatedExpression(); 7998 // If the first component is a member expression, we have to look into 7999 // 'this', which maps to null in the map of map information. Otherwise 8000 // look directly for the information. 8001 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 8002 8003 // We potentially have map information for this declaration already. 8004 // Look for the first set of components that refer to it. 8005 if (It != Info.end()) { 8006 auto CI = std::find_if( 8007 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 8008 return MI.Components.back().getAssociatedDeclaration() == VD; 8009 }); 8010 // If we found a map entry, signal that the pointer has to be returned 8011 // and move on to the next declaration. 8012 if (CI != It->second.end()) { 8013 CI->ReturnDevicePointer = true; 8014 continue; 8015 } 8016 } 8017 8018 // We didn't find any match in our map information - generate a zero 8019 // size array section - if the pointer is a struct member we defer this 8020 // action until the whole struct has been processed. 8021 if (isa<MemberExpr>(IE)) { 8022 // Insert the pointer into Info to be processed by 8023 // generateInfoForComponentList. Because it is a member pointer 8024 // without a pointee, no entry will be generated for it, therefore 8025 // we need to generate one after the whole struct has been processed. 8026 // Nonetheless, generateInfoForComponentList must be called to take 8027 // the pointer into account for the calculation of the range of the 8028 // partial struct. 8029 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 8030 /*ReturnDevicePointer=*/false, C->isImplicit()); 8031 DeferredInfo[nullptr].emplace_back(IE, VD); 8032 } else { 8033 llvm::Value *Ptr = 8034 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8035 BasePointers.emplace_back(Ptr, VD); 8036 Pointers.push_back(Ptr); 8037 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8038 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8039 } 8040 } 8041 } 8042 8043 for (const auto &M : Info) { 8044 // We need to know when we generate information for the first component 8045 // associated with a capture, because the mapping flags depend on it. 8046 bool IsFirstComponentList = true; 8047 8048 // Temporary versions of arrays 8049 MapBaseValuesArrayTy CurBasePointers; 8050 MapValuesArrayTy CurPointers; 8051 MapValuesArrayTy CurSizes; 8052 MapFlagsArrayTy CurTypes; 8053 StructRangeInfoTy PartialStruct; 8054 8055 for (const MapInfo &L : M.second) { 8056 assert(!L.Components.empty() && 8057 "Not expecting declaration with no component lists."); 8058 8059 // Remember the current base pointer index. 8060 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 8061 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8062 CurBasePointers, CurPointers, CurSizes, 8063 CurTypes, PartialStruct, 8064 IsFirstComponentList, L.IsImplicit); 8065 8066 // If this entry relates with a device pointer, set the relevant 8067 // declaration and add the 'return pointer' flag. 8068 if (L.ReturnDevicePointer) { 8069 assert(CurBasePointers.size() > CurrentBasePointersIdx && 8070 "Unexpected number of mapped base pointers."); 8071 8072 const ValueDecl *RelevantVD = 8073 L.Components.back().getAssociatedDeclaration(); 8074 assert(RelevantVD && 8075 "No relevant declaration related with device pointer??"); 8076 8077 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 8078 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8079 } 8080 IsFirstComponentList = false; 8081 } 8082 8083 // Append any pending zero-length pointers which are struct members and 8084 // used with use_device_ptr. 8085 auto CI = DeferredInfo.find(M.first); 8086 if (CI != DeferredInfo.end()) { 8087 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8088 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(); 8089 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 8090 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 8091 CurBasePointers.emplace_back(BasePtr, L.VD); 8092 CurPointers.push_back(Ptr); 8093 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8094 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8095 // value MEMBER_OF=FFFF so that the entry is later updated with the 8096 // correct value of MEMBER_OF. 8097 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8098 OMP_MAP_MEMBER_OF); 8099 } 8100 } 8101 8102 // If there is an entry in PartialStruct it means we have a struct with 8103 // individual members mapped. Emit an extra combined entry. 8104 if (PartialStruct.Base.isValid()) 8105 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8106 PartialStruct); 8107 8108 // We need to append the results of this capture to what we already have. 8109 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8110 Pointers.append(CurPointers.begin(), CurPointers.end()); 8111 Sizes.append(CurSizes.begin(), CurSizes.end()); 8112 Types.append(CurTypes.begin(), CurTypes.end()); 8113 } 8114 } 8115 8116 /// Generate all the base pointers, section pointers, sizes and map types for 8117 /// the extracted map clauses of user-defined mapper. 8118 void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, 8119 MapValuesArrayTy &Pointers, 8120 MapValuesArrayTy &Sizes, 8121 MapFlagsArrayTy &Types) const { 8122 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8123 "Expect a declare mapper directive"); 8124 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8125 // We have to process the component lists that relate with the same 8126 // declaration in a single chunk so that we can generate the map flags 8127 // correctly. Therefore, we organize all lists in a map. 8128 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8129 8130 // Helper function to fill the information map for the different supported 8131 // clauses. 8132 auto &&InfoGen = [&Info]( 8133 const ValueDecl *D, 8134 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8135 OpenMPMapClauseKind MapType, 8136 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8137 bool ReturnDevicePointer, bool IsImplicit) { 8138 const ValueDecl *VD = 8139 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8140 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8141 IsImplicit); 8142 }; 8143 8144 for (const auto *C : CurMapperDir->clauselists()) { 8145 const auto *MC = cast<OMPMapClause>(C); 8146 for (const auto &L : MC->component_lists()) { 8147 InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), 8148 /*ReturnDevicePointer=*/false, MC->isImplicit()); 8149 } 8150 } 8151 8152 for (const auto &M : Info) { 8153 // We need to know when we generate information for the first component 8154 // associated with a capture, because the mapping flags depend on it. 8155 bool IsFirstComponentList = true; 8156 8157 // Temporary versions of arrays 8158 MapBaseValuesArrayTy CurBasePointers; 8159 MapValuesArrayTy CurPointers; 8160 MapValuesArrayTy CurSizes; 8161 MapFlagsArrayTy CurTypes; 8162 StructRangeInfoTy PartialStruct; 8163 8164 for (const MapInfo &L : M.second) { 8165 assert(!L.Components.empty() && 8166 "Not expecting declaration with no component lists."); 8167 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8168 CurBasePointers, CurPointers, CurSizes, 8169 CurTypes, PartialStruct, 8170 IsFirstComponentList, L.IsImplicit); 8171 IsFirstComponentList = false; 8172 } 8173 8174 // If there is an entry in PartialStruct it means we have a struct with 8175 // individual members mapped. Emit an extra combined entry. 8176 if (PartialStruct.Base.isValid()) 8177 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8178 PartialStruct); 8179 8180 // We need to append the results of this capture to what we already have. 8181 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8182 Pointers.append(CurPointers.begin(), CurPointers.end()); 8183 Sizes.append(CurSizes.begin(), CurSizes.end()); 8184 Types.append(CurTypes.begin(), CurTypes.end()); 8185 } 8186 } 8187 8188 /// Emit capture info for lambdas for variables captured by reference. 8189 void generateInfoForLambdaCaptures( 8190 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 8191 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8192 MapFlagsArrayTy &Types, 8193 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8194 const auto *RD = VD->getType() 8195 .getCanonicalType() 8196 .getNonReferenceType() 8197 ->getAsCXXRecordDecl(); 8198 if (!RD || !RD->isLambda()) 8199 return; 8200 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8201 LValue VDLVal = CGF.MakeAddrLValue( 8202 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8203 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8204 FieldDecl *ThisCapture = nullptr; 8205 RD->getCaptureFields(Captures, ThisCapture); 8206 if (ThisCapture) { 8207 LValue ThisLVal = 8208 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8209 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8210 LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer()); 8211 BasePointers.push_back(ThisLVal.getPointer()); 8212 Pointers.push_back(ThisLValVal.getPointer()); 8213 Sizes.push_back( 8214 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8215 CGF.Int64Ty, /*isSigned=*/true)); 8216 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8217 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8218 } 8219 for (const LambdaCapture &LC : RD->captures()) { 8220 if (!LC.capturesVariable()) 8221 continue; 8222 const VarDecl *VD = LC.getCapturedVar(); 8223 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8224 continue; 8225 auto It = Captures.find(VD); 8226 assert(It != Captures.end() && "Found lambda capture without field."); 8227 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8228 if (LC.getCaptureKind() == LCK_ByRef) { 8229 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8230 LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); 8231 BasePointers.push_back(VarLVal.getPointer()); 8232 Pointers.push_back(VarLValVal.getPointer()); 8233 Sizes.push_back(CGF.Builder.CreateIntCast( 8234 CGF.getTypeSize( 8235 VD->getType().getCanonicalType().getNonReferenceType()), 8236 CGF.Int64Ty, /*isSigned=*/true)); 8237 } else { 8238 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8239 LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); 8240 BasePointers.push_back(VarLVal.getPointer()); 8241 Pointers.push_back(VarRVal.getScalarVal()); 8242 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8243 } 8244 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8245 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8246 } 8247 } 8248 8249 /// Set correct indices for lambdas captures. 8250 void adjustMemberOfForLambdaCaptures( 8251 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8252 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8253 MapFlagsArrayTy &Types) const { 8254 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8255 // Set correct member_of idx for all implicit lambda captures. 8256 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8257 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8258 continue; 8259 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8260 assert(BasePtr && "Unable to find base lambda address."); 8261 int TgtIdx = -1; 8262 for (unsigned J = I; J > 0; --J) { 8263 unsigned Idx = J - 1; 8264 if (Pointers[Idx] != BasePtr) 8265 continue; 8266 TgtIdx = Idx; 8267 break; 8268 } 8269 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8270 // All other current entries will be MEMBER_OF the combined entry 8271 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8272 // 0xFFFF in the MEMBER_OF field). 8273 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8274 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8275 } 8276 } 8277 8278 /// Generate the base pointers, section pointers, sizes and map types 8279 /// associated to a given capture. 8280 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8281 llvm::Value *Arg, 8282 MapBaseValuesArrayTy &BasePointers, 8283 MapValuesArrayTy &Pointers, 8284 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8285 StructRangeInfoTy &PartialStruct) const { 8286 assert(!Cap->capturesVariableArrayType() && 8287 "Not expecting to generate map info for a variable array type!"); 8288 8289 // We need to know when we generating information for the first component 8290 const ValueDecl *VD = Cap->capturesThis() 8291 ? nullptr 8292 : Cap->getCapturedVar()->getCanonicalDecl(); 8293 8294 // If this declaration appears in a is_device_ptr clause we just have to 8295 // pass the pointer by value. If it is a reference to a declaration, we just 8296 // pass its value. 8297 if (DevPointersMap.count(VD)) { 8298 BasePointers.emplace_back(Arg, VD); 8299 Pointers.push_back(Arg); 8300 Sizes.push_back( 8301 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8302 CGF.Int64Ty, /*isSigned=*/true)); 8303 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8304 return; 8305 } 8306 8307 using MapData = 8308 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8309 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 8310 SmallVector<MapData, 4> DeclComponentLists; 8311 assert(CurDir.is<const OMPExecutableDirective *>() && 8312 "Expect a executable directive"); 8313 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8314 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8315 for (const auto &L : C->decl_component_lists(VD)) { 8316 assert(L.first == VD && 8317 "We got information for the wrong declaration??"); 8318 assert(!L.second.empty() && 8319 "Not expecting declaration with no component lists."); 8320 DeclComponentLists.emplace_back(L.second, C->getMapType(), 8321 C->getMapTypeModifiers(), 8322 C->isImplicit()); 8323 } 8324 } 8325 8326 // Find overlapping elements (including the offset from the base element). 8327 llvm::SmallDenseMap< 8328 const MapData *, 8329 llvm::SmallVector< 8330 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8331 4> 8332 OverlappedData; 8333 size_t Count = 0; 8334 for (const MapData &L : DeclComponentLists) { 8335 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8336 OpenMPMapClauseKind MapType; 8337 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8338 bool IsImplicit; 8339 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8340 ++Count; 8341 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8342 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8343 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 8344 auto CI = Components.rbegin(); 8345 auto CE = Components.rend(); 8346 auto SI = Components1.rbegin(); 8347 auto SE = Components1.rend(); 8348 for (; CI != CE && SI != SE; ++CI, ++SI) { 8349 if (CI->getAssociatedExpression()->getStmtClass() != 8350 SI->getAssociatedExpression()->getStmtClass()) 8351 break; 8352 // Are we dealing with different variables/fields? 8353 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8354 break; 8355 } 8356 // Found overlapping if, at least for one component, reached the head of 8357 // the components list. 8358 if (CI == CE || SI == SE) { 8359 assert((CI != CE || SI != SE) && 8360 "Unexpected full match of the mapping components."); 8361 const MapData &BaseData = CI == CE ? L : L1; 8362 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8363 SI == SE ? Components : Components1; 8364 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8365 OverlappedElements.getSecond().push_back(SubData); 8366 } 8367 } 8368 } 8369 // Sort the overlapped elements for each item. 8370 llvm::SmallVector<const FieldDecl *, 4> Layout; 8371 if (!OverlappedData.empty()) { 8372 if (const auto *CRD = 8373 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8374 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8375 else { 8376 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8377 Layout.append(RD->field_begin(), RD->field_end()); 8378 } 8379 } 8380 for (auto &Pair : OverlappedData) { 8381 llvm::sort( 8382 Pair.getSecond(), 8383 [&Layout]( 8384 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8385 OMPClauseMappableExprCommon::MappableExprComponentListRef 8386 Second) { 8387 auto CI = First.rbegin(); 8388 auto CE = First.rend(); 8389 auto SI = Second.rbegin(); 8390 auto SE = Second.rend(); 8391 for (; CI != CE && SI != SE; ++CI, ++SI) { 8392 if (CI->getAssociatedExpression()->getStmtClass() != 8393 SI->getAssociatedExpression()->getStmtClass()) 8394 break; 8395 // Are we dealing with different variables/fields? 8396 if (CI->getAssociatedDeclaration() != 8397 SI->getAssociatedDeclaration()) 8398 break; 8399 } 8400 8401 // Lists contain the same elements. 8402 if (CI == CE && SI == SE) 8403 return false; 8404 8405 // List with less elements is less than list with more elements. 8406 if (CI == CE || SI == SE) 8407 return CI == CE; 8408 8409 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8410 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8411 if (FD1->getParent() == FD2->getParent()) 8412 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8413 const auto It = 8414 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8415 return FD == FD1 || FD == FD2; 8416 }); 8417 return *It == FD1; 8418 }); 8419 } 8420 8421 // Associated with a capture, because the mapping flags depend on it. 8422 // Go through all of the elements with the overlapped elements. 8423 for (const auto &Pair : OverlappedData) { 8424 const MapData &L = *Pair.getFirst(); 8425 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8426 OpenMPMapClauseKind MapType; 8427 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8428 bool IsImplicit; 8429 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8430 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8431 OverlappedComponents = Pair.getSecond(); 8432 bool IsFirstComponentList = true; 8433 generateInfoForComponentList(MapType, MapModifiers, Components, 8434 BasePointers, Pointers, Sizes, Types, 8435 PartialStruct, IsFirstComponentList, 8436 IsImplicit, OverlappedComponents); 8437 } 8438 // Go through other elements without overlapped elements. 8439 bool IsFirstComponentList = OverlappedData.empty(); 8440 for (const MapData &L : DeclComponentLists) { 8441 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8442 OpenMPMapClauseKind MapType; 8443 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8444 bool IsImplicit; 8445 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8446 auto It = OverlappedData.find(&L); 8447 if (It == OverlappedData.end()) 8448 generateInfoForComponentList(MapType, MapModifiers, Components, 8449 BasePointers, Pointers, Sizes, Types, 8450 PartialStruct, IsFirstComponentList, 8451 IsImplicit); 8452 IsFirstComponentList = false; 8453 } 8454 } 8455 8456 /// Generate the base pointers, section pointers, sizes and map types 8457 /// associated with the declare target link variables. 8458 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 8459 MapValuesArrayTy &Pointers, 8460 MapValuesArrayTy &Sizes, 8461 MapFlagsArrayTy &Types) const { 8462 assert(CurDir.is<const OMPExecutableDirective *>() && 8463 "Expect a executable directive"); 8464 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8465 // Map other list items in the map clause which are not captured variables 8466 // but "declare target link" global variables. 8467 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8468 for (const auto &L : C->component_lists()) { 8469 if (!L.first) 8470 continue; 8471 const auto *VD = dyn_cast<VarDecl>(L.first); 8472 if (!VD) 8473 continue; 8474 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8475 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8476 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8477 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 8478 continue; 8479 StructRangeInfoTy PartialStruct; 8480 generateInfoForComponentList( 8481 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 8482 Pointers, Sizes, Types, PartialStruct, 8483 /*IsFirstComponentList=*/true, C->isImplicit()); 8484 assert(!PartialStruct.Base.isValid() && 8485 "No partial structs for declare target link expected."); 8486 } 8487 } 8488 } 8489 8490 /// Generate the default map information for a given capture \a CI, 8491 /// record field declaration \a RI and captured value \a CV. 8492 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8493 const FieldDecl &RI, llvm::Value *CV, 8494 MapBaseValuesArrayTy &CurBasePointers, 8495 MapValuesArrayTy &CurPointers, 8496 MapValuesArrayTy &CurSizes, 8497 MapFlagsArrayTy &CurMapTypes) const { 8498 bool IsImplicit = true; 8499 // Do the default mapping. 8500 if (CI.capturesThis()) { 8501 CurBasePointers.push_back(CV); 8502 CurPointers.push_back(CV); 8503 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8504 CurSizes.push_back( 8505 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8506 CGF.Int64Ty, /*isSigned=*/true)); 8507 // Default map type. 8508 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8509 } else if (CI.capturesVariableByCopy()) { 8510 CurBasePointers.push_back(CV); 8511 CurPointers.push_back(CV); 8512 if (!RI.getType()->isAnyPointerType()) { 8513 // We have to signal to the runtime captures passed by value that are 8514 // not pointers. 8515 CurMapTypes.push_back(OMP_MAP_LITERAL); 8516 CurSizes.push_back(CGF.Builder.CreateIntCast( 8517 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8518 } else { 8519 // Pointers are implicitly mapped with a zero size and no flags 8520 // (other than first map that is added for all implicit maps). 8521 CurMapTypes.push_back(OMP_MAP_NONE); 8522 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8523 } 8524 const VarDecl *VD = CI.getCapturedVar(); 8525 auto I = FirstPrivateDecls.find(VD); 8526 if (I != FirstPrivateDecls.end()) 8527 IsImplicit = I->getSecond(); 8528 } else { 8529 assert(CI.capturesVariable() && "Expected captured reference."); 8530 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8531 QualType ElementType = PtrTy->getPointeeType(); 8532 CurSizes.push_back(CGF.Builder.CreateIntCast( 8533 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8534 // The default map type for a scalar/complex type is 'to' because by 8535 // default the value doesn't have to be retrieved. For an aggregate 8536 // type, the default is 'tofrom'. 8537 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 8538 const VarDecl *VD = CI.getCapturedVar(); 8539 auto I = FirstPrivateDecls.find(VD); 8540 if (I != FirstPrivateDecls.end() && 8541 VD->getType().isConstant(CGF.getContext())) { 8542 llvm::Constant *Addr = 8543 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8544 // Copy the value of the original variable to the new global copy. 8545 CGF.Builder.CreateMemCpy( 8546 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(), 8547 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8548 CurSizes.back(), /*IsVolatile=*/false); 8549 // Use new global variable as the base pointers. 8550 CurBasePointers.push_back(Addr); 8551 CurPointers.push_back(Addr); 8552 } else { 8553 CurBasePointers.push_back(CV); 8554 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8555 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8556 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8557 AlignmentSource::Decl)); 8558 CurPointers.push_back(PtrAddr.getPointer()); 8559 } else { 8560 CurPointers.push_back(CV); 8561 } 8562 } 8563 if (I != FirstPrivateDecls.end()) 8564 IsImplicit = I->getSecond(); 8565 } 8566 // Every default map produces a single argument which is a target parameter. 8567 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 8568 8569 // Add flag stating this is an implicit map. 8570 if (IsImplicit) 8571 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 8572 } 8573 }; 8574 } // anonymous namespace 8575 8576 /// Emit the arrays used to pass the captures and map information to the 8577 /// offloading runtime library. If there is no map or capture information, 8578 /// return nullptr by reference. 8579 static void 8580 emitOffloadingArrays(CodeGenFunction &CGF, 8581 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 8582 MappableExprsHandler::MapValuesArrayTy &Pointers, 8583 MappableExprsHandler::MapValuesArrayTy &Sizes, 8584 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 8585 CGOpenMPRuntime::TargetDataInfo &Info) { 8586 CodeGenModule &CGM = CGF.CGM; 8587 ASTContext &Ctx = CGF.getContext(); 8588 8589 // Reset the array information. 8590 Info.clearArrayInfo(); 8591 Info.NumberOfPtrs = BasePointers.size(); 8592 8593 if (Info.NumberOfPtrs) { 8594 // Detect if we have any capture size requiring runtime evaluation of the 8595 // size so that a constant array could be eventually used. 8596 bool hasRuntimeEvaluationCaptureSize = false; 8597 for (llvm::Value *S : Sizes) 8598 if (!isa<llvm::Constant>(S)) { 8599 hasRuntimeEvaluationCaptureSize = true; 8600 break; 8601 } 8602 8603 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8604 QualType PointerArrayType = 8605 Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal, 8606 /*IndexTypeQuals=*/0); 8607 8608 Info.BasePointersArray = 8609 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8610 Info.PointersArray = 8611 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8612 8613 // If we don't have any VLA types or other types that require runtime 8614 // evaluation, we can use a constant array for the map sizes, otherwise we 8615 // need to fill up the arrays as we do for the pointers. 8616 QualType Int64Ty = 8617 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8618 if (hasRuntimeEvaluationCaptureSize) { 8619 QualType SizeArrayType = 8620 Ctx.getConstantArrayType(Int64Ty, PointerNumAP, ArrayType::Normal, 8621 /*IndexTypeQuals=*/0); 8622 Info.SizesArray = 8623 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8624 } else { 8625 // We expect all the sizes to be constant, so we collect them to create 8626 // a constant array. 8627 SmallVector<llvm::Constant *, 16> ConstSizes; 8628 for (llvm::Value *S : Sizes) 8629 ConstSizes.push_back(cast<llvm::Constant>(S)); 8630 8631 auto *SizesArrayInit = llvm::ConstantArray::get( 8632 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 8633 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8634 auto *SizesArrayGbl = new llvm::GlobalVariable( 8635 CGM.getModule(), SizesArrayInit->getType(), 8636 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8637 SizesArrayInit, Name); 8638 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8639 Info.SizesArray = SizesArrayGbl; 8640 } 8641 8642 // The map types are always constant so we don't need to generate code to 8643 // fill arrays. Instead, we create an array constant. 8644 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 8645 llvm::copy(MapTypes, Mapping.begin()); 8646 llvm::Constant *MapTypesArrayInit = 8647 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8648 std::string MaptypesName = 8649 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8650 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8651 CGM.getModule(), MapTypesArrayInit->getType(), 8652 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8653 MapTypesArrayInit, MaptypesName); 8654 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8655 Info.MapTypesArray = MapTypesArrayGbl; 8656 8657 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8658 llvm::Value *BPVal = *BasePointers[I]; 8659 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8660 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8661 Info.BasePointersArray, 0, I); 8662 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8663 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8664 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8665 CGF.Builder.CreateStore(BPVal, BPAddr); 8666 8667 if (Info.requiresDevicePointerInfo()) 8668 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 8669 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8670 8671 llvm::Value *PVal = Pointers[I]; 8672 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8673 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8674 Info.PointersArray, 0, I); 8675 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8676 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8677 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8678 CGF.Builder.CreateStore(PVal, PAddr); 8679 8680 if (hasRuntimeEvaluationCaptureSize) { 8681 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8682 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8683 Info.SizesArray, 8684 /*Idx0=*/0, 8685 /*Idx1=*/I); 8686 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 8687 CGF.Builder.CreateStore( 8688 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true), 8689 SAddr); 8690 } 8691 } 8692 } 8693 } 8694 8695 /// Emit the arguments to be passed to the runtime library based on the 8696 /// arrays of pointers, sizes and map types. 8697 static void emitOffloadingArraysArgument( 8698 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8699 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8700 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 8701 CodeGenModule &CGM = CGF.CGM; 8702 if (Info.NumberOfPtrs) { 8703 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8704 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8705 Info.BasePointersArray, 8706 /*Idx0=*/0, /*Idx1=*/0); 8707 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8708 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8709 Info.PointersArray, 8710 /*Idx0=*/0, 8711 /*Idx1=*/0); 8712 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8713 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 8714 /*Idx0=*/0, /*Idx1=*/0); 8715 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8716 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8717 Info.MapTypesArray, 8718 /*Idx0=*/0, 8719 /*Idx1=*/0); 8720 } else { 8721 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8722 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8723 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8724 MapTypesArrayArg = 8725 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8726 } 8727 } 8728 8729 /// Check for inner distribute directive. 8730 static const OMPExecutableDirective * 8731 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8732 const auto *CS = D.getInnermostCapturedStmt(); 8733 const auto *Body = 8734 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8735 const Stmt *ChildStmt = 8736 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8737 8738 if (const auto *NestedDir = 8739 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8740 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8741 switch (D.getDirectiveKind()) { 8742 case OMPD_target: 8743 if (isOpenMPDistributeDirective(DKind)) 8744 return NestedDir; 8745 if (DKind == OMPD_teams) { 8746 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8747 /*IgnoreCaptured=*/true); 8748 if (!Body) 8749 return nullptr; 8750 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8751 if (const auto *NND = 8752 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8753 DKind = NND->getDirectiveKind(); 8754 if (isOpenMPDistributeDirective(DKind)) 8755 return NND; 8756 } 8757 } 8758 return nullptr; 8759 case OMPD_target_teams: 8760 if (isOpenMPDistributeDirective(DKind)) 8761 return NestedDir; 8762 return nullptr; 8763 case OMPD_target_parallel: 8764 case OMPD_target_simd: 8765 case OMPD_target_parallel_for: 8766 case OMPD_target_parallel_for_simd: 8767 return nullptr; 8768 case OMPD_target_teams_distribute: 8769 case OMPD_target_teams_distribute_simd: 8770 case OMPD_target_teams_distribute_parallel_for: 8771 case OMPD_target_teams_distribute_parallel_for_simd: 8772 case OMPD_parallel: 8773 case OMPD_for: 8774 case OMPD_parallel_for: 8775 case OMPD_parallel_sections: 8776 case OMPD_for_simd: 8777 case OMPD_parallel_for_simd: 8778 case OMPD_cancel: 8779 case OMPD_cancellation_point: 8780 case OMPD_ordered: 8781 case OMPD_threadprivate: 8782 case OMPD_allocate: 8783 case OMPD_task: 8784 case OMPD_simd: 8785 case OMPD_sections: 8786 case OMPD_section: 8787 case OMPD_single: 8788 case OMPD_master: 8789 case OMPD_critical: 8790 case OMPD_taskyield: 8791 case OMPD_barrier: 8792 case OMPD_taskwait: 8793 case OMPD_taskgroup: 8794 case OMPD_atomic: 8795 case OMPD_flush: 8796 case OMPD_teams: 8797 case OMPD_target_data: 8798 case OMPD_target_exit_data: 8799 case OMPD_target_enter_data: 8800 case OMPD_distribute: 8801 case OMPD_distribute_simd: 8802 case OMPD_distribute_parallel_for: 8803 case OMPD_distribute_parallel_for_simd: 8804 case OMPD_teams_distribute: 8805 case OMPD_teams_distribute_simd: 8806 case OMPD_teams_distribute_parallel_for: 8807 case OMPD_teams_distribute_parallel_for_simd: 8808 case OMPD_target_update: 8809 case OMPD_declare_simd: 8810 case OMPD_declare_target: 8811 case OMPD_end_declare_target: 8812 case OMPD_declare_reduction: 8813 case OMPD_declare_mapper: 8814 case OMPD_taskloop: 8815 case OMPD_taskloop_simd: 8816 case OMPD_requires: 8817 case OMPD_unknown: 8818 llvm_unreachable("Unexpected directive."); 8819 } 8820 } 8821 8822 return nullptr; 8823 } 8824 8825 /// Emit the user-defined mapper function. The code generation follows the 8826 /// pattern in the example below. 8827 /// \code 8828 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 8829 /// void *base, void *begin, 8830 /// int64_t size, int64_t type) { 8831 /// // Allocate space for an array section first. 8832 /// if (size > 1 && !maptype.IsDelete) 8833 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8834 /// size*sizeof(Ty), clearToFrom(type)); 8835 /// // Map members. 8836 /// for (unsigned i = 0; i < size; i++) { 8837 /// // For each component specified by this mapper: 8838 /// for (auto c : all_components) { 8839 /// if (c.hasMapper()) 8840 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 8841 /// c.arg_type); 8842 /// else 8843 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 8844 /// c.arg_begin, c.arg_size, c.arg_type); 8845 /// } 8846 /// } 8847 /// // Delete the array section. 8848 /// if (size > 1 && maptype.IsDelete) 8849 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8850 /// size*sizeof(Ty), clearToFrom(type)); 8851 /// } 8852 /// \endcode 8853 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 8854 CodeGenFunction *CGF) { 8855 if (UDMMap.count(D) > 0) 8856 return; 8857 ASTContext &C = CGM.getContext(); 8858 QualType Ty = D->getType(); 8859 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 8860 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 8861 auto *MapperVarDecl = 8862 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 8863 SourceLocation Loc = D->getLocation(); 8864 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 8865 8866 // Prepare mapper function arguments and attributes. 8867 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8868 C.VoidPtrTy, ImplicitParamDecl::Other); 8869 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 8870 ImplicitParamDecl::Other); 8871 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8872 C.VoidPtrTy, ImplicitParamDecl::Other); 8873 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8874 ImplicitParamDecl::Other); 8875 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8876 ImplicitParamDecl::Other); 8877 FunctionArgList Args; 8878 Args.push_back(&HandleArg); 8879 Args.push_back(&BaseArg); 8880 Args.push_back(&BeginArg); 8881 Args.push_back(&SizeArg); 8882 Args.push_back(&TypeArg); 8883 const CGFunctionInfo &FnInfo = 8884 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 8885 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 8886 SmallString<64> TyStr; 8887 llvm::raw_svector_ostream Out(TyStr); 8888 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 8889 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 8890 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 8891 Name, &CGM.getModule()); 8892 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 8893 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 8894 // Start the mapper function code generation. 8895 CodeGenFunction MapperCGF(CGM); 8896 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 8897 // Compute the starting and end addreses of array elements. 8898 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 8899 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 8900 C.getPointerType(Int64Ty), Loc); 8901 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 8902 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 8903 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 8904 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 8905 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 8906 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 8907 C.getPointerType(Int64Ty), Loc); 8908 // Prepare common arguments for array initiation and deletion. 8909 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 8910 MapperCGF.GetAddrOfLocalVar(&HandleArg), 8911 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8912 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 8913 MapperCGF.GetAddrOfLocalVar(&BaseArg), 8914 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8915 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 8916 MapperCGF.GetAddrOfLocalVar(&BeginArg), 8917 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8918 8919 // Emit array initiation if this is an array section and \p MapType indicates 8920 // that memory allocation is required. 8921 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 8922 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 8923 ElementSize, HeadBB, /*IsInit=*/true); 8924 8925 // Emit a for loop to iterate through SizeArg of elements and map all of them. 8926 8927 // Emit the loop header block. 8928 MapperCGF.EmitBlock(HeadBB); 8929 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 8930 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 8931 // Evaluate whether the initial condition is satisfied. 8932 llvm::Value *IsEmpty = 8933 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 8934 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 8935 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 8936 8937 // Emit the loop body block. 8938 MapperCGF.EmitBlock(BodyBB); 8939 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 8940 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 8941 PtrPHI->addIncoming(PtrBegin, EntryBB); 8942 Address PtrCurrent = 8943 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 8944 .getAlignment() 8945 .alignmentOfArrayElement(ElementSize)); 8946 // Privatize the declared variable of mapper to be the current array element. 8947 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 8948 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 8949 return MapperCGF 8950 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 8951 .getAddress(); 8952 }); 8953 (void)Scope.Privatize(); 8954 8955 // Get map clause information. Fill up the arrays with all mapped variables. 8956 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 8957 MappableExprsHandler::MapValuesArrayTy Pointers; 8958 MappableExprsHandler::MapValuesArrayTy Sizes; 8959 MappableExprsHandler::MapFlagsArrayTy MapTypes; 8960 MappableExprsHandler MEHandler(*D, MapperCGF); 8961 MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); 8962 8963 // Call the runtime API __tgt_mapper_num_components to get the number of 8964 // pre-existing components. 8965 llvm::Value *OffloadingArgs[] = {Handle}; 8966 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 8967 createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs); 8968 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 8969 PreviousSize, 8970 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 8971 8972 // Fill up the runtime mapper handle for all components. 8973 for (unsigned I = 0; I < BasePointers.size(); ++I) { 8974 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 8975 *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 8976 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 8977 Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 8978 llvm::Value *CurSizeArg = Sizes[I]; 8979 8980 // Extract the MEMBER_OF field from the map type. 8981 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 8982 MapperCGF.EmitBlock(MemberBB); 8983 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); 8984 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 8985 OriMapType, 8986 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 8987 llvm::BasicBlock *MemberCombineBB = 8988 MapperCGF.createBasicBlock("omp.member.combine"); 8989 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 8990 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 8991 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 8992 // Add the number of pre-existing components to the MEMBER_OF field if it 8993 // is valid. 8994 MapperCGF.EmitBlock(MemberCombineBB); 8995 llvm::Value *CombinedMember = 8996 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 8997 // Do nothing if it is not a member of previous components. 8998 MapperCGF.EmitBlock(TypeBB); 8999 llvm::PHINode *MemberMapType = 9000 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 9001 MemberMapType->addIncoming(OriMapType, MemberBB); 9002 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 9003 9004 // Combine the map type inherited from user-defined mapper with that 9005 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 9006 // bits of the \a MapType, which is the input argument of the mapper 9007 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 9008 // bits of MemberMapType. 9009 // [OpenMP 5.0], 1.2.6. map-type decay. 9010 // | alloc | to | from | tofrom | release | delete 9011 // ---------------------------------------------------------- 9012 // alloc | alloc | alloc | alloc | alloc | release | delete 9013 // to | alloc | to | alloc | to | release | delete 9014 // from | alloc | alloc | from | from | release | delete 9015 // tofrom | alloc | to | from | tofrom | release | delete 9016 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 9017 MapType, 9018 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 9019 MappableExprsHandler::OMP_MAP_FROM)); 9020 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 9021 llvm::BasicBlock *AllocElseBB = 9022 MapperCGF.createBasicBlock("omp.type.alloc.else"); 9023 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 9024 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9025 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9026 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9027 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9028 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9029 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9030 MapperCGF.EmitBlock(AllocBB); 9031 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9032 MemberMapType, 9033 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9034 MappableExprsHandler::OMP_MAP_FROM))); 9035 MapperCGF.Builder.CreateBr(EndBB); 9036 MapperCGF.EmitBlock(AllocElseBB); 9037 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9038 LeftToFrom, 9039 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9040 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9041 // In case of to, clear OMP_MAP_FROM. 9042 MapperCGF.EmitBlock(ToBB); 9043 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9044 MemberMapType, 9045 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9046 MapperCGF.Builder.CreateBr(EndBB); 9047 MapperCGF.EmitBlock(ToElseBB); 9048 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9049 LeftToFrom, 9050 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9051 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9052 // In case of from, clear OMP_MAP_TO. 9053 MapperCGF.EmitBlock(FromBB); 9054 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9055 MemberMapType, 9056 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9057 // In case of tofrom, do nothing. 9058 MapperCGF.EmitBlock(EndBB); 9059 llvm::PHINode *CurMapType = 9060 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9061 CurMapType->addIncoming(AllocMapType, AllocBB); 9062 CurMapType->addIncoming(ToMapType, ToBB); 9063 CurMapType->addIncoming(FromMapType, FromBB); 9064 CurMapType->addIncoming(MemberMapType, ToElseBB); 9065 9066 // TODO: call the corresponding mapper function if a user-defined mapper is 9067 // associated with this map clause. 9068 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9069 // data structure. 9070 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9071 CurSizeArg, CurMapType}; 9072 MapperCGF.EmitRuntimeCall( 9073 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), 9074 OffloadingArgs); 9075 } 9076 9077 // Update the pointer to point to the next element that needs to be mapped, 9078 // and check whether we have mapped all elements. 9079 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9080 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9081 PtrPHI->addIncoming(PtrNext, BodyBB); 9082 llvm::Value *IsDone = 9083 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9084 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9085 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9086 9087 MapperCGF.EmitBlock(ExitBB); 9088 // Emit array deletion if this is an array section and \p MapType indicates 9089 // that deletion is required. 9090 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9091 ElementSize, DoneBB, /*IsInit=*/false); 9092 9093 // Emit the function exit block. 9094 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9095 MapperCGF.FinishFunction(); 9096 UDMMap.try_emplace(D, Fn); 9097 if (CGF) { 9098 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9099 Decls.second.push_back(D); 9100 } 9101 } 9102 9103 /// Emit the array initialization or deletion portion for user-defined mapper 9104 /// code generation. First, it evaluates whether an array section is mapped and 9105 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9106 /// true, and \a MapType indicates to not delete this array, array 9107 /// initialization code is generated. If \a IsInit is false, and \a MapType 9108 /// indicates to not this array, array deletion code is generated. 9109 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9110 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9111 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9112 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9113 StringRef Prefix = IsInit ? ".init" : ".del"; 9114 9115 // Evaluate if this is an array section. 9116 llvm::BasicBlock *IsDeleteBB = 9117 MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete"); 9118 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix); 9119 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9120 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9121 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9122 9123 // Evaluate if we are going to delete this section. 9124 MapperCGF.EmitBlock(IsDeleteBB); 9125 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9126 MapType, 9127 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9128 llvm::Value *DeleteCond; 9129 if (IsInit) { 9130 DeleteCond = MapperCGF.Builder.CreateIsNull( 9131 DeleteBit, "omp.array" + Prefix + ".delete"); 9132 } else { 9133 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9134 DeleteBit, "omp.array" + Prefix + ".delete"); 9135 } 9136 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9137 9138 MapperCGF.EmitBlock(BodyBB); 9139 // Get the array size by multiplying element size and element number (i.e., \p 9140 // Size). 9141 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9142 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9143 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9144 // memory allocation/deletion purpose only. 9145 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9146 MapType, 9147 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9148 MappableExprsHandler::OMP_MAP_FROM))); 9149 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9150 // data structure. 9151 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9152 MapperCGF.EmitRuntimeCall( 9153 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs); 9154 } 9155 9156 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9157 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device, 9158 const llvm::function_ref<llvm::Value *( 9159 CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) { 9160 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9161 const OMPExecutableDirective *TD = &D; 9162 // Get nested teams distribute kind directive, if any. 9163 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9164 TD = getNestedDistributeDirective(CGM.getContext(), D); 9165 if (!TD) 9166 return; 9167 const auto *LD = cast<OMPLoopDirective>(TD); 9168 auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF, 9169 PrePostActionTy &) { 9170 llvm::Value *NumIterations = SizeEmitter(CGF, *LD); 9171 9172 // Emit device ID if any. 9173 llvm::Value *DeviceID; 9174 if (Device) 9175 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9176 CGF.Int64Ty, /*isSigned=*/true); 9177 else 9178 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9179 9180 llvm::Value *Args[] = {DeviceID, NumIterations}; 9181 CGF.EmitRuntimeCall( 9182 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); 9183 }; 9184 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9185 } 9186 9187 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, 9188 const OMPExecutableDirective &D, 9189 llvm::Function *OutlinedFn, 9190 llvm::Value *OutlinedFnID, 9191 const Expr *IfCond, const Expr *Device) { 9192 if (!CGF.HaveInsertPoint()) 9193 return; 9194 9195 assert(OutlinedFn && "Invalid outlined function!"); 9196 9197 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 9198 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9199 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9200 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9201 PrePostActionTy &) { 9202 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9203 }; 9204 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9205 9206 CodeGenFunction::OMPTargetDataInfo InputInfo; 9207 llvm::Value *MapTypesArray = nullptr; 9208 // Fill up the pointer arrays and transfer execution to the device. 9209 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9210 &MapTypesArray, &CS, RequiresOuterTask, 9211 &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) { 9212 // On top of the arrays that were filled up, the target offloading call 9213 // takes as arguments the device id as well as the host pointer. The host 9214 // pointer is used by the runtime library to identify the current target 9215 // region, so it only has to be unique and not necessarily point to 9216 // anything. It could be the pointer to the outlined function that 9217 // implements the target region, but we aren't using that so that the 9218 // compiler doesn't need to keep that, and could therefore inline the host 9219 // function if proven worthwhile during optimization. 9220 9221 // From this point on, we need to have an ID of the target region defined. 9222 assert(OutlinedFnID && "Invalid outlined function ID!"); 9223 9224 // Emit device ID if any. 9225 llvm::Value *DeviceID; 9226 if (Device) { 9227 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9228 CGF.Int64Ty, /*isSigned=*/true); 9229 } else { 9230 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9231 } 9232 9233 // Emit the number of elements in the offloading arrays. 9234 llvm::Value *PointerNum = 9235 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9236 9237 // Return value of the runtime offloading call. 9238 llvm::Value *Return; 9239 9240 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9241 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9242 9243 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9244 // The target region is an outlined function launched by the runtime 9245 // via calls __tgt_target() or __tgt_target_teams(). 9246 // 9247 // __tgt_target() launches a target region with one team and one thread, 9248 // executing a serial region. This master thread may in turn launch 9249 // more threads within its team upon encountering a parallel region, 9250 // however, no additional teams can be launched on the device. 9251 // 9252 // __tgt_target_teams() launches a target region with one or more teams, 9253 // each with one or more threads. This call is required for target 9254 // constructs such as: 9255 // 'target teams' 9256 // 'target' / 'teams' 9257 // 'target teams distribute parallel for' 9258 // 'target parallel' 9259 // and so on. 9260 // 9261 // Note that on the host and CPU targets, the runtime implementation of 9262 // these calls simply call the outlined function without forking threads. 9263 // The outlined functions themselves have runtime calls to 9264 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9265 // the compiler in emitTeamsCall() and emitParallelCall(). 9266 // 9267 // In contrast, on the NVPTX target, the implementation of 9268 // __tgt_target_teams() launches a GPU kernel with the requested number 9269 // of teams and threads so no additional calls to the runtime are required. 9270 if (NumTeams) { 9271 // If we have NumTeams defined this means that we have an enclosed teams 9272 // region. Therefore we also expect to have NumThreads defined. These two 9273 // values should be defined in the presence of a teams directive, 9274 // regardless of having any clauses associated. If the user is using teams 9275 // but no clauses, these two values will be the default that should be 9276 // passed to the runtime library - a 32-bit integer with the value zero. 9277 assert(NumThreads && "Thread limit expression should be available along " 9278 "with number of teams."); 9279 llvm::Value *OffloadingArgs[] = {DeviceID, 9280 OutlinedFnID, 9281 PointerNum, 9282 InputInfo.BasePointersArray.getPointer(), 9283 InputInfo.PointersArray.getPointer(), 9284 InputInfo.SizesArray.getPointer(), 9285 MapTypesArray, 9286 NumTeams, 9287 NumThreads}; 9288 Return = CGF.EmitRuntimeCall( 9289 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 9290 : OMPRTL__tgt_target_teams), 9291 OffloadingArgs); 9292 } else { 9293 llvm::Value *OffloadingArgs[] = {DeviceID, 9294 OutlinedFnID, 9295 PointerNum, 9296 InputInfo.BasePointersArray.getPointer(), 9297 InputInfo.PointersArray.getPointer(), 9298 InputInfo.SizesArray.getPointer(), 9299 MapTypesArray}; 9300 Return = CGF.EmitRuntimeCall( 9301 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 9302 : OMPRTL__tgt_target), 9303 OffloadingArgs); 9304 } 9305 9306 // Check the error code and execute the host version if required. 9307 llvm::BasicBlock *OffloadFailedBlock = 9308 CGF.createBasicBlock("omp_offload.failed"); 9309 llvm::BasicBlock *OffloadContBlock = 9310 CGF.createBasicBlock("omp_offload.cont"); 9311 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9312 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9313 9314 CGF.EmitBlock(OffloadFailedBlock); 9315 if (RequiresOuterTask) { 9316 CapturedVars.clear(); 9317 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9318 } 9319 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9320 CGF.EmitBranch(OffloadContBlock); 9321 9322 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9323 }; 9324 9325 // Notify that the host version must be executed. 9326 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 9327 RequiresOuterTask](CodeGenFunction &CGF, 9328 PrePostActionTy &) { 9329 if (RequiresOuterTask) { 9330 CapturedVars.clear(); 9331 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9332 } 9333 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9334 }; 9335 9336 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 9337 &CapturedVars, RequiresOuterTask, 9338 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 9339 // Fill up the arrays with all the captured variables. 9340 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9341 MappableExprsHandler::MapValuesArrayTy Pointers; 9342 MappableExprsHandler::MapValuesArrayTy Sizes; 9343 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9344 9345 // Get mappable expression information. 9346 MappableExprsHandler MEHandler(D, CGF); 9347 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9348 9349 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9350 auto CV = CapturedVars.begin(); 9351 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9352 CE = CS.capture_end(); 9353 CI != CE; ++CI, ++RI, ++CV) { 9354 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 9355 MappableExprsHandler::MapValuesArrayTy CurPointers; 9356 MappableExprsHandler::MapValuesArrayTy CurSizes; 9357 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 9358 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9359 9360 // VLA sizes are passed to the outlined region by copy and do not have map 9361 // information associated. 9362 if (CI->capturesVariableArrayType()) { 9363 CurBasePointers.push_back(*CV); 9364 CurPointers.push_back(*CV); 9365 CurSizes.push_back(CGF.Builder.CreateIntCast( 9366 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9367 // Copy to the device as an argument. No need to retrieve it. 9368 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 9369 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 9370 MappableExprsHandler::OMP_MAP_IMPLICIT); 9371 } else { 9372 // If we have any information in the map clause, we use it, otherwise we 9373 // just do a default mapping. 9374 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 9375 CurSizes, CurMapTypes, PartialStruct); 9376 if (CurBasePointers.empty()) 9377 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 9378 CurPointers, CurSizes, CurMapTypes); 9379 // Generate correct mapping for variables captured by reference in 9380 // lambdas. 9381 if (CI->capturesVariable()) 9382 MEHandler.generateInfoForLambdaCaptures( 9383 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 9384 CurMapTypes, LambdaPointers); 9385 } 9386 // We expect to have at least an element of information for this capture. 9387 assert(!CurBasePointers.empty() && 9388 "Non-existing map pointer for capture!"); 9389 assert(CurBasePointers.size() == CurPointers.size() && 9390 CurBasePointers.size() == CurSizes.size() && 9391 CurBasePointers.size() == CurMapTypes.size() && 9392 "Inconsistent map information sizes!"); 9393 9394 // If there is an entry in PartialStruct it means we have a struct with 9395 // individual members mapped. Emit an extra combined entry. 9396 if (PartialStruct.Base.isValid()) 9397 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 9398 CurMapTypes, PartialStruct); 9399 9400 // We need to append the results of this capture to what we already have. 9401 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 9402 Pointers.append(CurPointers.begin(), CurPointers.end()); 9403 Sizes.append(CurSizes.begin(), CurSizes.end()); 9404 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 9405 } 9406 // Adjust MEMBER_OF flags for the lambdas captures. 9407 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 9408 Pointers, MapTypes); 9409 // Map other list items in the map clause which are not captured variables 9410 // but "declare target link" global variables. 9411 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 9412 MapTypes); 9413 9414 TargetDataInfo Info; 9415 // Fill up the arrays and create the arguments. 9416 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9417 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9418 Info.PointersArray, Info.SizesArray, 9419 Info.MapTypesArray, Info); 9420 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9421 InputInfo.BasePointersArray = 9422 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9423 InputInfo.PointersArray = 9424 Address(Info.PointersArray, CGM.getPointerAlign()); 9425 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 9426 MapTypesArray = Info.MapTypesArray; 9427 if (RequiresOuterTask) 9428 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9429 else 9430 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9431 }; 9432 9433 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 9434 CodeGenFunction &CGF, PrePostActionTy &) { 9435 if (RequiresOuterTask) { 9436 CodeGenFunction::OMPTargetDataInfo InputInfo; 9437 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9438 } else { 9439 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9440 } 9441 }; 9442 9443 // If we have a target function ID it means that we need to support 9444 // offloading, otherwise, just execute on the host. We need to execute on host 9445 // regardless of the conditional in the if clause if, e.g., the user do not 9446 // specify target triples. 9447 if (OutlinedFnID) { 9448 if (IfCond) { 9449 emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9450 } else { 9451 RegionCodeGenTy ThenRCG(TargetThenGen); 9452 ThenRCG(CGF); 9453 } 9454 } else { 9455 RegionCodeGenTy ElseRCG(TargetElseGen); 9456 ElseRCG(CGF); 9457 } 9458 } 9459 9460 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9461 StringRef ParentName) { 9462 if (!S) 9463 return; 9464 9465 // Codegen OMP target directives that offload compute to the device. 9466 bool RequiresDeviceCodegen = 9467 isa<OMPExecutableDirective>(S) && 9468 isOpenMPTargetExecutionDirective( 9469 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9470 9471 if (RequiresDeviceCodegen) { 9472 const auto &E = *cast<OMPExecutableDirective>(S); 9473 unsigned DeviceID; 9474 unsigned FileID; 9475 unsigned Line; 9476 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 9477 FileID, Line); 9478 9479 // Is this a target region that should not be emitted as an entry point? If 9480 // so just signal we are done with this target region. 9481 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 9482 ParentName, Line)) 9483 return; 9484 9485 switch (E.getDirectiveKind()) { 9486 case OMPD_target: 9487 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9488 cast<OMPTargetDirective>(E)); 9489 break; 9490 case OMPD_target_parallel: 9491 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9492 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9493 break; 9494 case OMPD_target_teams: 9495 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9496 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9497 break; 9498 case OMPD_target_teams_distribute: 9499 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9500 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9501 break; 9502 case OMPD_target_teams_distribute_simd: 9503 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9504 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9505 break; 9506 case OMPD_target_parallel_for: 9507 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9508 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9509 break; 9510 case OMPD_target_parallel_for_simd: 9511 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9512 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9513 break; 9514 case OMPD_target_simd: 9515 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9516 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9517 break; 9518 case OMPD_target_teams_distribute_parallel_for: 9519 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9520 CGM, ParentName, 9521 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9522 break; 9523 case OMPD_target_teams_distribute_parallel_for_simd: 9524 CodeGenFunction:: 9525 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9526 CGM, ParentName, 9527 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9528 break; 9529 case OMPD_parallel: 9530 case OMPD_for: 9531 case OMPD_parallel_for: 9532 case OMPD_parallel_sections: 9533 case OMPD_for_simd: 9534 case OMPD_parallel_for_simd: 9535 case OMPD_cancel: 9536 case OMPD_cancellation_point: 9537 case OMPD_ordered: 9538 case OMPD_threadprivate: 9539 case OMPD_allocate: 9540 case OMPD_task: 9541 case OMPD_simd: 9542 case OMPD_sections: 9543 case OMPD_section: 9544 case OMPD_single: 9545 case OMPD_master: 9546 case OMPD_critical: 9547 case OMPD_taskyield: 9548 case OMPD_barrier: 9549 case OMPD_taskwait: 9550 case OMPD_taskgroup: 9551 case OMPD_atomic: 9552 case OMPD_flush: 9553 case OMPD_teams: 9554 case OMPD_target_data: 9555 case OMPD_target_exit_data: 9556 case OMPD_target_enter_data: 9557 case OMPD_distribute: 9558 case OMPD_distribute_simd: 9559 case OMPD_distribute_parallel_for: 9560 case OMPD_distribute_parallel_for_simd: 9561 case OMPD_teams_distribute: 9562 case OMPD_teams_distribute_simd: 9563 case OMPD_teams_distribute_parallel_for: 9564 case OMPD_teams_distribute_parallel_for_simd: 9565 case OMPD_target_update: 9566 case OMPD_declare_simd: 9567 case OMPD_declare_target: 9568 case OMPD_end_declare_target: 9569 case OMPD_declare_reduction: 9570 case OMPD_declare_mapper: 9571 case OMPD_taskloop: 9572 case OMPD_taskloop_simd: 9573 case OMPD_requires: 9574 case OMPD_unknown: 9575 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9576 } 9577 return; 9578 } 9579 9580 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9581 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9582 return; 9583 9584 scanForTargetRegionsFunctions( 9585 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9586 return; 9587 } 9588 9589 // If this is a lambda function, look into its body. 9590 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9591 S = L->getBody(); 9592 9593 // Keep looking for target regions recursively. 9594 for (const Stmt *II : S->children()) 9595 scanForTargetRegionsFunctions(II, ParentName); 9596 } 9597 9598 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9599 // If emitting code for the host, we do not process FD here. Instead we do 9600 // the normal code generation. 9601 if (!CGM.getLangOpts().OpenMPIsDevice) 9602 return false; 9603 9604 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9605 StringRef Name = CGM.getMangledName(GD); 9606 // Try to detect target regions in the function. 9607 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) 9608 scanForTargetRegionsFunctions(FD->getBody(), Name); 9609 9610 // Do not to emit function if it is not marked as declare target. 9611 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9612 AlreadyEmittedTargetFunctions.count(Name) == 0; 9613 } 9614 9615 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9616 if (!CGM.getLangOpts().OpenMPIsDevice) 9617 return false; 9618 9619 // Check if there are Ctors/Dtors in this declaration and look for target 9620 // regions in it. We use the complete variant to produce the kernel name 9621 // mangling. 9622 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9623 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9624 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9625 StringRef ParentName = 9626 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9627 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9628 } 9629 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9630 StringRef ParentName = 9631 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9632 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9633 } 9634 } 9635 9636 // Do not to emit variable if it is not marked as declare target. 9637 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9638 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9639 cast<VarDecl>(GD.getDecl())); 9640 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9641 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9642 HasRequiresUnifiedSharedMemory)) { 9643 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9644 return true; 9645 } 9646 return false; 9647 } 9648 9649 llvm::Constant * 9650 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9651 const VarDecl *VD) { 9652 assert(VD->getType().isConstant(CGM.getContext()) && 9653 "Expected constant variable."); 9654 StringRef VarName; 9655 llvm::Constant *Addr; 9656 llvm::GlobalValue::LinkageTypes Linkage; 9657 QualType Ty = VD->getType(); 9658 SmallString<128> Buffer; 9659 { 9660 unsigned DeviceID; 9661 unsigned FileID; 9662 unsigned Line; 9663 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9664 FileID, Line); 9665 llvm::raw_svector_ostream OS(Buffer); 9666 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9667 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9668 VarName = OS.str(); 9669 } 9670 Linkage = llvm::GlobalValue::InternalLinkage; 9671 Addr = 9672 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9673 getDefaultFirstprivateAddressSpace()); 9674 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9675 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9676 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9677 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9678 VarName, Addr, VarSize, 9679 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9680 return Addr; 9681 } 9682 9683 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9684 llvm::Constant *Addr) { 9685 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9686 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9687 if (!Res) { 9688 if (CGM.getLangOpts().OpenMPIsDevice) { 9689 // Register non-target variables being emitted in device code (debug info 9690 // may cause this). 9691 StringRef VarName = CGM.getMangledName(VD); 9692 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9693 } 9694 return; 9695 } 9696 // Register declare target variables. 9697 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9698 StringRef VarName; 9699 CharUnits VarSize; 9700 llvm::GlobalValue::LinkageTypes Linkage; 9701 9702 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9703 !HasRequiresUnifiedSharedMemory) { 9704 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9705 VarName = CGM.getMangledName(VD); 9706 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9707 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9708 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9709 } else { 9710 VarSize = CharUnits::Zero(); 9711 } 9712 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 9713 // Temp solution to prevent optimizations of the internal variables. 9714 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 9715 std::string RefName = getName({VarName, "ref"}); 9716 if (!CGM.GetGlobalValue(RefName)) { 9717 llvm::Constant *AddrRef = 9718 getOrCreateInternalVariable(Addr->getType(), RefName); 9719 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 9720 GVAddrRef->setConstant(/*Val=*/true); 9721 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 9722 GVAddrRef->setInitializer(Addr); 9723 CGM.addCompilerUsedGlobal(GVAddrRef); 9724 } 9725 } 9726 } else { 9727 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 9728 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9729 HasRequiresUnifiedSharedMemory)) && 9730 "Declare target attribute must link or to with unified memory."); 9731 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 9732 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 9733 else 9734 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9735 9736 if (CGM.getLangOpts().OpenMPIsDevice) { 9737 VarName = Addr->getName(); 9738 Addr = nullptr; 9739 } else { 9740 VarName = getAddrOfDeclareTargetVar(VD).getName(); 9741 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 9742 } 9743 VarSize = CGM.getPointerSize(); 9744 Linkage = llvm::GlobalValue::WeakAnyLinkage; 9745 } 9746 9747 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9748 VarName, Addr, VarSize, Flags, Linkage); 9749 } 9750 9751 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9752 if (isa<FunctionDecl>(GD.getDecl()) || 9753 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9754 return emitTargetFunctions(GD); 9755 9756 return emitTargetGlobalVariable(GD); 9757 } 9758 9759 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9760 for (const VarDecl *VD : DeferredGlobalVariables) { 9761 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9762 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9763 if (!Res) 9764 continue; 9765 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9766 !HasRequiresUnifiedSharedMemory) { 9767 CGM.EmitGlobal(VD); 9768 } else { 9769 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 9770 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9771 HasRequiresUnifiedSharedMemory)) && 9772 "Expected link clause or to clause with unified memory."); 9773 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 9774 } 9775 } 9776 } 9777 9778 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 9779 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 9780 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 9781 " Expected target-based directive."); 9782 } 9783 9784 void CGOpenMPRuntime::checkArchForUnifiedAddressing( 9785 const OMPRequiresDecl *D) { 9786 for (const OMPClause *Clause : D->clauselists()) { 9787 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 9788 HasRequiresUnifiedSharedMemory = true; 9789 break; 9790 } 9791 } 9792 } 9793 9794 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 9795 LangAS &AS) { 9796 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 9797 return false; 9798 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 9799 switch(A->getAllocatorType()) { 9800 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 9801 // Not supported, fallback to the default mem space. 9802 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 9803 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 9804 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 9805 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 9806 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 9807 case OMPAllocateDeclAttr::OMPConstMemAlloc: 9808 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 9809 AS = LangAS::Default; 9810 return true; 9811 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 9812 llvm_unreachable("Expected predefined allocator for the variables with the " 9813 "static storage."); 9814 } 9815 return false; 9816 } 9817 9818 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 9819 return HasRequiresUnifiedSharedMemory; 9820 } 9821 9822 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 9823 CodeGenModule &CGM) 9824 : CGM(CGM) { 9825 if (CGM.getLangOpts().OpenMPIsDevice) { 9826 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 9827 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 9828 } 9829 } 9830 9831 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 9832 if (CGM.getLangOpts().OpenMPIsDevice) 9833 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 9834 } 9835 9836 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 9837 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 9838 return true; 9839 9840 StringRef Name = CGM.getMangledName(GD); 9841 const auto *D = cast<FunctionDecl>(GD.getDecl()); 9842 // Do not to emit function if it is marked as declare target as it was already 9843 // emitted. 9844 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 9845 if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) { 9846 if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name))) 9847 return !F->isDeclaration(); 9848 return false; 9849 } 9850 return true; 9851 } 9852 9853 return !AlreadyEmittedTargetFunctions.insert(Name).second; 9854 } 9855 9856 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 9857 // If we don't have entries or if we are emitting code for the device, we 9858 // don't need to do anything. 9859 if (CGM.getLangOpts().OMPTargetTriples.empty() || 9860 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 9861 (OffloadEntriesInfoManager.empty() && 9862 !HasEmittedDeclareTargetRegion && 9863 !HasEmittedTargetRegion)) 9864 return nullptr; 9865 9866 // Create and register the function that handles the requires directives. 9867 ASTContext &C = CGM.getContext(); 9868 9869 llvm::Function *RequiresRegFn; 9870 { 9871 CodeGenFunction CGF(CGM); 9872 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 9873 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 9874 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 9875 RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); 9876 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 9877 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 9878 // TODO: check for other requires clauses. 9879 // The requires directive takes effect only when a target region is 9880 // present in the compilation unit. Otherwise it is ignored and not 9881 // passed to the runtime. This avoids the runtime from throwing an error 9882 // for mismatching requires clauses across compilation units that don't 9883 // contain at least 1 target region. 9884 assert((HasEmittedTargetRegion || 9885 HasEmittedDeclareTargetRegion || 9886 !OffloadEntriesInfoManager.empty()) && 9887 "Target or declare target region expected."); 9888 if (HasRequiresUnifiedSharedMemory) 9889 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 9890 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires), 9891 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 9892 CGF.FinishFunction(); 9893 } 9894 return RequiresRegFn; 9895 } 9896 9897 llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() { 9898 // If we have offloading in the current module, we need to emit the entries 9899 // now and register the offloading descriptor. 9900 createOffloadEntriesAndInfoMetadata(); 9901 9902 // Create and register the offloading binary descriptors. This is the main 9903 // entity that captures all the information about offloading in the current 9904 // compilation unit. 9905 return createOffloadingBinaryDescriptorRegistration(); 9906 } 9907 9908 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 9909 const OMPExecutableDirective &D, 9910 SourceLocation Loc, 9911 llvm::Function *OutlinedFn, 9912 ArrayRef<llvm::Value *> CapturedVars) { 9913 if (!CGF.HaveInsertPoint()) 9914 return; 9915 9916 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9917 CodeGenFunction::RunCleanupsScope Scope(CGF); 9918 9919 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 9920 llvm::Value *Args[] = { 9921 RTLoc, 9922 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 9923 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 9924 llvm::SmallVector<llvm::Value *, 16> RealArgs; 9925 RealArgs.append(std::begin(Args), std::end(Args)); 9926 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 9927 9928 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 9929 CGF.EmitRuntimeCall(RTLFn, RealArgs); 9930 } 9931 9932 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 9933 const Expr *NumTeams, 9934 const Expr *ThreadLimit, 9935 SourceLocation Loc) { 9936 if (!CGF.HaveInsertPoint()) 9937 return; 9938 9939 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9940 9941 llvm::Value *NumTeamsVal = 9942 NumTeams 9943 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 9944 CGF.CGM.Int32Ty, /* isSigned = */ true) 9945 : CGF.Builder.getInt32(0); 9946 9947 llvm::Value *ThreadLimitVal = 9948 ThreadLimit 9949 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 9950 CGF.CGM.Int32Ty, /* isSigned = */ true) 9951 : CGF.Builder.getInt32(0); 9952 9953 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 9954 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 9955 ThreadLimitVal}; 9956 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 9957 PushNumTeamsArgs); 9958 } 9959 9960 void CGOpenMPRuntime::emitTargetDataCalls( 9961 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9962 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 9963 if (!CGF.HaveInsertPoint()) 9964 return; 9965 9966 // Action used to replace the default codegen action and turn privatization 9967 // off. 9968 PrePostActionTy NoPrivAction; 9969 9970 // Generate the code for the opening of the data environment. Capture all the 9971 // arguments of the runtime call by reference because they are used in the 9972 // closing of the region. 9973 auto &&BeginThenGen = [this, &D, Device, &Info, 9974 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 9975 // Fill up the arrays with all the mapped variables. 9976 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9977 MappableExprsHandler::MapValuesArrayTy Pointers; 9978 MappableExprsHandler::MapValuesArrayTy Sizes; 9979 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9980 9981 // Get map clause information. 9982 MappableExprsHandler MCHandler(D, CGF); 9983 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 9984 9985 // Fill up the arrays and create the arguments. 9986 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9987 9988 llvm::Value *BasePointersArrayArg = nullptr; 9989 llvm::Value *PointersArrayArg = nullptr; 9990 llvm::Value *SizesArrayArg = nullptr; 9991 llvm::Value *MapTypesArrayArg = nullptr; 9992 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 9993 SizesArrayArg, MapTypesArrayArg, Info); 9994 9995 // Emit device ID if any. 9996 llvm::Value *DeviceID = nullptr; 9997 if (Device) { 9998 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9999 CGF.Int64Ty, /*isSigned=*/true); 10000 } else { 10001 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10002 } 10003 10004 // Emit the number of elements in the offloading arrays. 10005 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10006 10007 llvm::Value *OffloadingArgs[] = { 10008 DeviceID, PointerNum, BasePointersArrayArg, 10009 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10010 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 10011 OffloadingArgs); 10012 10013 // If device pointer privatization is required, emit the body of the region 10014 // here. It will have to be duplicated: with and without privatization. 10015 if (!Info.CaptureDeviceAddrMap.empty()) 10016 CodeGen(CGF); 10017 }; 10018 10019 // Generate code for the closing of the data region. 10020 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10021 PrePostActionTy &) { 10022 assert(Info.isValid() && "Invalid data environment closing arguments."); 10023 10024 llvm::Value *BasePointersArrayArg = nullptr; 10025 llvm::Value *PointersArrayArg = nullptr; 10026 llvm::Value *SizesArrayArg = nullptr; 10027 llvm::Value *MapTypesArrayArg = nullptr; 10028 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10029 SizesArrayArg, MapTypesArrayArg, Info); 10030 10031 // Emit device ID if any. 10032 llvm::Value *DeviceID = nullptr; 10033 if (Device) { 10034 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10035 CGF.Int64Ty, /*isSigned=*/true); 10036 } else { 10037 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10038 } 10039 10040 // Emit the number of elements in the offloading arrays. 10041 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10042 10043 llvm::Value *OffloadingArgs[] = { 10044 DeviceID, PointerNum, BasePointersArrayArg, 10045 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10046 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 10047 OffloadingArgs); 10048 }; 10049 10050 // If we need device pointer privatization, we need to emit the body of the 10051 // region with no privatization in the 'else' branch of the conditional. 10052 // Otherwise, we don't have to do anything. 10053 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10054 PrePostActionTy &) { 10055 if (!Info.CaptureDeviceAddrMap.empty()) { 10056 CodeGen.setAction(NoPrivAction); 10057 CodeGen(CGF); 10058 } 10059 }; 10060 10061 // We don't have to do anything to close the region if the if clause evaluates 10062 // to false. 10063 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10064 10065 if (IfCond) { 10066 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10067 } else { 10068 RegionCodeGenTy RCG(BeginThenGen); 10069 RCG(CGF); 10070 } 10071 10072 // If we don't require privatization of device pointers, we emit the body in 10073 // between the runtime calls. This avoids duplicating the body code. 10074 if (Info.CaptureDeviceAddrMap.empty()) { 10075 CodeGen.setAction(NoPrivAction); 10076 CodeGen(CGF); 10077 } 10078 10079 if (IfCond) { 10080 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10081 } else { 10082 RegionCodeGenTy RCG(EndThenGen); 10083 RCG(CGF); 10084 } 10085 } 10086 10087 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10088 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10089 const Expr *Device) { 10090 if (!CGF.HaveInsertPoint()) 10091 return; 10092 10093 assert((isa<OMPTargetEnterDataDirective>(D) || 10094 isa<OMPTargetExitDataDirective>(D) || 10095 isa<OMPTargetUpdateDirective>(D)) && 10096 "Expecting either target enter, exit data, or update directives."); 10097 10098 CodeGenFunction::OMPTargetDataInfo InputInfo; 10099 llvm::Value *MapTypesArray = nullptr; 10100 // Generate the code for the opening of the data environment. 10101 auto &&ThenGen = [this, &D, Device, &InputInfo, 10102 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10103 // Emit device ID if any. 10104 llvm::Value *DeviceID = nullptr; 10105 if (Device) { 10106 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10107 CGF.Int64Ty, /*isSigned=*/true); 10108 } else { 10109 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10110 } 10111 10112 // Emit the number of elements in the offloading arrays. 10113 llvm::Constant *PointerNum = 10114 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10115 10116 llvm::Value *OffloadingArgs[] = {DeviceID, 10117 PointerNum, 10118 InputInfo.BasePointersArray.getPointer(), 10119 InputInfo.PointersArray.getPointer(), 10120 InputInfo.SizesArray.getPointer(), 10121 MapTypesArray}; 10122 10123 // Select the right runtime function call for each expected standalone 10124 // directive. 10125 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10126 OpenMPRTLFunction RTLFn; 10127 switch (D.getDirectiveKind()) { 10128 case OMPD_target_enter_data: 10129 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 10130 : OMPRTL__tgt_target_data_begin; 10131 break; 10132 case OMPD_target_exit_data: 10133 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 10134 : OMPRTL__tgt_target_data_end; 10135 break; 10136 case OMPD_target_update: 10137 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 10138 : OMPRTL__tgt_target_data_update; 10139 break; 10140 case OMPD_parallel: 10141 case OMPD_for: 10142 case OMPD_parallel_for: 10143 case OMPD_parallel_sections: 10144 case OMPD_for_simd: 10145 case OMPD_parallel_for_simd: 10146 case OMPD_cancel: 10147 case OMPD_cancellation_point: 10148 case OMPD_ordered: 10149 case OMPD_threadprivate: 10150 case OMPD_allocate: 10151 case OMPD_task: 10152 case OMPD_simd: 10153 case OMPD_sections: 10154 case OMPD_section: 10155 case OMPD_single: 10156 case OMPD_master: 10157 case OMPD_critical: 10158 case OMPD_taskyield: 10159 case OMPD_barrier: 10160 case OMPD_taskwait: 10161 case OMPD_taskgroup: 10162 case OMPD_atomic: 10163 case OMPD_flush: 10164 case OMPD_teams: 10165 case OMPD_target_data: 10166 case OMPD_distribute: 10167 case OMPD_distribute_simd: 10168 case OMPD_distribute_parallel_for: 10169 case OMPD_distribute_parallel_for_simd: 10170 case OMPD_teams_distribute: 10171 case OMPD_teams_distribute_simd: 10172 case OMPD_teams_distribute_parallel_for: 10173 case OMPD_teams_distribute_parallel_for_simd: 10174 case OMPD_declare_simd: 10175 case OMPD_declare_target: 10176 case OMPD_end_declare_target: 10177 case OMPD_declare_reduction: 10178 case OMPD_declare_mapper: 10179 case OMPD_taskloop: 10180 case OMPD_taskloop_simd: 10181 case OMPD_target: 10182 case OMPD_target_simd: 10183 case OMPD_target_teams_distribute: 10184 case OMPD_target_teams_distribute_simd: 10185 case OMPD_target_teams_distribute_parallel_for: 10186 case OMPD_target_teams_distribute_parallel_for_simd: 10187 case OMPD_target_teams: 10188 case OMPD_target_parallel: 10189 case OMPD_target_parallel_for: 10190 case OMPD_target_parallel_for_simd: 10191 case OMPD_requires: 10192 case OMPD_unknown: 10193 llvm_unreachable("Unexpected standalone target data directive."); 10194 break; 10195 } 10196 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 10197 }; 10198 10199 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 10200 CodeGenFunction &CGF, PrePostActionTy &) { 10201 // Fill up the arrays with all the mapped variables. 10202 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10203 MappableExprsHandler::MapValuesArrayTy Pointers; 10204 MappableExprsHandler::MapValuesArrayTy Sizes; 10205 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10206 10207 // Get map clause information. 10208 MappableExprsHandler MEHandler(D, CGF); 10209 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10210 10211 TargetDataInfo Info; 10212 // Fill up the arrays and create the arguments. 10213 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10214 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10215 Info.PointersArray, Info.SizesArray, 10216 Info.MapTypesArray, Info); 10217 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10218 InputInfo.BasePointersArray = 10219 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10220 InputInfo.PointersArray = 10221 Address(Info.PointersArray, CGM.getPointerAlign()); 10222 InputInfo.SizesArray = 10223 Address(Info.SizesArray, CGM.getPointerAlign()); 10224 MapTypesArray = Info.MapTypesArray; 10225 if (D.hasClausesOfKind<OMPDependClause>()) 10226 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10227 else 10228 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10229 }; 10230 10231 if (IfCond) { 10232 emitOMPIfClause(CGF, IfCond, TargetThenGen, 10233 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10234 } else { 10235 RegionCodeGenTy ThenRCG(TargetThenGen); 10236 ThenRCG(CGF); 10237 } 10238 } 10239 10240 namespace { 10241 /// Kind of parameter in a function with 'declare simd' directive. 10242 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 10243 /// Attribute set of the parameter. 10244 struct ParamAttrTy { 10245 ParamKindTy Kind = Vector; 10246 llvm::APSInt StrideOrArg; 10247 llvm::APSInt Alignment; 10248 }; 10249 } // namespace 10250 10251 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10252 ArrayRef<ParamAttrTy> ParamAttrs) { 10253 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10254 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10255 // of that clause. The VLEN value must be power of 2. 10256 // In other case the notion of the function`s "characteristic data type" (CDT) 10257 // is used to compute the vector length. 10258 // CDT is defined in the following order: 10259 // a) For non-void function, the CDT is the return type. 10260 // b) If the function has any non-uniform, non-linear parameters, then the 10261 // CDT is the type of the first such parameter. 10262 // c) If the CDT determined by a) or b) above is struct, union, or class 10263 // type which is pass-by-value (except for the type that maps to the 10264 // built-in complex data type), the characteristic data type is int. 10265 // d) If none of the above three cases is applicable, the CDT is int. 10266 // The VLEN is then determined based on the CDT and the size of vector 10267 // register of that ISA for which current vector version is generated. The 10268 // VLEN is computed using the formula below: 10269 // VLEN = sizeof(vector_register) / sizeof(CDT), 10270 // where vector register size specified in section 3.2.1 Registers and the 10271 // Stack Frame of original AMD64 ABI document. 10272 QualType RetType = FD->getReturnType(); 10273 if (RetType.isNull()) 10274 return 0; 10275 ASTContext &C = FD->getASTContext(); 10276 QualType CDT; 10277 if (!RetType.isNull() && !RetType->isVoidType()) { 10278 CDT = RetType; 10279 } else { 10280 unsigned Offset = 0; 10281 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10282 if (ParamAttrs[Offset].Kind == Vector) 10283 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10284 ++Offset; 10285 } 10286 if (CDT.isNull()) { 10287 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10288 if (ParamAttrs[I + Offset].Kind == Vector) { 10289 CDT = FD->getParamDecl(I)->getType(); 10290 break; 10291 } 10292 } 10293 } 10294 } 10295 if (CDT.isNull()) 10296 CDT = C.IntTy; 10297 CDT = CDT->getCanonicalTypeUnqualified(); 10298 if (CDT->isRecordType() || CDT->isUnionType()) 10299 CDT = C.IntTy; 10300 return C.getTypeSize(CDT); 10301 } 10302 10303 static void 10304 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10305 const llvm::APSInt &VLENVal, 10306 ArrayRef<ParamAttrTy> ParamAttrs, 10307 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10308 struct ISADataTy { 10309 char ISA; 10310 unsigned VecRegSize; 10311 }; 10312 ISADataTy ISAData[] = { 10313 { 10314 'b', 128 10315 }, // SSE 10316 { 10317 'c', 256 10318 }, // AVX 10319 { 10320 'd', 256 10321 }, // AVX2 10322 { 10323 'e', 512 10324 }, // AVX512 10325 }; 10326 llvm::SmallVector<char, 2> Masked; 10327 switch (State) { 10328 case OMPDeclareSimdDeclAttr::BS_Undefined: 10329 Masked.push_back('N'); 10330 Masked.push_back('M'); 10331 break; 10332 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10333 Masked.push_back('N'); 10334 break; 10335 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10336 Masked.push_back('M'); 10337 break; 10338 } 10339 for (char Mask : Masked) { 10340 for (const ISADataTy &Data : ISAData) { 10341 SmallString<256> Buffer; 10342 llvm::raw_svector_ostream Out(Buffer); 10343 Out << "_ZGV" << Data.ISA << Mask; 10344 if (!VLENVal) { 10345 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10346 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10347 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10348 } else { 10349 Out << VLENVal; 10350 } 10351 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 10352 switch (ParamAttr.Kind){ 10353 case LinearWithVarStride: 10354 Out << 's' << ParamAttr.StrideOrArg; 10355 break; 10356 case Linear: 10357 Out << 'l'; 10358 if (!!ParamAttr.StrideOrArg) 10359 Out << ParamAttr.StrideOrArg; 10360 break; 10361 case Uniform: 10362 Out << 'u'; 10363 break; 10364 case Vector: 10365 Out << 'v'; 10366 break; 10367 } 10368 if (!!ParamAttr.Alignment) 10369 Out << 'a' << ParamAttr.Alignment; 10370 } 10371 Out << '_' << Fn->getName(); 10372 Fn->addFnAttr(Out.str()); 10373 } 10374 } 10375 } 10376 10377 // This are the Functions that are needed to mangle the name of the 10378 // vector functions generated by the compiler, according to the rules 10379 // defined in the "Vector Function ABI specifications for AArch64", 10380 // available at 10381 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10382 10383 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 10384 /// 10385 /// TODO: Need to implement the behavior for reference marked with a 10386 /// var or no linear modifiers (1.b in the section). For this, we 10387 /// need to extend ParamKindTy to support the linear modifiers. 10388 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10389 QT = QT.getCanonicalType(); 10390 10391 if (QT->isVoidType()) 10392 return false; 10393 10394 if (Kind == ParamKindTy::Uniform) 10395 return false; 10396 10397 if (Kind == ParamKindTy::Linear) 10398 return false; 10399 10400 // TODO: Handle linear references with modifiers 10401 10402 if (Kind == ParamKindTy::LinearWithVarStride) 10403 return false; 10404 10405 return true; 10406 } 10407 10408 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10409 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10410 QT = QT.getCanonicalType(); 10411 unsigned Size = C.getTypeSize(QT); 10412 10413 // Only scalars and complex within 16 bytes wide set PVB to true. 10414 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10415 return false; 10416 10417 if (QT->isFloatingType()) 10418 return true; 10419 10420 if (QT->isIntegerType()) 10421 return true; 10422 10423 if (QT->isPointerType()) 10424 return true; 10425 10426 // TODO: Add support for complex types (section 3.1.2, item 2). 10427 10428 return false; 10429 } 10430 10431 /// Computes the lane size (LS) of a return type or of an input parameter, 10432 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10433 /// TODO: Add support for references, section 3.2.1, item 1. 10434 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10435 if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10436 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10437 if (getAArch64PBV(PTy, C)) 10438 return C.getTypeSize(PTy); 10439 } 10440 if (getAArch64PBV(QT, C)) 10441 return C.getTypeSize(QT); 10442 10443 return C.getTypeSize(C.getUIntPtrType()); 10444 } 10445 10446 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10447 // signature of the scalar function, as defined in 3.2.2 of the 10448 // AAVFABI. 10449 static std::tuple<unsigned, unsigned, bool> 10450 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10451 QualType RetType = FD->getReturnType().getCanonicalType(); 10452 10453 ASTContext &C = FD->getASTContext(); 10454 10455 bool OutputBecomesInput = false; 10456 10457 llvm::SmallVector<unsigned, 8> Sizes; 10458 if (!RetType->isVoidType()) { 10459 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10460 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10461 OutputBecomesInput = true; 10462 } 10463 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10464 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10465 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10466 } 10467 10468 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10469 // The LS of a function parameter / return value can only be a power 10470 // of 2, starting from 8 bits, up to 128. 10471 assert(std::all_of(Sizes.begin(), Sizes.end(), 10472 [](unsigned Size) { 10473 return Size == 8 || Size == 16 || Size == 32 || 10474 Size == 64 || Size == 128; 10475 }) && 10476 "Invalid size"); 10477 10478 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10479 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10480 OutputBecomesInput); 10481 } 10482 10483 /// Mangle the parameter part of the vector function name according to 10484 /// their OpenMP classification. The mangling function is defined in 10485 /// section 3.5 of the AAVFABI. 10486 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10487 SmallString<256> Buffer; 10488 llvm::raw_svector_ostream Out(Buffer); 10489 for (const auto &ParamAttr : ParamAttrs) { 10490 switch (ParamAttr.Kind) { 10491 case LinearWithVarStride: 10492 Out << "ls" << ParamAttr.StrideOrArg; 10493 break; 10494 case Linear: 10495 Out << 'l'; 10496 // Don't print the step value if it is not present or if it is 10497 // equal to 1. 10498 if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1) 10499 Out << ParamAttr.StrideOrArg; 10500 break; 10501 case Uniform: 10502 Out << 'u'; 10503 break; 10504 case Vector: 10505 Out << 'v'; 10506 break; 10507 } 10508 10509 if (!!ParamAttr.Alignment) 10510 Out << 'a' << ParamAttr.Alignment; 10511 } 10512 10513 return Out.str(); 10514 } 10515 10516 // Function used to add the attribute. The parameter `VLEN` is 10517 // templated to allow the use of "x" when targeting scalable functions 10518 // for SVE. 10519 template <typename T> 10520 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10521 char ISA, StringRef ParSeq, 10522 StringRef MangledName, bool OutputBecomesInput, 10523 llvm::Function *Fn) { 10524 SmallString<256> Buffer; 10525 llvm::raw_svector_ostream Out(Buffer); 10526 Out << Prefix << ISA << LMask << VLEN; 10527 if (OutputBecomesInput) 10528 Out << "v"; 10529 Out << ParSeq << "_" << MangledName; 10530 Fn->addFnAttr(Out.str()); 10531 } 10532 10533 // Helper function to generate the Advanced SIMD names depending on 10534 // the value of the NDS when simdlen is not present. 10535 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10536 StringRef Prefix, char ISA, 10537 StringRef ParSeq, StringRef MangledName, 10538 bool OutputBecomesInput, 10539 llvm::Function *Fn) { 10540 switch (NDS) { 10541 case 8: 10542 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10543 OutputBecomesInput, Fn); 10544 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10545 OutputBecomesInput, Fn); 10546 break; 10547 case 16: 10548 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10549 OutputBecomesInput, Fn); 10550 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10551 OutputBecomesInput, Fn); 10552 break; 10553 case 32: 10554 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10555 OutputBecomesInput, Fn); 10556 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10557 OutputBecomesInput, Fn); 10558 break; 10559 case 64: 10560 case 128: 10561 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10562 OutputBecomesInput, Fn); 10563 break; 10564 default: 10565 llvm_unreachable("Scalar type is too wide."); 10566 } 10567 } 10568 10569 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10570 static void emitAArch64DeclareSimdFunction( 10571 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10572 ArrayRef<ParamAttrTy> ParamAttrs, 10573 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10574 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10575 10576 // Get basic data for building the vector signature. 10577 const auto Data = getNDSWDS(FD, ParamAttrs); 10578 const unsigned NDS = std::get<0>(Data); 10579 const unsigned WDS = std::get<1>(Data); 10580 const bool OutputBecomesInput = std::get<2>(Data); 10581 10582 // Check the values provided via `simdlen` by the user. 10583 // 1. A `simdlen(1)` doesn't produce vector signatures, 10584 if (UserVLEN == 1) { 10585 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10586 DiagnosticsEngine::Warning, 10587 "The clause simdlen(1) has no effect when targeting aarch64."); 10588 CGM.getDiags().Report(SLoc, DiagID); 10589 return; 10590 } 10591 10592 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10593 // Advanced SIMD output. 10594 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10595 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10596 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10597 "power of 2 when targeting Advanced SIMD."); 10598 CGM.getDiags().Report(SLoc, DiagID); 10599 return; 10600 } 10601 10602 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10603 // limits. 10604 if (ISA == 's' && UserVLEN != 0) { 10605 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10606 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10607 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10608 "lanes in the architectural constraints " 10609 "for SVE (min is 128-bit, max is " 10610 "2048-bit, by steps of 128-bit)"); 10611 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10612 return; 10613 } 10614 } 10615 10616 // Sort out parameter sequence. 10617 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10618 StringRef Prefix = "_ZGV"; 10619 // Generate simdlen from user input (if any). 10620 if (UserVLEN) { 10621 if (ISA == 's') { 10622 // SVE generates only a masked function. 10623 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10624 OutputBecomesInput, Fn); 10625 } else { 10626 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10627 // Advanced SIMD generates one or two functions, depending on 10628 // the `[not]inbranch` clause. 10629 switch (State) { 10630 case OMPDeclareSimdDeclAttr::BS_Undefined: 10631 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10632 OutputBecomesInput, Fn); 10633 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10634 OutputBecomesInput, Fn); 10635 break; 10636 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10637 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10638 OutputBecomesInput, Fn); 10639 break; 10640 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10641 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10642 OutputBecomesInput, Fn); 10643 break; 10644 } 10645 } 10646 } else { 10647 // If no user simdlen is provided, follow the AAVFABI rules for 10648 // generating the vector length. 10649 if (ISA == 's') { 10650 // SVE, section 3.4.1, item 1. 10651 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10652 OutputBecomesInput, Fn); 10653 } else { 10654 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10655 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10656 // two vector names depending on the use of the clause 10657 // `[not]inbranch`. 10658 switch (State) { 10659 case OMPDeclareSimdDeclAttr::BS_Undefined: 10660 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10661 OutputBecomesInput, Fn); 10662 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10663 OutputBecomesInput, Fn); 10664 break; 10665 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10666 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10667 OutputBecomesInput, Fn); 10668 break; 10669 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10670 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10671 OutputBecomesInput, Fn); 10672 break; 10673 } 10674 } 10675 } 10676 } 10677 10678 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10679 llvm::Function *Fn) { 10680 ASTContext &C = CGM.getContext(); 10681 FD = FD->getMostRecentDecl(); 10682 // Map params to their positions in function decl. 10683 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10684 if (isa<CXXMethodDecl>(FD)) 10685 ParamPositions.try_emplace(FD, 0); 10686 unsigned ParamPos = ParamPositions.size(); 10687 for (const ParmVarDecl *P : FD->parameters()) { 10688 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10689 ++ParamPos; 10690 } 10691 while (FD) { 10692 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10693 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10694 // Mark uniform parameters. 10695 for (const Expr *E : Attr->uniforms()) { 10696 E = E->IgnoreParenImpCasts(); 10697 unsigned Pos; 10698 if (isa<CXXThisExpr>(E)) { 10699 Pos = ParamPositions[FD]; 10700 } else { 10701 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10702 ->getCanonicalDecl(); 10703 Pos = ParamPositions[PVD]; 10704 } 10705 ParamAttrs[Pos].Kind = Uniform; 10706 } 10707 // Get alignment info. 10708 auto NI = Attr->alignments_begin(); 10709 for (const Expr *E : Attr->aligneds()) { 10710 E = E->IgnoreParenImpCasts(); 10711 unsigned Pos; 10712 QualType ParmTy; 10713 if (isa<CXXThisExpr>(E)) { 10714 Pos = ParamPositions[FD]; 10715 ParmTy = E->getType(); 10716 } else { 10717 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10718 ->getCanonicalDecl(); 10719 Pos = ParamPositions[PVD]; 10720 ParmTy = PVD->getType(); 10721 } 10722 ParamAttrs[Pos].Alignment = 10723 (*NI) 10724 ? (*NI)->EvaluateKnownConstInt(C) 10725 : llvm::APSInt::getUnsigned( 10726 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10727 .getQuantity()); 10728 ++NI; 10729 } 10730 // Mark linear parameters. 10731 auto SI = Attr->steps_begin(); 10732 auto MI = Attr->modifiers_begin(); 10733 for (const Expr *E : Attr->linears()) { 10734 E = E->IgnoreParenImpCasts(); 10735 unsigned Pos; 10736 if (isa<CXXThisExpr>(E)) { 10737 Pos = ParamPositions[FD]; 10738 } else { 10739 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10740 ->getCanonicalDecl(); 10741 Pos = ParamPositions[PVD]; 10742 } 10743 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 10744 ParamAttr.Kind = Linear; 10745 if (*SI) { 10746 Expr::EvalResult Result; 10747 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 10748 if (const auto *DRE = 10749 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 10750 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 10751 ParamAttr.Kind = LinearWithVarStride; 10752 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 10753 ParamPositions[StridePVD->getCanonicalDecl()]); 10754 } 10755 } 10756 } else { 10757 ParamAttr.StrideOrArg = Result.Val.getInt(); 10758 } 10759 } 10760 ++SI; 10761 ++MI; 10762 } 10763 llvm::APSInt VLENVal; 10764 SourceLocation ExprLoc; 10765 const Expr *VLENExpr = Attr->getSimdlen(); 10766 if (VLENExpr) { 10767 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 10768 ExprLoc = VLENExpr->getExprLoc(); 10769 } 10770 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 10771 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 10772 CGM.getTriple().getArch() == llvm::Triple::x86_64) { 10773 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 10774 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 10775 unsigned VLEN = VLENVal.getExtValue(); 10776 StringRef MangledName = Fn->getName(); 10777 if (CGM.getTarget().hasFeature("sve")) 10778 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10779 MangledName, 's', 128, Fn, ExprLoc); 10780 if (CGM.getTarget().hasFeature("neon")) 10781 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10782 MangledName, 'n', 128, Fn, ExprLoc); 10783 } 10784 } 10785 FD = FD->getPreviousDecl(); 10786 } 10787 } 10788 10789 namespace { 10790 /// Cleanup action for doacross support. 10791 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 10792 public: 10793 static const int DoacrossFinArgs = 2; 10794 10795 private: 10796 llvm::FunctionCallee RTLFn; 10797 llvm::Value *Args[DoacrossFinArgs]; 10798 10799 public: 10800 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 10801 ArrayRef<llvm::Value *> CallArgs) 10802 : RTLFn(RTLFn) { 10803 assert(CallArgs.size() == DoacrossFinArgs); 10804 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10805 } 10806 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10807 if (!CGF.HaveInsertPoint()) 10808 return; 10809 CGF.EmitRuntimeCall(RTLFn, Args); 10810 } 10811 }; 10812 } // namespace 10813 10814 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 10815 const OMPLoopDirective &D, 10816 ArrayRef<Expr *> NumIterations) { 10817 if (!CGF.HaveInsertPoint()) 10818 return; 10819 10820 ASTContext &C = CGM.getContext(); 10821 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 10822 RecordDecl *RD; 10823 if (KmpDimTy.isNull()) { 10824 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 10825 // kmp_int64 lo; // lower 10826 // kmp_int64 up; // upper 10827 // kmp_int64 st; // stride 10828 // }; 10829 RD = C.buildImplicitRecord("kmp_dim"); 10830 RD->startDefinition(); 10831 addFieldToRecordDecl(C, RD, Int64Ty); 10832 addFieldToRecordDecl(C, RD, Int64Ty); 10833 addFieldToRecordDecl(C, RD, Int64Ty); 10834 RD->completeDefinition(); 10835 KmpDimTy = C.getRecordType(RD); 10836 } else { 10837 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 10838 } 10839 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 10840 QualType ArrayTy = 10841 C.getConstantArrayType(KmpDimTy, Size, ArrayType::Normal, 0); 10842 10843 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 10844 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 10845 enum { LowerFD = 0, UpperFD, StrideFD }; 10846 // Fill dims with data. 10847 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 10848 LValue DimsLVal = CGF.MakeAddrLValue( 10849 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 10850 // dims.upper = num_iterations; 10851 LValue UpperLVal = CGF.EmitLValueForField( 10852 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 10853 llvm::Value *NumIterVal = 10854 CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]), 10855 D.getNumIterations()->getType(), Int64Ty, 10856 D.getNumIterations()->getExprLoc()); 10857 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 10858 // dims.stride = 1; 10859 LValue StrideLVal = CGF.EmitLValueForField( 10860 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 10861 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 10862 StrideLVal); 10863 } 10864 10865 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 10866 // kmp_int32 num_dims, struct kmp_dim * dims); 10867 llvm::Value *Args[] = { 10868 emitUpdateLocation(CGF, D.getBeginLoc()), 10869 getThreadID(CGF, D.getBeginLoc()), 10870 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 10871 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 10872 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 10873 CGM.VoidPtrTy)}; 10874 10875 llvm::FunctionCallee RTLFn = 10876 createRuntimeFunction(OMPRTL__kmpc_doacross_init); 10877 CGF.EmitRuntimeCall(RTLFn, Args); 10878 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 10879 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 10880 llvm::FunctionCallee FiniRTLFn = 10881 createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 10882 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 10883 llvm::makeArrayRef(FiniArgs)); 10884 } 10885 10886 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 10887 const OMPDependClause *C) { 10888 QualType Int64Ty = 10889 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 10890 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 10891 QualType ArrayTy = CGM.getContext().getConstantArrayType( 10892 Int64Ty, Size, ArrayType::Normal, 0); 10893 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 10894 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 10895 const Expr *CounterVal = C->getLoopData(I); 10896 assert(CounterVal); 10897 llvm::Value *CntVal = CGF.EmitScalarConversion( 10898 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 10899 CounterVal->getExprLoc()); 10900 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 10901 /*Volatile=*/false, Int64Ty); 10902 } 10903 llvm::Value *Args[] = { 10904 emitUpdateLocation(CGF, C->getBeginLoc()), 10905 getThreadID(CGF, C->getBeginLoc()), 10906 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 10907 llvm::FunctionCallee RTLFn; 10908 if (C->getDependencyKind() == OMPC_DEPEND_source) { 10909 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 10910 } else { 10911 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 10912 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 10913 } 10914 CGF.EmitRuntimeCall(RTLFn, Args); 10915 } 10916 10917 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 10918 llvm::FunctionCallee Callee, 10919 ArrayRef<llvm::Value *> Args) const { 10920 assert(Loc.isValid() && "Outlined function call location must be valid."); 10921 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 10922 10923 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 10924 if (Fn->doesNotThrow()) { 10925 CGF.EmitNounwindRuntimeCall(Fn, Args); 10926 return; 10927 } 10928 } 10929 CGF.EmitRuntimeCall(Callee, Args); 10930 } 10931 10932 void CGOpenMPRuntime::emitOutlinedFunctionCall( 10933 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 10934 ArrayRef<llvm::Value *> Args) const { 10935 emitCall(CGF, Loc, OutlinedFn, Args); 10936 } 10937 10938 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 10939 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 10940 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 10941 HasEmittedDeclareTargetRegion = true; 10942 } 10943 10944 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 10945 const VarDecl *NativeParam, 10946 const VarDecl *TargetParam) const { 10947 return CGF.GetAddrOfLocalVar(NativeParam); 10948 } 10949 10950 namespace { 10951 /// Cleanup action for allocate support. 10952 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 10953 public: 10954 static const int CleanupArgs = 3; 10955 10956 private: 10957 llvm::FunctionCallee RTLFn; 10958 llvm::Value *Args[CleanupArgs]; 10959 10960 public: 10961 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 10962 ArrayRef<llvm::Value *> CallArgs) 10963 : RTLFn(RTLFn) { 10964 assert(CallArgs.size() == CleanupArgs && 10965 "Size of arguments does not match."); 10966 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10967 } 10968 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10969 if (!CGF.HaveInsertPoint()) 10970 return; 10971 CGF.EmitRuntimeCall(RTLFn, Args); 10972 } 10973 }; 10974 } // namespace 10975 10976 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 10977 const VarDecl *VD) { 10978 if (!VD) 10979 return Address::invalid(); 10980 const VarDecl *CVD = VD->getCanonicalDecl(); 10981 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 10982 return Address::invalid(); 10983 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 10984 // Use the default allocation. 10985 if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 10986 !AA->getAllocator()) 10987 return Address::invalid(); 10988 llvm::Value *Size; 10989 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 10990 if (CVD->getType()->isVariablyModifiedType()) { 10991 Size = CGF.getTypeSize(CVD->getType()); 10992 // Align the size: ((size + align - 1) / align) * align 10993 Size = CGF.Builder.CreateNUWAdd( 10994 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 10995 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 10996 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 10997 } else { 10998 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 10999 Size = CGM.getSize(Sz.alignTo(Align)); 11000 } 11001 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 11002 assert(AA->getAllocator() && 11003 "Expected allocator expression for non-default allocator."); 11004 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 11005 // According to the standard, the original allocator type is a enum (integer). 11006 // Convert to pointer type, if required. 11007 if (Allocator->getType()->isIntegerTy()) 11008 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 11009 else if (Allocator->getType()->isPointerTy()) 11010 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 11011 CGM.VoidPtrTy); 11012 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 11013 11014 llvm::Value *Addr = 11015 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, 11016 CVD->getName() + ".void.addr"); 11017 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 11018 Allocator}; 11019 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); 11020 11021 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11022 llvm::makeArrayRef(FiniArgs)); 11023 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11024 Addr, 11025 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 11026 CVD->getName() + ".addr"); 11027 return Address(Addr, Align); 11028 } 11029 11030 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11031 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11032 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11033 llvm_unreachable("Not supported in SIMD-only mode"); 11034 } 11035 11036 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11037 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11038 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11039 llvm_unreachable("Not supported in SIMD-only mode"); 11040 } 11041 11042 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11043 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11044 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11045 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11046 bool Tied, unsigned &NumberOfParts) { 11047 llvm_unreachable("Not supported in SIMD-only mode"); 11048 } 11049 11050 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11051 SourceLocation Loc, 11052 llvm::Function *OutlinedFn, 11053 ArrayRef<llvm::Value *> CapturedVars, 11054 const Expr *IfCond) { 11055 llvm_unreachable("Not supported in SIMD-only mode"); 11056 } 11057 11058 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11059 CodeGenFunction &CGF, StringRef CriticalName, 11060 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11061 const Expr *Hint) { 11062 llvm_unreachable("Not supported in SIMD-only mode"); 11063 } 11064 11065 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11066 const RegionCodeGenTy &MasterOpGen, 11067 SourceLocation Loc) { 11068 llvm_unreachable("Not supported in SIMD-only mode"); 11069 } 11070 11071 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11072 SourceLocation Loc) { 11073 llvm_unreachable("Not supported in SIMD-only mode"); 11074 } 11075 11076 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 11077 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 11078 SourceLocation Loc) { 11079 llvm_unreachable("Not supported in SIMD-only mode"); 11080 } 11081 11082 void CGOpenMPSIMDRuntime::emitSingleRegion( 11083 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 11084 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 11085 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 11086 ArrayRef<const Expr *> AssignmentOps) { 11087 llvm_unreachable("Not supported in SIMD-only mode"); 11088 } 11089 11090 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 11091 const RegionCodeGenTy &OrderedOpGen, 11092 SourceLocation Loc, 11093 bool IsThreads) { 11094 llvm_unreachable("Not supported in SIMD-only mode"); 11095 } 11096 11097 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 11098 SourceLocation Loc, 11099 OpenMPDirectiveKind Kind, 11100 bool EmitChecks, 11101 bool ForceSimpleCall) { 11102 llvm_unreachable("Not supported in SIMD-only mode"); 11103 } 11104 11105 void CGOpenMPSIMDRuntime::emitForDispatchInit( 11106 CodeGenFunction &CGF, SourceLocation Loc, 11107 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 11108 bool Ordered, const DispatchRTInput &DispatchValues) { 11109 llvm_unreachable("Not supported in SIMD-only mode"); 11110 } 11111 11112 void CGOpenMPSIMDRuntime::emitForStaticInit( 11113 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 11114 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 11115 llvm_unreachable("Not supported in SIMD-only mode"); 11116 } 11117 11118 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 11119 CodeGenFunction &CGF, SourceLocation Loc, 11120 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 11121 llvm_unreachable("Not supported in SIMD-only mode"); 11122 } 11123 11124 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 11125 SourceLocation Loc, 11126 unsigned IVSize, 11127 bool IVSigned) { 11128 llvm_unreachable("Not supported in SIMD-only mode"); 11129 } 11130 11131 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 11132 SourceLocation Loc, 11133 OpenMPDirectiveKind DKind) { 11134 llvm_unreachable("Not supported in SIMD-only mode"); 11135 } 11136 11137 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 11138 SourceLocation Loc, 11139 unsigned IVSize, bool IVSigned, 11140 Address IL, Address LB, 11141 Address UB, Address ST) { 11142 llvm_unreachable("Not supported in SIMD-only mode"); 11143 } 11144 11145 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 11146 llvm::Value *NumThreads, 11147 SourceLocation Loc) { 11148 llvm_unreachable("Not supported in SIMD-only mode"); 11149 } 11150 11151 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 11152 OpenMPProcBindClauseKind ProcBind, 11153 SourceLocation Loc) { 11154 llvm_unreachable("Not supported in SIMD-only mode"); 11155 } 11156 11157 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 11158 const VarDecl *VD, 11159 Address VDAddr, 11160 SourceLocation Loc) { 11161 llvm_unreachable("Not supported in SIMD-only mode"); 11162 } 11163 11164 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 11165 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 11166 CodeGenFunction *CGF) { 11167 llvm_unreachable("Not supported in SIMD-only mode"); 11168 } 11169 11170 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 11171 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 11172 llvm_unreachable("Not supported in SIMD-only mode"); 11173 } 11174 11175 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 11176 ArrayRef<const Expr *> Vars, 11177 SourceLocation Loc) { 11178 llvm_unreachable("Not supported in SIMD-only mode"); 11179 } 11180 11181 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 11182 const OMPExecutableDirective &D, 11183 llvm::Function *TaskFunction, 11184 QualType SharedsTy, Address Shareds, 11185 const Expr *IfCond, 11186 const OMPTaskDataTy &Data) { 11187 llvm_unreachable("Not supported in SIMD-only mode"); 11188 } 11189 11190 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 11191 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 11192 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 11193 const Expr *IfCond, const OMPTaskDataTy &Data) { 11194 llvm_unreachable("Not supported in SIMD-only mode"); 11195 } 11196 11197 void CGOpenMPSIMDRuntime::emitReduction( 11198 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 11199 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 11200 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 11201 assert(Options.SimpleReduction && "Only simple reduction is expected."); 11202 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 11203 ReductionOps, Options); 11204 } 11205 11206 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 11207 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 11208 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 11209 llvm_unreachable("Not supported in SIMD-only mode"); 11210 } 11211 11212 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 11213 SourceLocation Loc, 11214 ReductionCodeGen &RCG, 11215 unsigned N) { 11216 llvm_unreachable("Not supported in SIMD-only mode"); 11217 } 11218 11219 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 11220 SourceLocation Loc, 11221 llvm::Value *ReductionsPtr, 11222 LValue SharedLVal) { 11223 llvm_unreachable("Not supported in SIMD-only mode"); 11224 } 11225 11226 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 11227 SourceLocation Loc) { 11228 llvm_unreachable("Not supported in SIMD-only mode"); 11229 } 11230 11231 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 11232 CodeGenFunction &CGF, SourceLocation Loc, 11233 OpenMPDirectiveKind CancelRegion) { 11234 llvm_unreachable("Not supported in SIMD-only mode"); 11235 } 11236 11237 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 11238 SourceLocation Loc, const Expr *IfCond, 11239 OpenMPDirectiveKind CancelRegion) { 11240 llvm_unreachable("Not supported in SIMD-only mode"); 11241 } 11242 11243 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 11244 const OMPExecutableDirective &D, StringRef ParentName, 11245 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 11246 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 11247 llvm_unreachable("Not supported in SIMD-only mode"); 11248 } 11249 11250 void CGOpenMPSIMDRuntime::emitTargetCall(CodeGenFunction &CGF, 11251 const OMPExecutableDirective &D, 11252 llvm::Function *OutlinedFn, 11253 llvm::Value *OutlinedFnID, 11254 const Expr *IfCond, 11255 const Expr *Device) { 11256 llvm_unreachable("Not supported in SIMD-only mode"); 11257 } 11258 11259 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 11260 llvm_unreachable("Not supported in SIMD-only mode"); 11261 } 11262 11263 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 11264 llvm_unreachable("Not supported in SIMD-only mode"); 11265 } 11266 11267 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 11268 return false; 11269 } 11270 11271 llvm::Function *CGOpenMPSIMDRuntime::emitRegistrationFunction() { 11272 return nullptr; 11273 } 11274 11275 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 11276 const OMPExecutableDirective &D, 11277 SourceLocation Loc, 11278 llvm::Function *OutlinedFn, 11279 ArrayRef<llvm::Value *> CapturedVars) { 11280 llvm_unreachable("Not supported in SIMD-only mode"); 11281 } 11282 11283 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11284 const Expr *NumTeams, 11285 const Expr *ThreadLimit, 11286 SourceLocation Loc) { 11287 llvm_unreachable("Not supported in SIMD-only mode"); 11288 } 11289 11290 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 11291 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11292 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11293 llvm_unreachable("Not supported in SIMD-only mode"); 11294 } 11295 11296 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 11297 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11298 const Expr *Device) { 11299 llvm_unreachable("Not supported in SIMD-only mode"); 11300 } 11301 11302 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11303 const OMPLoopDirective &D, 11304 ArrayRef<Expr *> NumIterations) { 11305 llvm_unreachable("Not supported in SIMD-only mode"); 11306 } 11307 11308 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11309 const OMPDependClause *C) { 11310 llvm_unreachable("Not supported in SIMD-only mode"); 11311 } 11312 11313 const VarDecl * 11314 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 11315 const VarDecl *NativeParam) const { 11316 llvm_unreachable("Not supported in SIMD-only mode"); 11317 } 11318 11319 Address 11320 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 11321 const VarDecl *NativeParam, 11322 const VarDecl *TargetParam) const { 11323 llvm_unreachable("Not supported in SIMD-only mode"); 11324 } 11325