1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This provides a class for OpenMP runtime code generation. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "CGCXXABI.h" 14 #include "CGCleanup.h" 15 #include "CGOpenMPRuntime.h" 16 #include "CGRecordLayout.h" 17 #include "CodeGenFunction.h" 18 #include "clang/CodeGen/ConstantInitBuilder.h" 19 #include "clang/AST/Decl.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "clang/Basic/BitmaskEnum.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/Bitcode/BitcodeReader.h" 24 #include "llvm/IR/DerivedTypes.h" 25 #include "llvm/IR/GlobalValue.h" 26 #include "llvm/IR/Value.h" 27 #include "llvm/Support/Format.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <cassert> 30 31 using namespace clang; 32 using namespace CodeGen; 33 34 namespace { 35 /// Base class for handling code generation inside OpenMP regions. 36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { 37 public: 38 /// Kinds of OpenMP regions used in codegen. 39 enum CGOpenMPRegionKind { 40 /// Region with outlined function for standalone 'parallel' 41 /// directive. 42 ParallelOutlinedRegion, 43 /// Region with outlined function for standalone 'task' directive. 44 TaskOutlinedRegion, 45 /// Region for constructs that do not require function outlining, 46 /// like 'for', 'sections', 'atomic' etc. directives. 47 InlinedRegion, 48 /// Region with outlined function for standalone 'target' directive. 49 TargetRegion, 50 }; 51 52 CGOpenMPRegionInfo(const CapturedStmt &CS, 53 const CGOpenMPRegionKind RegionKind, 54 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 55 bool HasCancel) 56 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), 57 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} 58 59 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, 60 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, 61 bool HasCancel) 62 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), 63 Kind(Kind), HasCancel(HasCancel) {} 64 65 /// Get a variable or parameter for storing global thread id 66 /// inside OpenMP construct. 67 virtual const VarDecl *getThreadIDVariable() const = 0; 68 69 /// Emit the captured statement body. 70 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; 71 72 /// Get an LValue for the current ThreadID variable. 73 /// \return LValue for thread id variable. This LValue always has type int32*. 74 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); 75 76 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} 77 78 CGOpenMPRegionKind getRegionKind() const { return RegionKind; } 79 80 OpenMPDirectiveKind getDirectiveKind() const { return Kind; } 81 82 bool hasCancel() const { return HasCancel; } 83 84 static bool classof(const CGCapturedStmtInfo *Info) { 85 return Info->getKind() == CR_OpenMP; 86 } 87 88 ~CGOpenMPRegionInfo() override = default; 89 90 protected: 91 CGOpenMPRegionKind RegionKind; 92 RegionCodeGenTy CodeGen; 93 OpenMPDirectiveKind Kind; 94 bool HasCancel; 95 }; 96 97 /// API for captured statement code generation in OpenMP constructs. 98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { 99 public: 100 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, 101 const RegionCodeGenTy &CodeGen, 102 OpenMPDirectiveKind Kind, bool HasCancel, 103 StringRef HelperName) 104 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, 105 HasCancel), 106 ThreadIDVar(ThreadIDVar), HelperName(HelperName) { 107 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 108 } 109 110 /// Get a variable or parameter for storing global thread id 111 /// inside OpenMP construct. 112 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 113 114 /// Get the name of the capture helper. 115 StringRef getHelperName() const override { return HelperName; } 116 117 static bool classof(const CGCapturedStmtInfo *Info) { 118 return CGOpenMPRegionInfo::classof(Info) && 119 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 120 ParallelOutlinedRegion; 121 } 122 123 private: 124 /// A variable or parameter storing global thread id for OpenMP 125 /// constructs. 126 const VarDecl *ThreadIDVar; 127 StringRef HelperName; 128 }; 129 130 /// API for captured statement code generation in OpenMP constructs. 131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { 132 public: 133 class UntiedTaskActionTy final : public PrePostActionTy { 134 bool Untied; 135 const VarDecl *PartIDVar; 136 const RegionCodeGenTy UntiedCodeGen; 137 llvm::SwitchInst *UntiedSwitch = nullptr; 138 139 public: 140 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, 141 const RegionCodeGenTy &UntiedCodeGen) 142 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} 143 void Enter(CodeGenFunction &CGF) override { 144 if (Untied) { 145 // Emit task switching point. 146 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 147 CGF.GetAddrOfLocalVar(PartIDVar), 148 PartIDVar->getType()->castAs<PointerType>()); 149 llvm::Value *Res = 150 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); 151 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); 152 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); 153 CGF.EmitBlock(DoneBB); 154 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 155 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 156 UntiedSwitch->addCase(CGF.Builder.getInt32(0), 157 CGF.Builder.GetInsertBlock()); 158 emitUntiedSwitch(CGF); 159 } 160 } 161 void emitUntiedSwitch(CodeGenFunction &CGF) const { 162 if (Untied) { 163 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( 164 CGF.GetAddrOfLocalVar(PartIDVar), 165 PartIDVar->getType()->castAs<PointerType>()); 166 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 167 PartIdLVal); 168 UntiedCodeGen(CGF); 169 CodeGenFunction::JumpDest CurPoint = 170 CGF.getJumpDestInCurrentScope(".untied.next."); 171 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); 172 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); 173 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), 174 CGF.Builder.GetInsertBlock()); 175 CGF.EmitBranchThroughCleanup(CurPoint); 176 CGF.EmitBlock(CurPoint.getBlock()); 177 } 178 } 179 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } 180 }; 181 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, 182 const VarDecl *ThreadIDVar, 183 const RegionCodeGenTy &CodeGen, 184 OpenMPDirectiveKind Kind, bool HasCancel, 185 const UntiedTaskActionTy &Action) 186 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), 187 ThreadIDVar(ThreadIDVar), Action(Action) { 188 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); 189 } 190 191 /// Get a variable or parameter for storing global thread id 192 /// inside OpenMP construct. 193 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } 194 195 /// Get an LValue for the current ThreadID variable. 196 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; 197 198 /// Get the name of the capture helper. 199 StringRef getHelperName() const override { return ".omp_outlined."; } 200 201 void emitUntiedSwitch(CodeGenFunction &CGF) override { 202 Action.emitUntiedSwitch(CGF); 203 } 204 205 static bool classof(const CGCapturedStmtInfo *Info) { 206 return CGOpenMPRegionInfo::classof(Info) && 207 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == 208 TaskOutlinedRegion; 209 } 210 211 private: 212 /// A variable or parameter storing global thread id for OpenMP 213 /// constructs. 214 const VarDecl *ThreadIDVar; 215 /// Action for emitting code for untied tasks. 216 const UntiedTaskActionTy &Action; 217 }; 218 219 /// API for inlined captured statement code generation in OpenMP 220 /// constructs. 221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { 222 public: 223 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, 224 const RegionCodeGenTy &CodeGen, 225 OpenMPDirectiveKind Kind, bool HasCancel) 226 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), 227 OldCSI(OldCSI), 228 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} 229 230 // Retrieve the value of the context parameter. 231 llvm::Value *getContextValue() const override { 232 if (OuterRegionInfo) 233 return OuterRegionInfo->getContextValue(); 234 llvm_unreachable("No context value for inlined OpenMP region"); 235 } 236 237 void setContextValue(llvm::Value *V) override { 238 if (OuterRegionInfo) { 239 OuterRegionInfo->setContextValue(V); 240 return; 241 } 242 llvm_unreachable("No context value for inlined OpenMP region"); 243 } 244 245 /// Lookup the captured field decl for a variable. 246 const FieldDecl *lookup(const VarDecl *VD) const override { 247 if (OuterRegionInfo) 248 return OuterRegionInfo->lookup(VD); 249 // If there is no outer outlined region,no need to lookup in a list of 250 // captured variables, we can use the original one. 251 return nullptr; 252 } 253 254 FieldDecl *getThisFieldDecl() const override { 255 if (OuterRegionInfo) 256 return OuterRegionInfo->getThisFieldDecl(); 257 return nullptr; 258 } 259 260 /// Get a variable or parameter for storing global thread id 261 /// inside OpenMP construct. 262 const VarDecl *getThreadIDVariable() const override { 263 if (OuterRegionInfo) 264 return OuterRegionInfo->getThreadIDVariable(); 265 return nullptr; 266 } 267 268 /// Get an LValue for the current ThreadID variable. 269 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { 270 if (OuterRegionInfo) 271 return OuterRegionInfo->getThreadIDVariableLValue(CGF); 272 llvm_unreachable("No LValue for inlined OpenMP construct"); 273 } 274 275 /// Get the name of the capture helper. 276 StringRef getHelperName() const override { 277 if (auto *OuterRegionInfo = getOldCSI()) 278 return OuterRegionInfo->getHelperName(); 279 llvm_unreachable("No helper name for inlined OpenMP construct"); 280 } 281 282 void emitUntiedSwitch(CodeGenFunction &CGF) override { 283 if (OuterRegionInfo) 284 OuterRegionInfo->emitUntiedSwitch(CGF); 285 } 286 287 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } 288 289 static bool classof(const CGCapturedStmtInfo *Info) { 290 return CGOpenMPRegionInfo::classof(Info) && 291 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; 292 } 293 294 ~CGOpenMPInlinedRegionInfo() override = default; 295 296 private: 297 /// CodeGen info about outer OpenMP region. 298 CodeGenFunction::CGCapturedStmtInfo *OldCSI; 299 CGOpenMPRegionInfo *OuterRegionInfo; 300 }; 301 302 /// API for captured statement code generation in OpenMP target 303 /// constructs. For this captures, implicit parameters are used instead of the 304 /// captured fields. The name of the target region has to be unique in a given 305 /// application so it is provided by the client, because only the client has 306 /// the information to generate that. 307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { 308 public: 309 CGOpenMPTargetRegionInfo(const CapturedStmt &CS, 310 const RegionCodeGenTy &CodeGen, StringRef HelperName) 311 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, 312 /*HasCancel=*/false), 313 HelperName(HelperName) {} 314 315 /// This is unused for target regions because each starts executing 316 /// with a single thread. 317 const VarDecl *getThreadIDVariable() const override { return nullptr; } 318 319 /// Get the name of the capture helper. 320 StringRef getHelperName() const override { return HelperName; } 321 322 static bool classof(const CGCapturedStmtInfo *Info) { 323 return CGOpenMPRegionInfo::classof(Info) && 324 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; 325 } 326 327 private: 328 StringRef HelperName; 329 }; 330 331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { 332 llvm_unreachable("No codegen for expressions"); 333 } 334 /// API for generation of expressions captured in a innermost OpenMP 335 /// region. 336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { 337 public: 338 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) 339 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, 340 OMPD_unknown, 341 /*HasCancel=*/false), 342 PrivScope(CGF) { 343 // Make sure the globals captured in the provided statement are local by 344 // using the privatization logic. We assume the same variable is not 345 // captured more than once. 346 for (const auto &C : CS.captures()) { 347 if (!C.capturesVariable() && !C.capturesVariableByCopy()) 348 continue; 349 350 const VarDecl *VD = C.getCapturedVar(); 351 if (VD->isLocalVarDeclOrParm()) 352 continue; 353 354 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 355 /*RefersToEnclosingVariableOrCapture=*/false, 356 VD->getType().getNonReferenceType(), VK_LValue, 357 C.getLocation()); 358 PrivScope.addPrivate( 359 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); }); 360 } 361 (void)PrivScope.Privatize(); 362 } 363 364 /// Lookup the captured field decl for a variable. 365 const FieldDecl *lookup(const VarDecl *VD) const override { 366 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) 367 return FD; 368 return nullptr; 369 } 370 371 /// Emit the captured statement body. 372 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { 373 llvm_unreachable("No body for expressions"); 374 } 375 376 /// Get a variable or parameter for storing global thread id 377 /// inside OpenMP construct. 378 const VarDecl *getThreadIDVariable() const override { 379 llvm_unreachable("No thread id for expressions"); 380 } 381 382 /// Get the name of the capture helper. 383 StringRef getHelperName() const override { 384 llvm_unreachable("No helper name for expressions"); 385 } 386 387 static bool classof(const CGCapturedStmtInfo *Info) { return false; } 388 389 private: 390 /// Private scope to capture global variables. 391 CodeGenFunction::OMPPrivateScope PrivScope; 392 }; 393 394 /// RAII for emitting code of OpenMP constructs. 395 class InlinedOpenMPRegionRAII { 396 CodeGenFunction &CGF; 397 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; 398 FieldDecl *LambdaThisCaptureField = nullptr; 399 const CodeGen::CGBlockInfo *BlockInfo = nullptr; 400 401 public: 402 /// Constructs region for combined constructs. 403 /// \param CodeGen Code generation sequence for combined directives. Includes 404 /// a list of functions used for code generation of implicitly inlined 405 /// regions. 406 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, 407 OpenMPDirectiveKind Kind, bool HasCancel) 408 : CGF(CGF) { 409 // Start emission for the construct. 410 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( 411 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); 412 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 413 LambdaThisCaptureField = CGF.LambdaThisCaptureField; 414 CGF.LambdaThisCaptureField = nullptr; 415 BlockInfo = CGF.BlockInfo; 416 CGF.BlockInfo = nullptr; 417 } 418 419 ~InlinedOpenMPRegionRAII() { 420 // Restore original CapturedStmtInfo only if we're done with code emission. 421 auto *OldCSI = 422 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); 423 delete CGF.CapturedStmtInfo; 424 CGF.CapturedStmtInfo = OldCSI; 425 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); 426 CGF.LambdaThisCaptureField = LambdaThisCaptureField; 427 CGF.BlockInfo = BlockInfo; 428 } 429 }; 430 431 /// Values for bit flags used in the ident_t to describe the fields. 432 /// All enumeric elements are named and described in accordance with the code 433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 434 enum OpenMPLocationFlags : unsigned { 435 /// Use trampoline for internal microtask. 436 OMP_IDENT_IMD = 0x01, 437 /// Use c-style ident structure. 438 OMP_IDENT_KMPC = 0x02, 439 /// Atomic reduction option for kmpc_reduce. 440 OMP_ATOMIC_REDUCE = 0x10, 441 /// Explicit 'barrier' directive. 442 OMP_IDENT_BARRIER_EXPL = 0x20, 443 /// Implicit barrier in code. 444 OMP_IDENT_BARRIER_IMPL = 0x40, 445 /// Implicit barrier in 'for' directive. 446 OMP_IDENT_BARRIER_IMPL_FOR = 0x40, 447 /// Implicit barrier in 'sections' directive. 448 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, 449 /// Implicit barrier in 'single' directive. 450 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, 451 /// Call of __kmp_for_static_init for static loop. 452 OMP_IDENT_WORK_LOOP = 0x200, 453 /// Call of __kmp_for_static_init for sections. 454 OMP_IDENT_WORK_SECTIONS = 0x400, 455 /// Call of __kmp_for_static_init for distribute. 456 OMP_IDENT_WORK_DISTRIBUTE = 0x800, 457 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) 458 }; 459 460 namespace { 461 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 462 /// Values for bit flags for marking which requires clauses have been used. 463 enum OpenMPOffloadingRequiresDirFlags : int64_t { 464 /// flag undefined. 465 OMP_REQ_UNDEFINED = 0x000, 466 /// no requires clause present. 467 OMP_REQ_NONE = 0x001, 468 /// reverse_offload clause. 469 OMP_REQ_REVERSE_OFFLOAD = 0x002, 470 /// unified_address clause. 471 OMP_REQ_UNIFIED_ADDRESS = 0x004, 472 /// unified_shared_memory clause. 473 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, 474 /// dynamic_allocators clause. 475 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, 476 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) 477 }; 478 479 enum OpenMPOffloadingReservedDeviceIDs { 480 /// Device ID if the device was not defined, runtime should get it 481 /// from environment variables in the spec. 482 OMP_DEVICEID_UNDEF = -1, 483 }; 484 } // anonymous namespace 485 486 /// Describes ident structure that describes a source location. 487 /// All descriptions are taken from 488 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h 489 /// Original structure: 490 /// typedef struct ident { 491 /// kmp_int32 reserved_1; /**< might be used in Fortran; 492 /// see above */ 493 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; 494 /// KMP_IDENT_KMPC identifies this union 495 /// member */ 496 /// kmp_int32 reserved_2; /**< not really used in Fortran any more; 497 /// see above */ 498 ///#if USE_ITT_BUILD 499 /// /* but currently used for storing 500 /// region-specific ITT */ 501 /// /* contextual information. */ 502 ///#endif /* USE_ITT_BUILD */ 503 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for 504 /// C++ */ 505 /// char const *psource; /**< String describing the source location. 506 /// The string is composed of semi-colon separated 507 // fields which describe the source file, 508 /// the function and a pair of line numbers that 509 /// delimit the construct. 510 /// */ 511 /// } ident_t; 512 enum IdentFieldIndex { 513 /// might be used in Fortran 514 IdentField_Reserved_1, 515 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. 516 IdentField_Flags, 517 /// Not really used in Fortran any more 518 IdentField_Reserved_2, 519 /// Source[4] in Fortran, do not use for C++ 520 IdentField_Reserved_3, 521 /// String describing the source location. The string is composed of 522 /// semi-colon separated fields which describe the source file, the function 523 /// and a pair of line numbers that delimit the construct. 524 IdentField_PSource 525 }; 526 527 /// Schedule types for 'omp for' loops (these enumerators are taken from 528 /// the enum sched_type in kmp.h). 529 enum OpenMPSchedType { 530 /// Lower bound for default (unordered) versions. 531 OMP_sch_lower = 32, 532 OMP_sch_static_chunked = 33, 533 OMP_sch_static = 34, 534 OMP_sch_dynamic_chunked = 35, 535 OMP_sch_guided_chunked = 36, 536 OMP_sch_runtime = 37, 537 OMP_sch_auto = 38, 538 /// static with chunk adjustment (e.g., simd) 539 OMP_sch_static_balanced_chunked = 45, 540 /// Lower bound for 'ordered' versions. 541 OMP_ord_lower = 64, 542 OMP_ord_static_chunked = 65, 543 OMP_ord_static = 66, 544 OMP_ord_dynamic_chunked = 67, 545 OMP_ord_guided_chunked = 68, 546 OMP_ord_runtime = 69, 547 OMP_ord_auto = 70, 548 OMP_sch_default = OMP_sch_static, 549 /// dist_schedule types 550 OMP_dist_sch_static_chunked = 91, 551 OMP_dist_sch_static = 92, 552 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. 553 /// Set if the monotonic schedule modifier was present. 554 OMP_sch_modifier_monotonic = (1 << 29), 555 /// Set if the nonmonotonic schedule modifier was present. 556 OMP_sch_modifier_nonmonotonic = (1 << 30), 557 }; 558 559 enum OpenMPRTLFunction { 560 /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, 561 /// kmpc_micro microtask, ...); 562 OMPRTL__kmpc_fork_call, 563 /// Call to void *__kmpc_threadprivate_cached(ident_t *loc, 564 /// kmp_int32 global_tid, void *data, size_t size, void ***cache); 565 OMPRTL__kmpc_threadprivate_cached, 566 /// Call to void __kmpc_threadprivate_register( ident_t *, 567 /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 568 OMPRTL__kmpc_threadprivate_register, 569 // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); 570 OMPRTL__kmpc_global_thread_num, 571 // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 572 // kmp_critical_name *crit); 573 OMPRTL__kmpc_critical, 574 // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 575 // global_tid, kmp_critical_name *crit, uintptr_t hint); 576 OMPRTL__kmpc_critical_with_hint, 577 // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 578 // kmp_critical_name *crit); 579 OMPRTL__kmpc_end_critical, 580 // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 581 // global_tid); 582 OMPRTL__kmpc_cancel_barrier, 583 // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 584 OMPRTL__kmpc_barrier, 585 // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 586 OMPRTL__kmpc_for_static_fini, 587 // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 588 // global_tid); 589 OMPRTL__kmpc_serialized_parallel, 590 // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 591 // global_tid); 592 OMPRTL__kmpc_end_serialized_parallel, 593 // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 594 // kmp_int32 num_threads); 595 OMPRTL__kmpc_push_num_threads, 596 // Call to void __kmpc_flush(ident_t *loc); 597 OMPRTL__kmpc_flush, 598 // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); 599 OMPRTL__kmpc_master, 600 // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid); 601 OMPRTL__kmpc_end_master, 602 // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 603 // int end_part); 604 OMPRTL__kmpc_omp_taskyield, 605 // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); 606 OMPRTL__kmpc_single, 607 // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid); 608 OMPRTL__kmpc_end_single, 609 // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 610 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 611 // kmp_routine_entry_t *task_entry); 612 OMPRTL__kmpc_omp_task_alloc, 613 // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *, 614 // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, 615 // size_t sizeof_shareds, kmp_routine_entry_t *task_entry, 616 // kmp_int64 device_id); 617 OMPRTL__kmpc_omp_target_task_alloc, 618 // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * 619 // new_task); 620 OMPRTL__kmpc_omp_task, 621 // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 622 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 623 // kmp_int32 didit); 624 OMPRTL__kmpc_copyprivate, 625 // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 626 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 627 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 628 OMPRTL__kmpc_reduce, 629 // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 630 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 631 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 632 // *lck); 633 OMPRTL__kmpc_reduce_nowait, 634 // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 635 // kmp_critical_name *lck); 636 OMPRTL__kmpc_end_reduce, 637 // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 638 // kmp_critical_name *lck); 639 OMPRTL__kmpc_end_reduce_nowait, 640 // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 641 // kmp_task_t * new_task); 642 OMPRTL__kmpc_omp_task_begin_if0, 643 // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 644 // kmp_task_t * new_task); 645 OMPRTL__kmpc_omp_task_complete_if0, 646 // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 647 OMPRTL__kmpc_ordered, 648 // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 649 OMPRTL__kmpc_end_ordered, 650 // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 651 // global_tid); 652 OMPRTL__kmpc_omp_taskwait, 653 // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 654 OMPRTL__kmpc_taskgroup, 655 // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 656 OMPRTL__kmpc_end_taskgroup, 657 // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 658 // int proc_bind); 659 OMPRTL__kmpc_push_proc_bind, 660 // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 661 // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t 662 // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 663 OMPRTL__kmpc_omp_task_with_deps, 664 // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 665 // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 666 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 667 OMPRTL__kmpc_omp_wait_deps, 668 // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 669 // global_tid, kmp_int32 cncl_kind); 670 OMPRTL__kmpc_cancellationpoint, 671 // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 672 // kmp_int32 cncl_kind); 673 OMPRTL__kmpc_cancel, 674 // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, 675 // kmp_int32 num_teams, kmp_int32 thread_limit); 676 OMPRTL__kmpc_push_num_teams, 677 // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 678 // microtask, ...); 679 OMPRTL__kmpc_fork_teams, 680 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 681 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 682 // sched, kmp_uint64 grainsize, void *task_dup); 683 OMPRTL__kmpc_taskloop, 684 // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 685 // num_dims, struct kmp_dim *dims); 686 OMPRTL__kmpc_doacross_init, 687 // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 688 OMPRTL__kmpc_doacross_fini, 689 // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 690 // *vec); 691 OMPRTL__kmpc_doacross_post, 692 // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 693 // *vec); 694 OMPRTL__kmpc_doacross_wait, 695 // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void 696 // *data); 697 OMPRTL__kmpc_task_reduction_init, 698 // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 699 // *d); 700 OMPRTL__kmpc_task_reduction_get_th_data, 701 // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al); 702 OMPRTL__kmpc_alloc, 703 // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); 704 OMPRTL__kmpc_free, 705 706 // 707 // Offloading related calls 708 // 709 // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 710 // size); 711 OMPRTL__kmpc_push_target_tripcount, 712 // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 713 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 714 // *arg_types); 715 OMPRTL__tgt_target, 716 // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 717 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 718 // *arg_types); 719 OMPRTL__tgt_target_nowait, 720 // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 721 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 722 // *arg_types, int32_t num_teams, int32_t thread_limit); 723 OMPRTL__tgt_target_teams, 724 // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void 725 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 726 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 727 OMPRTL__tgt_target_teams_nowait, 728 // Call to void __tgt_register_requires(int64_t flags); 729 OMPRTL__tgt_register_requires, 730 // Call to void __tgt_register_lib(__tgt_bin_desc *desc); 731 OMPRTL__tgt_register_lib, 732 // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); 733 OMPRTL__tgt_unregister_lib, 734 // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 735 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 736 OMPRTL__tgt_target_data_begin, 737 // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 738 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 739 // *arg_types); 740 OMPRTL__tgt_target_data_begin_nowait, 741 // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 742 // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types); 743 OMPRTL__tgt_target_data_end, 744 // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t 745 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 746 // *arg_types); 747 OMPRTL__tgt_target_data_end_nowait, 748 // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 749 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 750 OMPRTL__tgt_target_data_update, 751 // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t 752 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 753 // *arg_types); 754 OMPRTL__tgt_target_data_update_nowait, 755 // Call to int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 756 OMPRTL__tgt_mapper_num_components, 757 // Call to void __tgt_push_mapper_component(void *rt_mapper_handle, void 758 // *base, void *begin, int64_t size, int64_t type); 759 OMPRTL__tgt_push_mapper_component, 760 }; 761 762 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP 763 /// region. 764 class CleanupTy final : public EHScopeStack::Cleanup { 765 PrePostActionTy *Action; 766 767 public: 768 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} 769 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 770 if (!CGF.HaveInsertPoint()) 771 return; 772 Action->Exit(CGF); 773 } 774 }; 775 776 } // anonymous namespace 777 778 void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { 779 CodeGenFunction::RunCleanupsScope Scope(CGF); 780 if (PrePostAction) { 781 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); 782 Callback(CodeGen, CGF, *PrePostAction); 783 } else { 784 PrePostActionTy Action; 785 Callback(CodeGen, CGF, Action); 786 } 787 } 788 789 /// Check if the combiner is a call to UDR combiner and if it is so return the 790 /// UDR decl used for reduction. 791 static const OMPDeclareReductionDecl * 792 getReductionInit(const Expr *ReductionOp) { 793 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 794 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 795 if (const auto *DRE = 796 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 797 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) 798 return DRD; 799 return nullptr; 800 } 801 802 static void emitInitWithReductionInitializer(CodeGenFunction &CGF, 803 const OMPDeclareReductionDecl *DRD, 804 const Expr *InitOp, 805 Address Private, Address Original, 806 QualType Ty) { 807 if (DRD->getInitializer()) { 808 std::pair<llvm::Function *, llvm::Function *> Reduction = 809 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 810 const auto *CE = cast<CallExpr>(InitOp); 811 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); 812 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); 813 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); 814 const auto *LHSDRE = 815 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); 816 const auto *RHSDRE = 817 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); 818 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 819 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), 820 [=]() { return Private; }); 821 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), 822 [=]() { return Original; }); 823 (void)PrivateScope.Privatize(); 824 RValue Func = RValue::get(Reduction.second); 825 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 826 CGF.EmitIgnoredExpr(InitOp); 827 } else { 828 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); 829 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); 830 auto *GV = new llvm::GlobalVariable( 831 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, 832 llvm::GlobalValue::PrivateLinkage, Init, Name); 833 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); 834 RValue InitRVal; 835 switch (CGF.getEvaluationKind(Ty)) { 836 case TEK_Scalar: 837 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); 838 break; 839 case TEK_Complex: 840 InitRVal = 841 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); 842 break; 843 case TEK_Aggregate: 844 InitRVal = RValue::getAggregate(LV.getAddress()); 845 break; 846 } 847 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); 848 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); 849 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), 850 /*IsInitializer=*/false); 851 } 852 } 853 854 /// Emit initialization of arrays of complex types. 855 /// \param DestAddr Address of the array. 856 /// \param Type Type of array. 857 /// \param Init Initial expression of array. 858 /// \param SrcAddr Address of the original array. 859 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, 860 QualType Type, bool EmitDeclareReductionInit, 861 const Expr *Init, 862 const OMPDeclareReductionDecl *DRD, 863 Address SrcAddr = Address::invalid()) { 864 // Perform element-by-element initialization. 865 QualType ElementTy; 866 867 // Drill down to the base element type on both arrays. 868 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 869 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); 870 DestAddr = 871 CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); 872 if (DRD) 873 SrcAddr = 874 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 875 876 llvm::Value *SrcBegin = nullptr; 877 if (DRD) 878 SrcBegin = SrcAddr.getPointer(); 879 llvm::Value *DestBegin = DestAddr.getPointer(); 880 // Cast from pointer to array type to pointer to single element. 881 llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); 882 // The basic structure here is a while-do loop. 883 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); 884 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); 885 llvm::Value *IsEmpty = 886 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); 887 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 888 889 // Enter the loop body, making that address the current address. 890 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 891 CGF.EmitBlock(BodyBB); 892 893 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 894 895 llvm::PHINode *SrcElementPHI = nullptr; 896 Address SrcElementCurrent = Address::invalid(); 897 if (DRD) { 898 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, 899 "omp.arraycpy.srcElementPast"); 900 SrcElementPHI->addIncoming(SrcBegin, EntryBB); 901 SrcElementCurrent = 902 Address(SrcElementPHI, 903 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 904 } 905 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( 906 DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); 907 DestElementPHI->addIncoming(DestBegin, EntryBB); 908 Address DestElementCurrent = 909 Address(DestElementPHI, 910 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 911 912 // Emit copy. 913 { 914 CodeGenFunction::RunCleanupsScope InitScope(CGF); 915 if (EmitDeclareReductionInit) { 916 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, 917 SrcElementCurrent, ElementTy); 918 } else 919 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), 920 /*IsInitializer=*/false); 921 } 922 923 if (DRD) { 924 // Shift the address forward by one element. 925 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( 926 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 927 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); 928 } 929 930 // Shift the address forward by one element. 931 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( 932 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 933 // Check whether we've reached the end. 934 llvm::Value *Done = 935 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 936 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 937 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); 938 939 // Done. 940 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 941 } 942 943 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { 944 return CGF.EmitOMPSharedLValue(E); 945 } 946 947 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, 948 const Expr *E) { 949 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) 950 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); 951 return LValue(); 952 } 953 954 void ReductionCodeGen::emitAggregateInitialization( 955 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 956 const OMPDeclareReductionDecl *DRD) { 957 // Emit VarDecl with copy init for arrays. 958 // Get the address of the original variable captured in current 959 // captured region. 960 const auto *PrivateVD = 961 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 962 bool EmitDeclareReductionInit = 963 DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); 964 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), 965 EmitDeclareReductionInit, 966 EmitDeclareReductionInit ? ClausesData[N].ReductionOp 967 : PrivateVD->getInit(), 968 DRD, SharedLVal.getAddress()); 969 } 970 971 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, 972 ArrayRef<const Expr *> Privates, 973 ArrayRef<const Expr *> ReductionOps) { 974 ClausesData.reserve(Shareds.size()); 975 SharedAddresses.reserve(Shareds.size()); 976 Sizes.reserve(Shareds.size()); 977 BaseDecls.reserve(Shareds.size()); 978 auto IPriv = Privates.begin(); 979 auto IRed = ReductionOps.begin(); 980 for (const Expr *Ref : Shareds) { 981 ClausesData.emplace_back(Ref, *IPriv, *IRed); 982 std::advance(IPriv, 1); 983 std::advance(IRed, 1); 984 } 985 } 986 987 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) { 988 assert(SharedAddresses.size() == N && 989 "Number of generated lvalues must be exactly N."); 990 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); 991 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); 992 SharedAddresses.emplace_back(First, Second); 993 } 994 995 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { 996 const auto *PrivateVD = 997 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 998 QualType PrivateType = PrivateVD->getType(); 999 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); 1000 if (!PrivateType->isVariablyModifiedType()) { 1001 Sizes.emplace_back( 1002 CGF.getTypeSize( 1003 SharedAddresses[N].first.getType().getNonReferenceType()), 1004 nullptr); 1005 return; 1006 } 1007 llvm::Value *Size; 1008 llvm::Value *SizeInChars; 1009 auto *ElemType = 1010 cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType()) 1011 ->getElementType(); 1012 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); 1013 if (AsArraySection) { 1014 Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(), 1015 SharedAddresses[N].first.getPointer()); 1016 Size = CGF.Builder.CreateNUWAdd( 1017 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); 1018 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); 1019 } else { 1020 SizeInChars = CGF.getTypeSize( 1021 SharedAddresses[N].first.getType().getNonReferenceType()); 1022 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); 1023 } 1024 Sizes.emplace_back(SizeInChars, Size); 1025 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1026 CGF, 1027 cast<OpaqueValueExpr>( 1028 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1029 RValue::get(Size)); 1030 CGF.EmitVariablyModifiedType(PrivateType); 1031 } 1032 1033 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, 1034 llvm::Value *Size) { 1035 const auto *PrivateVD = 1036 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1037 QualType PrivateType = PrivateVD->getType(); 1038 if (!PrivateType->isVariablyModifiedType()) { 1039 assert(!Size && !Sizes[N].second && 1040 "Size should be nullptr for non-variably modified reduction " 1041 "items."); 1042 return; 1043 } 1044 CodeGenFunction::OpaqueValueMapping OpaqueMap( 1045 CGF, 1046 cast<OpaqueValueExpr>( 1047 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), 1048 RValue::get(Size)); 1049 CGF.EmitVariablyModifiedType(PrivateType); 1050 } 1051 1052 void ReductionCodeGen::emitInitialization( 1053 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, 1054 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { 1055 assert(SharedAddresses.size() > N && "No variable was generated"); 1056 const auto *PrivateVD = 1057 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1058 const OMPDeclareReductionDecl *DRD = 1059 getReductionInit(ClausesData[N].ReductionOp); 1060 QualType PrivateType = PrivateVD->getType(); 1061 PrivateAddr = CGF.Builder.CreateElementBitCast( 1062 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1063 QualType SharedType = SharedAddresses[N].first.getType(); 1064 SharedLVal = CGF.MakeAddrLValue( 1065 CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(), 1066 CGF.ConvertTypeForMem(SharedType)), 1067 SharedType, SharedAddresses[N].first.getBaseInfo(), 1068 CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); 1069 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { 1070 emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); 1071 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { 1072 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, 1073 PrivateAddr, SharedLVal.getAddress(), 1074 SharedLVal.getType()); 1075 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && 1076 !CGF.isTrivialInitializer(PrivateVD->getInit())) { 1077 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, 1078 PrivateVD->getType().getQualifiers(), 1079 /*IsInitializer=*/false); 1080 } 1081 } 1082 1083 bool ReductionCodeGen::needCleanups(unsigned N) { 1084 const auto *PrivateVD = 1085 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1086 QualType PrivateType = PrivateVD->getType(); 1087 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1088 return DTorKind != QualType::DK_none; 1089 } 1090 1091 void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, 1092 Address PrivateAddr) { 1093 const auto *PrivateVD = 1094 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); 1095 QualType PrivateType = PrivateVD->getType(); 1096 QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); 1097 if (needCleanups(N)) { 1098 PrivateAddr = CGF.Builder.CreateElementBitCast( 1099 PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); 1100 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); 1101 } 1102 } 1103 1104 static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1105 LValue BaseLV) { 1106 BaseTy = BaseTy.getNonReferenceType(); 1107 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1108 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1109 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { 1110 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); 1111 } else { 1112 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); 1113 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); 1114 } 1115 BaseTy = BaseTy->getPointeeType(); 1116 } 1117 return CGF.MakeAddrLValue( 1118 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(), 1119 CGF.ConvertTypeForMem(ElTy)), 1120 BaseLV.getType(), BaseLV.getBaseInfo(), 1121 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); 1122 } 1123 1124 static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, 1125 llvm::Type *BaseLVType, CharUnits BaseLVAlignment, 1126 llvm::Value *Addr) { 1127 Address Tmp = Address::invalid(); 1128 Address TopTmp = Address::invalid(); 1129 Address MostTopTmp = Address::invalid(); 1130 BaseTy = BaseTy.getNonReferenceType(); 1131 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && 1132 !CGF.getContext().hasSameType(BaseTy, ElTy)) { 1133 Tmp = CGF.CreateMemTemp(BaseTy); 1134 if (TopTmp.isValid()) 1135 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); 1136 else 1137 MostTopTmp = Tmp; 1138 TopTmp = Tmp; 1139 BaseTy = BaseTy->getPointeeType(); 1140 } 1141 llvm::Type *Ty = BaseLVType; 1142 if (Tmp.isValid()) 1143 Ty = Tmp.getElementType(); 1144 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); 1145 if (Tmp.isValid()) { 1146 CGF.Builder.CreateStore(Addr, Tmp); 1147 return MostTopTmp; 1148 } 1149 return Address(Addr, BaseLVAlignment); 1150 } 1151 1152 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { 1153 const VarDecl *OrigVD = nullptr; 1154 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { 1155 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); 1156 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) 1157 Base = TempOASE->getBase()->IgnoreParenImpCasts(); 1158 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1159 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1160 DE = cast<DeclRefExpr>(Base); 1161 OrigVD = cast<VarDecl>(DE->getDecl()); 1162 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { 1163 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); 1164 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) 1165 Base = TempASE->getBase()->IgnoreParenImpCasts(); 1166 DE = cast<DeclRefExpr>(Base); 1167 OrigVD = cast<VarDecl>(DE->getDecl()); 1168 } 1169 return OrigVD; 1170 } 1171 1172 Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, 1173 Address PrivateAddr) { 1174 const DeclRefExpr *DE; 1175 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { 1176 BaseDecls.emplace_back(OrigVD); 1177 LValue OriginalBaseLValue = CGF.EmitLValue(DE); 1178 LValue BaseLValue = 1179 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), 1180 OriginalBaseLValue); 1181 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( 1182 BaseLValue.getPointer(), SharedAddresses[N].first.getPointer()); 1183 llvm::Value *PrivatePointer = 1184 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 1185 PrivateAddr.getPointer(), 1186 SharedAddresses[N].first.getAddress().getType()); 1187 llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); 1188 return castToBase(CGF, OrigVD->getType(), 1189 SharedAddresses[N].first.getType(), 1190 OriginalBaseLValue.getAddress().getType(), 1191 OriginalBaseLValue.getAlignment(), Ptr); 1192 } 1193 BaseDecls.emplace_back( 1194 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); 1195 return PrivateAddr; 1196 } 1197 1198 bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { 1199 const OMPDeclareReductionDecl *DRD = 1200 getReductionInit(ClausesData[N].ReductionOp); 1201 return DRD && DRD->getInitializer(); 1202 } 1203 1204 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { 1205 return CGF.EmitLoadOfPointerLValue( 1206 CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1207 getThreadIDVariable()->getType()->castAs<PointerType>()); 1208 } 1209 1210 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { 1211 if (!CGF.HaveInsertPoint()) 1212 return; 1213 // 1.2.2 OpenMP Language Terminology 1214 // Structured block - An executable statement with a single entry at the 1215 // top and a single exit at the bottom. 1216 // The point of exit cannot be a branch out of the structured block. 1217 // longjmp() and throw() must not violate the entry/exit criteria. 1218 CGF.EHStack.pushTerminate(); 1219 CodeGen(CGF); 1220 CGF.EHStack.popTerminate(); 1221 } 1222 1223 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( 1224 CodeGenFunction &CGF) { 1225 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), 1226 getThreadIDVariable()->getType(), 1227 AlignmentSource::Decl); 1228 } 1229 1230 static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, 1231 QualType FieldTy) { 1232 auto *Field = FieldDecl::Create( 1233 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, 1234 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), 1235 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); 1236 Field->setAccess(AS_public); 1237 DC->addDecl(Field); 1238 return Field; 1239 } 1240 1241 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, 1242 StringRef Separator) 1243 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), 1244 OffloadEntriesInfoManager(CGM) { 1245 ASTContext &C = CGM.getContext(); 1246 RecordDecl *RD = C.buildImplicitRecord("ident_t"); 1247 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 1248 RD->startDefinition(); 1249 // reserved_1 1250 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1251 // flags 1252 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1253 // reserved_2 1254 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1255 // reserved_3 1256 addFieldToRecordDecl(C, RD, KmpInt32Ty); 1257 // psource 1258 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 1259 RD->completeDefinition(); 1260 IdentQTy = C.getRecordType(RD); 1261 IdentTy = CGM.getTypes().ConvertRecordDeclType(RD); 1262 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); 1263 1264 loadOffloadInfoMetadata(); 1265 } 1266 1267 bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD, 1268 const GlobalDecl &OldGD, 1269 llvm::GlobalValue *OrigAddr, 1270 bool IsForDefinition) { 1271 // Emit at least a definition for the aliasee if the the address of the 1272 // original function is requested. 1273 if (IsForDefinition || OrigAddr) 1274 (void)CGM.GetAddrOfGlobal(NewGD); 1275 StringRef NewMangledName = CGM.getMangledName(NewGD); 1276 llvm::GlobalValue *Addr = CGM.GetGlobalValue(NewMangledName); 1277 if (Addr && !Addr->isDeclaration()) { 1278 const auto *D = cast<FunctionDecl>(OldGD.getDecl()); 1279 const CGFunctionInfo &FI = CGM.getTypes().arrangeGlobalDeclaration(OldGD); 1280 llvm::Type *DeclTy = CGM.getTypes().GetFunctionType(FI); 1281 1282 // Create a reference to the named value. This ensures that it is emitted 1283 // if a deferred decl. 1284 llvm::GlobalValue::LinkageTypes LT = CGM.getFunctionLinkage(OldGD); 1285 1286 // Create the new alias itself, but don't set a name yet. 1287 auto *GA = 1288 llvm::GlobalAlias::create(DeclTy, 0, LT, "", Addr, &CGM.getModule()); 1289 1290 if (OrigAddr) { 1291 assert(OrigAddr->isDeclaration() && "Expected declaration"); 1292 1293 GA->takeName(OrigAddr); 1294 OrigAddr->replaceAllUsesWith( 1295 llvm::ConstantExpr::getBitCast(GA, OrigAddr->getType())); 1296 OrigAddr->eraseFromParent(); 1297 } else { 1298 GA->setName(CGM.getMangledName(OldGD)); 1299 } 1300 1301 // Set attributes which are particular to an alias; this is a 1302 // specialization of the attributes which may be set on a global function. 1303 if (D->hasAttr<WeakAttr>() || D->hasAttr<WeakRefAttr>() || 1304 D->isWeakImported()) 1305 GA->setLinkage(llvm::Function::WeakAnyLinkage); 1306 1307 CGM.SetCommonAttributes(OldGD, GA); 1308 return true; 1309 } 1310 return false; 1311 } 1312 1313 void CGOpenMPRuntime::clear() { 1314 InternalVars.clear(); 1315 // Clean non-target variable declarations possibly used only in debug info. 1316 for (const auto &Data : EmittedNonTargetVariables) { 1317 if (!Data.getValue().pointsToAliveValue()) 1318 continue; 1319 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); 1320 if (!GV) 1321 continue; 1322 if (!GV->isDeclaration() || GV->getNumUses() > 0) 1323 continue; 1324 GV->eraseFromParent(); 1325 } 1326 // Emit aliases for the deferred aliasees. 1327 for (const auto &Pair : DeferredVariantFunction) { 1328 StringRef MangledName = CGM.getMangledName(Pair.second.second); 1329 llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName); 1330 // If not able to emit alias, just emit original declaration. 1331 (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr, 1332 /*IsForDefinition=*/false); 1333 } 1334 } 1335 1336 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { 1337 SmallString<128> Buffer; 1338 llvm::raw_svector_ostream OS(Buffer); 1339 StringRef Sep = FirstSeparator; 1340 for (StringRef Part : Parts) { 1341 OS << Sep << Part; 1342 Sep = Separator; 1343 } 1344 return OS.str(); 1345 } 1346 1347 static llvm::Function * 1348 emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, 1349 const Expr *CombinerInitializer, const VarDecl *In, 1350 const VarDecl *Out, bool IsCombiner) { 1351 // void .omp_combiner.(Ty *in, Ty *out); 1352 ASTContext &C = CGM.getContext(); 1353 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 1354 FunctionArgList Args; 1355 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), 1356 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1357 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), 1358 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); 1359 Args.push_back(&OmpOutParm); 1360 Args.push_back(&OmpInParm); 1361 const CGFunctionInfo &FnInfo = 1362 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 1363 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 1364 std::string Name = CGM.getOpenMPRuntime().getName( 1365 {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); 1366 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 1367 Name, &CGM.getModule()); 1368 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 1369 if (CGM.getLangOpts().Optimize) { 1370 Fn->removeFnAttr(llvm::Attribute::NoInline); 1371 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 1372 Fn->addFnAttr(llvm::Attribute::AlwaysInline); 1373 } 1374 CodeGenFunction CGF(CGM); 1375 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. 1376 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. 1377 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), 1378 Out->getLocation()); 1379 CodeGenFunction::OMPPrivateScope Scope(CGF); 1380 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); 1381 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { 1382 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) 1383 .getAddress(); 1384 }); 1385 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); 1386 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { 1387 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) 1388 .getAddress(); 1389 }); 1390 (void)Scope.Privatize(); 1391 if (!IsCombiner && Out->hasInit() && 1392 !CGF.isTrivialInitializer(Out->getInit())) { 1393 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), 1394 Out->getType().getQualifiers(), 1395 /*IsInitializer=*/true); 1396 } 1397 if (CombinerInitializer) 1398 CGF.EmitIgnoredExpr(CombinerInitializer); 1399 Scope.ForceCleanup(); 1400 CGF.FinishFunction(); 1401 return Fn; 1402 } 1403 1404 void CGOpenMPRuntime::emitUserDefinedReduction( 1405 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { 1406 if (UDRMap.count(D) > 0) 1407 return; 1408 llvm::Function *Combiner = emitCombinerOrInitializer( 1409 CGM, D->getType(), D->getCombiner(), 1410 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), 1411 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), 1412 /*IsCombiner=*/true); 1413 llvm::Function *Initializer = nullptr; 1414 if (const Expr *Init = D->getInitializer()) { 1415 Initializer = emitCombinerOrInitializer( 1416 CGM, D->getType(), 1417 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init 1418 : nullptr, 1419 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), 1420 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), 1421 /*IsCombiner=*/false); 1422 } 1423 UDRMap.try_emplace(D, Combiner, Initializer); 1424 if (CGF) { 1425 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); 1426 Decls.second.push_back(D); 1427 } 1428 } 1429 1430 std::pair<llvm::Function *, llvm::Function *> 1431 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { 1432 auto I = UDRMap.find(D); 1433 if (I != UDRMap.end()) 1434 return I->second; 1435 emitUserDefinedReduction(/*CGF=*/nullptr, D); 1436 return UDRMap.lookup(D); 1437 } 1438 1439 static llvm::Function *emitParallelOrTeamsOutlinedFunction( 1440 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, 1441 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, 1442 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { 1443 assert(ThreadIDVar->getType()->isPointerType() && 1444 "thread id variable must be of type kmp_int32 *"); 1445 CodeGenFunction CGF(CGM, true); 1446 bool HasCancel = false; 1447 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) 1448 HasCancel = OPD->hasCancel(); 1449 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) 1450 HasCancel = OPSD->hasCancel(); 1451 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) 1452 HasCancel = OPFD->hasCancel(); 1453 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) 1454 HasCancel = OPFD->hasCancel(); 1455 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) 1456 HasCancel = OPFD->hasCancel(); 1457 else if (const auto *OPFD = 1458 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) 1459 HasCancel = OPFD->hasCancel(); 1460 else if (const auto *OPFD = 1461 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) 1462 HasCancel = OPFD->hasCancel(); 1463 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, 1464 HasCancel, OutlinedHelperName); 1465 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1466 return CGF.GenerateOpenMPCapturedStmtFunction(*CS); 1467 } 1468 1469 llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( 1470 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1471 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1472 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); 1473 return emitParallelOrTeamsOutlinedFunction( 1474 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1475 } 1476 1477 llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( 1478 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1479 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 1480 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); 1481 return emitParallelOrTeamsOutlinedFunction( 1482 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); 1483 } 1484 1485 llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( 1486 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 1487 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 1488 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 1489 bool Tied, unsigned &NumberOfParts) { 1490 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, 1491 PrePostActionTy &) { 1492 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); 1493 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); 1494 llvm::Value *TaskArgs[] = { 1495 UpLoc, ThreadID, 1496 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), 1497 TaskTVar->getType()->castAs<PointerType>()) 1498 .getPointer()}; 1499 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); 1500 }; 1501 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, 1502 UntiedCodeGen); 1503 CodeGen.setAction(Action); 1504 assert(!ThreadIDVar->getType()->isPointerType() && 1505 "thread id variable must be of type kmp_int32 for tasks"); 1506 const OpenMPDirectiveKind Region = 1507 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop 1508 : OMPD_task; 1509 const CapturedStmt *CS = D.getCapturedStmt(Region); 1510 const auto *TD = dyn_cast<OMPTaskDirective>(&D); 1511 CodeGenFunction CGF(CGM, true); 1512 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, 1513 InnermostKind, 1514 TD ? TD->hasCancel() : false, Action); 1515 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 1516 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); 1517 if (!Tied) 1518 NumberOfParts = Action.getNumberOfParts(); 1519 return Res; 1520 } 1521 1522 static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, 1523 const RecordDecl *RD, const CGRecordLayout &RL, 1524 ArrayRef<llvm::Constant *> Data) { 1525 llvm::StructType *StructTy = RL.getLLVMType(); 1526 unsigned PrevIdx = 0; 1527 ConstantInitBuilder CIBuilder(CGM); 1528 auto DI = Data.begin(); 1529 for (const FieldDecl *FD : RD->fields()) { 1530 unsigned Idx = RL.getLLVMFieldNo(FD); 1531 // Fill the alignment. 1532 for (unsigned I = PrevIdx; I < Idx; ++I) 1533 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); 1534 PrevIdx = Idx + 1; 1535 Fields.add(*DI); 1536 ++DI; 1537 } 1538 } 1539 1540 template <class... As> 1541 static llvm::GlobalVariable * 1542 createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, 1543 ArrayRef<llvm::Constant *> Data, const Twine &Name, 1544 As &&... Args) { 1545 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1546 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1547 ConstantInitBuilder CIBuilder(CGM); 1548 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); 1549 buildStructValue(Fields, CGM, RD, RL, Data); 1550 return Fields.finishAndCreateGlobal( 1551 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, 1552 std::forward<As>(Args)...); 1553 } 1554 1555 template <typename T> 1556 static void 1557 createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, 1558 ArrayRef<llvm::Constant *> Data, 1559 T &Parent) { 1560 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); 1561 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); 1562 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); 1563 buildStructValue(Fields, CGM, RD, RL, Data); 1564 Fields.finishAndAddTo(Parent); 1565 } 1566 1567 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { 1568 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1569 unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); 1570 FlagsTy FlagsKey(Flags, Reserved2Flags); 1571 llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey); 1572 if (!Entry) { 1573 if (!DefaultOpenMPPSource) { 1574 // Initialize default location for psource field of ident_t structure of 1575 // all ident_t objects. Format is ";file;function;line;column;;". 1576 // Taken from 1577 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp 1578 DefaultOpenMPPSource = 1579 CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer(); 1580 DefaultOpenMPPSource = 1581 llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy); 1582 } 1583 1584 llvm::Constant *Data[] = { 1585 llvm::ConstantInt::getNullValue(CGM.Int32Ty), 1586 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 1587 llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags), 1588 llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource}; 1589 llvm::GlobalValue *DefaultOpenMPLocation = 1590 createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "", 1591 llvm::GlobalValue::PrivateLinkage); 1592 DefaultOpenMPLocation->setUnnamedAddr( 1593 llvm::GlobalValue::UnnamedAddr::Global); 1594 1595 OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation; 1596 } 1597 return Address(Entry, Align); 1598 } 1599 1600 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, 1601 bool AtCurrentPoint) { 1602 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1603 assert(!Elem.second.ServiceInsertPt && "Insert point is set already."); 1604 1605 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); 1606 if (AtCurrentPoint) { 1607 Elem.second.ServiceInsertPt = new llvm::BitCastInst( 1608 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); 1609 } else { 1610 Elem.second.ServiceInsertPt = 1611 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); 1612 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); 1613 } 1614 } 1615 1616 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { 1617 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1618 if (Elem.second.ServiceInsertPt) { 1619 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; 1620 Elem.second.ServiceInsertPt = nullptr; 1621 Ptr->eraseFromParent(); 1622 } 1623 } 1624 1625 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, 1626 SourceLocation Loc, 1627 unsigned Flags) { 1628 Flags |= OMP_IDENT_KMPC; 1629 // If no debug info is generated - return global default location. 1630 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || 1631 Loc.isInvalid()) 1632 return getOrCreateDefaultLocation(Flags).getPointer(); 1633 1634 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1635 1636 CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy); 1637 Address LocValue = Address::invalid(); 1638 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1639 if (I != OpenMPLocThreadIDMap.end()) 1640 LocValue = Address(I->second.DebugLoc, Align); 1641 1642 // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if 1643 // GetOpenMPThreadID was called before this routine. 1644 if (!LocValue.isValid()) { 1645 // Generate "ident_t .kmpc_loc.addr;" 1646 Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr"); 1647 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1648 Elem.second.DebugLoc = AI.getPointer(); 1649 LocValue = AI; 1650 1651 if (!Elem.second.ServiceInsertPt) 1652 setLocThreadIdInsertPt(CGF); 1653 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1654 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1655 CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags), 1656 CGF.getTypeSize(IdentQTy)); 1657 } 1658 1659 // char **psource = &.kmpc_loc_<flags>.addr.psource; 1660 LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy); 1661 auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin(); 1662 LValue PSource = 1663 CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource)); 1664 1665 llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding()); 1666 if (OMPDebugLoc == nullptr) { 1667 SmallString<128> Buffer2; 1668 llvm::raw_svector_ostream OS2(Buffer2); 1669 // Build debug location 1670 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); 1671 OS2 << ";" << PLoc.getFilename() << ";"; 1672 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) 1673 OS2 << FD->getQualifiedNameAsString(); 1674 OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; 1675 OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str()); 1676 OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc; 1677 } 1678 // *psource = ";<File>;<Function>;<Line>;<Column>;;"; 1679 CGF.EmitStoreOfScalar(OMPDebugLoc, PSource); 1680 1681 // Our callers always pass this to a runtime function, so for 1682 // convenience, go ahead and return a naked pointer. 1683 return LocValue.getPointer(); 1684 } 1685 1686 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, 1687 SourceLocation Loc) { 1688 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1689 1690 llvm::Value *ThreadID = nullptr; 1691 // Check whether we've already cached a load of the thread id in this 1692 // function. 1693 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); 1694 if (I != OpenMPLocThreadIDMap.end()) { 1695 ThreadID = I->second.ThreadID; 1696 if (ThreadID != nullptr) 1697 return ThreadID; 1698 } 1699 // If exceptions are enabled, do not use parameter to avoid possible crash. 1700 if (auto *OMPRegionInfo = 1701 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 1702 if (OMPRegionInfo->getThreadIDVariable()) { 1703 // Check if this an outlined function with thread id passed as argument. 1704 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); 1705 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); 1706 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || 1707 !CGF.getLangOpts().CXXExceptions || 1708 CGF.Builder.GetInsertBlock() == TopBlock || 1709 !isa<llvm::Instruction>(LVal.getPointer()) || 1710 cast<llvm::Instruction>(LVal.getPointer())->getParent() == TopBlock || 1711 cast<llvm::Instruction>(LVal.getPointer())->getParent() == 1712 CGF.Builder.GetInsertBlock()) { 1713 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); 1714 // If value loaded in entry block, cache it and use it everywhere in 1715 // function. 1716 if (CGF.Builder.GetInsertBlock() == TopBlock) { 1717 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1718 Elem.second.ThreadID = ThreadID; 1719 } 1720 return ThreadID; 1721 } 1722 } 1723 } 1724 1725 // This is not an outlined function region - need to call __kmpc_int32 1726 // kmpc_global_thread_num(ident_t *loc). 1727 // Generate thread id value and cache this value for use across the 1728 // function. 1729 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); 1730 if (!Elem.second.ServiceInsertPt) 1731 setLocThreadIdInsertPt(CGF); 1732 CGBuilderTy::InsertPointGuard IPG(CGF.Builder); 1733 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); 1734 llvm::CallInst *Call = CGF.Builder.CreateCall( 1735 createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 1736 emitUpdateLocation(CGF, Loc)); 1737 Call->setCallingConv(CGF.getRuntimeCC()); 1738 Elem.second.ThreadID = Call; 1739 return Call; 1740 } 1741 1742 void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { 1743 assert(CGF.CurFn && "No function in current CodeGenFunction."); 1744 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { 1745 clearLocThreadIdInsertPt(CGF); 1746 OpenMPLocThreadIDMap.erase(CGF.CurFn); 1747 } 1748 if (FunctionUDRMap.count(CGF.CurFn) > 0) { 1749 for(auto *D : FunctionUDRMap[CGF.CurFn]) 1750 UDRMap.erase(D); 1751 FunctionUDRMap.erase(CGF.CurFn); 1752 } 1753 auto I = FunctionUDMMap.find(CGF.CurFn); 1754 if (I != FunctionUDMMap.end()) { 1755 for(auto *D : I->second) 1756 UDMMap.erase(D); 1757 FunctionUDMMap.erase(I); 1758 } 1759 } 1760 1761 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { 1762 return IdentTy->getPointerTo(); 1763 } 1764 1765 llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { 1766 if (!Kmpc_MicroTy) { 1767 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) 1768 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), 1769 llvm::PointerType::getUnqual(CGM.Int32Ty)}; 1770 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); 1771 } 1772 return llvm::PointerType::getUnqual(Kmpc_MicroTy); 1773 } 1774 1775 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { 1776 llvm::FunctionCallee RTLFn = nullptr; 1777 switch (static_cast<OpenMPRTLFunction>(Function)) { 1778 case OMPRTL__kmpc_fork_call: { 1779 // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro 1780 // microtask, ...); 1781 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1782 getKmpc_MicroPointerTy()}; 1783 auto *FnTy = 1784 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 1785 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); 1786 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 1787 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 1788 llvm::LLVMContext &Ctx = F->getContext(); 1789 llvm::MDBuilder MDB(Ctx); 1790 // Annotate the callback behavior of the __kmpc_fork_call: 1791 // - The callback callee is argument number 2 (microtask). 1792 // - The first two arguments of the callback callee are unknown (-1). 1793 // - All variadic arguments to the __kmpc_fork_call are passed to the 1794 // callback callee. 1795 F->addMetadata( 1796 llvm::LLVMContext::MD_callback, 1797 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 1798 2, {-1, -1}, 1799 /* VarArgsArePassed */ true)})); 1800 } 1801 } 1802 break; 1803 } 1804 case OMPRTL__kmpc_global_thread_num: { 1805 // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); 1806 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1807 auto *FnTy = 1808 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1809 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); 1810 break; 1811 } 1812 case OMPRTL__kmpc_threadprivate_cached: { 1813 // Build void *__kmpc_threadprivate_cached(ident_t *loc, 1814 // kmp_int32 global_tid, void *data, size_t size, void ***cache); 1815 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1816 CGM.VoidPtrTy, CGM.SizeTy, 1817 CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; 1818 auto *FnTy = 1819 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); 1820 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); 1821 break; 1822 } 1823 case OMPRTL__kmpc_critical: { 1824 // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, 1825 // kmp_critical_name *crit); 1826 llvm::Type *TypeParams[] = { 1827 getIdentTyPointerTy(), CGM.Int32Ty, 1828 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1829 auto *FnTy = 1830 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1831 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); 1832 break; 1833 } 1834 case OMPRTL__kmpc_critical_with_hint: { 1835 // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, 1836 // kmp_critical_name *crit, uintptr_t hint); 1837 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1838 llvm::PointerType::getUnqual(KmpCriticalNameTy), 1839 CGM.IntPtrTy}; 1840 auto *FnTy = 1841 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1842 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint"); 1843 break; 1844 } 1845 case OMPRTL__kmpc_threadprivate_register: { 1846 // Build void __kmpc_threadprivate_register(ident_t *, void *data, 1847 // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); 1848 // typedef void *(*kmpc_ctor)(void *); 1849 auto *KmpcCtorTy = 1850 llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 1851 /*isVarArg*/ false)->getPointerTo(); 1852 // typedef void *(*kmpc_cctor)(void *, void *); 1853 llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 1854 auto *KmpcCopyCtorTy = 1855 llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, 1856 /*isVarArg*/ false) 1857 ->getPointerTo(); 1858 // typedef void (*kmpc_dtor)(void *); 1859 auto *KmpcDtorTy = 1860 llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) 1861 ->getPointerTo(); 1862 llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, 1863 KmpcCopyCtorTy, KmpcDtorTy}; 1864 auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, 1865 /*isVarArg*/ false); 1866 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); 1867 break; 1868 } 1869 case OMPRTL__kmpc_end_critical: { 1870 // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, 1871 // kmp_critical_name *crit); 1872 llvm::Type *TypeParams[] = { 1873 getIdentTyPointerTy(), CGM.Int32Ty, 1874 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 1875 auto *FnTy = 1876 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1877 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical"); 1878 break; 1879 } 1880 case OMPRTL__kmpc_cancel_barrier: { 1881 // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 1882 // global_tid); 1883 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1884 auto *FnTy = 1885 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 1886 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier"); 1887 break; 1888 } 1889 case OMPRTL__kmpc_barrier: { 1890 // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); 1891 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1892 auto *FnTy = 1893 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1894 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); 1895 break; 1896 } 1897 case OMPRTL__kmpc_for_static_fini: { 1898 // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); 1899 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1900 auto *FnTy = 1901 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1902 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini"); 1903 break; 1904 } 1905 case OMPRTL__kmpc_push_num_threads: { 1906 // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, 1907 // kmp_int32 num_threads) 1908 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 1909 CGM.Int32Ty}; 1910 auto *FnTy = 1911 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1912 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads"); 1913 break; 1914 } 1915 case OMPRTL__kmpc_serialized_parallel: { 1916 // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 1917 // global_tid); 1918 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1919 auto *FnTy = 1920 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1921 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel"); 1922 break; 1923 } 1924 case OMPRTL__kmpc_end_serialized_parallel: { 1925 // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 1926 // global_tid); 1927 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1928 auto *FnTy = 1929 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1930 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel"); 1931 break; 1932 } 1933 case OMPRTL__kmpc_flush: { 1934 // Build void __kmpc_flush(ident_t *loc); 1935 llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; 1936 auto *FnTy = 1937 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 1938 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush"); 1939 break; 1940 } 1941 case OMPRTL__kmpc_master: { 1942 // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid); 1943 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1944 auto *FnTy = 1945 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1946 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master"); 1947 break; 1948 } 1949 case OMPRTL__kmpc_end_master: { 1950 // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid); 1951 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1952 auto *FnTy = 1953 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1954 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master"); 1955 break; 1956 } 1957 case OMPRTL__kmpc_omp_taskyield: { 1958 // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid, 1959 // int end_part); 1960 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 1961 auto *FnTy = 1962 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1963 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield"); 1964 break; 1965 } 1966 case OMPRTL__kmpc_single: { 1967 // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid); 1968 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1969 auto *FnTy = 1970 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 1971 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single"); 1972 break; 1973 } 1974 case OMPRTL__kmpc_end_single: { 1975 // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid); 1976 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 1977 auto *FnTy = 1978 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 1979 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single"); 1980 break; 1981 } 1982 case OMPRTL__kmpc_omp_task_alloc: { 1983 // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 1984 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1985 // kmp_routine_entry_t *task_entry); 1986 assert(KmpRoutineEntryPtrTy != nullptr && 1987 "Type kmp_routine_entry_t must be created."); 1988 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 1989 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy}; 1990 // Return void * and then cast to particular kmp_task_t type. 1991 auto *FnTy = 1992 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 1993 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc"); 1994 break; 1995 } 1996 case OMPRTL__kmpc_omp_target_task_alloc: { 1997 // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid, 1998 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 1999 // kmp_routine_entry_t *task_entry, kmp_int64 device_id); 2000 assert(KmpRoutineEntryPtrTy != nullptr && 2001 "Type kmp_routine_entry_t must be created."); 2002 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2003 CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy, 2004 CGM.Int64Ty}; 2005 // Return void * and then cast to particular kmp_task_t type. 2006 auto *FnTy = 2007 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2008 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc"); 2009 break; 2010 } 2011 case OMPRTL__kmpc_omp_task: { 2012 // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2013 // *new_task); 2014 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2015 CGM.VoidPtrTy}; 2016 auto *FnTy = 2017 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2018 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); 2019 break; 2020 } 2021 case OMPRTL__kmpc_copyprivate: { 2022 // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, 2023 // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), 2024 // kmp_int32 didit); 2025 llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2026 auto *CpyFnTy = 2027 llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); 2028 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy, 2029 CGM.VoidPtrTy, CpyFnTy->getPointerTo(), 2030 CGM.Int32Ty}; 2031 auto *FnTy = 2032 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2033 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); 2034 break; 2035 } 2036 case OMPRTL__kmpc_reduce: { 2037 // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, 2038 // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void 2039 // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); 2040 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2041 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2042 /*isVarArg=*/false); 2043 llvm::Type *TypeParams[] = { 2044 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2045 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2046 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2047 auto *FnTy = 2048 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2049 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); 2050 break; 2051 } 2052 case OMPRTL__kmpc_reduce_nowait: { 2053 // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 2054 // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 2055 // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name 2056 // *lck); 2057 llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2058 auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, 2059 /*isVarArg=*/false); 2060 llvm::Type *TypeParams[] = { 2061 getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, 2062 CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), 2063 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2064 auto *FnTy = 2065 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2066 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); 2067 break; 2068 } 2069 case OMPRTL__kmpc_end_reduce: { 2070 // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, 2071 // kmp_critical_name *lck); 2072 llvm::Type *TypeParams[] = { 2073 getIdentTyPointerTy(), CGM.Int32Ty, 2074 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2075 auto *FnTy = 2076 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2077 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); 2078 break; 2079 } 2080 case OMPRTL__kmpc_end_reduce_nowait: { 2081 // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, 2082 // kmp_critical_name *lck); 2083 llvm::Type *TypeParams[] = { 2084 getIdentTyPointerTy(), CGM.Int32Ty, 2085 llvm::PointerType::getUnqual(KmpCriticalNameTy)}; 2086 auto *FnTy = 2087 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2088 RTLFn = 2089 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); 2090 break; 2091 } 2092 case OMPRTL__kmpc_omp_task_begin_if0: { 2093 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2094 // *new_task); 2095 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2096 CGM.VoidPtrTy}; 2097 auto *FnTy = 2098 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2099 RTLFn = 2100 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0"); 2101 break; 2102 } 2103 case OMPRTL__kmpc_omp_task_complete_if0: { 2104 // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t 2105 // *new_task); 2106 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2107 CGM.VoidPtrTy}; 2108 auto *FnTy = 2109 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2110 RTLFn = CGM.CreateRuntimeFunction(FnTy, 2111 /*Name=*/"__kmpc_omp_task_complete_if0"); 2112 break; 2113 } 2114 case OMPRTL__kmpc_ordered: { 2115 // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid); 2116 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2117 auto *FnTy = 2118 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2119 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered"); 2120 break; 2121 } 2122 case OMPRTL__kmpc_end_ordered: { 2123 // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid); 2124 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2125 auto *FnTy = 2126 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2127 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered"); 2128 break; 2129 } 2130 case OMPRTL__kmpc_omp_taskwait: { 2131 // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid); 2132 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2133 auto *FnTy = 2134 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2135 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait"); 2136 break; 2137 } 2138 case OMPRTL__kmpc_taskgroup: { 2139 // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid); 2140 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2141 auto *FnTy = 2142 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2143 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup"); 2144 break; 2145 } 2146 case OMPRTL__kmpc_end_taskgroup: { 2147 // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid); 2148 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2149 auto *FnTy = 2150 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2151 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup"); 2152 break; 2153 } 2154 case OMPRTL__kmpc_push_proc_bind: { 2155 // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, 2156 // int proc_bind) 2157 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2158 auto *FnTy = 2159 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2160 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind"); 2161 break; 2162 } 2163 case OMPRTL__kmpc_omp_task_with_deps: { 2164 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 2165 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 2166 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list); 2167 llvm::Type *TypeParams[] = { 2168 getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty, 2169 CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy}; 2170 auto *FnTy = 2171 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); 2172 RTLFn = 2173 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps"); 2174 break; 2175 } 2176 case OMPRTL__kmpc_omp_wait_deps: { 2177 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 2178 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, 2179 // kmp_depend_info_t *noalias_dep_list); 2180 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2181 CGM.Int32Ty, CGM.VoidPtrTy, 2182 CGM.Int32Ty, CGM.VoidPtrTy}; 2183 auto *FnTy = 2184 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2185 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps"); 2186 break; 2187 } 2188 case OMPRTL__kmpc_cancellationpoint: { 2189 // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 2190 // global_tid, kmp_int32 cncl_kind) 2191 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2192 auto *FnTy = 2193 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2194 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint"); 2195 break; 2196 } 2197 case OMPRTL__kmpc_cancel: { 2198 // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 2199 // kmp_int32 cncl_kind) 2200 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy}; 2201 auto *FnTy = 2202 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2203 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel"); 2204 break; 2205 } 2206 case OMPRTL__kmpc_push_num_teams: { 2207 // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid, 2208 // kmp_int32 num_teams, kmp_int32 num_threads) 2209 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, 2210 CGM.Int32Ty}; 2211 auto *FnTy = 2212 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2213 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams"); 2214 break; 2215 } 2216 case OMPRTL__kmpc_fork_teams: { 2217 // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro 2218 // microtask, ...); 2219 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2220 getKmpc_MicroPointerTy()}; 2221 auto *FnTy = 2222 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); 2223 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); 2224 if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) { 2225 if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) { 2226 llvm::LLVMContext &Ctx = F->getContext(); 2227 llvm::MDBuilder MDB(Ctx); 2228 // Annotate the callback behavior of the __kmpc_fork_teams: 2229 // - The callback callee is argument number 2 (microtask). 2230 // - The first two arguments of the callback callee are unknown (-1). 2231 // - All variadic arguments to the __kmpc_fork_teams are passed to the 2232 // callback callee. 2233 F->addMetadata( 2234 llvm::LLVMContext::MD_callback, 2235 *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( 2236 2, {-1, -1}, 2237 /* VarArgsArePassed */ true)})); 2238 } 2239 } 2240 break; 2241 } 2242 case OMPRTL__kmpc_taskloop: { 2243 // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 2244 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 2245 // sched, kmp_uint64 grainsize, void *task_dup); 2246 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2247 CGM.IntTy, 2248 CGM.VoidPtrTy, 2249 CGM.IntTy, 2250 CGM.Int64Ty->getPointerTo(), 2251 CGM.Int64Ty->getPointerTo(), 2252 CGM.Int64Ty, 2253 CGM.IntTy, 2254 CGM.IntTy, 2255 CGM.Int64Ty, 2256 CGM.VoidPtrTy}; 2257 auto *FnTy = 2258 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2259 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); 2260 break; 2261 } 2262 case OMPRTL__kmpc_doacross_init: { 2263 // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 2264 // num_dims, struct kmp_dim *dims); 2265 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), 2266 CGM.Int32Ty, 2267 CGM.Int32Ty, 2268 CGM.VoidPtrTy}; 2269 auto *FnTy = 2270 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2271 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); 2272 break; 2273 } 2274 case OMPRTL__kmpc_doacross_fini: { 2275 // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); 2276 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; 2277 auto *FnTy = 2278 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2279 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); 2280 break; 2281 } 2282 case OMPRTL__kmpc_doacross_post: { 2283 // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 2284 // *vec); 2285 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2286 CGM.Int64Ty->getPointerTo()}; 2287 auto *FnTy = 2288 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2289 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); 2290 break; 2291 } 2292 case OMPRTL__kmpc_doacross_wait: { 2293 // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 2294 // *vec); 2295 llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, 2296 CGM.Int64Ty->getPointerTo()}; 2297 auto *FnTy = 2298 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2299 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); 2300 break; 2301 } 2302 case OMPRTL__kmpc_task_reduction_init: { 2303 // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void 2304 // *data); 2305 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy}; 2306 auto *FnTy = 2307 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2308 RTLFn = 2309 CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init"); 2310 break; 2311 } 2312 case OMPRTL__kmpc_task_reduction_get_th_data: { 2313 // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 2314 // *d); 2315 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2316 auto *FnTy = 2317 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2318 RTLFn = CGM.CreateRuntimeFunction( 2319 FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data"); 2320 break; 2321 } 2322 case OMPRTL__kmpc_alloc: { 2323 // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t 2324 // al); omp_allocator_handle_t type is void *. 2325 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy}; 2326 auto *FnTy = 2327 llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false); 2328 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc"); 2329 break; 2330 } 2331 case OMPRTL__kmpc_free: { 2332 // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t 2333 // al); omp_allocator_handle_t type is void *. 2334 llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy}; 2335 auto *FnTy = 2336 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2337 RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free"); 2338 break; 2339 } 2340 case OMPRTL__kmpc_push_target_tripcount: { 2341 // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64 2342 // size); 2343 llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty}; 2344 llvm::FunctionType *FnTy = 2345 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2346 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount"); 2347 break; 2348 } 2349 case OMPRTL__tgt_target: { 2350 // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t 2351 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2352 // *arg_types); 2353 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2354 CGM.VoidPtrTy, 2355 CGM.Int32Ty, 2356 CGM.VoidPtrPtrTy, 2357 CGM.VoidPtrPtrTy, 2358 CGM.Int64Ty->getPointerTo(), 2359 CGM.Int64Ty->getPointerTo()}; 2360 auto *FnTy = 2361 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2362 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); 2363 break; 2364 } 2365 case OMPRTL__tgt_target_nowait: { 2366 // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr, 2367 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2368 // int64_t *arg_types); 2369 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2370 CGM.VoidPtrTy, 2371 CGM.Int32Ty, 2372 CGM.VoidPtrPtrTy, 2373 CGM.VoidPtrPtrTy, 2374 CGM.Int64Ty->getPointerTo(), 2375 CGM.Int64Ty->getPointerTo()}; 2376 auto *FnTy = 2377 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2378 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait"); 2379 break; 2380 } 2381 case OMPRTL__tgt_target_teams: { 2382 // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr, 2383 // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, 2384 // int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2385 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2386 CGM.VoidPtrTy, 2387 CGM.Int32Ty, 2388 CGM.VoidPtrPtrTy, 2389 CGM.VoidPtrPtrTy, 2390 CGM.Int64Ty->getPointerTo(), 2391 CGM.Int64Ty->getPointerTo(), 2392 CGM.Int32Ty, 2393 CGM.Int32Ty}; 2394 auto *FnTy = 2395 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2396 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); 2397 break; 2398 } 2399 case OMPRTL__tgt_target_teams_nowait: { 2400 // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void 2401 // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t 2402 // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit); 2403 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2404 CGM.VoidPtrTy, 2405 CGM.Int32Ty, 2406 CGM.VoidPtrPtrTy, 2407 CGM.VoidPtrPtrTy, 2408 CGM.Int64Ty->getPointerTo(), 2409 CGM.Int64Ty->getPointerTo(), 2410 CGM.Int32Ty, 2411 CGM.Int32Ty}; 2412 auto *FnTy = 2413 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2414 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait"); 2415 break; 2416 } 2417 case OMPRTL__tgt_register_requires: { 2418 // Build void __tgt_register_requires(int64_t flags); 2419 llvm::Type *TypeParams[] = {CGM.Int64Ty}; 2420 auto *FnTy = 2421 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2422 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires"); 2423 break; 2424 } 2425 case OMPRTL__tgt_register_lib: { 2426 // Build void __tgt_register_lib(__tgt_bin_desc *desc); 2427 QualType ParamTy = 2428 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2429 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2430 auto *FnTy = 2431 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2432 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib"); 2433 break; 2434 } 2435 case OMPRTL__tgt_unregister_lib: { 2436 // Build void __tgt_unregister_lib(__tgt_bin_desc *desc); 2437 QualType ParamTy = 2438 CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy()); 2439 llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)}; 2440 auto *FnTy = 2441 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2442 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib"); 2443 break; 2444 } 2445 case OMPRTL__tgt_target_data_begin: { 2446 // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, 2447 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2448 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2449 CGM.Int32Ty, 2450 CGM.VoidPtrPtrTy, 2451 CGM.VoidPtrPtrTy, 2452 CGM.Int64Ty->getPointerTo(), 2453 CGM.Int64Ty->getPointerTo()}; 2454 auto *FnTy = 2455 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2456 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin"); 2457 break; 2458 } 2459 case OMPRTL__tgt_target_data_begin_nowait: { 2460 // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t 2461 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2462 // *arg_types); 2463 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2464 CGM.Int32Ty, 2465 CGM.VoidPtrPtrTy, 2466 CGM.VoidPtrPtrTy, 2467 CGM.Int64Ty->getPointerTo(), 2468 CGM.Int64Ty->getPointerTo()}; 2469 auto *FnTy = 2470 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2471 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait"); 2472 break; 2473 } 2474 case OMPRTL__tgt_target_data_end: { 2475 // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num, 2476 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2477 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2478 CGM.Int32Ty, 2479 CGM.VoidPtrPtrTy, 2480 CGM.VoidPtrPtrTy, 2481 CGM.Int64Ty->getPointerTo(), 2482 CGM.Int64Ty->getPointerTo()}; 2483 auto *FnTy = 2484 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2485 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end"); 2486 break; 2487 } 2488 case OMPRTL__tgt_target_data_end_nowait: { 2489 // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t 2490 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2491 // *arg_types); 2492 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2493 CGM.Int32Ty, 2494 CGM.VoidPtrPtrTy, 2495 CGM.VoidPtrPtrTy, 2496 CGM.Int64Ty->getPointerTo(), 2497 CGM.Int64Ty->getPointerTo()}; 2498 auto *FnTy = 2499 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2500 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait"); 2501 break; 2502 } 2503 case OMPRTL__tgt_target_data_update: { 2504 // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num, 2505 // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); 2506 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2507 CGM.Int32Ty, 2508 CGM.VoidPtrPtrTy, 2509 CGM.VoidPtrPtrTy, 2510 CGM.Int64Ty->getPointerTo(), 2511 CGM.Int64Ty->getPointerTo()}; 2512 auto *FnTy = 2513 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2514 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update"); 2515 break; 2516 } 2517 case OMPRTL__tgt_target_data_update_nowait: { 2518 // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t 2519 // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t 2520 // *arg_types); 2521 llvm::Type *TypeParams[] = {CGM.Int64Ty, 2522 CGM.Int32Ty, 2523 CGM.VoidPtrPtrTy, 2524 CGM.VoidPtrPtrTy, 2525 CGM.Int64Ty->getPointerTo(), 2526 CGM.Int64Ty->getPointerTo()}; 2527 auto *FnTy = 2528 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2529 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait"); 2530 break; 2531 } 2532 case OMPRTL__tgt_mapper_num_components: { 2533 // Build int64_t __tgt_mapper_num_components(void *rt_mapper_handle); 2534 llvm::Type *TypeParams[] = {CGM.VoidPtrTy}; 2535 auto *FnTy = 2536 llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false); 2537 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_mapper_num_components"); 2538 break; 2539 } 2540 case OMPRTL__tgt_push_mapper_component: { 2541 // Build void __tgt_push_mapper_component(void *rt_mapper_handle, void 2542 // *base, void *begin, int64_t size, int64_t type); 2543 llvm::Type *TypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy, CGM.VoidPtrTy, 2544 CGM.Int64Ty, CGM.Int64Ty}; 2545 auto *FnTy = 2546 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2547 RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_push_mapper_component"); 2548 break; 2549 } 2550 } 2551 assert(RTLFn && "Unable to find OpenMP runtime function"); 2552 return RTLFn; 2553 } 2554 2555 llvm::FunctionCallee 2556 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { 2557 assert((IVSize == 32 || IVSize == 64) && 2558 "IV size is not compatible with the omp runtime"); 2559 StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" 2560 : "__kmpc_for_static_init_4u") 2561 : (IVSigned ? "__kmpc_for_static_init_8" 2562 : "__kmpc_for_static_init_8u"); 2563 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2564 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2565 llvm::Type *TypeParams[] = { 2566 getIdentTyPointerTy(), // loc 2567 CGM.Int32Ty, // tid 2568 CGM.Int32Ty, // schedtype 2569 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2570 PtrTy, // p_lower 2571 PtrTy, // p_upper 2572 PtrTy, // p_stride 2573 ITy, // incr 2574 ITy // chunk 2575 }; 2576 auto *FnTy = 2577 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2578 return CGM.CreateRuntimeFunction(FnTy, Name); 2579 } 2580 2581 llvm::FunctionCallee 2582 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { 2583 assert((IVSize == 32 || IVSize == 64) && 2584 "IV size is not compatible with the omp runtime"); 2585 StringRef Name = 2586 IVSize == 32 2587 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") 2588 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); 2589 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2590 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc 2591 CGM.Int32Ty, // tid 2592 CGM.Int32Ty, // schedtype 2593 ITy, // lower 2594 ITy, // upper 2595 ITy, // stride 2596 ITy // chunk 2597 }; 2598 auto *FnTy = 2599 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); 2600 return CGM.CreateRuntimeFunction(FnTy, Name); 2601 } 2602 2603 llvm::FunctionCallee 2604 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { 2605 assert((IVSize == 32 || IVSize == 64) && 2606 "IV size is not compatible with the omp runtime"); 2607 StringRef Name = 2608 IVSize == 32 2609 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") 2610 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); 2611 llvm::Type *TypeParams[] = { 2612 getIdentTyPointerTy(), // loc 2613 CGM.Int32Ty, // tid 2614 }; 2615 auto *FnTy = 2616 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); 2617 return CGM.CreateRuntimeFunction(FnTy, Name); 2618 } 2619 2620 llvm::FunctionCallee 2621 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { 2622 assert((IVSize == 32 || IVSize == 64) && 2623 "IV size is not compatible with the omp runtime"); 2624 StringRef Name = 2625 IVSize == 32 2626 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") 2627 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); 2628 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; 2629 auto *PtrTy = llvm::PointerType::getUnqual(ITy); 2630 llvm::Type *TypeParams[] = { 2631 getIdentTyPointerTy(), // loc 2632 CGM.Int32Ty, // tid 2633 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter 2634 PtrTy, // p_lower 2635 PtrTy, // p_upper 2636 PtrTy // p_stride 2637 }; 2638 auto *FnTy = 2639 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); 2640 return CGM.CreateRuntimeFunction(FnTy, Name); 2641 } 2642 2643 /// Obtain information that uniquely identifies a target entry. This 2644 /// consists of the file and device IDs as well as line number associated with 2645 /// the relevant entry source location. 2646 static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, 2647 unsigned &DeviceID, unsigned &FileID, 2648 unsigned &LineNum) { 2649 SourceManager &SM = C.getSourceManager(); 2650 2651 // The loc should be always valid and have a file ID (the user cannot use 2652 // #pragma directives in macros) 2653 2654 assert(Loc.isValid() && "Source location is expected to be always valid."); 2655 2656 PresumedLoc PLoc = SM.getPresumedLoc(Loc); 2657 assert(PLoc.isValid() && "Source location is expected to be always valid."); 2658 2659 llvm::sys::fs::UniqueID ID; 2660 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) 2661 SM.getDiagnostics().Report(diag::err_cannot_open_file) 2662 << PLoc.getFilename() << EC.message(); 2663 2664 DeviceID = ID.getDevice(); 2665 FileID = ID.getFile(); 2666 LineNum = PLoc.getLine(); 2667 } 2668 2669 Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { 2670 if (CGM.getLangOpts().OpenMPSimd) 2671 return Address::invalid(); 2672 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2673 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2674 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || 2675 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2676 HasRequiresUnifiedSharedMemory))) { 2677 SmallString<64> PtrName; 2678 { 2679 llvm::raw_svector_ostream OS(PtrName); 2680 OS << CGM.getMangledName(GlobalDecl(VD)); 2681 if (!VD->isExternallyVisible()) { 2682 unsigned DeviceID, FileID, Line; 2683 getTargetEntryUniqueInfo(CGM.getContext(), 2684 VD->getCanonicalDecl()->getBeginLoc(), 2685 DeviceID, FileID, Line); 2686 OS << llvm::format("_%x", FileID); 2687 } 2688 OS << "_decl_tgt_ref_ptr"; 2689 } 2690 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); 2691 if (!Ptr) { 2692 QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); 2693 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), 2694 PtrName); 2695 2696 auto *GV = cast<llvm::GlobalVariable>(Ptr); 2697 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 2698 2699 if (!CGM.getLangOpts().OpenMPIsDevice) 2700 GV->setInitializer(CGM.GetAddrOfGlobal(VD)); 2701 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); 2702 } 2703 return Address(Ptr, CGM.getContext().getDeclAlign(VD)); 2704 } 2705 return Address::invalid(); 2706 } 2707 2708 llvm::Constant * 2709 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { 2710 assert(!CGM.getLangOpts().OpenMPUseTLS || 2711 !CGM.getContext().getTargetInfo().isTLSSupported()); 2712 // Lookup the entry, lazily creating it if necessary. 2713 std::string Suffix = getName({"cache", ""}); 2714 return getOrCreateInternalVariable( 2715 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); 2716 } 2717 2718 Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 2719 const VarDecl *VD, 2720 Address VDAddr, 2721 SourceLocation Loc) { 2722 if (CGM.getLangOpts().OpenMPUseTLS && 2723 CGM.getContext().getTargetInfo().isTLSSupported()) 2724 return VDAddr; 2725 2726 llvm::Type *VarTy = VDAddr.getElementType(); 2727 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 2728 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), 2729 CGM.Int8PtrTy), 2730 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), 2731 getOrCreateThreadPrivateCache(VD)}; 2732 return Address(CGF.EmitRuntimeCall( 2733 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 2734 VDAddr.getAlignment()); 2735 } 2736 2737 void CGOpenMPRuntime::emitThreadPrivateVarInit( 2738 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, 2739 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { 2740 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime 2741 // library. 2742 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); 2743 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_global_thread_num), 2744 OMPLoc); 2745 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) 2746 // to register constructor/destructor for variable. 2747 llvm::Value *Args[] = { 2748 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), 2749 Ctor, CopyCtor, Dtor}; 2750 CGF.EmitRuntimeCall( 2751 createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args); 2752 } 2753 2754 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( 2755 const VarDecl *VD, Address VDAddr, SourceLocation Loc, 2756 bool PerformInit, CodeGenFunction *CGF) { 2757 if (CGM.getLangOpts().OpenMPUseTLS && 2758 CGM.getContext().getTargetInfo().isTLSSupported()) 2759 return nullptr; 2760 2761 VD = VD->getDefinition(CGM.getContext()); 2762 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { 2763 QualType ASTTy = VD->getType(); 2764 2765 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; 2766 const Expr *Init = VD->getAnyInitializer(); 2767 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2768 // Generate function that re-emits the declaration's initializer into the 2769 // threadprivate copy of the variable VD 2770 CodeGenFunction CtorCGF(CGM); 2771 FunctionArgList Args; 2772 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2773 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2774 ImplicitParamDecl::Other); 2775 Args.push_back(&Dst); 2776 2777 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2778 CGM.getContext().VoidPtrTy, Args); 2779 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2780 std::string Name = getName({"__kmpc_global_ctor_", ""}); 2781 llvm::Function *Fn = 2782 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2783 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, 2784 Args, Loc, Loc); 2785 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( 2786 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2787 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2788 Address Arg = Address(ArgVal, VDAddr.getAlignment()); 2789 Arg = CtorCGF.Builder.CreateElementBitCast( 2790 Arg, CtorCGF.ConvertTypeForMem(ASTTy)); 2791 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), 2792 /*IsInitializer=*/true); 2793 ArgVal = CtorCGF.EmitLoadOfScalar( 2794 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, 2795 CGM.getContext().VoidPtrTy, Dst.getLocation()); 2796 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); 2797 CtorCGF.FinishFunction(); 2798 Ctor = Fn; 2799 } 2800 if (VD->getType().isDestructedType() != QualType::DK_none) { 2801 // Generate function that emits destructor call for the threadprivate copy 2802 // of the variable VD 2803 CodeGenFunction DtorCGF(CGM); 2804 FunctionArgList Args; 2805 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, 2806 /*Id=*/nullptr, CGM.getContext().VoidPtrTy, 2807 ImplicitParamDecl::Other); 2808 Args.push_back(&Dst); 2809 2810 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( 2811 CGM.getContext().VoidTy, Args); 2812 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2813 std::string Name = getName({"__kmpc_global_dtor_", ""}); 2814 llvm::Function *Fn = 2815 CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc); 2816 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2817 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, 2818 Loc, Loc); 2819 // Create a scope with an artificial location for the body of this function. 2820 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2821 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( 2822 DtorCGF.GetAddrOfLocalVar(&Dst), 2823 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); 2824 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, 2825 DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2826 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2827 DtorCGF.FinishFunction(); 2828 Dtor = Fn; 2829 } 2830 // Do not emit init function if it is not required. 2831 if (!Ctor && !Dtor) 2832 return nullptr; 2833 2834 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; 2835 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, 2836 /*isVarArg=*/false) 2837 ->getPointerTo(); 2838 // Copying constructor for the threadprivate variable. 2839 // Must be NULL - reserved by runtime, but currently it requires that this 2840 // parameter is always NULL. Otherwise it fires assertion. 2841 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); 2842 if (Ctor == nullptr) { 2843 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, 2844 /*isVarArg=*/false) 2845 ->getPointerTo(); 2846 Ctor = llvm::Constant::getNullValue(CtorTy); 2847 } 2848 if (Dtor == nullptr) { 2849 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, 2850 /*isVarArg=*/false) 2851 ->getPointerTo(); 2852 Dtor = llvm::Constant::getNullValue(DtorTy); 2853 } 2854 if (!CGF) { 2855 auto *InitFunctionTy = 2856 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); 2857 std::string Name = getName({"__omp_threadprivate_init_", ""}); 2858 llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction( 2859 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); 2860 CodeGenFunction InitCGF(CGM); 2861 FunctionArgList ArgList; 2862 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, 2863 CGM.getTypes().arrangeNullaryFunction(), ArgList, 2864 Loc, Loc); 2865 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2866 InitCGF.FinishFunction(); 2867 return InitFunction; 2868 } 2869 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); 2870 } 2871 return nullptr; 2872 } 2873 2874 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, 2875 llvm::GlobalVariable *Addr, 2876 bool PerformInit) { 2877 if (CGM.getLangOpts().OMPTargetTriples.empty() && 2878 !CGM.getLangOpts().OpenMPIsDevice) 2879 return false; 2880 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 2881 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 2882 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 2883 (*Res == OMPDeclareTargetDeclAttr::MT_To && 2884 HasRequiresUnifiedSharedMemory)) 2885 return CGM.getLangOpts().OpenMPIsDevice; 2886 VD = VD->getDefinition(CGM.getContext()); 2887 if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) 2888 return CGM.getLangOpts().OpenMPIsDevice; 2889 2890 QualType ASTTy = VD->getType(); 2891 2892 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); 2893 // Produce the unique prefix to identify the new target regions. We use 2894 // the source location of the variable declaration which we know to not 2895 // conflict with any target region. 2896 unsigned DeviceID; 2897 unsigned FileID; 2898 unsigned Line; 2899 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); 2900 SmallString<128> Buffer, Out; 2901 { 2902 llvm::raw_svector_ostream OS(Buffer); 2903 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) 2904 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 2905 } 2906 2907 const Expr *Init = VD->getAnyInitializer(); 2908 if (CGM.getLangOpts().CPlusPlus && PerformInit) { 2909 llvm::Constant *Ctor; 2910 llvm::Constant *ID; 2911 if (CGM.getLangOpts().OpenMPIsDevice) { 2912 // Generate function that re-emits the declaration's initializer into 2913 // the threadprivate copy of the variable VD 2914 CodeGenFunction CtorCGF(CGM); 2915 2916 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2917 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2918 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2919 FTy, Twine(Buffer, "_ctor"), FI, Loc); 2920 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); 2921 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2922 FunctionArgList(), Loc, Loc); 2923 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); 2924 CtorCGF.EmitAnyExprToMem(Init, 2925 Address(Addr, CGM.getContext().getDeclAlign(VD)), 2926 Init->getType().getQualifiers(), 2927 /*IsInitializer=*/true); 2928 CtorCGF.FinishFunction(); 2929 Ctor = Fn; 2930 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2931 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); 2932 } else { 2933 Ctor = new llvm::GlobalVariable( 2934 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2935 llvm::GlobalValue::PrivateLinkage, 2936 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); 2937 ID = Ctor; 2938 } 2939 2940 // Register the information for the entry associated with the constructor. 2941 Out.clear(); 2942 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2943 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, 2944 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); 2945 } 2946 if (VD->getType().isDestructedType() != QualType::DK_none) { 2947 llvm::Constant *Dtor; 2948 llvm::Constant *ID; 2949 if (CGM.getLangOpts().OpenMPIsDevice) { 2950 // Generate function that emits destructor call for the threadprivate 2951 // copy of the variable VD 2952 CodeGenFunction DtorCGF(CGM); 2953 2954 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); 2955 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 2956 llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction( 2957 FTy, Twine(Buffer, "_dtor"), FI, Loc); 2958 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); 2959 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, 2960 FunctionArgList(), Loc, Loc); 2961 // Create a scope with an artificial location for the body of this 2962 // function. 2963 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); 2964 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), 2965 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), 2966 DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); 2967 DtorCGF.FinishFunction(); 2968 Dtor = Fn; 2969 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); 2970 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); 2971 } else { 2972 Dtor = new llvm::GlobalVariable( 2973 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 2974 llvm::GlobalValue::PrivateLinkage, 2975 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); 2976 ID = Dtor; 2977 } 2978 // Register the information for the entry associated with the destructor. 2979 Out.clear(); 2980 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 2981 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, 2982 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); 2983 } 2984 return CGM.getLangOpts().OpenMPIsDevice; 2985 } 2986 2987 Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, 2988 QualType VarType, 2989 StringRef Name) { 2990 std::string Suffix = getName({"artificial", ""}); 2991 std::string CacheSuffix = getName({"cache", ""}); 2992 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); 2993 llvm::Value *GAddr = 2994 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); 2995 llvm::Value *Args[] = { 2996 emitUpdateLocation(CGF, SourceLocation()), 2997 getThreadID(CGF, SourceLocation()), 2998 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), 2999 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, 3000 /*isSigned=*/false), 3001 getOrCreateInternalVariable( 3002 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; 3003 return Address( 3004 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3005 CGF.EmitRuntimeCall( 3006 createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), 3007 VarLVType->getPointerTo(/*AddrSpace=*/0)), 3008 CGM.getPointerAlign()); 3009 } 3010 3011 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, 3012 const RegionCodeGenTy &ThenGen, 3013 const RegionCodeGenTy &ElseGen) { 3014 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); 3015 3016 // If the condition constant folds and can be elided, try to avoid emitting 3017 // the condition and the dead arm of the if/else. 3018 bool CondConstant; 3019 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { 3020 if (CondConstant) 3021 ThenGen(CGF); 3022 else 3023 ElseGen(CGF); 3024 return; 3025 } 3026 3027 // Otherwise, the condition did not fold, or we couldn't elide it. Just 3028 // emit the conditional branch. 3029 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3030 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); 3031 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); 3032 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); 3033 3034 // Emit the 'then' code. 3035 CGF.EmitBlock(ThenBlock); 3036 ThenGen(CGF); 3037 CGF.EmitBranch(ContBlock); 3038 // Emit the 'else' code if present. 3039 // There is no need to emit line number for unconditional branch. 3040 (void)ApplyDebugLocation::CreateEmpty(CGF); 3041 CGF.EmitBlock(ElseBlock); 3042 ElseGen(CGF); 3043 // There is no need to emit line number for unconditional branch. 3044 (void)ApplyDebugLocation::CreateEmpty(CGF); 3045 CGF.EmitBranch(ContBlock); 3046 // Emit the continuation block for code after the if. 3047 CGF.EmitBlock(ContBlock, /*IsFinished=*/true); 3048 } 3049 3050 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, 3051 llvm::Function *OutlinedFn, 3052 ArrayRef<llvm::Value *> CapturedVars, 3053 const Expr *IfCond) { 3054 if (!CGF.HaveInsertPoint()) 3055 return; 3056 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 3057 auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF, 3058 PrePostActionTy &) { 3059 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); 3060 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3061 llvm::Value *Args[] = { 3062 RTLoc, 3063 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 3064 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; 3065 llvm::SmallVector<llvm::Value *, 16> RealArgs; 3066 RealArgs.append(std::begin(Args), std::end(Args)); 3067 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 3068 3069 llvm::FunctionCallee RTLFn = 3070 RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); 3071 CGF.EmitRuntimeCall(RTLFn, RealArgs); 3072 }; 3073 auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF, 3074 PrePostActionTy &) { 3075 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 3076 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); 3077 // Build calls: 3078 // __kmpc_serialized_parallel(&Loc, GTid); 3079 llvm::Value *Args[] = {RTLoc, ThreadID}; 3080 CGF.EmitRuntimeCall( 3081 RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); 3082 3083 // OutlinedFn(>id, &zero_bound, CapturedStruct); 3084 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); 3085 Address ZeroAddrBound = 3086 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, 3087 /*Name=*/".bound.zero.addr"); 3088 CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); 3089 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; 3090 // ThreadId for serialized parallels is 0. 3091 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); 3092 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); 3093 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); 3094 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); 3095 3096 // __kmpc_end_serialized_parallel(&Loc, GTid); 3097 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; 3098 CGF.EmitRuntimeCall( 3099 RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), 3100 EndArgs); 3101 }; 3102 if (IfCond) { 3103 emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); 3104 } else { 3105 RegionCodeGenTy ThenRCG(ThenGen); 3106 ThenRCG(CGF); 3107 } 3108 } 3109 3110 // If we're inside an (outlined) parallel region, use the region info's 3111 // thread-ID variable (it is passed in a first argument of the outlined function 3112 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in 3113 // regular serial code region, get thread ID by calling kmp_int32 3114 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and 3115 // return the address of that temp. 3116 Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, 3117 SourceLocation Loc) { 3118 if (auto *OMPRegionInfo = 3119 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3120 if (OMPRegionInfo->getThreadIDVariable()) 3121 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); 3122 3123 llvm::Value *ThreadID = getThreadID(CGF, Loc); 3124 QualType Int32Ty = 3125 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); 3126 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); 3127 CGF.EmitStoreOfScalar(ThreadID, 3128 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); 3129 3130 return ThreadIDTemp; 3131 } 3132 3133 llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( 3134 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { 3135 SmallString<256> Buffer; 3136 llvm::raw_svector_ostream Out(Buffer); 3137 Out << Name; 3138 StringRef RuntimeName = Out.str(); 3139 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; 3140 if (Elem.second) { 3141 assert(Elem.second->getType()->getPointerElementType() == Ty && 3142 "OMP internal variable has different type than requested"); 3143 return &*Elem.second; 3144 } 3145 3146 return Elem.second = new llvm::GlobalVariable( 3147 CGM.getModule(), Ty, /*IsConstant*/ false, 3148 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), 3149 Elem.first(), /*InsertBefore=*/nullptr, 3150 llvm::GlobalValue::NotThreadLocal, AddressSpace); 3151 } 3152 3153 llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { 3154 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); 3155 std::string Name = getName({Prefix, "var"}); 3156 return getOrCreateInternalVariable(KmpCriticalNameTy, Name); 3157 } 3158 3159 namespace { 3160 /// Common pre(post)-action for different OpenMP constructs. 3161 class CommonActionTy final : public PrePostActionTy { 3162 llvm::FunctionCallee EnterCallee; 3163 ArrayRef<llvm::Value *> EnterArgs; 3164 llvm::FunctionCallee ExitCallee; 3165 ArrayRef<llvm::Value *> ExitArgs; 3166 bool Conditional; 3167 llvm::BasicBlock *ContBlock = nullptr; 3168 3169 public: 3170 CommonActionTy(llvm::FunctionCallee EnterCallee, 3171 ArrayRef<llvm::Value *> EnterArgs, 3172 llvm::FunctionCallee ExitCallee, 3173 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) 3174 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), 3175 ExitArgs(ExitArgs), Conditional(Conditional) {} 3176 void Enter(CodeGenFunction &CGF) override { 3177 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); 3178 if (Conditional) { 3179 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); 3180 auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); 3181 ContBlock = CGF.createBasicBlock("omp_if.end"); 3182 // Generate the branch (If-stmt) 3183 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); 3184 CGF.EmitBlock(ThenBlock); 3185 } 3186 } 3187 void Done(CodeGenFunction &CGF) { 3188 // Emit the rest of blocks/branches 3189 CGF.EmitBranch(ContBlock); 3190 CGF.EmitBlock(ContBlock, true); 3191 } 3192 void Exit(CodeGenFunction &CGF) override { 3193 CGF.EmitRuntimeCall(ExitCallee, ExitArgs); 3194 } 3195 }; 3196 } // anonymous namespace 3197 3198 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, 3199 StringRef CriticalName, 3200 const RegionCodeGenTy &CriticalOpGen, 3201 SourceLocation Loc, const Expr *Hint) { 3202 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); 3203 // CriticalOpGen(); 3204 // __kmpc_end_critical(ident_t *, gtid, Lock); 3205 // Prepare arguments and build a call to __kmpc_critical 3206 if (!CGF.HaveInsertPoint()) 3207 return; 3208 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3209 getCriticalRegionLock(CriticalName)}; 3210 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), 3211 std::end(Args)); 3212 if (Hint) { 3213 EnterArgs.push_back(CGF.Builder.CreateIntCast( 3214 CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); 3215 } 3216 CommonActionTy Action( 3217 createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint 3218 : OMPRTL__kmpc_critical), 3219 EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); 3220 CriticalOpGen.setAction(Action); 3221 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); 3222 } 3223 3224 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, 3225 const RegionCodeGenTy &MasterOpGen, 3226 SourceLocation Loc) { 3227 if (!CGF.HaveInsertPoint()) 3228 return; 3229 // if(__kmpc_master(ident_t *, gtid)) { 3230 // MasterOpGen(); 3231 // __kmpc_end_master(ident_t *, gtid); 3232 // } 3233 // Prepare arguments and build a call to __kmpc_master 3234 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3235 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, 3236 createRuntimeFunction(OMPRTL__kmpc_end_master), Args, 3237 /*Conditional=*/true); 3238 MasterOpGen.setAction(Action); 3239 emitInlinedDirective(CGF, OMPD_master, MasterOpGen); 3240 Action.Done(CGF); 3241 } 3242 3243 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 3244 SourceLocation Loc) { 3245 if (!CGF.HaveInsertPoint()) 3246 return; 3247 // Build call __kmpc_omp_taskyield(loc, thread_id, 0); 3248 llvm::Value *Args[] = { 3249 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3250 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; 3251 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); 3252 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 3253 Region->emitUntiedSwitch(CGF); 3254 } 3255 3256 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, 3257 const RegionCodeGenTy &TaskgroupOpGen, 3258 SourceLocation Loc) { 3259 if (!CGF.HaveInsertPoint()) 3260 return; 3261 // __kmpc_taskgroup(ident_t *, gtid); 3262 // TaskgroupOpGen(); 3263 // __kmpc_end_taskgroup(ident_t *, gtid); 3264 // Prepare arguments and build a call to __kmpc_taskgroup 3265 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3266 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, 3267 createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), 3268 Args); 3269 TaskgroupOpGen.setAction(Action); 3270 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); 3271 } 3272 3273 /// Given an array of pointers to variables, project the address of a 3274 /// given variable. 3275 static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, 3276 unsigned Index, const VarDecl *Var) { 3277 // Pull out the pointer to the variable. 3278 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); 3279 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); 3280 3281 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); 3282 Addr = CGF.Builder.CreateElementBitCast( 3283 Addr, CGF.ConvertTypeForMem(Var->getType())); 3284 return Addr; 3285 } 3286 3287 static llvm::Value *emitCopyprivateCopyFunction( 3288 CodeGenModule &CGM, llvm::Type *ArgsType, 3289 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, 3290 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, 3291 SourceLocation Loc) { 3292 ASTContext &C = CGM.getContext(); 3293 // void copy_func(void *LHSArg, void *RHSArg); 3294 FunctionArgList Args; 3295 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3296 ImplicitParamDecl::Other); 3297 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 3298 ImplicitParamDecl::Other); 3299 Args.push_back(&LHSArg); 3300 Args.push_back(&RHSArg); 3301 const auto &CGFI = 3302 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 3303 std::string Name = 3304 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); 3305 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 3306 llvm::GlobalValue::InternalLinkage, Name, 3307 &CGM.getModule()); 3308 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 3309 Fn->setDoesNotRecurse(); 3310 CodeGenFunction CGF(CGM); 3311 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 3312 // Dest = (void*[n])(LHSArg); 3313 // Src = (void*[n])(RHSArg); 3314 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3315 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 3316 ArgsType), CGF.getPointerAlign()); 3317 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3318 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 3319 ArgsType), CGF.getPointerAlign()); 3320 // *(Type0*)Dst[0] = *(Type0*)Src[0]; 3321 // *(Type1*)Dst[1] = *(Type1*)Src[1]; 3322 // ... 3323 // *(Typen*)Dst[n] = *(Typen*)Src[n]; 3324 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { 3325 const auto *DestVar = 3326 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); 3327 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); 3328 3329 const auto *SrcVar = 3330 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); 3331 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); 3332 3333 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); 3334 QualType Type = VD->getType(); 3335 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); 3336 } 3337 CGF.FinishFunction(); 3338 return Fn; 3339 } 3340 3341 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, 3342 const RegionCodeGenTy &SingleOpGen, 3343 SourceLocation Loc, 3344 ArrayRef<const Expr *> CopyprivateVars, 3345 ArrayRef<const Expr *> SrcExprs, 3346 ArrayRef<const Expr *> DstExprs, 3347 ArrayRef<const Expr *> AssignmentOps) { 3348 if (!CGF.HaveInsertPoint()) 3349 return; 3350 assert(CopyprivateVars.size() == SrcExprs.size() && 3351 CopyprivateVars.size() == DstExprs.size() && 3352 CopyprivateVars.size() == AssignmentOps.size()); 3353 ASTContext &C = CGM.getContext(); 3354 // int32 did_it = 0; 3355 // if(__kmpc_single(ident_t *, gtid)) { 3356 // SingleOpGen(); 3357 // __kmpc_end_single(ident_t *, gtid); 3358 // did_it = 1; 3359 // } 3360 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3361 // <copy_func>, did_it); 3362 3363 Address DidIt = Address::invalid(); 3364 if (!CopyprivateVars.empty()) { 3365 // int32 did_it = 0; 3366 QualType KmpInt32Ty = 3367 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 3368 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); 3369 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); 3370 } 3371 // Prepare arguments and build a call to __kmpc_single 3372 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3373 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, 3374 createRuntimeFunction(OMPRTL__kmpc_end_single), Args, 3375 /*Conditional=*/true); 3376 SingleOpGen.setAction(Action); 3377 emitInlinedDirective(CGF, OMPD_single, SingleOpGen); 3378 if (DidIt.isValid()) { 3379 // did_it = 1; 3380 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); 3381 } 3382 Action.Done(CGF); 3383 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, 3384 // <copy_func>, did_it); 3385 if (DidIt.isValid()) { 3386 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); 3387 QualType CopyprivateArrayTy = C.getConstantArrayType( 3388 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 3389 /*IndexTypeQuals=*/0); 3390 // Create a list of all private variables for copyprivate. 3391 Address CopyprivateList = 3392 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); 3393 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { 3394 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); 3395 CGF.Builder.CreateStore( 3396 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 3397 CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy), 3398 Elem); 3399 } 3400 // Build function that copies private values from single region to all other 3401 // threads in the corresponding parallel region. 3402 llvm::Value *CpyFn = emitCopyprivateCopyFunction( 3403 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), 3404 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); 3405 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); 3406 Address CL = 3407 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, 3408 CGF.VoidPtrTy); 3409 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); 3410 llvm::Value *Args[] = { 3411 emitUpdateLocation(CGF, Loc), // ident_t *<loc> 3412 getThreadID(CGF, Loc), // i32 <gtid> 3413 BufSize, // size_t <buf_size> 3414 CL.getPointer(), // void *<copyprivate list> 3415 CpyFn, // void (*) (void *, void *) <copy_func> 3416 DidItVal // i32 did_it 3417 }; 3418 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); 3419 } 3420 } 3421 3422 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, 3423 const RegionCodeGenTy &OrderedOpGen, 3424 SourceLocation Loc, bool IsThreads) { 3425 if (!CGF.HaveInsertPoint()) 3426 return; 3427 // __kmpc_ordered(ident_t *, gtid); 3428 // OrderedOpGen(); 3429 // __kmpc_end_ordered(ident_t *, gtid); 3430 // Prepare arguments and build a call to __kmpc_ordered 3431 if (IsThreads) { 3432 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3433 CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, 3434 createRuntimeFunction(OMPRTL__kmpc_end_ordered), 3435 Args); 3436 OrderedOpGen.setAction(Action); 3437 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3438 return; 3439 } 3440 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); 3441 } 3442 3443 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { 3444 unsigned Flags; 3445 if (Kind == OMPD_for) 3446 Flags = OMP_IDENT_BARRIER_IMPL_FOR; 3447 else if (Kind == OMPD_sections) 3448 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; 3449 else if (Kind == OMPD_single) 3450 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; 3451 else if (Kind == OMPD_barrier) 3452 Flags = OMP_IDENT_BARRIER_EXPL; 3453 else 3454 Flags = OMP_IDENT_BARRIER_IMPL; 3455 return Flags; 3456 } 3457 3458 void CGOpenMPRuntime::getDefaultScheduleAndChunk( 3459 CodeGenFunction &CGF, const OMPLoopDirective &S, 3460 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { 3461 // Check if the loop directive is actually a doacross loop directive. In this 3462 // case choose static, 1 schedule. 3463 if (llvm::any_of( 3464 S.getClausesOfKind<OMPOrderedClause>(), 3465 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { 3466 ScheduleKind = OMPC_SCHEDULE_static; 3467 // Chunk size is 1 in this case. 3468 llvm::APInt ChunkSize(32, 1); 3469 ChunkExpr = IntegerLiteral::Create( 3470 CGF.getContext(), ChunkSize, 3471 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), 3472 SourceLocation()); 3473 } 3474 } 3475 3476 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, 3477 OpenMPDirectiveKind Kind, bool EmitChecks, 3478 bool ForceSimpleCall) { 3479 if (!CGF.HaveInsertPoint()) 3480 return; 3481 // Build call __kmpc_cancel_barrier(loc, thread_id); 3482 // Build call __kmpc_barrier(loc, thread_id); 3483 unsigned Flags = getDefaultFlagsForBarriers(Kind); 3484 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, 3485 // thread_id); 3486 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), 3487 getThreadID(CGF, Loc)}; 3488 if (auto *OMPRegionInfo = 3489 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 3490 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { 3491 llvm::Value *Result = CGF.EmitRuntimeCall( 3492 createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args); 3493 if (EmitChecks) { 3494 // if (__kmpc_cancel_barrier()) { 3495 // exit from construct; 3496 // } 3497 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 3498 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 3499 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 3500 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 3501 CGF.EmitBlock(ExitBB); 3502 // exit from construct; 3503 CodeGenFunction::JumpDest CancelDestination = 3504 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 3505 CGF.EmitBranchThroughCleanup(CancelDestination); 3506 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 3507 } 3508 return; 3509 } 3510 } 3511 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_barrier), Args); 3512 } 3513 3514 /// Map the OpenMP loop schedule to the runtime enumeration. 3515 static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, 3516 bool Chunked, bool Ordered) { 3517 switch (ScheduleKind) { 3518 case OMPC_SCHEDULE_static: 3519 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) 3520 : (Ordered ? OMP_ord_static : OMP_sch_static); 3521 case OMPC_SCHEDULE_dynamic: 3522 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; 3523 case OMPC_SCHEDULE_guided: 3524 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; 3525 case OMPC_SCHEDULE_runtime: 3526 return Ordered ? OMP_ord_runtime : OMP_sch_runtime; 3527 case OMPC_SCHEDULE_auto: 3528 return Ordered ? OMP_ord_auto : OMP_sch_auto; 3529 case OMPC_SCHEDULE_unknown: 3530 assert(!Chunked && "chunk was specified but schedule kind not known"); 3531 return Ordered ? OMP_ord_static : OMP_sch_static; 3532 } 3533 llvm_unreachable("Unexpected runtime schedule"); 3534 } 3535 3536 /// Map the OpenMP distribute schedule to the runtime enumeration. 3537 static OpenMPSchedType 3538 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { 3539 // only static is allowed for dist_schedule 3540 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; 3541 } 3542 3543 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, 3544 bool Chunked) const { 3545 OpenMPSchedType Schedule = 3546 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3547 return Schedule == OMP_sch_static; 3548 } 3549 3550 bool CGOpenMPRuntime::isStaticNonchunked( 3551 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3552 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3553 return Schedule == OMP_dist_sch_static; 3554 } 3555 3556 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, 3557 bool Chunked) const { 3558 OpenMPSchedType Schedule = 3559 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); 3560 return Schedule == OMP_sch_static_chunked; 3561 } 3562 3563 bool CGOpenMPRuntime::isStaticChunked( 3564 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { 3565 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); 3566 return Schedule == OMP_dist_sch_static_chunked; 3567 } 3568 3569 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { 3570 OpenMPSchedType Schedule = 3571 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); 3572 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here"); 3573 return Schedule != OMP_sch_static; 3574 } 3575 3576 static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, 3577 OpenMPScheduleClauseModifier M1, 3578 OpenMPScheduleClauseModifier M2) { 3579 int Modifier = 0; 3580 switch (M1) { 3581 case OMPC_SCHEDULE_MODIFIER_monotonic: 3582 Modifier = OMP_sch_modifier_monotonic; 3583 break; 3584 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3585 Modifier = OMP_sch_modifier_nonmonotonic; 3586 break; 3587 case OMPC_SCHEDULE_MODIFIER_simd: 3588 if (Schedule == OMP_sch_static_chunked) 3589 Schedule = OMP_sch_static_balanced_chunked; 3590 break; 3591 case OMPC_SCHEDULE_MODIFIER_last: 3592 case OMPC_SCHEDULE_MODIFIER_unknown: 3593 break; 3594 } 3595 switch (M2) { 3596 case OMPC_SCHEDULE_MODIFIER_monotonic: 3597 Modifier = OMP_sch_modifier_monotonic; 3598 break; 3599 case OMPC_SCHEDULE_MODIFIER_nonmonotonic: 3600 Modifier = OMP_sch_modifier_nonmonotonic; 3601 break; 3602 case OMPC_SCHEDULE_MODIFIER_simd: 3603 if (Schedule == OMP_sch_static_chunked) 3604 Schedule = OMP_sch_static_balanced_chunked; 3605 break; 3606 case OMPC_SCHEDULE_MODIFIER_last: 3607 case OMPC_SCHEDULE_MODIFIER_unknown: 3608 break; 3609 } 3610 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. 3611 // If the static schedule kind is specified or if the ordered clause is 3612 // specified, and if the nonmonotonic modifier is not specified, the effect is 3613 // as if the monotonic modifier is specified. Otherwise, unless the monotonic 3614 // modifier is specified, the effect is as if the nonmonotonic modifier is 3615 // specified. 3616 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { 3617 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || 3618 Schedule == OMP_sch_static_balanced_chunked || 3619 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static)) 3620 Modifier = OMP_sch_modifier_nonmonotonic; 3621 } 3622 return Schedule | Modifier; 3623 } 3624 3625 void CGOpenMPRuntime::emitForDispatchInit( 3626 CodeGenFunction &CGF, SourceLocation Loc, 3627 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 3628 bool Ordered, const DispatchRTInput &DispatchValues) { 3629 if (!CGF.HaveInsertPoint()) 3630 return; 3631 OpenMPSchedType Schedule = getRuntimeSchedule( 3632 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); 3633 assert(Ordered || 3634 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && 3635 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && 3636 Schedule != OMP_sch_static_balanced_chunked)); 3637 // Call __kmpc_dispatch_init( 3638 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, 3639 // kmp_int[32|64] lower, kmp_int[32|64] upper, 3640 // kmp_int[32|64] stride, kmp_int[32|64] chunk); 3641 3642 // If the Chunk was not specified in the clause - use default value 1. 3643 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk 3644 : CGF.Builder.getIntN(IVSize, 1); 3645 llvm::Value *Args[] = { 3646 emitUpdateLocation(CGF, Loc), 3647 getThreadID(CGF, Loc), 3648 CGF.Builder.getInt32(addMonoNonMonoModifier( 3649 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type 3650 DispatchValues.LB, // Lower 3651 DispatchValues.UB, // Upper 3652 CGF.Builder.getIntN(IVSize, 1), // Stride 3653 Chunk // Chunk 3654 }; 3655 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); 3656 } 3657 3658 static void emitForStaticInitCall( 3659 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, 3660 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, 3661 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, 3662 const CGOpenMPRuntime::StaticRTInput &Values) { 3663 if (!CGF.HaveInsertPoint()) 3664 return; 3665 3666 assert(!Values.Ordered); 3667 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || 3668 Schedule == OMP_sch_static_balanced_chunked || 3669 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || 3670 Schedule == OMP_dist_sch_static || 3671 Schedule == OMP_dist_sch_static_chunked); 3672 3673 // Call __kmpc_for_static_init( 3674 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, 3675 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, 3676 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, 3677 // kmp_int[32|64] incr, kmp_int[32|64] chunk); 3678 llvm::Value *Chunk = Values.Chunk; 3679 if (Chunk == nullptr) { 3680 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static || 3681 Schedule == OMP_dist_sch_static) && 3682 "expected static non-chunked schedule"); 3683 // If the Chunk was not specified in the clause - use default value 1. 3684 Chunk = CGF.Builder.getIntN(Values.IVSize, 1); 3685 } else { 3686 assert((Schedule == OMP_sch_static_chunked || 3687 Schedule == OMP_sch_static_balanced_chunked || 3688 Schedule == OMP_ord_static_chunked || 3689 Schedule == OMP_dist_sch_static_chunked) && 3690 "expected static chunked schedule"); 3691 } 3692 llvm::Value *Args[] = { 3693 UpdateLocation, 3694 ThreadId, 3695 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, 3696 M2)), // Schedule type 3697 Values.IL.getPointer(), // &isLastIter 3698 Values.LB.getPointer(), // &LB 3699 Values.UB.getPointer(), // &UB 3700 Values.ST.getPointer(), // &Stride 3701 CGF.Builder.getIntN(Values.IVSize, 1), // Incr 3702 Chunk // Chunk 3703 }; 3704 CGF.EmitRuntimeCall(ForStaticInitFunction, Args); 3705 } 3706 3707 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, 3708 SourceLocation Loc, 3709 OpenMPDirectiveKind DKind, 3710 const OpenMPScheduleTy &ScheduleKind, 3711 const StaticRTInput &Values) { 3712 OpenMPSchedType ScheduleNum = getRuntimeSchedule( 3713 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); 3714 assert(isOpenMPWorksharingDirective(DKind) && 3715 "Expected loop-based or sections-based directive."); 3716 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, 3717 isOpenMPLoopDirective(DKind) 3718 ? OMP_IDENT_WORK_LOOP 3719 : OMP_IDENT_WORK_SECTIONS); 3720 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3721 llvm::FunctionCallee StaticInitFunction = 3722 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3723 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3724 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); 3725 } 3726 3727 void CGOpenMPRuntime::emitDistributeStaticInit( 3728 CodeGenFunction &CGF, SourceLocation Loc, 3729 OpenMPDistScheduleClauseKind SchedKind, 3730 const CGOpenMPRuntime::StaticRTInput &Values) { 3731 OpenMPSchedType ScheduleNum = 3732 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); 3733 llvm::Value *UpdatedLocation = 3734 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); 3735 llvm::Value *ThreadId = getThreadID(CGF, Loc); 3736 llvm::FunctionCallee StaticInitFunction = 3737 createForStaticInitFunction(Values.IVSize, Values.IVSigned); 3738 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, 3739 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, 3740 OMPC_SCHEDULE_MODIFIER_unknown, Values); 3741 } 3742 3743 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, 3744 SourceLocation Loc, 3745 OpenMPDirectiveKind DKind) { 3746 if (!CGF.HaveInsertPoint()) 3747 return; 3748 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); 3749 llvm::Value *Args[] = { 3750 emitUpdateLocation(CGF, Loc, 3751 isOpenMPDistributeDirective(DKind) 3752 ? OMP_IDENT_WORK_DISTRIBUTE 3753 : isOpenMPLoopDirective(DKind) 3754 ? OMP_IDENT_WORK_LOOP 3755 : OMP_IDENT_WORK_SECTIONS), 3756 getThreadID(CGF, Loc)}; 3757 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_for_static_fini), 3758 Args); 3759 } 3760 3761 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 3762 SourceLocation Loc, 3763 unsigned IVSize, 3764 bool IVSigned) { 3765 if (!CGF.HaveInsertPoint()) 3766 return; 3767 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); 3768 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 3769 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); 3770 } 3771 3772 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, 3773 SourceLocation Loc, unsigned IVSize, 3774 bool IVSigned, Address IL, 3775 Address LB, Address UB, 3776 Address ST) { 3777 // Call __kmpc_dispatch_next( 3778 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, 3779 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, 3780 // kmp_int[32|64] *p_stride); 3781 llvm::Value *Args[] = { 3782 emitUpdateLocation(CGF, Loc), 3783 getThreadID(CGF, Loc), 3784 IL.getPointer(), // &isLastIter 3785 LB.getPointer(), // &Lower 3786 UB.getPointer(), // &Upper 3787 ST.getPointer() // &Stride 3788 }; 3789 llvm::Value *Call = 3790 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); 3791 return CGF.EmitScalarConversion( 3792 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), 3793 CGF.getContext().BoolTy, Loc); 3794 } 3795 3796 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 3797 llvm::Value *NumThreads, 3798 SourceLocation Loc) { 3799 if (!CGF.HaveInsertPoint()) 3800 return; 3801 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) 3802 llvm::Value *Args[] = { 3803 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3804 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; 3805 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_threads), 3806 Args); 3807 } 3808 3809 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, 3810 OpenMPProcBindClauseKind ProcBind, 3811 SourceLocation Loc) { 3812 if (!CGF.HaveInsertPoint()) 3813 return; 3814 // Constants for proc bind value accepted by the runtime. 3815 enum ProcBindTy { 3816 ProcBindFalse = 0, 3817 ProcBindTrue, 3818 ProcBindMaster, 3819 ProcBindClose, 3820 ProcBindSpread, 3821 ProcBindIntel, 3822 ProcBindDefault 3823 } RuntimeProcBind; 3824 switch (ProcBind) { 3825 case OMPC_PROC_BIND_master: 3826 RuntimeProcBind = ProcBindMaster; 3827 break; 3828 case OMPC_PROC_BIND_close: 3829 RuntimeProcBind = ProcBindClose; 3830 break; 3831 case OMPC_PROC_BIND_spread: 3832 RuntimeProcBind = ProcBindSpread; 3833 break; 3834 case OMPC_PROC_BIND_unknown: 3835 llvm_unreachable("Unsupported proc_bind value."); 3836 } 3837 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) 3838 llvm::Value *Args[] = { 3839 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 3840 llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)}; 3841 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_proc_bind), Args); 3842 } 3843 3844 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, 3845 SourceLocation Loc) { 3846 if (!CGF.HaveInsertPoint()) 3847 return; 3848 // Build call void __kmpc_flush(ident_t *loc) 3849 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush), 3850 emitUpdateLocation(CGF, Loc)); 3851 } 3852 3853 namespace { 3854 /// Indexes of fields for type kmp_task_t. 3855 enum KmpTaskTFields { 3856 /// List of shared variables. 3857 KmpTaskTShareds, 3858 /// Task routine. 3859 KmpTaskTRoutine, 3860 /// Partition id for the untied tasks. 3861 KmpTaskTPartId, 3862 /// Function with call of destructors for private variables. 3863 Data1, 3864 /// Task priority. 3865 Data2, 3866 /// (Taskloops only) Lower bound. 3867 KmpTaskTLowerBound, 3868 /// (Taskloops only) Upper bound. 3869 KmpTaskTUpperBound, 3870 /// (Taskloops only) Stride. 3871 KmpTaskTStride, 3872 /// (Taskloops only) Is last iteration flag. 3873 KmpTaskTLastIter, 3874 /// (Taskloops only) Reduction data. 3875 KmpTaskTReductions, 3876 }; 3877 } // anonymous namespace 3878 3879 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { 3880 return OffloadEntriesTargetRegion.empty() && 3881 OffloadEntriesDeviceGlobalVar.empty(); 3882 } 3883 3884 /// Initialize target region entry. 3885 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3886 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3887 StringRef ParentName, unsigned LineNum, 3888 unsigned Order) { 3889 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3890 "only required for the device " 3891 "code generation."); 3892 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = 3893 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, 3894 OMPTargetRegionEntryTargetRegion); 3895 ++OffloadingEntriesNum; 3896 } 3897 3898 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3899 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, 3900 StringRef ParentName, unsigned LineNum, 3901 llvm::Constant *Addr, llvm::Constant *ID, 3902 OMPTargetRegionEntryKind Flags) { 3903 // If we are emitting code for a target, the entry is already initialized, 3904 // only has to be registered. 3905 if (CGM.getLangOpts().OpenMPIsDevice) { 3906 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) { 3907 unsigned DiagID = CGM.getDiags().getCustomDiagID( 3908 DiagnosticsEngine::Error, 3909 "Unable to find target region on line '%0' in the device code."); 3910 CGM.getDiags().Report(DiagID) << LineNum; 3911 return; 3912 } 3913 auto &Entry = 3914 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; 3915 assert(Entry.isValid() && "Entry not initialized!"); 3916 Entry.setAddress(Addr); 3917 Entry.setID(ID); 3918 Entry.setFlags(Flags); 3919 } else { 3920 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); 3921 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; 3922 ++OffloadingEntriesNum; 3923 } 3924 } 3925 3926 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( 3927 unsigned DeviceID, unsigned FileID, StringRef ParentName, 3928 unsigned LineNum) const { 3929 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); 3930 if (PerDevice == OffloadEntriesTargetRegion.end()) 3931 return false; 3932 auto PerFile = PerDevice->second.find(FileID); 3933 if (PerFile == PerDevice->second.end()) 3934 return false; 3935 auto PerParentName = PerFile->second.find(ParentName); 3936 if (PerParentName == PerFile->second.end()) 3937 return false; 3938 auto PerLine = PerParentName->second.find(LineNum); 3939 if (PerLine == PerParentName->second.end()) 3940 return false; 3941 // Fail if this entry is already registered. 3942 if (PerLine->second.getAddress() || PerLine->second.getID()) 3943 return false; 3944 return true; 3945 } 3946 3947 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( 3948 const OffloadTargetRegionEntryInfoActTy &Action) { 3949 // Scan all target region entries and perform the provided action. 3950 for (const auto &D : OffloadEntriesTargetRegion) 3951 for (const auto &F : D.second) 3952 for (const auto &P : F.second) 3953 for (const auto &L : P.second) 3954 Action(D.first, F.first, P.first(), L.first, L.second); 3955 } 3956 3957 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3958 initializeDeviceGlobalVarEntryInfo(StringRef Name, 3959 OMPTargetGlobalVarEntryKind Flags, 3960 unsigned Order) { 3961 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " 3962 "only required for the device " 3963 "code generation."); 3964 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); 3965 ++OffloadingEntriesNum; 3966 } 3967 3968 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 3969 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, 3970 CharUnits VarSize, 3971 OMPTargetGlobalVarEntryKind Flags, 3972 llvm::GlobalValue::LinkageTypes Linkage) { 3973 if (CGM.getLangOpts().OpenMPIsDevice) { 3974 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3975 assert(Entry.isValid() && Entry.getFlags() == Flags && 3976 "Entry not initialized!"); 3977 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3978 "Resetting with the new address."); 3979 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { 3980 if (Entry.getVarSize().isZero()) { 3981 Entry.setVarSize(VarSize); 3982 Entry.setLinkage(Linkage); 3983 } 3984 return; 3985 } 3986 Entry.setVarSize(VarSize); 3987 Entry.setLinkage(Linkage); 3988 Entry.setAddress(Addr); 3989 } else { 3990 if (hasDeviceGlobalVarEntryInfo(VarName)) { 3991 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; 3992 assert(Entry.isValid() && Entry.getFlags() == Flags && 3993 "Entry not initialized!"); 3994 assert((!Entry.getAddress() || Entry.getAddress() == Addr) && 3995 "Resetting with the new address."); 3996 if (Entry.getVarSize().isZero()) { 3997 Entry.setVarSize(VarSize); 3998 Entry.setLinkage(Linkage); 3999 } 4000 return; 4001 } 4002 OffloadEntriesDeviceGlobalVar.try_emplace( 4003 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); 4004 ++OffloadingEntriesNum; 4005 } 4006 } 4007 4008 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: 4009 actOnDeviceGlobalVarEntriesInfo( 4010 const OffloadDeviceGlobalVarEntryInfoActTy &Action) { 4011 // Scan all target region entries and perform the provided action. 4012 for (const auto &E : OffloadEntriesDeviceGlobalVar) 4013 Action(E.getKey(), E.getValue()); 4014 } 4015 4016 void CGOpenMPRuntime::createOffloadEntry( 4017 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, 4018 llvm::GlobalValue::LinkageTypes Linkage) { 4019 StringRef Name = Addr->getName(); 4020 llvm::Module &M = CGM.getModule(); 4021 llvm::LLVMContext &C = M.getContext(); 4022 4023 // Create constant string with the name. 4024 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); 4025 4026 std::string StringName = getName({"omp_offloading", "entry_name"}); 4027 auto *Str = new llvm::GlobalVariable( 4028 M, StrPtrInit->getType(), /*isConstant=*/true, 4029 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); 4030 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 4031 4032 llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy), 4033 llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy), 4034 llvm::ConstantInt::get(CGM.SizeTy, Size), 4035 llvm::ConstantInt::get(CGM.Int32Ty, Flags), 4036 llvm::ConstantInt::get(CGM.Int32Ty, 0)}; 4037 std::string EntryName = getName({"omp_offloading", "entry", ""}); 4038 llvm::GlobalVariable *Entry = createGlobalStruct( 4039 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, 4040 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); 4041 4042 // The entry has to be created in the section the linker expects it to be. 4043 Entry->setSection("omp_offloading_entries"); 4044 } 4045 4046 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { 4047 // Emit the offloading entries and metadata so that the device codegen side 4048 // can easily figure out what to emit. The produced metadata looks like 4049 // this: 4050 // 4051 // !omp_offload.info = !{!1, ...} 4052 // 4053 // Right now we only generate metadata for function that contain target 4054 // regions. 4055 4056 // If we are in simd mode or there are no entries, we don't need to do 4057 // anything. 4058 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) 4059 return; 4060 4061 llvm::Module &M = CGM.getModule(); 4062 llvm::LLVMContext &C = M.getContext(); 4063 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, 4064 SourceLocation, StringRef>, 4065 16> 4066 OrderedEntries(OffloadEntriesInfoManager.size()); 4067 llvm::SmallVector<StringRef, 16> ParentFunctions( 4068 OffloadEntriesInfoManager.size()); 4069 4070 // Auxiliary methods to create metadata values and strings. 4071 auto &&GetMDInt = [this](unsigned V) { 4072 return llvm::ConstantAsMetadata::get( 4073 llvm::ConstantInt::get(CGM.Int32Ty, V)); 4074 }; 4075 4076 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; 4077 4078 // Create the offloading info metadata node. 4079 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); 4080 4081 // Create function that emits metadata for each target region entry; 4082 auto &&TargetRegionMetadataEmitter = 4083 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, 4084 &GetMDString]( 4085 unsigned DeviceID, unsigned FileID, StringRef ParentName, 4086 unsigned Line, 4087 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { 4088 // Generate metadata for target regions. Each entry of this metadata 4089 // contains: 4090 // - Entry 0 -> Kind of this type of metadata (0). 4091 // - Entry 1 -> Device ID of the file where the entry was identified. 4092 // - Entry 2 -> File ID of the file where the entry was identified. 4093 // - Entry 3 -> Mangled name of the function where the entry was 4094 // identified. 4095 // - Entry 4 -> Line in the file where the entry was identified. 4096 // - Entry 5 -> Order the entry was created. 4097 // The first element of the metadata node is the kind. 4098 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), 4099 GetMDInt(FileID), GetMDString(ParentName), 4100 GetMDInt(Line), GetMDInt(E.getOrder())}; 4101 4102 SourceLocation Loc; 4103 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), 4104 E = CGM.getContext().getSourceManager().fileinfo_end(); 4105 I != E; ++I) { 4106 if (I->getFirst()->getUniqueID().getDevice() == DeviceID && 4107 I->getFirst()->getUniqueID().getFile() == FileID) { 4108 Loc = CGM.getContext().getSourceManager().translateFileLineCol( 4109 I->getFirst(), Line, 1); 4110 break; 4111 } 4112 } 4113 // Save this entry in the right position of the ordered entries array. 4114 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); 4115 ParentFunctions[E.getOrder()] = ParentName; 4116 4117 // Add metadata to the named metadata node. 4118 MD->addOperand(llvm::MDNode::get(C, Ops)); 4119 }; 4120 4121 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( 4122 TargetRegionMetadataEmitter); 4123 4124 // Create function that emits metadata for each device global variable entry; 4125 auto &&DeviceGlobalVarMetadataEmitter = 4126 [&C, &OrderedEntries, &GetMDInt, &GetMDString, 4127 MD](StringRef MangledName, 4128 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar 4129 &E) { 4130 // Generate metadata for global variables. Each entry of this metadata 4131 // contains: 4132 // - Entry 0 -> Kind of this type of metadata (1). 4133 // - Entry 1 -> Mangled name of the variable. 4134 // - Entry 2 -> Declare target kind. 4135 // - Entry 3 -> Order the entry was created. 4136 // The first element of the metadata node is the kind. 4137 llvm::Metadata *Ops[] = { 4138 GetMDInt(E.getKind()), GetMDString(MangledName), 4139 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; 4140 4141 // Save this entry in the right position of the ordered entries array. 4142 OrderedEntries[E.getOrder()] = 4143 std::make_tuple(&E, SourceLocation(), MangledName); 4144 4145 // Add metadata to the named metadata node. 4146 MD->addOperand(llvm::MDNode::get(C, Ops)); 4147 }; 4148 4149 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( 4150 DeviceGlobalVarMetadataEmitter); 4151 4152 for (const auto &E : OrderedEntries) { 4153 assert(std::get<0>(E) && "All ordered entries must exist!"); 4154 if (const auto *CE = 4155 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( 4156 std::get<0>(E))) { 4157 if (!CE->getID() || !CE->getAddress()) { 4158 // Do not blame the entry if the parent funtion is not emitted. 4159 StringRef FnName = ParentFunctions[CE->getOrder()]; 4160 if (!CGM.GetGlobalValue(FnName)) 4161 continue; 4162 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4163 DiagnosticsEngine::Error, 4164 "Offloading entry for target region in %0 is incorrect: either the " 4165 "address or the ID is invalid."); 4166 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; 4167 continue; 4168 } 4169 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, 4170 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); 4171 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: 4172 OffloadEntryInfoDeviceGlobalVar>( 4173 std::get<0>(E))) { 4174 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = 4175 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4176 CE->getFlags()); 4177 switch (Flags) { 4178 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { 4179 if (CGM.getLangOpts().OpenMPIsDevice && 4180 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) 4181 continue; 4182 if (!CE->getAddress()) { 4183 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4184 DiagnosticsEngine::Error, "Offloading entry for declare target " 4185 "variable %0 is incorrect: the " 4186 "address is invalid."); 4187 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); 4188 continue; 4189 } 4190 // The vaiable has no definition - no need to add the entry. 4191 if (CE->getVarSize().isZero()) 4192 continue; 4193 break; 4194 } 4195 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: 4196 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || 4197 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && 4198 "Declaret target link address is set."); 4199 if (CGM.getLangOpts().OpenMPIsDevice) 4200 continue; 4201 if (!CE->getAddress()) { 4202 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4203 DiagnosticsEngine::Error, 4204 "Offloading entry for declare target variable is incorrect: the " 4205 "address is invalid."); 4206 CGM.getDiags().Report(DiagID); 4207 continue; 4208 } 4209 break; 4210 } 4211 createOffloadEntry(CE->getAddress(), CE->getAddress(), 4212 CE->getVarSize().getQuantity(), Flags, 4213 CE->getLinkage()); 4214 } else { 4215 llvm_unreachable("Unsupported entry kind."); 4216 } 4217 } 4218 } 4219 4220 /// Loads all the offload entries information from the host IR 4221 /// metadata. 4222 void CGOpenMPRuntime::loadOffloadInfoMetadata() { 4223 // If we are in target mode, load the metadata from the host IR. This code has 4224 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). 4225 4226 if (!CGM.getLangOpts().OpenMPIsDevice) 4227 return; 4228 4229 if (CGM.getLangOpts().OMPHostIRFile.empty()) 4230 return; 4231 4232 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); 4233 if (auto EC = Buf.getError()) { 4234 CGM.getDiags().Report(diag::err_cannot_open_file) 4235 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4236 return; 4237 } 4238 4239 llvm::LLVMContext C; 4240 auto ME = expectedToErrorOrAndEmitErrors( 4241 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); 4242 4243 if (auto EC = ME.getError()) { 4244 unsigned DiagID = CGM.getDiags().getCustomDiagID( 4245 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); 4246 CGM.getDiags().Report(DiagID) 4247 << CGM.getLangOpts().OMPHostIRFile << EC.message(); 4248 return; 4249 } 4250 4251 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); 4252 if (!MD) 4253 return; 4254 4255 for (llvm::MDNode *MN : MD->operands()) { 4256 auto &&GetMDInt = [MN](unsigned Idx) { 4257 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); 4258 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); 4259 }; 4260 4261 auto &&GetMDString = [MN](unsigned Idx) { 4262 auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); 4263 return V->getString(); 4264 }; 4265 4266 switch (GetMDInt(0)) { 4267 default: 4268 llvm_unreachable("Unexpected metadata!"); 4269 break; 4270 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4271 OffloadingEntryInfoTargetRegion: 4272 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( 4273 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), 4274 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), 4275 /*Order=*/GetMDInt(5)); 4276 break; 4277 case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: 4278 OffloadingEntryInfoDeviceGlobalVar: 4279 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( 4280 /*MangledName=*/GetMDString(1), 4281 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( 4282 /*Flags=*/GetMDInt(2)), 4283 /*Order=*/GetMDInt(3)); 4284 break; 4285 } 4286 } 4287 } 4288 4289 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { 4290 if (!KmpRoutineEntryPtrTy) { 4291 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. 4292 ASTContext &C = CGM.getContext(); 4293 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; 4294 FunctionProtoType::ExtProtoInfo EPI; 4295 KmpRoutineEntryPtrQTy = C.getPointerType( 4296 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); 4297 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); 4298 } 4299 } 4300 4301 QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { 4302 // Make sure the type of the entry is already created. This is the type we 4303 // have to create: 4304 // struct __tgt_offload_entry{ 4305 // void *addr; // Pointer to the offload entry info. 4306 // // (function or global) 4307 // char *name; // Name of the function or global. 4308 // size_t size; // Size of the entry info (0 if it a function). 4309 // int32_t flags; // Flags associated with the entry, e.g. 'link'. 4310 // int32_t reserved; // Reserved, to use by the runtime library. 4311 // }; 4312 if (TgtOffloadEntryQTy.isNull()) { 4313 ASTContext &C = CGM.getContext(); 4314 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); 4315 RD->startDefinition(); 4316 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4317 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); 4318 addFieldToRecordDecl(C, RD, C.getSizeType()); 4319 addFieldToRecordDecl( 4320 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4321 addFieldToRecordDecl( 4322 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4323 RD->completeDefinition(); 4324 RD->addAttr(PackedAttr::CreateImplicit(C)); 4325 TgtOffloadEntryQTy = C.getRecordType(RD); 4326 } 4327 return TgtOffloadEntryQTy; 4328 } 4329 4330 QualType CGOpenMPRuntime::getTgtDeviceImageQTy() { 4331 // These are the types we need to build: 4332 // struct __tgt_device_image{ 4333 // void *ImageStart; // Pointer to the target code start. 4334 // void *ImageEnd; // Pointer to the target code end. 4335 // // We also add the host entries to the device image, as it may be useful 4336 // // for the target runtime to have access to that information. 4337 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all 4338 // // the entries. 4339 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4340 // // entries (non inclusive). 4341 // }; 4342 if (TgtDeviceImageQTy.isNull()) { 4343 ASTContext &C = CGM.getContext(); 4344 RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image"); 4345 RD->startDefinition(); 4346 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4347 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4348 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4349 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4350 RD->completeDefinition(); 4351 TgtDeviceImageQTy = C.getRecordType(RD); 4352 } 4353 return TgtDeviceImageQTy; 4354 } 4355 4356 QualType CGOpenMPRuntime::getTgtBinaryDescriptorQTy() { 4357 // struct __tgt_bin_desc{ 4358 // int32_t NumDevices; // Number of devices supported. 4359 // __tgt_device_image *DeviceImages; // Arrays of device images 4360 // // (one per device). 4361 // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the 4362 // // entries. 4363 // __tgt_offload_entry *EntriesEnd; // End of the table with all the 4364 // // entries (non inclusive). 4365 // }; 4366 if (TgtBinaryDescriptorQTy.isNull()) { 4367 ASTContext &C = CGM.getContext(); 4368 RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc"); 4369 RD->startDefinition(); 4370 addFieldToRecordDecl( 4371 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); 4372 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtDeviceImageQTy())); 4373 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4374 addFieldToRecordDecl(C, RD, C.getPointerType(getTgtOffloadEntryQTy())); 4375 RD->completeDefinition(); 4376 TgtBinaryDescriptorQTy = C.getRecordType(RD); 4377 } 4378 return TgtBinaryDescriptorQTy; 4379 } 4380 4381 namespace { 4382 struct PrivateHelpersTy { 4383 PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy, 4384 const VarDecl *PrivateElemInit) 4385 : Original(Original), PrivateCopy(PrivateCopy), 4386 PrivateElemInit(PrivateElemInit) {} 4387 const VarDecl *Original; 4388 const VarDecl *PrivateCopy; 4389 const VarDecl *PrivateElemInit; 4390 }; 4391 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; 4392 } // anonymous namespace 4393 4394 static RecordDecl * 4395 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { 4396 if (!Privates.empty()) { 4397 ASTContext &C = CGM.getContext(); 4398 // Build struct .kmp_privates_t. { 4399 // /* private vars */ 4400 // }; 4401 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); 4402 RD->startDefinition(); 4403 for (const auto &Pair : Privates) { 4404 const VarDecl *VD = Pair.second.Original; 4405 QualType Type = VD->getType().getNonReferenceType(); 4406 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); 4407 if (VD->hasAttrs()) { 4408 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), 4409 E(VD->getAttrs().end()); 4410 I != E; ++I) 4411 FD->addAttr(*I); 4412 } 4413 } 4414 RD->completeDefinition(); 4415 return RD; 4416 } 4417 return nullptr; 4418 } 4419 4420 static RecordDecl * 4421 createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, 4422 QualType KmpInt32Ty, 4423 QualType KmpRoutineEntryPointerQTy) { 4424 ASTContext &C = CGM.getContext(); 4425 // Build struct kmp_task_t { 4426 // void * shareds; 4427 // kmp_routine_entry_t routine; 4428 // kmp_int32 part_id; 4429 // kmp_cmplrdata_t data1; 4430 // kmp_cmplrdata_t data2; 4431 // For taskloops additional fields: 4432 // kmp_uint64 lb; 4433 // kmp_uint64 ub; 4434 // kmp_int64 st; 4435 // kmp_int32 liter; 4436 // void * reductions; 4437 // }; 4438 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); 4439 UD->startDefinition(); 4440 addFieldToRecordDecl(C, UD, KmpInt32Ty); 4441 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); 4442 UD->completeDefinition(); 4443 QualType KmpCmplrdataTy = C.getRecordType(UD); 4444 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); 4445 RD->startDefinition(); 4446 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4447 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); 4448 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4449 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4450 addFieldToRecordDecl(C, RD, KmpCmplrdataTy); 4451 if (isOpenMPTaskLoopDirective(Kind)) { 4452 QualType KmpUInt64Ty = 4453 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); 4454 QualType KmpInt64Ty = 4455 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 4456 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4457 addFieldToRecordDecl(C, RD, KmpUInt64Ty); 4458 addFieldToRecordDecl(C, RD, KmpInt64Ty); 4459 addFieldToRecordDecl(C, RD, KmpInt32Ty); 4460 addFieldToRecordDecl(C, RD, C.VoidPtrTy); 4461 } 4462 RD->completeDefinition(); 4463 return RD; 4464 } 4465 4466 static RecordDecl * 4467 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, 4468 ArrayRef<PrivateDataTy> Privates) { 4469 ASTContext &C = CGM.getContext(); 4470 // Build struct kmp_task_t_with_privates { 4471 // kmp_task_t task_data; 4472 // .kmp_privates_t. privates; 4473 // }; 4474 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); 4475 RD->startDefinition(); 4476 addFieldToRecordDecl(C, RD, KmpTaskTQTy); 4477 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) 4478 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); 4479 RD->completeDefinition(); 4480 return RD; 4481 } 4482 4483 /// Emit a proxy function which accepts kmp_task_t as the second 4484 /// argument. 4485 /// \code 4486 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { 4487 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, 4488 /// For taskloops: 4489 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4490 /// tt->reductions, tt->shareds); 4491 /// return 0; 4492 /// } 4493 /// \endcode 4494 static llvm::Function * 4495 emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, 4496 OpenMPDirectiveKind Kind, QualType KmpInt32Ty, 4497 QualType KmpTaskTWithPrivatesPtrQTy, 4498 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, 4499 QualType SharedsPtrTy, llvm::Function *TaskFunction, 4500 llvm::Value *TaskPrivatesMap) { 4501 ASTContext &C = CGM.getContext(); 4502 FunctionArgList Args; 4503 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4504 ImplicitParamDecl::Other); 4505 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4506 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4507 ImplicitParamDecl::Other); 4508 Args.push_back(&GtidArg); 4509 Args.push_back(&TaskTypeArg); 4510 const auto &TaskEntryFnInfo = 4511 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4512 llvm::FunctionType *TaskEntryTy = 4513 CGM.getTypes().GetFunctionType(TaskEntryFnInfo); 4514 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); 4515 auto *TaskEntry = llvm::Function::Create( 4516 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4517 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); 4518 TaskEntry->setDoesNotRecurse(); 4519 CodeGenFunction CGF(CGM); 4520 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, 4521 Loc, Loc); 4522 4523 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, 4524 // tt, 4525 // For taskloops: 4526 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, 4527 // tt->task_data.shareds); 4528 llvm::Value *GtidParam = CGF.EmitLoadOfScalar( 4529 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); 4530 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4531 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4532 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4533 const auto *KmpTaskTWithPrivatesQTyRD = 4534 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4535 LValue Base = 4536 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4537 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 4538 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 4539 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); 4540 llvm::Value *PartidParam = PartIdLVal.getPointer(); 4541 4542 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); 4543 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); 4544 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4545 CGF.EmitLoadOfScalar(SharedsLVal, Loc), 4546 CGF.ConvertTypeForMem(SharedsPtrTy)); 4547 4548 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4549 llvm::Value *PrivatesParam; 4550 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { 4551 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); 4552 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4553 PrivatesLVal.getPointer(), CGF.VoidPtrTy); 4554 } else { 4555 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 4556 } 4557 4558 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, 4559 TaskPrivatesMap, 4560 CGF.Builder 4561 .CreatePointerBitCastOrAddrSpaceCast( 4562 TDBase.getAddress(), CGF.VoidPtrTy) 4563 .getPointer()}; 4564 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), 4565 std::end(CommonArgs)); 4566 if (isOpenMPTaskLoopDirective(Kind)) { 4567 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); 4568 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); 4569 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); 4570 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); 4571 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); 4572 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); 4573 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); 4574 LValue StLVal = CGF.EmitLValueForField(Base, *StFI); 4575 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); 4576 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4577 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4578 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); 4579 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); 4580 LValue RLVal = CGF.EmitLValueForField(Base, *RFI); 4581 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); 4582 CallArgs.push_back(LBParam); 4583 CallArgs.push_back(UBParam); 4584 CallArgs.push_back(StParam); 4585 CallArgs.push_back(LIParam); 4586 CallArgs.push_back(RParam); 4587 } 4588 CallArgs.push_back(SharedsParam); 4589 4590 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, 4591 CallArgs); 4592 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), 4593 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); 4594 CGF.FinishFunction(); 4595 return TaskEntry; 4596 } 4597 4598 static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, 4599 SourceLocation Loc, 4600 QualType KmpInt32Ty, 4601 QualType KmpTaskTWithPrivatesPtrQTy, 4602 QualType KmpTaskTWithPrivatesQTy) { 4603 ASTContext &C = CGM.getContext(); 4604 FunctionArgList Args; 4605 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, 4606 ImplicitParamDecl::Other); 4607 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4608 KmpTaskTWithPrivatesPtrQTy.withRestrict(), 4609 ImplicitParamDecl::Other); 4610 Args.push_back(&GtidArg); 4611 Args.push_back(&TaskTypeArg); 4612 const auto &DestructorFnInfo = 4613 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); 4614 llvm::FunctionType *DestructorFnTy = 4615 CGM.getTypes().GetFunctionType(DestructorFnInfo); 4616 std::string Name = 4617 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); 4618 auto *DestructorFn = 4619 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, 4620 Name, &CGM.getModule()); 4621 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, 4622 DestructorFnInfo); 4623 DestructorFn->setDoesNotRecurse(); 4624 CodeGenFunction CGF(CGM); 4625 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, 4626 Args, Loc, Loc); 4627 4628 LValue Base = CGF.EmitLoadOfPointerLValue( 4629 CGF.GetAddrOfLocalVar(&TaskTypeArg), 4630 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4631 const auto *KmpTaskTWithPrivatesQTyRD = 4632 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); 4633 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4634 Base = CGF.EmitLValueForField(Base, *FI); 4635 for (const auto *Field : 4636 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { 4637 if (QualType::DestructionKind DtorKind = 4638 Field->getType().isDestructedType()) { 4639 LValue FieldLValue = CGF.EmitLValueForField(Base, Field); 4640 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); 4641 } 4642 } 4643 CGF.FinishFunction(); 4644 return DestructorFn; 4645 } 4646 4647 /// Emit a privates mapping function for correct handling of private and 4648 /// firstprivate variables. 4649 /// \code 4650 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> 4651 /// **noalias priv1,..., <tyn> **noalias privn) { 4652 /// *priv1 = &.privates.priv1; 4653 /// ...; 4654 /// *privn = &.privates.privn; 4655 /// } 4656 /// \endcode 4657 static llvm::Value * 4658 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, 4659 ArrayRef<const Expr *> PrivateVars, 4660 ArrayRef<const Expr *> FirstprivateVars, 4661 ArrayRef<const Expr *> LastprivateVars, 4662 QualType PrivatesQTy, 4663 ArrayRef<PrivateDataTy> Privates) { 4664 ASTContext &C = CGM.getContext(); 4665 FunctionArgList Args; 4666 ImplicitParamDecl TaskPrivatesArg( 4667 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4668 C.getPointerType(PrivatesQTy).withConst().withRestrict(), 4669 ImplicitParamDecl::Other); 4670 Args.push_back(&TaskPrivatesArg); 4671 llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; 4672 unsigned Counter = 1; 4673 for (const Expr *E : PrivateVars) { 4674 Args.push_back(ImplicitParamDecl::Create( 4675 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4676 C.getPointerType(C.getPointerType(E->getType())) 4677 .withConst() 4678 .withRestrict(), 4679 ImplicitParamDecl::Other)); 4680 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4681 PrivateVarsPos[VD] = Counter; 4682 ++Counter; 4683 } 4684 for (const Expr *E : FirstprivateVars) { 4685 Args.push_back(ImplicitParamDecl::Create( 4686 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4687 C.getPointerType(C.getPointerType(E->getType())) 4688 .withConst() 4689 .withRestrict(), 4690 ImplicitParamDecl::Other)); 4691 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4692 PrivateVarsPos[VD] = Counter; 4693 ++Counter; 4694 } 4695 for (const Expr *E : LastprivateVars) { 4696 Args.push_back(ImplicitParamDecl::Create( 4697 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4698 C.getPointerType(C.getPointerType(E->getType())) 4699 .withConst() 4700 .withRestrict(), 4701 ImplicitParamDecl::Other)); 4702 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4703 PrivateVarsPos[VD] = Counter; 4704 ++Counter; 4705 } 4706 const auto &TaskPrivatesMapFnInfo = 4707 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4708 llvm::FunctionType *TaskPrivatesMapTy = 4709 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); 4710 std::string Name = 4711 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); 4712 auto *TaskPrivatesMap = llvm::Function::Create( 4713 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, 4714 &CGM.getModule()); 4715 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, 4716 TaskPrivatesMapFnInfo); 4717 if (CGM.getLangOpts().Optimize) { 4718 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); 4719 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); 4720 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); 4721 } 4722 CodeGenFunction CGF(CGM); 4723 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, 4724 TaskPrivatesMapFnInfo, Args, Loc, Loc); 4725 4726 // *privi = &.privates.privi; 4727 LValue Base = CGF.EmitLoadOfPointerLValue( 4728 CGF.GetAddrOfLocalVar(&TaskPrivatesArg), 4729 TaskPrivatesArg.getType()->castAs<PointerType>()); 4730 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); 4731 Counter = 0; 4732 for (const FieldDecl *Field : PrivatesQTyRD->fields()) { 4733 LValue FieldLVal = CGF.EmitLValueForField(Base, Field); 4734 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; 4735 LValue RefLVal = 4736 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); 4737 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( 4738 RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); 4739 CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); 4740 ++Counter; 4741 } 4742 CGF.FinishFunction(); 4743 return TaskPrivatesMap; 4744 } 4745 4746 /// Emit initialization for private variables in task-based directives. 4747 static void emitPrivatesInit(CodeGenFunction &CGF, 4748 const OMPExecutableDirective &D, 4749 Address KmpTaskSharedsPtr, LValue TDBase, 4750 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4751 QualType SharedsTy, QualType SharedsPtrTy, 4752 const OMPTaskDataTy &Data, 4753 ArrayRef<PrivateDataTy> Privates, bool ForDup) { 4754 ASTContext &C = CGF.getContext(); 4755 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 4756 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); 4757 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) 4758 ? OMPD_taskloop 4759 : OMPD_task; 4760 const CapturedStmt &CS = *D.getCapturedStmt(Kind); 4761 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); 4762 LValue SrcBase; 4763 bool IsTargetTask = 4764 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || 4765 isOpenMPTargetExecutionDirective(D.getDirectiveKind()); 4766 // For target-based directives skip 3 firstprivate arrays BasePointersArray, 4767 // PointersArray and SizesArray. The original variables for these arrays are 4768 // not captured and we get their addresses explicitly. 4769 if ((!IsTargetTask && !Data.FirstprivateVars.empty()) || 4770 (IsTargetTask && KmpTaskSharedsPtr.isValid())) { 4771 SrcBase = CGF.MakeAddrLValue( 4772 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 4773 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), 4774 SharedsTy); 4775 } 4776 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); 4777 for (const PrivateDataTy &Pair : Privates) { 4778 const VarDecl *VD = Pair.second.PrivateCopy; 4779 const Expr *Init = VD->getAnyInitializer(); 4780 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && 4781 !CGF.isTrivialInitializer(Init)))) { 4782 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); 4783 if (const VarDecl *Elem = Pair.second.PrivateElemInit) { 4784 const VarDecl *OriginalVD = Pair.second.Original; 4785 // Check if the variable is the target-based BasePointersArray, 4786 // PointersArray or SizesArray. 4787 LValue SharedRefLValue; 4788 QualType Type = PrivateLValue.getType(); 4789 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); 4790 if (IsTargetTask && !SharedField) { 4791 assert(isa<ImplicitParamDecl>(OriginalVD) && 4792 isa<CapturedDecl>(OriginalVD->getDeclContext()) && 4793 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4794 ->getNumParams() == 0 && 4795 isa<TranslationUnitDecl>( 4796 cast<CapturedDecl>(OriginalVD->getDeclContext()) 4797 ->getDeclContext()) && 4798 "Expected artificial target data variable."); 4799 SharedRefLValue = 4800 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); 4801 } else { 4802 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); 4803 SharedRefLValue = CGF.MakeAddrLValue( 4804 Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)), 4805 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), 4806 SharedRefLValue.getTBAAInfo()); 4807 } 4808 if (Type->isArrayType()) { 4809 // Initialize firstprivate array. 4810 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { 4811 // Perform simple memcpy. 4812 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); 4813 } else { 4814 // Initialize firstprivate array using element-by-element 4815 // initialization. 4816 CGF.EmitOMPAggregateAssign( 4817 PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, 4818 [&CGF, Elem, Init, &CapturesInfo](Address DestElement, 4819 Address SrcElement) { 4820 // Clean up any temporaries needed by the initialization. 4821 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4822 InitScope.addPrivate( 4823 Elem, [SrcElement]() -> Address { return SrcElement; }); 4824 (void)InitScope.Privatize(); 4825 // Emit initialization for single element. 4826 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( 4827 CGF, &CapturesInfo); 4828 CGF.EmitAnyExprToMem(Init, DestElement, 4829 Init->getType().getQualifiers(), 4830 /*IsInitializer=*/false); 4831 }); 4832 } 4833 } else { 4834 CodeGenFunction::OMPPrivateScope InitScope(CGF); 4835 InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address { 4836 return SharedRefLValue.getAddress(); 4837 }); 4838 (void)InitScope.Privatize(); 4839 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); 4840 CGF.EmitExprAsInit(Init, VD, PrivateLValue, 4841 /*capturedByInit=*/false); 4842 } 4843 } else { 4844 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); 4845 } 4846 } 4847 ++FI; 4848 } 4849 } 4850 4851 /// Check if duplication function is required for taskloops. 4852 static bool checkInitIsRequired(CodeGenFunction &CGF, 4853 ArrayRef<PrivateDataTy> Privates) { 4854 bool InitRequired = false; 4855 for (const PrivateDataTy &Pair : Privates) { 4856 const VarDecl *VD = Pair.second.PrivateCopy; 4857 const Expr *Init = VD->getAnyInitializer(); 4858 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && 4859 !CGF.isTrivialInitializer(Init)); 4860 if (InitRequired) 4861 break; 4862 } 4863 return InitRequired; 4864 } 4865 4866 4867 /// Emit task_dup function (for initialization of 4868 /// private/firstprivate/lastprivate vars and last_iter flag) 4869 /// \code 4870 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int 4871 /// lastpriv) { 4872 /// // setup lastprivate flag 4873 /// task_dst->last = lastpriv; 4874 /// // could be constructor calls here... 4875 /// } 4876 /// \endcode 4877 static llvm::Value * 4878 emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, 4879 const OMPExecutableDirective &D, 4880 QualType KmpTaskTWithPrivatesPtrQTy, 4881 const RecordDecl *KmpTaskTWithPrivatesQTyRD, 4882 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, 4883 QualType SharedsPtrTy, const OMPTaskDataTy &Data, 4884 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { 4885 ASTContext &C = CGM.getContext(); 4886 FunctionArgList Args; 4887 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4888 KmpTaskTWithPrivatesPtrQTy, 4889 ImplicitParamDecl::Other); 4890 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 4891 KmpTaskTWithPrivatesPtrQTy, 4892 ImplicitParamDecl::Other); 4893 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, 4894 ImplicitParamDecl::Other); 4895 Args.push_back(&DstArg); 4896 Args.push_back(&SrcArg); 4897 Args.push_back(&LastprivArg); 4898 const auto &TaskDupFnInfo = 4899 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 4900 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); 4901 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); 4902 auto *TaskDup = llvm::Function::Create( 4903 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); 4904 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); 4905 TaskDup->setDoesNotRecurse(); 4906 CodeGenFunction CGF(CGM); 4907 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, 4908 Loc); 4909 4910 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4911 CGF.GetAddrOfLocalVar(&DstArg), 4912 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4913 // task_dst->liter = lastpriv; 4914 if (WithLastIter) { 4915 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); 4916 LValue Base = CGF.EmitLValueForField( 4917 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4918 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); 4919 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( 4920 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); 4921 CGF.EmitStoreOfScalar(Lastpriv, LILVal); 4922 } 4923 4924 // Emit initial values for private copies (if any). 4925 assert(!Privates.empty()); 4926 Address KmpTaskSharedsPtr = Address::invalid(); 4927 if (!Data.FirstprivateVars.empty()) { 4928 LValue TDBase = CGF.EmitLoadOfPointerLValue( 4929 CGF.GetAddrOfLocalVar(&SrcArg), 4930 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); 4931 LValue Base = CGF.EmitLValueForField( 4932 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); 4933 KmpTaskSharedsPtr = Address( 4934 CGF.EmitLoadOfScalar(CGF.EmitLValueForField( 4935 Base, *std::next(KmpTaskTQTyRD->field_begin(), 4936 KmpTaskTShareds)), 4937 Loc), 4938 CGF.getNaturalTypeAlignment(SharedsTy)); 4939 } 4940 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, 4941 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); 4942 CGF.FinishFunction(); 4943 return TaskDup; 4944 } 4945 4946 /// Checks if destructor function is required to be generated. 4947 /// \return true if cleanups are required, false otherwise. 4948 static bool 4949 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { 4950 bool NeedsCleanup = false; 4951 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); 4952 const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); 4953 for (const FieldDecl *FD : PrivateRD->fields()) { 4954 NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); 4955 if (NeedsCleanup) 4956 break; 4957 } 4958 return NeedsCleanup; 4959 } 4960 4961 CGOpenMPRuntime::TaskResultTy 4962 CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, 4963 const OMPExecutableDirective &D, 4964 llvm::Function *TaskFunction, QualType SharedsTy, 4965 Address Shareds, const OMPTaskDataTy &Data) { 4966 ASTContext &C = CGM.getContext(); 4967 llvm::SmallVector<PrivateDataTy, 4> Privates; 4968 // Aggregate privates and sort them by the alignment. 4969 auto I = Data.PrivateCopies.begin(); 4970 for (const Expr *E : Data.PrivateVars) { 4971 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4972 Privates.emplace_back( 4973 C.getDeclAlign(VD), 4974 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4975 /*PrivateElemInit=*/nullptr)); 4976 ++I; 4977 } 4978 I = Data.FirstprivateCopies.begin(); 4979 auto IElemInitRef = Data.FirstprivateInits.begin(); 4980 for (const Expr *E : Data.FirstprivateVars) { 4981 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4982 Privates.emplace_back( 4983 C.getDeclAlign(VD), 4984 PrivateHelpersTy( 4985 VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4986 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); 4987 ++I; 4988 ++IElemInitRef; 4989 } 4990 I = Data.LastprivateCopies.begin(); 4991 for (const Expr *E : Data.LastprivateVars) { 4992 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4993 Privates.emplace_back( 4994 C.getDeclAlign(VD), 4995 PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), 4996 /*PrivateElemInit=*/nullptr)); 4997 ++I; 4998 } 4999 llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { 5000 return L.first > R.first; 5001 }); 5002 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); 5003 // Build type kmp_routine_entry_t (if not built yet). 5004 emitKmpRoutineEntryT(KmpInt32Ty); 5005 // Build type kmp_task_t (if not built yet). 5006 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { 5007 if (SavedKmpTaskloopTQTy.isNull()) { 5008 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5009 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5010 } 5011 KmpTaskTQTy = SavedKmpTaskloopTQTy; 5012 } else { 5013 assert((D.getDirectiveKind() == OMPD_task || 5014 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || 5015 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && 5016 "Expected taskloop, task or target directive"); 5017 if (SavedKmpTaskTQTy.isNull()) { 5018 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( 5019 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); 5020 } 5021 KmpTaskTQTy = SavedKmpTaskTQTy; 5022 } 5023 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); 5024 // Build particular struct kmp_task_t for the given task. 5025 const RecordDecl *KmpTaskTWithPrivatesQTyRD = 5026 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); 5027 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); 5028 QualType KmpTaskTWithPrivatesPtrQTy = 5029 C.getPointerType(KmpTaskTWithPrivatesQTy); 5030 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); 5031 llvm::Type *KmpTaskTWithPrivatesPtrTy = 5032 KmpTaskTWithPrivatesTy->getPointerTo(); 5033 llvm::Value *KmpTaskTWithPrivatesTySize = 5034 CGF.getTypeSize(KmpTaskTWithPrivatesQTy); 5035 QualType SharedsPtrTy = C.getPointerType(SharedsTy); 5036 5037 // Emit initial values for private copies (if any). 5038 llvm::Value *TaskPrivatesMap = nullptr; 5039 llvm::Type *TaskPrivatesMapTy = 5040 std::next(TaskFunction->arg_begin(), 3)->getType(); 5041 if (!Privates.empty()) { 5042 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); 5043 TaskPrivatesMap = emitTaskPrivateMappingFunction( 5044 CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, 5045 FI->getType(), Privates); 5046 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5047 TaskPrivatesMap, TaskPrivatesMapTy); 5048 } else { 5049 TaskPrivatesMap = llvm::ConstantPointerNull::get( 5050 cast<llvm::PointerType>(TaskPrivatesMapTy)); 5051 } 5052 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, 5053 // kmp_task_t *tt); 5054 llvm::Function *TaskEntry = emitProxyTaskFunction( 5055 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5056 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, 5057 TaskPrivatesMap); 5058 5059 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, 5060 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, 5061 // kmp_routine_entry_t *task_entry); 5062 // Task flags. Format is taken from 5063 // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, 5064 // description of kmp_tasking_flags struct. 5065 enum { 5066 TiedFlag = 0x1, 5067 FinalFlag = 0x2, 5068 DestructorsFlag = 0x8, 5069 PriorityFlag = 0x20 5070 }; 5071 unsigned Flags = Data.Tied ? TiedFlag : 0; 5072 bool NeedsCleanup = false; 5073 if (!Privates.empty()) { 5074 NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); 5075 if (NeedsCleanup) 5076 Flags = Flags | DestructorsFlag; 5077 } 5078 if (Data.Priority.getInt()) 5079 Flags = Flags | PriorityFlag; 5080 llvm::Value *TaskFlags = 5081 Data.Final.getPointer() 5082 ? CGF.Builder.CreateSelect(Data.Final.getPointer(), 5083 CGF.Builder.getInt32(FinalFlag), 5084 CGF.Builder.getInt32(/*C=*/0)) 5085 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); 5086 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); 5087 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); 5088 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), 5089 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, 5090 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5091 TaskEntry, KmpRoutineEntryPtrTy)}; 5092 llvm::Value *NewTask; 5093 if (D.hasClausesOfKind<OMPNowaitClause>()) { 5094 // Check if we have any device clause associated with the directive. 5095 const Expr *Device = nullptr; 5096 if (auto *C = D.getSingleClause<OMPDeviceClause>()) 5097 Device = C->getDevice(); 5098 // Emit device ID if any otherwise use default value. 5099 llvm::Value *DeviceID; 5100 if (Device) 5101 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 5102 CGF.Int64Ty, /*isSigned=*/true); 5103 else 5104 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 5105 AllocArgs.push_back(DeviceID); 5106 NewTask = CGF.EmitRuntimeCall( 5107 createRuntimeFunction(OMPRTL__kmpc_omp_target_task_alloc), AllocArgs); 5108 } else { 5109 NewTask = CGF.EmitRuntimeCall( 5110 createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs); 5111 } 5112 llvm::Value *NewTaskNewTaskTTy = 5113 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5114 NewTask, KmpTaskTWithPrivatesPtrTy); 5115 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, 5116 KmpTaskTWithPrivatesQTy); 5117 LValue TDBase = 5118 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); 5119 // Fill the data in the resulting kmp_task_t record. 5120 // Copy shareds if there are any. 5121 Address KmpTaskSharedsPtr = Address::invalid(); 5122 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { 5123 KmpTaskSharedsPtr = 5124 Address(CGF.EmitLoadOfScalar( 5125 CGF.EmitLValueForField( 5126 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), 5127 KmpTaskTShareds)), 5128 Loc), 5129 CGF.getNaturalTypeAlignment(SharedsTy)); 5130 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); 5131 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); 5132 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); 5133 } 5134 // Emit initial values for private copies (if any). 5135 TaskResultTy Result; 5136 if (!Privates.empty()) { 5137 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, 5138 SharedsTy, SharedsPtrTy, Data, Privates, 5139 /*ForDup=*/false); 5140 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && 5141 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { 5142 Result.TaskDupFn = emitTaskDupFunction( 5143 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, 5144 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, 5145 /*WithLastIter=*/!Data.LastprivateVars.empty()); 5146 } 5147 } 5148 // Fields of union "kmp_cmplrdata_t" for destructors and priority. 5149 enum { Priority = 0, Destructors = 1 }; 5150 // Provide pointer to function with destructors for privates. 5151 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); 5152 const RecordDecl *KmpCmplrdataUD = 5153 (*FI)->getType()->getAsUnionType()->getDecl(); 5154 if (NeedsCleanup) { 5155 llvm::Value *DestructorFn = emitDestructorsFunction( 5156 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, 5157 KmpTaskTWithPrivatesQTy); 5158 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); 5159 LValue DestructorsLV = CGF.EmitLValueForField( 5160 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); 5161 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5162 DestructorFn, KmpRoutineEntryPtrTy), 5163 DestructorsLV); 5164 } 5165 // Set priority. 5166 if (Data.Priority.getInt()) { 5167 LValue Data2LV = CGF.EmitLValueForField( 5168 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); 5169 LValue PriorityLV = CGF.EmitLValueForField( 5170 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); 5171 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); 5172 } 5173 Result.NewTask = NewTask; 5174 Result.TaskEntry = TaskEntry; 5175 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; 5176 Result.TDBase = TDBase; 5177 Result.KmpTaskTQTyRD = KmpTaskTQTyRD; 5178 return Result; 5179 } 5180 5181 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 5182 const OMPExecutableDirective &D, 5183 llvm::Function *TaskFunction, 5184 QualType SharedsTy, Address Shareds, 5185 const Expr *IfCond, 5186 const OMPTaskDataTy &Data) { 5187 if (!CGF.HaveInsertPoint()) 5188 return; 5189 5190 TaskResultTy Result = 5191 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5192 llvm::Value *NewTask = Result.NewTask; 5193 llvm::Function *TaskEntry = Result.TaskEntry; 5194 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; 5195 LValue TDBase = Result.TDBase; 5196 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; 5197 ASTContext &C = CGM.getContext(); 5198 // Process list of dependences. 5199 Address DependenciesArray = Address::invalid(); 5200 unsigned NumDependencies = Data.Dependences.size(); 5201 if (NumDependencies) { 5202 // Dependence kind for RTL. 5203 enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 }; 5204 enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; 5205 RecordDecl *KmpDependInfoRD; 5206 QualType FlagsTy = 5207 C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); 5208 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); 5209 if (KmpDependInfoTy.isNull()) { 5210 KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); 5211 KmpDependInfoRD->startDefinition(); 5212 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); 5213 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); 5214 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); 5215 KmpDependInfoRD->completeDefinition(); 5216 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); 5217 } else { 5218 KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); 5219 } 5220 // Define type kmp_depend_info[<Dependences.size()>]; 5221 QualType KmpDependInfoArrayTy = C.getConstantArrayType( 5222 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), 5223 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 5224 // kmp_depend_info[<Dependences.size()>] deps; 5225 DependenciesArray = 5226 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); 5227 for (unsigned I = 0; I < NumDependencies; ++I) { 5228 const Expr *E = Data.Dependences[I].second; 5229 LValue Addr = CGF.EmitLValue(E); 5230 llvm::Value *Size; 5231 QualType Ty = E->getType(); 5232 if (const auto *ASE = 5233 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { 5234 LValue UpAddrLVal = 5235 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); 5236 llvm::Value *UpAddr = 5237 CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1); 5238 llvm::Value *LowIntPtr = 5239 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy); 5240 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy); 5241 Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); 5242 } else { 5243 Size = CGF.getTypeSize(Ty); 5244 } 5245 LValue Base = CGF.MakeAddrLValue( 5246 CGF.Builder.CreateConstArrayGEP(DependenciesArray, I), 5247 KmpDependInfoTy); 5248 // deps[i].base_addr = &<Dependences[i].second>; 5249 LValue BaseAddrLVal = CGF.EmitLValueForField( 5250 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); 5251 CGF.EmitStoreOfScalar( 5252 CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy), 5253 BaseAddrLVal); 5254 // deps[i].len = sizeof(<Dependences[i].second>); 5255 LValue LenLVal = CGF.EmitLValueForField( 5256 Base, *std::next(KmpDependInfoRD->field_begin(), Len)); 5257 CGF.EmitStoreOfScalar(Size, LenLVal); 5258 // deps[i].flags = <Dependences[i].first>; 5259 RTLDependenceKindTy DepKind; 5260 switch (Data.Dependences[I].first) { 5261 case OMPC_DEPEND_in: 5262 DepKind = DepIn; 5263 break; 5264 // Out and InOut dependencies must use the same code. 5265 case OMPC_DEPEND_out: 5266 case OMPC_DEPEND_inout: 5267 DepKind = DepInOut; 5268 break; 5269 case OMPC_DEPEND_mutexinoutset: 5270 DepKind = DepMutexInOutSet; 5271 break; 5272 case OMPC_DEPEND_source: 5273 case OMPC_DEPEND_sink: 5274 case OMPC_DEPEND_unknown: 5275 llvm_unreachable("Unknown task dependence type"); 5276 } 5277 LValue FlagsLVal = CGF.EmitLValueForField( 5278 Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); 5279 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), 5280 FlagsLVal); 5281 } 5282 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5283 CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy); 5284 } 5285 5286 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5287 // libcall. 5288 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, 5289 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, 5290 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence 5291 // list is not empty 5292 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5293 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5294 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; 5295 llvm::Value *DepTaskArgs[7]; 5296 if (NumDependencies) { 5297 DepTaskArgs[0] = UpLoc; 5298 DepTaskArgs[1] = ThreadID; 5299 DepTaskArgs[2] = NewTask; 5300 DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies); 5301 DepTaskArgs[4] = DependenciesArray.getPointer(); 5302 DepTaskArgs[5] = CGF.Builder.getInt32(0); 5303 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5304 } 5305 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies, 5306 &TaskArgs, 5307 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { 5308 if (!Data.Tied) { 5309 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); 5310 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); 5311 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); 5312 } 5313 if (NumDependencies) { 5314 CGF.EmitRuntimeCall( 5315 createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); 5316 } else { 5317 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), 5318 TaskArgs); 5319 } 5320 // Check if parent region is untied and build return for untied task; 5321 if (auto *Region = 5322 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 5323 Region->emitUntiedSwitch(CGF); 5324 }; 5325 5326 llvm::Value *DepWaitTaskArgs[6]; 5327 if (NumDependencies) { 5328 DepWaitTaskArgs[0] = UpLoc; 5329 DepWaitTaskArgs[1] = ThreadID; 5330 DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies); 5331 DepWaitTaskArgs[3] = DependenciesArray.getPointer(); 5332 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); 5333 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); 5334 } 5335 auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, 5336 NumDependencies, &DepWaitTaskArgs, 5337 Loc](CodeGenFunction &CGF, PrePostActionTy &) { 5338 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5339 CodeGenFunction::RunCleanupsScope LocalScope(CGF); 5340 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, 5341 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 5342 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info 5343 // is specified. 5344 if (NumDependencies) 5345 CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), 5346 DepWaitTaskArgs); 5347 // Call proxy_task_entry(gtid, new_task); 5348 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, 5349 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { 5350 Action.Enter(CGF); 5351 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; 5352 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, 5353 OutlinedFnArgs); 5354 }; 5355 5356 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, 5357 // kmp_task_t *new_task); 5358 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, 5359 // kmp_task_t *new_task); 5360 RegionCodeGenTy RCG(CodeGen); 5361 CommonActionTy Action( 5362 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, 5363 RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); 5364 RCG.setAction(Action); 5365 RCG(CGF); 5366 }; 5367 5368 if (IfCond) { 5369 emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); 5370 } else { 5371 RegionCodeGenTy ThenRCG(ThenCodeGen); 5372 ThenRCG(CGF); 5373 } 5374 } 5375 5376 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, 5377 const OMPLoopDirective &D, 5378 llvm::Function *TaskFunction, 5379 QualType SharedsTy, Address Shareds, 5380 const Expr *IfCond, 5381 const OMPTaskDataTy &Data) { 5382 if (!CGF.HaveInsertPoint()) 5383 return; 5384 TaskResultTy Result = 5385 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); 5386 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() 5387 // libcall. 5388 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int 5389 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int 5390 // sched, kmp_uint64 grainsize, void *task_dup); 5391 llvm::Value *ThreadID = getThreadID(CGF, Loc); 5392 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); 5393 llvm::Value *IfVal; 5394 if (IfCond) { 5395 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, 5396 /*isSigned=*/true); 5397 } else { 5398 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); 5399 } 5400 5401 LValue LBLVal = CGF.EmitLValueForField( 5402 Result.TDBase, 5403 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); 5404 const auto *LBVar = 5405 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); 5406 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), 5407 /*IsInitializer=*/true); 5408 LValue UBLVal = CGF.EmitLValueForField( 5409 Result.TDBase, 5410 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); 5411 const auto *UBVar = 5412 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); 5413 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), 5414 /*IsInitializer=*/true); 5415 LValue StLVal = CGF.EmitLValueForField( 5416 Result.TDBase, 5417 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); 5418 const auto *StVar = 5419 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); 5420 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), 5421 /*IsInitializer=*/true); 5422 // Store reductions address. 5423 LValue RedLVal = CGF.EmitLValueForField( 5424 Result.TDBase, 5425 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); 5426 if (Data.Reductions) { 5427 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); 5428 } else { 5429 CGF.EmitNullInitialization(RedLVal.getAddress(), 5430 CGF.getContext().VoidPtrTy); 5431 } 5432 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; 5433 llvm::Value *TaskArgs[] = { 5434 UpLoc, 5435 ThreadID, 5436 Result.NewTask, 5437 IfVal, 5438 LBLVal.getPointer(), 5439 UBLVal.getPointer(), 5440 CGF.EmitLoadOfScalar(StLVal, Loc), 5441 llvm::ConstantInt::getSigned( 5442 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler 5443 llvm::ConstantInt::getSigned( 5444 CGF.IntTy, Data.Schedule.getPointer() 5445 ? Data.Schedule.getInt() ? NumTasks : Grainsize 5446 : NoSchedule), 5447 Data.Schedule.getPointer() 5448 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, 5449 /*isSigned=*/false) 5450 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), 5451 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5452 Result.TaskDupFn, CGF.VoidPtrTy) 5453 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; 5454 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); 5455 } 5456 5457 /// Emit reduction operation for each element of array (required for 5458 /// array sections) LHS op = RHS. 5459 /// \param Type Type of array. 5460 /// \param LHSVar Variable on the left side of the reduction operation 5461 /// (references element of array in original variable). 5462 /// \param RHSVar Variable on the right side of the reduction operation 5463 /// (references element of array in original variable). 5464 /// \param RedOpGen Generator of reduction operation with use of LHSVar and 5465 /// RHSVar. 5466 static void EmitOMPAggregateReduction( 5467 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, 5468 const VarDecl *RHSVar, 5469 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, 5470 const Expr *, const Expr *)> &RedOpGen, 5471 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, 5472 const Expr *UpExpr = nullptr) { 5473 // Perform element-by-element initialization. 5474 QualType ElementTy; 5475 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); 5476 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); 5477 5478 // Drill down to the base element type on both arrays. 5479 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); 5480 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); 5481 5482 llvm::Value *RHSBegin = RHSAddr.getPointer(); 5483 llvm::Value *LHSBegin = LHSAddr.getPointer(); 5484 // Cast from pointer to array type to pointer to single element. 5485 llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); 5486 // The basic structure here is a while-do loop. 5487 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); 5488 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); 5489 llvm::Value *IsEmpty = 5490 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); 5491 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 5492 5493 // Enter the loop body, making that address the current address. 5494 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); 5495 CGF.EmitBlock(BodyBB); 5496 5497 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); 5498 5499 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( 5500 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); 5501 RHSElementPHI->addIncoming(RHSBegin, EntryBB); 5502 Address RHSElementCurrent = 5503 Address(RHSElementPHI, 5504 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5505 5506 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( 5507 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); 5508 LHSElementPHI->addIncoming(LHSBegin, EntryBB); 5509 Address LHSElementCurrent = 5510 Address(LHSElementPHI, 5511 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); 5512 5513 // Emit copy. 5514 CodeGenFunction::OMPPrivateScope Scope(CGF); 5515 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); 5516 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); 5517 Scope.Privatize(); 5518 RedOpGen(CGF, XExpr, EExpr, UpExpr); 5519 Scope.ForceCleanup(); 5520 5521 // Shift the address forward by one element. 5522 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( 5523 LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 5524 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( 5525 RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 5526 // Check whether we've reached the end. 5527 llvm::Value *Done = 5528 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); 5529 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); 5530 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); 5531 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); 5532 5533 // Done. 5534 CGF.EmitBlock(DoneBB, /*IsFinished=*/true); 5535 } 5536 5537 /// Emit reduction combiner. If the combiner is a simple expression emit it as 5538 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of 5539 /// UDR combiner function. 5540 static void emitReductionCombiner(CodeGenFunction &CGF, 5541 const Expr *ReductionOp) { 5542 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) 5543 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) 5544 if (const auto *DRE = 5545 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) 5546 if (const auto *DRD = 5547 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { 5548 std::pair<llvm::Function *, llvm::Function *> Reduction = 5549 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); 5550 RValue Func = RValue::get(Reduction.first); 5551 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); 5552 CGF.EmitIgnoredExpr(ReductionOp); 5553 return; 5554 } 5555 CGF.EmitIgnoredExpr(ReductionOp); 5556 } 5557 5558 llvm::Function *CGOpenMPRuntime::emitReductionFunction( 5559 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, 5560 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 5561 ArrayRef<const Expr *> ReductionOps) { 5562 ASTContext &C = CGM.getContext(); 5563 5564 // void reduction_func(void *LHSArg, void *RHSArg); 5565 FunctionArgList Args; 5566 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5567 ImplicitParamDecl::Other); 5568 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 5569 ImplicitParamDecl::Other); 5570 Args.push_back(&LHSArg); 5571 Args.push_back(&RHSArg); 5572 const auto &CGFI = 5573 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 5574 std::string Name = getName({"omp", "reduction", "reduction_func"}); 5575 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), 5576 llvm::GlobalValue::InternalLinkage, Name, 5577 &CGM.getModule()); 5578 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); 5579 Fn->setDoesNotRecurse(); 5580 CodeGenFunction CGF(CGM); 5581 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); 5582 5583 // Dst = (void*[n])(LHSArg); 5584 // Src = (void*[n])(RHSArg); 5585 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5586 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), 5587 ArgsType), CGF.getPointerAlign()); 5588 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5589 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), 5590 ArgsType), CGF.getPointerAlign()); 5591 5592 // ... 5593 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); 5594 // ... 5595 CodeGenFunction::OMPPrivateScope Scope(CGF); 5596 auto IPriv = Privates.begin(); 5597 unsigned Idx = 0; 5598 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { 5599 const auto *RHSVar = 5600 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); 5601 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { 5602 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); 5603 }); 5604 const auto *LHSVar = 5605 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); 5606 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { 5607 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); 5608 }); 5609 QualType PrivTy = (*IPriv)->getType(); 5610 if (PrivTy->isVariablyModifiedType()) { 5611 // Get array size and emit VLA type. 5612 ++Idx; 5613 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); 5614 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); 5615 const VariableArrayType *VLA = 5616 CGF.getContext().getAsVariableArrayType(PrivTy); 5617 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); 5618 CodeGenFunction::OpaqueValueMapping OpaqueMap( 5619 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); 5620 CGF.EmitVariablyModifiedType(PrivTy); 5621 } 5622 } 5623 Scope.Privatize(); 5624 IPriv = Privates.begin(); 5625 auto ILHS = LHSExprs.begin(); 5626 auto IRHS = RHSExprs.begin(); 5627 for (const Expr *E : ReductionOps) { 5628 if ((*IPriv)->getType()->isArrayType()) { 5629 // Emit reduction for array section. 5630 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5631 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5632 EmitOMPAggregateReduction( 5633 CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5634 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5635 emitReductionCombiner(CGF, E); 5636 }); 5637 } else { 5638 // Emit reduction for array subscript or single variable. 5639 emitReductionCombiner(CGF, E); 5640 } 5641 ++IPriv; 5642 ++ILHS; 5643 ++IRHS; 5644 } 5645 Scope.ForceCleanup(); 5646 CGF.FinishFunction(); 5647 return Fn; 5648 } 5649 5650 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, 5651 const Expr *ReductionOp, 5652 const Expr *PrivateRef, 5653 const DeclRefExpr *LHS, 5654 const DeclRefExpr *RHS) { 5655 if (PrivateRef->getType()->isArrayType()) { 5656 // Emit reduction for array section. 5657 const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); 5658 const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); 5659 EmitOMPAggregateReduction( 5660 CGF, PrivateRef->getType(), LHSVar, RHSVar, 5661 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { 5662 emitReductionCombiner(CGF, ReductionOp); 5663 }); 5664 } else { 5665 // Emit reduction for array subscript or single variable. 5666 emitReductionCombiner(CGF, ReductionOp); 5667 } 5668 } 5669 5670 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, 5671 ArrayRef<const Expr *> Privates, 5672 ArrayRef<const Expr *> LHSExprs, 5673 ArrayRef<const Expr *> RHSExprs, 5674 ArrayRef<const Expr *> ReductionOps, 5675 ReductionOptionsTy Options) { 5676 if (!CGF.HaveInsertPoint()) 5677 return; 5678 5679 bool WithNowait = Options.WithNowait; 5680 bool SimpleReduction = Options.SimpleReduction; 5681 5682 // Next code should be emitted for reduction: 5683 // 5684 // static kmp_critical_name lock = { 0 }; 5685 // 5686 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { 5687 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); 5688 // ... 5689 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], 5690 // *(Type<n>-1*)rhs[<n>-1]); 5691 // } 5692 // 5693 // ... 5694 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; 5695 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5696 // RedList, reduce_func, &<lock>)) { 5697 // case 1: 5698 // ... 5699 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5700 // ... 5701 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5702 // break; 5703 // case 2: 5704 // ... 5705 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5706 // ... 5707 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] 5708 // break; 5709 // default:; 5710 // } 5711 // 5712 // if SimpleReduction is true, only the next code is generated: 5713 // ... 5714 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5715 // ... 5716 5717 ASTContext &C = CGM.getContext(); 5718 5719 if (SimpleReduction) { 5720 CodeGenFunction::RunCleanupsScope Scope(CGF); 5721 auto IPriv = Privates.begin(); 5722 auto ILHS = LHSExprs.begin(); 5723 auto IRHS = RHSExprs.begin(); 5724 for (const Expr *E : ReductionOps) { 5725 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5726 cast<DeclRefExpr>(*IRHS)); 5727 ++IPriv; 5728 ++ILHS; 5729 ++IRHS; 5730 } 5731 return; 5732 } 5733 5734 // 1. Build a list of reduction variables. 5735 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; 5736 auto Size = RHSExprs.size(); 5737 for (const Expr *E : Privates) { 5738 if (E->getType()->isVariablyModifiedType()) 5739 // Reserve place for array size. 5740 ++Size; 5741 } 5742 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); 5743 QualType ReductionArrayTy = 5744 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, 5745 /*IndexTypeQuals=*/0); 5746 Address ReductionList = 5747 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); 5748 auto IPriv = Privates.begin(); 5749 unsigned Idx = 0; 5750 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { 5751 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5752 CGF.Builder.CreateStore( 5753 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5754 CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy), 5755 Elem); 5756 if ((*IPriv)->getType()->isVariablyModifiedType()) { 5757 // Store array size. 5758 ++Idx; 5759 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); 5760 llvm::Value *Size = CGF.Builder.CreateIntCast( 5761 CGF.getVLASize( 5762 CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) 5763 .NumElts, 5764 CGF.SizeTy, /*isSigned=*/false); 5765 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), 5766 Elem); 5767 } 5768 } 5769 5770 // 2. Emit reduce_func(). 5771 llvm::Function *ReductionFn = emitReductionFunction( 5772 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, 5773 LHSExprs, RHSExprs, ReductionOps); 5774 5775 // 3. Create static kmp_critical_name lock = { 0 }; 5776 std::string Name = getName({"reduction"}); 5777 llvm::Value *Lock = getCriticalRegionLock(Name); 5778 5779 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), 5780 // RedList, reduce_func, &<lock>); 5781 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); 5782 llvm::Value *ThreadId = getThreadID(CGF, Loc); 5783 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); 5784 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 5785 ReductionList.getPointer(), CGF.VoidPtrTy); 5786 llvm::Value *Args[] = { 5787 IdentTLoc, // ident_t *<loc> 5788 ThreadId, // i32 <gtid> 5789 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> 5790 ReductionArrayTySize, // size_type sizeof(RedList) 5791 RL, // void *RedList 5792 ReductionFn, // void (*) (void *, void *) <reduce_func> 5793 Lock // kmp_critical_name *&<lock> 5794 }; 5795 llvm::Value *Res = CGF.EmitRuntimeCall( 5796 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait 5797 : OMPRTL__kmpc_reduce), 5798 Args); 5799 5800 // 5. Build switch(res) 5801 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); 5802 llvm::SwitchInst *SwInst = 5803 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); 5804 5805 // 6. Build case 1: 5806 // ... 5807 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); 5808 // ... 5809 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5810 // break; 5811 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); 5812 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); 5813 CGF.EmitBlock(Case1BB); 5814 5815 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); 5816 llvm::Value *EndArgs[] = { 5817 IdentTLoc, // ident_t *<loc> 5818 ThreadId, // i32 <gtid> 5819 Lock // kmp_critical_name *&<lock> 5820 }; 5821 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( 5822 CodeGenFunction &CGF, PrePostActionTy &Action) { 5823 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5824 auto IPriv = Privates.begin(); 5825 auto ILHS = LHSExprs.begin(); 5826 auto IRHS = RHSExprs.begin(); 5827 for (const Expr *E : ReductionOps) { 5828 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), 5829 cast<DeclRefExpr>(*IRHS)); 5830 ++IPriv; 5831 ++ILHS; 5832 ++IRHS; 5833 } 5834 }; 5835 RegionCodeGenTy RCG(CodeGen); 5836 CommonActionTy Action( 5837 nullptr, llvm::None, 5838 createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait 5839 : OMPRTL__kmpc_end_reduce), 5840 EndArgs); 5841 RCG.setAction(Action); 5842 RCG(CGF); 5843 5844 CGF.EmitBranch(DefaultBB); 5845 5846 // 7. Build case 2: 5847 // ... 5848 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); 5849 // ... 5850 // break; 5851 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); 5852 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); 5853 CGF.EmitBlock(Case2BB); 5854 5855 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( 5856 CodeGenFunction &CGF, PrePostActionTy &Action) { 5857 auto ILHS = LHSExprs.begin(); 5858 auto IRHS = RHSExprs.begin(); 5859 auto IPriv = Privates.begin(); 5860 for (const Expr *E : ReductionOps) { 5861 const Expr *XExpr = nullptr; 5862 const Expr *EExpr = nullptr; 5863 const Expr *UpExpr = nullptr; 5864 BinaryOperatorKind BO = BO_Comma; 5865 if (const auto *BO = dyn_cast<BinaryOperator>(E)) { 5866 if (BO->getOpcode() == BO_Assign) { 5867 XExpr = BO->getLHS(); 5868 UpExpr = BO->getRHS(); 5869 } 5870 } 5871 // Try to emit update expression as a simple atomic. 5872 const Expr *RHSExpr = UpExpr; 5873 if (RHSExpr) { 5874 // Analyze RHS part of the whole expression. 5875 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( 5876 RHSExpr->IgnoreParenImpCasts())) { 5877 // If this is a conditional operator, analyze its condition for 5878 // min/max reduction operator. 5879 RHSExpr = ACO->getCond(); 5880 } 5881 if (const auto *BORHS = 5882 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { 5883 EExpr = BORHS->getRHS(); 5884 BO = BORHS->getOpcode(); 5885 } 5886 } 5887 if (XExpr) { 5888 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5889 auto &&AtomicRedGen = [BO, VD, 5890 Loc](CodeGenFunction &CGF, const Expr *XExpr, 5891 const Expr *EExpr, const Expr *UpExpr) { 5892 LValue X = CGF.EmitLValue(XExpr); 5893 RValue E; 5894 if (EExpr) 5895 E = CGF.EmitAnyExpr(EExpr); 5896 CGF.EmitOMPAtomicSimpleUpdateExpr( 5897 X, E, BO, /*IsXLHSInRHSPart=*/true, 5898 llvm::AtomicOrdering::Monotonic, Loc, 5899 [&CGF, UpExpr, VD, Loc](RValue XRValue) { 5900 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 5901 PrivateScope.addPrivate( 5902 VD, [&CGF, VD, XRValue, Loc]() { 5903 Address LHSTemp = CGF.CreateMemTemp(VD->getType()); 5904 CGF.emitOMPSimpleStore( 5905 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, 5906 VD->getType().getNonReferenceType(), Loc); 5907 return LHSTemp; 5908 }); 5909 (void)PrivateScope.Privatize(); 5910 return CGF.EmitAnyExpr(UpExpr); 5911 }); 5912 }; 5913 if ((*IPriv)->getType()->isArrayType()) { 5914 // Emit atomic reduction for array section. 5915 const auto *RHSVar = 5916 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5917 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, 5918 AtomicRedGen, XExpr, EExpr, UpExpr); 5919 } else { 5920 // Emit atomic reduction for array subscript or single variable. 5921 AtomicRedGen(CGF, XExpr, EExpr, UpExpr); 5922 } 5923 } else { 5924 // Emit as a critical region. 5925 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, 5926 const Expr *, const Expr *) { 5927 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 5928 std::string Name = RT.getName({"atomic_reduction"}); 5929 RT.emitCriticalRegion( 5930 CGF, Name, 5931 [=](CodeGenFunction &CGF, PrePostActionTy &Action) { 5932 Action.Enter(CGF); 5933 emitReductionCombiner(CGF, E); 5934 }, 5935 Loc); 5936 }; 5937 if ((*IPriv)->getType()->isArrayType()) { 5938 const auto *LHSVar = 5939 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); 5940 const auto *RHSVar = 5941 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); 5942 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, 5943 CritRedGen); 5944 } else { 5945 CritRedGen(CGF, nullptr, nullptr, nullptr); 5946 } 5947 } 5948 ++ILHS; 5949 ++IRHS; 5950 ++IPriv; 5951 } 5952 }; 5953 RegionCodeGenTy AtomicRCG(AtomicCodeGen); 5954 if (!WithNowait) { 5955 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); 5956 llvm::Value *EndArgs[] = { 5957 IdentTLoc, // ident_t *<loc> 5958 ThreadId, // i32 <gtid> 5959 Lock // kmp_critical_name *&<lock> 5960 }; 5961 CommonActionTy Action(nullptr, llvm::None, 5962 createRuntimeFunction(OMPRTL__kmpc_end_reduce), 5963 EndArgs); 5964 AtomicRCG.setAction(Action); 5965 AtomicRCG(CGF); 5966 } else { 5967 AtomicRCG(CGF); 5968 } 5969 5970 CGF.EmitBranch(DefaultBB); 5971 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); 5972 } 5973 5974 /// Generates unique name for artificial threadprivate variables. 5975 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" 5976 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, 5977 const Expr *Ref) { 5978 SmallString<256> Buffer; 5979 llvm::raw_svector_ostream Out(Buffer); 5980 const clang::DeclRefExpr *DE; 5981 const VarDecl *D = ::getBaseDecl(Ref, DE); 5982 if (!D) 5983 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); 5984 D = D->getCanonicalDecl(); 5985 std::string Name = CGM.getOpenMPRuntime().getName( 5986 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); 5987 Out << Prefix << Name << "_" 5988 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); 5989 return Out.str(); 5990 } 5991 5992 /// Emits reduction initializer function: 5993 /// \code 5994 /// void @.red_init(void* %arg) { 5995 /// %0 = bitcast void* %arg to <type>* 5996 /// store <type> <init>, <type>* %0 5997 /// ret void 5998 /// } 5999 /// \endcode 6000 static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, 6001 SourceLocation Loc, 6002 ReductionCodeGen &RCG, unsigned N) { 6003 ASTContext &C = CGM.getContext(); 6004 FunctionArgList Args; 6005 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6006 ImplicitParamDecl::Other); 6007 Args.emplace_back(&Param); 6008 const auto &FnInfo = 6009 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6010 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6011 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); 6012 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6013 Name, &CGM.getModule()); 6014 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6015 Fn->setDoesNotRecurse(); 6016 CodeGenFunction CGF(CGM); 6017 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6018 Address PrivateAddr = CGF.EmitLoadOfPointer( 6019 CGF.GetAddrOfLocalVar(&Param), 6020 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6021 llvm::Value *Size = nullptr; 6022 // If the size of the reduction item is non-constant, load it from global 6023 // threadprivate variable. 6024 if (RCG.getSizes(N).second) { 6025 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6026 CGF, CGM.getContext().getSizeType(), 6027 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6028 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6029 CGM.getContext().getSizeType(), Loc); 6030 } 6031 RCG.emitAggregateType(CGF, N, Size); 6032 LValue SharedLVal; 6033 // If initializer uses initializer from declare reduction construct, emit a 6034 // pointer to the address of the original reduction item (reuired by reduction 6035 // initializer) 6036 if (RCG.usesReductionInitializer(N)) { 6037 Address SharedAddr = 6038 CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6039 CGF, CGM.getContext().VoidPtrTy, 6040 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6041 SharedAddr = CGF.EmitLoadOfPointer( 6042 SharedAddr, 6043 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); 6044 SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); 6045 } else { 6046 SharedLVal = CGF.MakeNaturalAlignAddrLValue( 6047 llvm::ConstantPointerNull::get(CGM.VoidPtrTy), 6048 CGM.getContext().VoidPtrTy); 6049 } 6050 // Emit the initializer: 6051 // %0 = bitcast void* %arg to <type>* 6052 // store <type> <init>, <type>* %0 6053 RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal, 6054 [](CodeGenFunction &) { return false; }); 6055 CGF.FinishFunction(); 6056 return Fn; 6057 } 6058 6059 /// Emits reduction combiner function: 6060 /// \code 6061 /// void @.red_comb(void* %arg0, void* %arg1) { 6062 /// %lhs = bitcast void* %arg0 to <type>* 6063 /// %rhs = bitcast void* %arg1 to <type>* 6064 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) 6065 /// store <type> %2, <type>* %lhs 6066 /// ret void 6067 /// } 6068 /// \endcode 6069 static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, 6070 SourceLocation Loc, 6071 ReductionCodeGen &RCG, unsigned N, 6072 const Expr *ReductionOp, 6073 const Expr *LHS, const Expr *RHS, 6074 const Expr *PrivateRef) { 6075 ASTContext &C = CGM.getContext(); 6076 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); 6077 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); 6078 FunctionArgList Args; 6079 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 6080 C.VoidPtrTy, ImplicitParamDecl::Other); 6081 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6082 ImplicitParamDecl::Other); 6083 Args.emplace_back(&ParamInOut); 6084 Args.emplace_back(&ParamIn); 6085 const auto &FnInfo = 6086 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6087 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6088 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); 6089 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6090 Name, &CGM.getModule()); 6091 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6092 Fn->setDoesNotRecurse(); 6093 CodeGenFunction CGF(CGM); 6094 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6095 llvm::Value *Size = nullptr; 6096 // If the size of the reduction item is non-constant, load it from global 6097 // threadprivate variable. 6098 if (RCG.getSizes(N).second) { 6099 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6100 CGF, CGM.getContext().getSizeType(), 6101 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6102 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6103 CGM.getContext().getSizeType(), Loc); 6104 } 6105 RCG.emitAggregateType(CGF, N, Size); 6106 // Remap lhs and rhs variables to the addresses of the function arguments. 6107 // %lhs = bitcast void* %arg0 to <type>* 6108 // %rhs = bitcast void* %arg1 to <type>* 6109 CodeGenFunction::OMPPrivateScope PrivateScope(CGF); 6110 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { 6111 // Pull out the pointer to the variable. 6112 Address PtrAddr = CGF.EmitLoadOfPointer( 6113 CGF.GetAddrOfLocalVar(&ParamInOut), 6114 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6115 return CGF.Builder.CreateElementBitCast( 6116 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); 6117 }); 6118 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { 6119 // Pull out the pointer to the variable. 6120 Address PtrAddr = CGF.EmitLoadOfPointer( 6121 CGF.GetAddrOfLocalVar(&ParamIn), 6122 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6123 return CGF.Builder.CreateElementBitCast( 6124 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); 6125 }); 6126 PrivateScope.Privatize(); 6127 // Emit the combiner body: 6128 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) 6129 // store <type> %2, <type>* %lhs 6130 CGM.getOpenMPRuntime().emitSingleReductionCombiner( 6131 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), 6132 cast<DeclRefExpr>(RHS)); 6133 CGF.FinishFunction(); 6134 return Fn; 6135 } 6136 6137 /// Emits reduction finalizer function: 6138 /// \code 6139 /// void @.red_fini(void* %arg) { 6140 /// %0 = bitcast void* %arg to <type>* 6141 /// <destroy>(<type>* %0) 6142 /// ret void 6143 /// } 6144 /// \endcode 6145 static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, 6146 SourceLocation Loc, 6147 ReductionCodeGen &RCG, unsigned N) { 6148 if (!RCG.needCleanups(N)) 6149 return nullptr; 6150 ASTContext &C = CGM.getContext(); 6151 FunctionArgList Args; 6152 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 6153 ImplicitParamDecl::Other); 6154 Args.emplace_back(&Param); 6155 const auto &FnInfo = 6156 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 6157 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 6158 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); 6159 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 6160 Name, &CGM.getModule()); 6161 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 6162 Fn->setDoesNotRecurse(); 6163 CodeGenFunction CGF(CGM); 6164 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 6165 Address PrivateAddr = CGF.EmitLoadOfPointer( 6166 CGF.GetAddrOfLocalVar(&Param), 6167 C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); 6168 llvm::Value *Size = nullptr; 6169 // If the size of the reduction item is non-constant, load it from global 6170 // threadprivate variable. 6171 if (RCG.getSizes(N).second) { 6172 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( 6173 CGF, CGM.getContext().getSizeType(), 6174 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6175 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, 6176 CGM.getContext().getSizeType(), Loc); 6177 } 6178 RCG.emitAggregateType(CGF, N, Size); 6179 // Emit the finalizer body: 6180 // <destroy>(<type>* %0) 6181 RCG.emitCleanups(CGF, N, PrivateAddr); 6182 CGF.FinishFunction(); 6183 return Fn; 6184 } 6185 6186 llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( 6187 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 6188 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 6189 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) 6190 return nullptr; 6191 6192 // Build typedef struct: 6193 // kmp_task_red_input { 6194 // void *reduce_shar; // shared reduction item 6195 // size_t reduce_size; // size of data item 6196 // void *reduce_init; // data initialization routine 6197 // void *reduce_fini; // data finalization routine 6198 // void *reduce_comb; // data combiner routine 6199 // kmp_task_red_flags_t flags; // flags for additional info from compiler 6200 // } kmp_task_red_input_t; 6201 ASTContext &C = CGM.getContext(); 6202 RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t"); 6203 RD->startDefinition(); 6204 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6205 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); 6206 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6207 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6208 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); 6209 const FieldDecl *FlagsFD = addFieldToRecordDecl( 6210 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); 6211 RD->completeDefinition(); 6212 QualType RDType = C.getRecordType(RD); 6213 unsigned Size = Data.ReductionVars.size(); 6214 llvm::APInt ArraySize(/*numBits=*/64, Size); 6215 QualType ArrayRDType = C.getConstantArrayType( 6216 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); 6217 // kmp_task_red_input_t .rd_input.[Size]; 6218 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); 6219 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies, 6220 Data.ReductionOps); 6221 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { 6222 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; 6223 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), 6224 llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; 6225 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( 6226 TaskRedInput.getPointer(), Idxs, 6227 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, 6228 ".rd_input.gep."); 6229 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); 6230 // ElemLVal.reduce_shar = &Shareds[Cnt]; 6231 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); 6232 RCG.emitSharedLValue(CGF, Cnt); 6233 llvm::Value *CastedShared = 6234 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer()); 6235 CGF.EmitStoreOfScalar(CastedShared, SharedLVal); 6236 RCG.emitAggregateType(CGF, Cnt); 6237 llvm::Value *SizeValInChars; 6238 llvm::Value *SizeVal; 6239 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); 6240 // We use delayed creation/initialization for VLAs, array sections and 6241 // custom reduction initializations. It is required because runtime does not 6242 // provide the way to pass the sizes of VLAs/array sections to 6243 // initializer/combiner/finalizer functions and does not pass the pointer to 6244 // original reduction item to the initializer. Instead threadprivate global 6245 // variables are used to store these values and use them in the functions. 6246 bool DelayedCreation = !!SizeVal; 6247 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, 6248 /*isSigned=*/false); 6249 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); 6250 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); 6251 // ElemLVal.reduce_init = init; 6252 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); 6253 llvm::Value *InitAddr = 6254 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); 6255 CGF.EmitStoreOfScalar(InitAddr, InitLVal); 6256 DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt); 6257 // ElemLVal.reduce_fini = fini; 6258 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); 6259 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); 6260 llvm::Value *FiniAddr = Fini 6261 ? CGF.EmitCastToVoidPtr(Fini) 6262 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); 6263 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); 6264 // ElemLVal.reduce_comb = comb; 6265 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); 6266 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( 6267 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], 6268 RHSExprs[Cnt], Data.ReductionCopies[Cnt])); 6269 CGF.EmitStoreOfScalar(CombAddr, CombLVal); 6270 // ElemLVal.flags = 0; 6271 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); 6272 if (DelayedCreation) { 6273 CGF.EmitStoreOfScalar( 6274 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), 6275 FlagsLVal); 6276 } else 6277 CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); 6278 } 6279 // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void 6280 // *data); 6281 llvm::Value *Args[] = { 6282 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6283 /*isSigned=*/true), 6284 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), 6285 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), 6286 CGM.VoidPtrTy)}; 6287 return CGF.EmitRuntimeCall( 6288 createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args); 6289 } 6290 6291 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 6292 SourceLocation Loc, 6293 ReductionCodeGen &RCG, 6294 unsigned N) { 6295 auto Sizes = RCG.getSizes(N); 6296 // Emit threadprivate global variable if the type is non-constant 6297 // (Sizes.second = nullptr). 6298 if (Sizes.second) { 6299 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, 6300 /*isSigned=*/false); 6301 Address SizeAddr = getAddrOfArtificialThreadPrivate( 6302 CGF, CGM.getContext().getSizeType(), 6303 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); 6304 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); 6305 } 6306 // Store address of the original reduction item if custom initializer is used. 6307 if (RCG.usesReductionInitializer(N)) { 6308 Address SharedAddr = getAddrOfArtificialThreadPrivate( 6309 CGF, CGM.getContext().VoidPtrTy, 6310 generateUniqueName(CGM, "reduction", RCG.getRefExpr(N))); 6311 CGF.Builder.CreateStore( 6312 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 6313 RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy), 6314 SharedAddr, /*IsVolatile=*/false); 6315 } 6316 } 6317 6318 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, 6319 SourceLocation Loc, 6320 llvm::Value *ReductionsPtr, 6321 LValue SharedLVal) { 6322 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void 6323 // *d); 6324 llvm::Value *Args[] = { 6325 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, 6326 /*isSigned=*/true), 6327 ReductionsPtr, 6328 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(SharedLVal.getPointer(), 6329 CGM.VoidPtrTy)}; 6330 return Address( 6331 CGF.EmitRuntimeCall( 6332 createRuntimeFunction(OMPRTL__kmpc_task_reduction_get_th_data), Args), 6333 SharedLVal.getAlignment()); 6334 } 6335 6336 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 6337 SourceLocation Loc) { 6338 if (!CGF.HaveInsertPoint()) 6339 return; 6340 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 6341 // global_tid); 6342 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; 6343 // Ignore return result until untied tasks are supported. 6344 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); 6345 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) 6346 Region->emitUntiedSwitch(CGF); 6347 } 6348 6349 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, 6350 OpenMPDirectiveKind InnerKind, 6351 const RegionCodeGenTy &CodeGen, 6352 bool HasCancel) { 6353 if (!CGF.HaveInsertPoint()) 6354 return; 6355 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel); 6356 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); 6357 } 6358 6359 namespace { 6360 enum RTCancelKind { 6361 CancelNoreq = 0, 6362 CancelParallel = 1, 6363 CancelLoop = 2, 6364 CancelSections = 3, 6365 CancelTaskgroup = 4 6366 }; 6367 } // anonymous namespace 6368 6369 static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) { 6370 RTCancelKind CancelKind = CancelNoreq; 6371 if (CancelRegion == OMPD_parallel) 6372 CancelKind = CancelParallel; 6373 else if (CancelRegion == OMPD_for) 6374 CancelKind = CancelLoop; 6375 else if (CancelRegion == OMPD_sections) 6376 CancelKind = CancelSections; 6377 else { 6378 assert(CancelRegion == OMPD_taskgroup); 6379 CancelKind = CancelTaskgroup; 6380 } 6381 return CancelKind; 6382 } 6383 6384 void CGOpenMPRuntime::emitCancellationPointCall( 6385 CodeGenFunction &CGF, SourceLocation Loc, 6386 OpenMPDirectiveKind CancelRegion) { 6387 if (!CGF.HaveInsertPoint()) 6388 return; 6389 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32 6390 // global_tid, kmp_int32 cncl_kind); 6391 if (auto *OMPRegionInfo = 6392 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6393 // For 'cancellation point taskgroup', the task region info may not have a 6394 // cancel. This may instead happen in another adjacent task. 6395 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) { 6396 llvm::Value *Args[] = { 6397 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), 6398 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6399 // Ignore return result until untied tasks are supported. 6400 llvm::Value *Result = CGF.EmitRuntimeCall( 6401 createRuntimeFunction(OMPRTL__kmpc_cancellationpoint), Args); 6402 // if (__kmpc_cancellationpoint()) { 6403 // exit from construct; 6404 // } 6405 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6406 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6407 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6408 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6409 CGF.EmitBlock(ExitBB); 6410 // exit from construct; 6411 CodeGenFunction::JumpDest CancelDest = 6412 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6413 CGF.EmitBranchThroughCleanup(CancelDest); 6414 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6415 } 6416 } 6417 } 6418 6419 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, 6420 const Expr *IfCond, 6421 OpenMPDirectiveKind CancelRegion) { 6422 if (!CGF.HaveInsertPoint()) 6423 return; 6424 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid, 6425 // kmp_int32 cncl_kind); 6426 if (auto *OMPRegionInfo = 6427 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { 6428 auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF, 6429 PrePostActionTy &) { 6430 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); 6431 llvm::Value *Args[] = { 6432 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), 6433 CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; 6434 // Ignore return result until untied tasks are supported. 6435 llvm::Value *Result = CGF.EmitRuntimeCall( 6436 RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); 6437 // if (__kmpc_cancel()) { 6438 // exit from construct; 6439 // } 6440 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); 6441 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); 6442 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); 6443 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); 6444 CGF.EmitBlock(ExitBB); 6445 // exit from construct; 6446 CodeGenFunction::JumpDest CancelDest = 6447 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); 6448 CGF.EmitBranchThroughCleanup(CancelDest); 6449 CGF.EmitBlock(ContBB, /*IsFinished=*/true); 6450 }; 6451 if (IfCond) { 6452 emitOMPIfClause(CGF, IfCond, ThenGen, 6453 [](CodeGenFunction &, PrePostActionTy &) {}); 6454 } else { 6455 RegionCodeGenTy ThenRCG(ThenGen); 6456 ThenRCG(CGF); 6457 } 6458 } 6459 } 6460 6461 void CGOpenMPRuntime::emitTargetOutlinedFunction( 6462 const OMPExecutableDirective &D, StringRef ParentName, 6463 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6464 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6465 assert(!ParentName.empty() && "Invalid target region parent name!"); 6466 HasEmittedTargetRegion = true; 6467 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, 6468 IsOffloadEntry, CodeGen); 6469 } 6470 6471 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( 6472 const OMPExecutableDirective &D, StringRef ParentName, 6473 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 6474 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 6475 // Create a unique name for the entry function using the source location 6476 // information of the current target region. The name will be something like: 6477 // 6478 // __omp_offloading_DD_FFFF_PP_lBB 6479 // 6480 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the 6481 // mangled name of the function that encloses the target region and BB is the 6482 // line number of the target region. 6483 6484 unsigned DeviceID; 6485 unsigned FileID; 6486 unsigned Line; 6487 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, 6488 Line); 6489 SmallString<64> EntryFnName; 6490 { 6491 llvm::raw_svector_ostream OS(EntryFnName); 6492 OS << "__omp_offloading" << llvm::format("_%x", DeviceID) 6493 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; 6494 } 6495 6496 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 6497 6498 CodeGenFunction CGF(CGM, true); 6499 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); 6500 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6501 6502 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS); 6503 6504 // If this target outline function is not an offload entry, we don't need to 6505 // register it. 6506 if (!IsOffloadEntry) 6507 return; 6508 6509 // The target region ID is used by the runtime library to identify the current 6510 // target region, so it only has to be unique and not necessarily point to 6511 // anything. It could be the pointer to the outlined function that implements 6512 // the target region, but we aren't using that so that the compiler doesn't 6513 // need to keep that, and could therefore inline the host function if proven 6514 // worthwhile during optimization. In the other hand, if emitting code for the 6515 // device, the ID has to be the function address so that it can retrieved from 6516 // the offloading entry and launched by the runtime library. We also mark the 6517 // outlined function to have external linkage in case we are emitting code for 6518 // the device, because these functions will be entry points to the device. 6519 6520 if (CGM.getLangOpts().OpenMPIsDevice) { 6521 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); 6522 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); 6523 OutlinedFn->setDSOLocal(false); 6524 } else { 6525 std::string Name = getName({EntryFnName, "region_id"}); 6526 OutlinedFnID = new llvm::GlobalVariable( 6527 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, 6528 llvm::GlobalValue::WeakAnyLinkage, 6529 llvm::Constant::getNullValue(CGM.Int8Ty), Name); 6530 } 6531 6532 // Register the information for the entry associated with this target region. 6533 OffloadEntriesInfoManager.registerTargetRegionEntryInfo( 6534 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, 6535 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); 6536 } 6537 6538 /// Checks if the expression is constant or does not have non-trivial function 6539 /// calls. 6540 static bool isTrivial(ASTContext &Ctx, const Expr * E) { 6541 // We can skip constant expressions. 6542 // We can skip expressions with trivial calls or simple expressions. 6543 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || 6544 !E->hasNonTrivialCall(Ctx)) && 6545 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); 6546 } 6547 6548 const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, 6549 const Stmt *Body) { 6550 const Stmt *Child = Body->IgnoreContainers(); 6551 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) { 6552 Child = nullptr; 6553 for (const Stmt *S : C->body()) { 6554 if (const auto *E = dyn_cast<Expr>(S)) { 6555 if (isTrivial(Ctx, E)) 6556 continue; 6557 } 6558 // Some of the statements can be ignored. 6559 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) || 6560 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S)) 6561 continue; 6562 // Analyze declarations. 6563 if (const auto *DS = dyn_cast<DeclStmt>(S)) { 6564 if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { 6565 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || 6566 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || 6567 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || 6568 isa<UsingDirectiveDecl>(D) || 6569 isa<OMPDeclareReductionDecl>(D) || 6570 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D)) 6571 return true; 6572 const auto *VD = dyn_cast<VarDecl>(D); 6573 if (!VD) 6574 return false; 6575 return VD->isConstexpr() || 6576 ((VD->getType().isTrivialType(Ctx) || 6577 VD->getType()->isReferenceType()) && 6578 (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); 6579 })) 6580 continue; 6581 } 6582 // Found multiple children - cannot get the one child only. 6583 if (Child) 6584 return nullptr; 6585 Child = S; 6586 } 6587 if (Child) 6588 Child = Child->IgnoreContainers(); 6589 } 6590 return Child; 6591 } 6592 6593 /// Emit the number of teams for a target directive. Inspect the num_teams 6594 /// clause associated with a teams construct combined or closely nested 6595 /// with the target directive. 6596 /// 6597 /// Emit a team of size one for directives such as 'target parallel' that 6598 /// have no associated teams construct. 6599 /// 6600 /// Otherwise, return nullptr. 6601 static llvm::Value * 6602 emitNumTeamsForTargetDirective(CodeGenFunction &CGF, 6603 const OMPExecutableDirective &D) { 6604 assert(!CGF.getLangOpts().OpenMPIsDevice && 6605 "Clauses associated with the teams directive expected to be emitted " 6606 "only for the host!"); 6607 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6608 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6609 "Expected target-based executable directive."); 6610 CGBuilderTy &Bld = CGF.Builder; 6611 switch (DirectiveKind) { 6612 case OMPD_target: { 6613 const auto *CS = D.getInnermostCapturedStmt(); 6614 const auto *Body = 6615 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 6616 const Stmt *ChildStmt = 6617 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); 6618 if (const auto *NestedDir = 6619 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 6620 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { 6621 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { 6622 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6623 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6624 const Expr *NumTeams = 6625 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6626 llvm::Value *NumTeamsVal = 6627 CGF.EmitScalarExpr(NumTeams, 6628 /*IgnoreResultAssign*/ true); 6629 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6630 /*isSigned=*/true); 6631 } 6632 return Bld.getInt32(0); 6633 } 6634 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || 6635 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) 6636 return Bld.getInt32(1); 6637 return Bld.getInt32(0); 6638 } 6639 return nullptr; 6640 } 6641 case OMPD_target_teams: 6642 case OMPD_target_teams_distribute: 6643 case OMPD_target_teams_distribute_simd: 6644 case OMPD_target_teams_distribute_parallel_for: 6645 case OMPD_target_teams_distribute_parallel_for_simd: { 6646 if (D.hasClausesOfKind<OMPNumTeamsClause>()) { 6647 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); 6648 const Expr *NumTeams = 6649 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); 6650 llvm::Value *NumTeamsVal = 6651 CGF.EmitScalarExpr(NumTeams, 6652 /*IgnoreResultAssign*/ true); 6653 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, 6654 /*isSigned=*/true); 6655 } 6656 return Bld.getInt32(0); 6657 } 6658 case OMPD_target_parallel: 6659 case OMPD_target_parallel_for: 6660 case OMPD_target_parallel_for_simd: 6661 case OMPD_target_simd: 6662 return Bld.getInt32(1); 6663 case OMPD_parallel: 6664 case OMPD_for: 6665 case OMPD_parallel_for: 6666 case OMPD_parallel_sections: 6667 case OMPD_for_simd: 6668 case OMPD_parallel_for_simd: 6669 case OMPD_cancel: 6670 case OMPD_cancellation_point: 6671 case OMPD_ordered: 6672 case OMPD_threadprivate: 6673 case OMPD_allocate: 6674 case OMPD_task: 6675 case OMPD_simd: 6676 case OMPD_sections: 6677 case OMPD_section: 6678 case OMPD_single: 6679 case OMPD_master: 6680 case OMPD_critical: 6681 case OMPD_taskyield: 6682 case OMPD_barrier: 6683 case OMPD_taskwait: 6684 case OMPD_taskgroup: 6685 case OMPD_atomic: 6686 case OMPD_flush: 6687 case OMPD_teams: 6688 case OMPD_target_data: 6689 case OMPD_target_exit_data: 6690 case OMPD_target_enter_data: 6691 case OMPD_distribute: 6692 case OMPD_distribute_simd: 6693 case OMPD_distribute_parallel_for: 6694 case OMPD_distribute_parallel_for_simd: 6695 case OMPD_teams_distribute: 6696 case OMPD_teams_distribute_simd: 6697 case OMPD_teams_distribute_parallel_for: 6698 case OMPD_teams_distribute_parallel_for_simd: 6699 case OMPD_target_update: 6700 case OMPD_declare_simd: 6701 case OMPD_declare_variant: 6702 case OMPD_declare_target: 6703 case OMPD_end_declare_target: 6704 case OMPD_declare_reduction: 6705 case OMPD_declare_mapper: 6706 case OMPD_taskloop: 6707 case OMPD_taskloop_simd: 6708 case OMPD_master_taskloop: 6709 case OMPD_parallel_master_taskloop: 6710 case OMPD_requires: 6711 case OMPD_unknown: 6712 break; 6713 } 6714 llvm_unreachable("Unexpected directive kind."); 6715 } 6716 6717 static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, 6718 llvm::Value *DefaultThreadLimitVal) { 6719 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6720 CGF.getContext(), CS->getCapturedStmt()); 6721 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6722 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { 6723 llvm::Value *NumThreads = nullptr; 6724 llvm::Value *CondVal = nullptr; 6725 // Handle if clause. If if clause present, the number of threads is 6726 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6727 if (Dir->hasClausesOfKind<OMPIfClause>()) { 6728 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6729 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6730 const OMPIfClause *IfClause = nullptr; 6731 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { 6732 if (C->getNameModifier() == OMPD_unknown || 6733 C->getNameModifier() == OMPD_parallel) { 6734 IfClause = C; 6735 break; 6736 } 6737 } 6738 if (IfClause) { 6739 const Expr *Cond = IfClause->getCondition(); 6740 bool Result; 6741 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6742 if (!Result) 6743 return CGF.Builder.getInt32(1); 6744 } else { 6745 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); 6746 if (const auto *PreInit = 6747 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { 6748 for (const auto *I : PreInit->decls()) { 6749 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6750 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6751 } else { 6752 CodeGenFunction::AutoVarEmission Emission = 6753 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6754 CGF.EmitAutoVarCleanups(Emission); 6755 } 6756 } 6757 } 6758 CondVal = CGF.EvaluateExprAsBool(Cond); 6759 } 6760 } 6761 } 6762 // Check the value of num_threads clause iff if clause was not specified 6763 // or is not evaluated to false. 6764 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { 6765 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6766 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6767 const auto *NumThreadsClause = 6768 Dir->getSingleClause<OMPNumThreadsClause>(); 6769 CodeGenFunction::LexicalScope Scope( 6770 CGF, NumThreadsClause->getNumThreads()->getSourceRange()); 6771 if (const auto *PreInit = 6772 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { 6773 for (const auto *I : PreInit->decls()) { 6774 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6775 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6776 } else { 6777 CodeGenFunction::AutoVarEmission Emission = 6778 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6779 CGF.EmitAutoVarCleanups(Emission); 6780 } 6781 } 6782 } 6783 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); 6784 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, 6785 /*isSigned=*/false); 6786 if (DefaultThreadLimitVal) 6787 NumThreads = CGF.Builder.CreateSelect( 6788 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), 6789 DefaultThreadLimitVal, NumThreads); 6790 } else { 6791 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal 6792 : CGF.Builder.getInt32(0); 6793 } 6794 // Process condition of the if clause. 6795 if (CondVal) { 6796 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, 6797 CGF.Builder.getInt32(1)); 6798 } 6799 return NumThreads; 6800 } 6801 if (isOpenMPSimdDirective(Dir->getDirectiveKind())) 6802 return CGF.Builder.getInt32(1); 6803 return DefaultThreadLimitVal; 6804 } 6805 return DefaultThreadLimitVal ? DefaultThreadLimitVal 6806 : CGF.Builder.getInt32(0); 6807 } 6808 6809 /// Emit the number of threads for a target directive. Inspect the 6810 /// thread_limit clause associated with a teams construct combined or closely 6811 /// nested with the target directive. 6812 /// 6813 /// Emit the num_threads clause for directives such as 'target parallel' that 6814 /// have no associated teams construct. 6815 /// 6816 /// Otherwise, return nullptr. 6817 static llvm::Value * 6818 emitNumThreadsForTargetDirective(CodeGenFunction &CGF, 6819 const OMPExecutableDirective &D) { 6820 assert(!CGF.getLangOpts().OpenMPIsDevice && 6821 "Clauses associated with the teams directive expected to be emitted " 6822 "only for the host!"); 6823 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); 6824 assert(isOpenMPTargetExecutionDirective(DirectiveKind) && 6825 "Expected target-based executable directive."); 6826 CGBuilderTy &Bld = CGF.Builder; 6827 llvm::Value *ThreadLimitVal = nullptr; 6828 llvm::Value *NumThreadsVal = nullptr; 6829 switch (DirectiveKind) { 6830 case OMPD_target: { 6831 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6832 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6833 return NumThreads; 6834 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6835 CGF.getContext(), CS->getCapturedStmt()); 6836 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6837 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { 6838 CGOpenMPInnerExprInfo CGInfo(CGF, *CS); 6839 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); 6840 const auto *ThreadLimitClause = 6841 Dir->getSingleClause<OMPThreadLimitClause>(); 6842 CodeGenFunction::LexicalScope Scope( 6843 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); 6844 if (const auto *PreInit = 6845 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { 6846 for (const auto *I : PreInit->decls()) { 6847 if (!I->hasAttr<OMPCaptureNoInitAttr>()) { 6848 CGF.EmitVarDecl(cast<VarDecl>(*I)); 6849 } else { 6850 CodeGenFunction::AutoVarEmission Emission = 6851 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); 6852 CGF.EmitAutoVarCleanups(Emission); 6853 } 6854 } 6855 } 6856 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6857 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6858 ThreadLimitVal = 6859 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6860 } 6861 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && 6862 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { 6863 CS = Dir->getInnermostCapturedStmt(); 6864 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6865 CGF.getContext(), CS->getCapturedStmt()); 6866 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); 6867 } 6868 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && 6869 !isOpenMPSimdDirective(Dir->getDirectiveKind())) { 6870 CS = Dir->getInnermostCapturedStmt(); 6871 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6872 return NumThreads; 6873 } 6874 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) 6875 return Bld.getInt32(1); 6876 } 6877 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6878 } 6879 case OMPD_target_teams: { 6880 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6881 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6882 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6883 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6884 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6885 ThreadLimitVal = 6886 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6887 } 6888 const CapturedStmt *CS = D.getInnermostCapturedStmt(); 6889 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6890 return NumThreads; 6891 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( 6892 CGF.getContext(), CS->getCapturedStmt()); 6893 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { 6894 if (Dir->getDirectiveKind() == OMPD_distribute) { 6895 CS = Dir->getInnermostCapturedStmt(); 6896 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) 6897 return NumThreads; 6898 } 6899 } 6900 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); 6901 } 6902 case OMPD_target_teams_distribute: 6903 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6904 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6905 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6906 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6907 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6908 ThreadLimitVal = 6909 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6910 } 6911 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); 6912 case OMPD_target_parallel: 6913 case OMPD_target_parallel_for: 6914 case OMPD_target_parallel_for_simd: 6915 case OMPD_target_teams_distribute_parallel_for: 6916 case OMPD_target_teams_distribute_parallel_for_simd: { 6917 llvm::Value *CondVal = nullptr; 6918 // Handle if clause. If if clause present, the number of threads is 6919 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. 6920 if (D.hasClausesOfKind<OMPIfClause>()) { 6921 const OMPIfClause *IfClause = nullptr; 6922 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { 6923 if (C->getNameModifier() == OMPD_unknown || 6924 C->getNameModifier() == OMPD_parallel) { 6925 IfClause = C; 6926 break; 6927 } 6928 } 6929 if (IfClause) { 6930 const Expr *Cond = IfClause->getCondition(); 6931 bool Result; 6932 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { 6933 if (!Result) 6934 return Bld.getInt32(1); 6935 } else { 6936 CodeGenFunction::RunCleanupsScope Scope(CGF); 6937 CondVal = CGF.EvaluateExprAsBool(Cond); 6938 } 6939 } 6940 } 6941 if (D.hasClausesOfKind<OMPThreadLimitClause>()) { 6942 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); 6943 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); 6944 llvm::Value *ThreadLimit = CGF.EmitScalarExpr( 6945 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); 6946 ThreadLimitVal = 6947 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); 6948 } 6949 if (D.hasClausesOfKind<OMPNumThreadsClause>()) { 6950 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); 6951 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); 6952 llvm::Value *NumThreads = CGF.EmitScalarExpr( 6953 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); 6954 NumThreadsVal = 6955 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); 6956 ThreadLimitVal = ThreadLimitVal 6957 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, 6958 ThreadLimitVal), 6959 NumThreadsVal, ThreadLimitVal) 6960 : NumThreadsVal; 6961 } 6962 if (!ThreadLimitVal) 6963 ThreadLimitVal = Bld.getInt32(0); 6964 if (CondVal) 6965 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); 6966 return ThreadLimitVal; 6967 } 6968 case OMPD_target_teams_distribute_simd: 6969 case OMPD_target_simd: 6970 return Bld.getInt32(1); 6971 case OMPD_parallel: 6972 case OMPD_for: 6973 case OMPD_parallel_for: 6974 case OMPD_parallel_sections: 6975 case OMPD_for_simd: 6976 case OMPD_parallel_for_simd: 6977 case OMPD_cancel: 6978 case OMPD_cancellation_point: 6979 case OMPD_ordered: 6980 case OMPD_threadprivate: 6981 case OMPD_allocate: 6982 case OMPD_task: 6983 case OMPD_simd: 6984 case OMPD_sections: 6985 case OMPD_section: 6986 case OMPD_single: 6987 case OMPD_master: 6988 case OMPD_critical: 6989 case OMPD_taskyield: 6990 case OMPD_barrier: 6991 case OMPD_taskwait: 6992 case OMPD_taskgroup: 6993 case OMPD_atomic: 6994 case OMPD_flush: 6995 case OMPD_teams: 6996 case OMPD_target_data: 6997 case OMPD_target_exit_data: 6998 case OMPD_target_enter_data: 6999 case OMPD_distribute: 7000 case OMPD_distribute_simd: 7001 case OMPD_distribute_parallel_for: 7002 case OMPD_distribute_parallel_for_simd: 7003 case OMPD_teams_distribute: 7004 case OMPD_teams_distribute_simd: 7005 case OMPD_teams_distribute_parallel_for: 7006 case OMPD_teams_distribute_parallel_for_simd: 7007 case OMPD_target_update: 7008 case OMPD_declare_simd: 7009 case OMPD_declare_variant: 7010 case OMPD_declare_target: 7011 case OMPD_end_declare_target: 7012 case OMPD_declare_reduction: 7013 case OMPD_declare_mapper: 7014 case OMPD_taskloop: 7015 case OMPD_taskloop_simd: 7016 case OMPD_master_taskloop: 7017 case OMPD_parallel_master_taskloop: 7018 case OMPD_requires: 7019 case OMPD_unknown: 7020 break; 7021 } 7022 llvm_unreachable("Unsupported directive kind."); 7023 } 7024 7025 namespace { 7026 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); 7027 7028 // Utility to handle information from clauses associated with a given 7029 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause). 7030 // It provides a convenient interface to obtain the information and generate 7031 // code for that information. 7032 class MappableExprsHandler { 7033 public: 7034 /// Values for bit flags used to specify the mapping type for 7035 /// offloading. 7036 enum OpenMPOffloadMappingFlags : uint64_t { 7037 /// No flags 7038 OMP_MAP_NONE = 0x0, 7039 /// Allocate memory on the device and move data from host to device. 7040 OMP_MAP_TO = 0x01, 7041 /// Allocate memory on the device and move data from device to host. 7042 OMP_MAP_FROM = 0x02, 7043 /// Always perform the requested mapping action on the element, even 7044 /// if it was already mapped before. 7045 OMP_MAP_ALWAYS = 0x04, 7046 /// Delete the element from the device environment, ignoring the 7047 /// current reference count associated with the element. 7048 OMP_MAP_DELETE = 0x08, 7049 /// The element being mapped is a pointer-pointee pair; both the 7050 /// pointer and the pointee should be mapped. 7051 OMP_MAP_PTR_AND_OBJ = 0x10, 7052 /// This flags signals that the base address of an entry should be 7053 /// passed to the target kernel as an argument. 7054 OMP_MAP_TARGET_PARAM = 0x20, 7055 /// Signal that the runtime library has to return the device pointer 7056 /// in the current position for the data being mapped. Used when we have the 7057 /// use_device_ptr clause. 7058 OMP_MAP_RETURN_PARAM = 0x40, 7059 /// This flag signals that the reference being passed is a pointer to 7060 /// private data. 7061 OMP_MAP_PRIVATE = 0x80, 7062 /// Pass the element to the device by value. 7063 OMP_MAP_LITERAL = 0x100, 7064 /// Implicit map 7065 OMP_MAP_IMPLICIT = 0x200, 7066 /// Close is a hint to the runtime to allocate memory close to 7067 /// the target device. 7068 OMP_MAP_CLOSE = 0x400, 7069 /// The 16 MSBs of the flags indicate whether the entry is member of some 7070 /// struct/class. 7071 OMP_MAP_MEMBER_OF = 0xffff000000000000, 7072 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), 7073 }; 7074 7075 /// Get the offset of the OMP_MAP_MEMBER_OF field. 7076 static unsigned getFlagMemberOffset() { 7077 unsigned Offset = 0; 7078 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); 7079 Remain = Remain >> 1) 7080 Offset++; 7081 return Offset; 7082 } 7083 7084 /// Class that associates information with a base pointer to be passed to the 7085 /// runtime library. 7086 class BasePointerInfo { 7087 /// The base pointer. 7088 llvm::Value *Ptr = nullptr; 7089 /// The base declaration that refers to this device pointer, or null if 7090 /// there is none. 7091 const ValueDecl *DevPtrDecl = nullptr; 7092 7093 public: 7094 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) 7095 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} 7096 llvm::Value *operator*() const { return Ptr; } 7097 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } 7098 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } 7099 }; 7100 7101 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; 7102 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; 7103 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; 7104 7105 /// Map between a struct and the its lowest & highest elements which have been 7106 /// mapped. 7107 /// [ValueDecl *] --> {LE(FieldIndex, Pointer), 7108 /// HE(FieldIndex, Pointer)} 7109 struct StructRangeInfoTy { 7110 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 7111 0, Address::invalid()}; 7112 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 7113 0, Address::invalid()}; 7114 Address Base = Address::invalid(); 7115 }; 7116 7117 private: 7118 /// Kind that defines how a device pointer has to be returned. 7119 struct MapInfo { 7120 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 7121 OpenMPMapClauseKind MapType = OMPC_MAP_unknown; 7122 ArrayRef<OpenMPMapModifierKind> MapModifiers; 7123 bool ReturnDevicePointer = false; 7124 bool IsImplicit = false; 7125 7126 MapInfo() = default; 7127 MapInfo( 7128 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7129 OpenMPMapClauseKind MapType, 7130 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7131 bool ReturnDevicePointer, bool IsImplicit) 7132 : Components(Components), MapType(MapType), MapModifiers(MapModifiers), 7133 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {} 7134 }; 7135 7136 /// If use_device_ptr is used on a pointer which is a struct member and there 7137 /// is no map information about it, then emission of that entry is deferred 7138 /// until the whole struct has been processed. 7139 struct DeferredDevicePtrEntryTy { 7140 const Expr *IE = nullptr; 7141 const ValueDecl *VD = nullptr; 7142 7143 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD) 7144 : IE(IE), VD(VD) {} 7145 }; 7146 7147 /// The target directive from where the mappable clauses were extracted. It 7148 /// is either a executable directive or a user-defined mapper directive. 7149 llvm::PointerUnion<const OMPExecutableDirective *, 7150 const OMPDeclareMapperDecl *> 7151 CurDir; 7152 7153 /// Function the directive is being generated for. 7154 CodeGenFunction &CGF; 7155 7156 /// Set of all first private variables in the current directive. 7157 /// bool data is set to true if the variable is implicitly marked as 7158 /// firstprivate, false otherwise. 7159 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls; 7160 7161 /// Map between device pointer declarations and their expression components. 7162 /// The key value for declarations in 'this' is null. 7163 llvm::DenseMap< 7164 const ValueDecl *, 7165 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> 7166 DevPointersMap; 7167 7168 llvm::Value *getExprTypeSize(const Expr *E) const { 7169 QualType ExprTy = E->getType().getCanonicalType(); 7170 7171 // Reference types are ignored for mapping purposes. 7172 if (const auto *RefTy = ExprTy->getAs<ReferenceType>()) 7173 ExprTy = RefTy->getPointeeType().getCanonicalType(); 7174 7175 // Given that an array section is considered a built-in type, we need to 7176 // do the calculation based on the length of the section instead of relying 7177 // on CGF.getTypeSize(E->getType()). 7178 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { 7179 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( 7180 OAE->getBase()->IgnoreParenImpCasts()) 7181 .getCanonicalType(); 7182 7183 // If there is no length associated with the expression and lower bound is 7184 // not specified too, that means we are using the whole length of the 7185 // base. 7186 if (!OAE->getLength() && OAE->getColonLoc().isValid() && 7187 !OAE->getLowerBound()) 7188 return CGF.getTypeSize(BaseTy); 7189 7190 llvm::Value *ElemSize; 7191 if (const auto *PTy = BaseTy->getAs<PointerType>()) { 7192 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType()); 7193 } else { 7194 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr()); 7195 assert(ATy && "Expecting array type if not a pointer type."); 7196 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType()); 7197 } 7198 7199 // If we don't have a length at this point, that is because we have an 7200 // array section with a single element. 7201 if (!OAE->getLength() && OAE->getColonLoc().isInvalid()) 7202 return ElemSize; 7203 7204 if (const Expr *LenExpr = OAE->getLength()) { 7205 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr); 7206 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(), 7207 CGF.getContext().getSizeType(), 7208 LenExpr->getExprLoc()); 7209 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize); 7210 } 7211 assert(!OAE->getLength() && OAE->getColonLoc().isValid() && 7212 OAE->getLowerBound() && "expected array_section[lb:]."); 7213 // Size = sizetype - lb * elemtype; 7214 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy); 7215 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound()); 7216 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(), 7217 CGF.getContext().getSizeType(), 7218 OAE->getLowerBound()->getExprLoc()); 7219 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize); 7220 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal); 7221 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal); 7222 LengthVal = CGF.Builder.CreateSelect( 7223 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0)); 7224 return LengthVal; 7225 } 7226 return CGF.getTypeSize(ExprTy); 7227 } 7228 7229 /// Return the corresponding bits for a given map clause modifier. Add 7230 /// a flag marking the map as a pointer if requested. Add a flag marking the 7231 /// map as the first one of a series of maps that relate to the same map 7232 /// expression. 7233 OpenMPOffloadMappingFlags getMapTypeBits( 7234 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, 7235 bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const { 7236 OpenMPOffloadMappingFlags Bits = 7237 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; 7238 switch (MapType) { 7239 case OMPC_MAP_alloc: 7240 case OMPC_MAP_release: 7241 // alloc and release is the default behavior in the runtime library, i.e. 7242 // if we don't pass any bits alloc/release that is what the runtime is 7243 // going to do. Therefore, we don't need to signal anything for these two 7244 // type modifiers. 7245 break; 7246 case OMPC_MAP_to: 7247 Bits |= OMP_MAP_TO; 7248 break; 7249 case OMPC_MAP_from: 7250 Bits |= OMP_MAP_FROM; 7251 break; 7252 case OMPC_MAP_tofrom: 7253 Bits |= OMP_MAP_TO | OMP_MAP_FROM; 7254 break; 7255 case OMPC_MAP_delete: 7256 Bits |= OMP_MAP_DELETE; 7257 break; 7258 case OMPC_MAP_unknown: 7259 llvm_unreachable("Unexpected map type!"); 7260 } 7261 if (AddPtrFlag) 7262 Bits |= OMP_MAP_PTR_AND_OBJ; 7263 if (AddIsTargetParamFlag) 7264 Bits |= OMP_MAP_TARGET_PARAM; 7265 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) 7266 != MapModifiers.end()) 7267 Bits |= OMP_MAP_ALWAYS; 7268 if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) 7269 != MapModifiers.end()) 7270 Bits |= OMP_MAP_CLOSE; 7271 return Bits; 7272 } 7273 7274 /// Return true if the provided expression is a final array section. A 7275 /// final array section, is one whose length can't be proved to be one. 7276 bool isFinalArraySectionExpression(const Expr *E) const { 7277 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); 7278 7279 // It is not an array section and therefore not a unity-size one. 7280 if (!OASE) 7281 return false; 7282 7283 // An array section with no colon always refer to a single element. 7284 if (OASE->getColonLoc().isInvalid()) 7285 return false; 7286 7287 const Expr *Length = OASE->getLength(); 7288 7289 // If we don't have a length we have to check if the array has size 1 7290 // for this dimension. Also, we should always expect a length if the 7291 // base type is pointer. 7292 if (!Length) { 7293 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( 7294 OASE->getBase()->IgnoreParenImpCasts()) 7295 .getCanonicalType(); 7296 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) 7297 return ATy->getSize().getSExtValue() != 1; 7298 // If we don't have a constant dimension length, we have to consider 7299 // the current section as having any size, so it is not necessarily 7300 // unitary. If it happen to be unity size, that's user fault. 7301 return true; 7302 } 7303 7304 // Check if the length evaluates to 1. 7305 Expr::EvalResult Result; 7306 if (!Length->EvaluateAsInt(Result, CGF.getContext())) 7307 return true; // Can have more that size 1. 7308 7309 llvm::APSInt ConstLength = Result.Val.getInt(); 7310 return ConstLength.getSExtValue() != 1; 7311 } 7312 7313 /// Generate the base pointers, section pointers, sizes and map type 7314 /// bits for the provided map type, map modifier, and expression components. 7315 /// \a IsFirstComponent should be set to true if the provided set of 7316 /// components is the first associated with a capture. 7317 void generateInfoForComponentList( 7318 OpenMPMapClauseKind MapType, 7319 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7320 OMPClauseMappableExprCommon::MappableExprComponentListRef Components, 7321 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 7322 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 7323 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, 7324 bool IsImplicit, 7325 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 7326 OverlappedElements = llvm::None) const { 7327 // The following summarizes what has to be generated for each map and the 7328 // types below. The generated information is expressed in this order: 7329 // base pointer, section pointer, size, flags 7330 // (to add to the ones that come from the map type and modifier). 7331 // 7332 // double d; 7333 // int i[100]; 7334 // float *p; 7335 // 7336 // struct S1 { 7337 // int i; 7338 // float f[50]; 7339 // } 7340 // struct S2 { 7341 // int i; 7342 // float f[50]; 7343 // S1 s; 7344 // double *p; 7345 // struct S2 *ps; 7346 // } 7347 // S2 s; 7348 // S2 *ps; 7349 // 7350 // map(d) 7351 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM 7352 // 7353 // map(i) 7354 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM 7355 // 7356 // map(i[1:23]) 7357 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM 7358 // 7359 // map(p) 7360 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM 7361 // 7362 // map(p[1:24]) 7363 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM 7364 // 7365 // map(s) 7366 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM 7367 // 7368 // map(s.i) 7369 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM 7370 // 7371 // map(s.s.f) 7372 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7373 // 7374 // map(s.p) 7375 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM 7376 // 7377 // map(to: s.p[:22]) 7378 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*) 7379 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**) 7380 // &(s.p), &(s.p[0]), 22*sizeof(double), 7381 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***) 7382 // (*) alloc space for struct members, only this is a target parameter 7383 // (**) map the pointer (nothing to be mapped in this example) (the compiler 7384 // optimizes this entry out, same in the examples below) 7385 // (***) map the pointee (map: to) 7386 // 7387 // map(s.ps) 7388 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7389 // 7390 // map(from: s.ps->s.i) 7391 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7392 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7393 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7394 // 7395 // map(to: s.ps->ps) 7396 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7397 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7398 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO 7399 // 7400 // map(s.ps->ps->ps) 7401 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7402 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7403 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7404 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7405 // 7406 // map(to: s.ps->ps->s.f[:22]) 7407 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM 7408 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1) 7409 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7410 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7411 // 7412 // map(ps) 7413 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM 7414 // 7415 // map(ps->i) 7416 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM 7417 // 7418 // map(ps->s.f) 7419 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM 7420 // 7421 // map(from: ps->p) 7422 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM 7423 // 7424 // map(to: ps->p[:22]) 7425 // ps, &(ps->p), sizeof(double*), TARGET_PARAM 7426 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1) 7427 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO 7428 // 7429 // map(ps->ps) 7430 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM 7431 // 7432 // map(from: ps->ps->s.i) 7433 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7434 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7435 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7436 // 7437 // map(from: ps->ps->ps) 7438 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7439 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7440 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7441 // 7442 // map(ps->ps->ps->ps) 7443 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7444 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7445 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7446 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM 7447 // 7448 // map(to: ps->ps->ps->s.f[:22]) 7449 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM 7450 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1) 7451 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ 7452 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO 7453 // 7454 // map(to: s.f[:22]) map(from: s.p[:33]) 7455 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) + 7456 // sizeof(double*) (**), TARGET_PARAM 7457 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO 7458 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) 7459 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM 7460 // (*) allocate contiguous space needed to fit all mapped members even if 7461 // we allocate space for members not mapped (in this example, 7462 // s.f[22..49] and s.s are not mapped, yet we must allocate space for 7463 // them as well because they fall between &s.f[0] and &s.p) 7464 // 7465 // map(from: s.f[:22]) map(to: ps->p[:33]) 7466 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM 7467 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7468 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*) 7469 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO 7470 // (*) the struct this entry pertains to is the 2nd element in the list of 7471 // arguments, hence MEMBER_OF(2) 7472 // 7473 // map(from: s.f[:22], s.s) map(to: ps->p[:33]) 7474 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM 7475 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM 7476 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM 7477 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM 7478 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*) 7479 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO 7480 // (*) the struct this entry pertains to is the 4th element in the list 7481 // of arguments, hence MEMBER_OF(4) 7482 7483 // Track if the map information being generated is the first for a capture. 7484 bool IsCaptureFirstInfo = IsFirstComponentList; 7485 // When the variable is on a declare target link or in a to clause with 7486 // unified memory, a reference is needed to hold the host/device address 7487 // of the variable. 7488 bool RequiresReference = false; 7489 7490 // Scan the components from the base to the complete expression. 7491 auto CI = Components.rbegin(); 7492 auto CE = Components.rend(); 7493 auto I = CI; 7494 7495 // Track if the map information being generated is the first for a list of 7496 // components. 7497 bool IsExpressionFirstInfo = true; 7498 Address BP = Address::invalid(); 7499 const Expr *AssocExpr = I->getAssociatedExpression(); 7500 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); 7501 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); 7502 7503 if (isa<MemberExpr>(AssocExpr)) { 7504 // The base is the 'this' pointer. The content of the pointer is going 7505 // to be the base of the field being mapped. 7506 BP = CGF.LoadCXXThisAddress(); 7507 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || 7508 (OASE && 7509 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { 7510 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7511 } else { 7512 // The base is the reference to the variable. 7513 // BP = &Var. 7514 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); 7515 if (const auto *VD = 7516 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { 7517 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 7518 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { 7519 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 7520 (*Res == OMPDeclareTargetDeclAttr::MT_To && 7521 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { 7522 RequiresReference = true; 7523 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 7524 } 7525 } 7526 } 7527 7528 // If the variable is a pointer and is being dereferenced (i.e. is not 7529 // the last component), the base has to be the pointer itself, not its 7530 // reference. References are ignored for mapping purposes. 7531 QualType Ty = 7532 I->getAssociatedDeclaration()->getType().getNonReferenceType(); 7533 if (Ty->isAnyPointerType() && std::next(I) != CE) { 7534 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); 7535 7536 // We do not need to generate individual map information for the 7537 // pointer, it can be associated with the combined storage. 7538 ++I; 7539 } 7540 } 7541 7542 // Track whether a component of the list should be marked as MEMBER_OF some 7543 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry 7544 // in a component list should be marked as MEMBER_OF, all subsequent entries 7545 // do not belong to the base struct. E.g. 7546 // struct S2 s; 7547 // s.ps->ps->ps->f[:] 7548 // (1) (2) (3) (4) 7549 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a 7550 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3) 7551 // is the pointee of ps(2) which is not member of struct s, so it should not 7552 // be marked as such (it is still PTR_AND_OBJ). 7553 // The variable is initialized to false so that PTR_AND_OBJ entries which 7554 // are not struct members are not considered (e.g. array of pointers to 7555 // data). 7556 bool ShouldBeMemberOf = false; 7557 7558 // Variable keeping track of whether or not we have encountered a component 7559 // in the component list which is a member expression. Useful when we have a 7560 // pointer or a final array section, in which case it is the previous 7561 // component in the list which tells us whether we have a member expression. 7562 // E.g. X.f[:] 7563 // While processing the final array section "[:]" it is "f" which tells us 7564 // whether we are dealing with a member of a declared struct. 7565 const MemberExpr *EncounteredME = nullptr; 7566 7567 for (; I != CE; ++I) { 7568 // If the current component is member of a struct (parent struct) mark it. 7569 if (!EncounteredME) { 7570 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression()); 7571 // If we encounter a PTR_AND_OBJ entry from now on it should be marked 7572 // as MEMBER_OF the parent struct. 7573 if (EncounteredME) 7574 ShouldBeMemberOf = true; 7575 } 7576 7577 auto Next = std::next(I); 7578 7579 // We need to generate the addresses and sizes if this is the last 7580 // component, if the component is a pointer or if it is an array section 7581 // whose length can't be proved to be one. If this is a pointer, it 7582 // becomes the base address for the following components. 7583 7584 // A final array section, is one whose length can't be proved to be one. 7585 bool IsFinalArraySection = 7586 isFinalArraySectionExpression(I->getAssociatedExpression()); 7587 7588 // Get information on whether the element is a pointer. Have to do a 7589 // special treatment for array sections given that they are built-in 7590 // types. 7591 const auto *OASE = 7592 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); 7593 bool IsPointer = 7594 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) 7595 .getCanonicalType() 7596 ->isAnyPointerType()) || 7597 I->getAssociatedExpression()->getType()->isAnyPointerType(); 7598 7599 if (Next == CE || IsPointer || IsFinalArraySection) { 7600 // If this is not the last component, we expect the pointer to be 7601 // associated with an array expression or member expression. 7602 assert((Next == CE || 7603 isa<MemberExpr>(Next->getAssociatedExpression()) || 7604 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || 7605 isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) && 7606 "Unexpected expression"); 7607 7608 Address LB = 7609 CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress(); 7610 7611 // If this component is a pointer inside the base struct then we don't 7612 // need to create any entry for it - it will be combined with the object 7613 // it is pointing to into a single PTR_AND_OBJ entry. 7614 bool IsMemberPointer = 7615 IsPointer && EncounteredME && 7616 (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == 7617 EncounteredME); 7618 if (!OverlappedElements.empty()) { 7619 // Handle base element with the info for overlapped elements. 7620 assert(!PartialStruct.Base.isValid() && "The base element is set."); 7621 assert(Next == CE && 7622 "Expected last element for the overlapped elements."); 7623 assert(!IsPointer && 7624 "Unexpected base element with the pointer type."); 7625 // Mark the whole struct as the struct that requires allocation on the 7626 // device. 7627 PartialStruct.LowestElem = {0, LB}; 7628 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( 7629 I->getAssociatedExpression()->getType()); 7630 Address HB = CGF.Builder.CreateConstGEP( 7631 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, 7632 CGF.VoidPtrTy), 7633 TypeSize.getQuantity() - 1); 7634 PartialStruct.HighestElem = { 7635 std::numeric_limits<decltype( 7636 PartialStruct.HighestElem.first)>::max(), 7637 HB}; 7638 PartialStruct.Base = BP; 7639 // Emit data for non-overlapped data. 7640 OpenMPOffloadMappingFlags Flags = 7641 OMP_MAP_MEMBER_OF | 7642 getMapTypeBits(MapType, MapModifiers, IsImplicit, 7643 /*AddPtrFlag=*/false, 7644 /*AddIsTargetParamFlag=*/false); 7645 LB = BP; 7646 llvm::Value *Size = nullptr; 7647 // Do bitcopy of all non-overlapped structure elements. 7648 for (OMPClauseMappableExprCommon::MappableExprComponentListRef 7649 Component : OverlappedElements) { 7650 Address ComponentLB = Address::invalid(); 7651 for (const OMPClauseMappableExprCommon::MappableComponent &MC : 7652 Component) { 7653 if (MC.getAssociatedDeclaration()) { 7654 ComponentLB = 7655 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) 7656 .getAddress(); 7657 Size = CGF.Builder.CreatePtrDiff( 7658 CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), 7659 CGF.EmitCastToVoidPtr(LB.getPointer())); 7660 break; 7661 } 7662 } 7663 BasePointers.push_back(BP.getPointer()); 7664 Pointers.push_back(LB.getPointer()); 7665 Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, 7666 /*isSigned=*/true)); 7667 Types.push_back(Flags); 7668 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); 7669 } 7670 BasePointers.push_back(BP.getPointer()); 7671 Pointers.push_back(LB.getPointer()); 7672 Size = CGF.Builder.CreatePtrDiff( 7673 CGF.EmitCastToVoidPtr( 7674 CGF.Builder.CreateConstGEP(HB, 1).getPointer()), 7675 CGF.EmitCastToVoidPtr(LB.getPointer())); 7676 Sizes.push_back( 7677 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7678 Types.push_back(Flags); 7679 break; 7680 } 7681 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); 7682 if (!IsMemberPointer) { 7683 BasePointers.push_back(BP.getPointer()); 7684 Pointers.push_back(LB.getPointer()); 7685 Sizes.push_back( 7686 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); 7687 7688 // We need to add a pointer flag for each map that comes from the 7689 // same expression except for the first one. We also need to signal 7690 // this map is the first one that relates with the current capture 7691 // (there is a set of entries for each capture). 7692 OpenMPOffloadMappingFlags Flags = getMapTypeBits( 7693 MapType, MapModifiers, IsImplicit, 7694 !IsExpressionFirstInfo || RequiresReference, 7695 IsCaptureFirstInfo && !RequiresReference); 7696 7697 if (!IsExpressionFirstInfo) { 7698 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, 7699 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. 7700 if (IsPointer) 7701 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | 7702 OMP_MAP_DELETE | OMP_MAP_CLOSE); 7703 7704 if (ShouldBeMemberOf) { 7705 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag 7706 // should be later updated with the correct value of MEMBER_OF. 7707 Flags |= OMP_MAP_MEMBER_OF; 7708 // From now on, all subsequent PTR_AND_OBJ entries should not be 7709 // marked as MEMBER_OF. 7710 ShouldBeMemberOf = false; 7711 } 7712 } 7713 7714 Types.push_back(Flags); 7715 } 7716 7717 // If we have encountered a member expression so far, keep track of the 7718 // mapped member. If the parent is "*this", then the value declaration 7719 // is nullptr. 7720 if (EncounteredME) { 7721 const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl()); 7722 unsigned FieldIndex = FD->getFieldIndex(); 7723 7724 // Update info about the lowest and highest elements for this struct 7725 if (!PartialStruct.Base.isValid()) { 7726 PartialStruct.LowestElem = {FieldIndex, LB}; 7727 PartialStruct.HighestElem = {FieldIndex, LB}; 7728 PartialStruct.Base = BP; 7729 } else if (FieldIndex < PartialStruct.LowestElem.first) { 7730 PartialStruct.LowestElem = {FieldIndex, LB}; 7731 } else if (FieldIndex > PartialStruct.HighestElem.first) { 7732 PartialStruct.HighestElem = {FieldIndex, LB}; 7733 } 7734 } 7735 7736 // If we have a final array section, we are done with this expression. 7737 if (IsFinalArraySection) 7738 break; 7739 7740 // The pointer becomes the base for the next element. 7741 if (Next != CE) 7742 BP = LB; 7743 7744 IsExpressionFirstInfo = false; 7745 IsCaptureFirstInfo = false; 7746 } 7747 } 7748 } 7749 7750 /// Return the adjusted map modifiers if the declaration a capture refers to 7751 /// appears in a first-private clause. This is expected to be used only with 7752 /// directives that start with 'target'. 7753 MappableExprsHandler::OpenMPOffloadMappingFlags 7754 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { 7755 assert(Cap.capturesVariable() && "Expected capture by reference only!"); 7756 7757 // A first private variable captured by reference will use only the 7758 // 'private ptr' and 'map to' flag. Return the right flags if the captured 7759 // declaration is known as first-private in this handler. 7760 if (FirstPrivateDecls.count(Cap.getCapturedVar())) { 7761 if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && 7762 Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) 7763 return MappableExprsHandler::OMP_MAP_ALWAYS | 7764 MappableExprsHandler::OMP_MAP_TO; 7765 if (Cap.getCapturedVar()->getType()->isAnyPointerType()) 7766 return MappableExprsHandler::OMP_MAP_TO | 7767 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; 7768 return MappableExprsHandler::OMP_MAP_PRIVATE | 7769 MappableExprsHandler::OMP_MAP_TO; 7770 } 7771 return MappableExprsHandler::OMP_MAP_TO | 7772 MappableExprsHandler::OMP_MAP_FROM; 7773 } 7774 7775 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { 7776 // Rotate by getFlagMemberOffset() bits. 7777 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) 7778 << getFlagMemberOffset()); 7779 } 7780 7781 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, 7782 OpenMPOffloadMappingFlags MemberOfFlag) { 7783 // If the entry is PTR_AND_OBJ but has not been marked with the special 7784 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be 7785 // marked as MEMBER_OF. 7786 if ((Flags & OMP_MAP_PTR_AND_OBJ) && 7787 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) 7788 return; 7789 7790 // Reset the placeholder value to prepare the flag for the assignment of the 7791 // proper MEMBER_OF value. 7792 Flags &= ~OMP_MAP_MEMBER_OF; 7793 Flags |= MemberOfFlag; 7794 } 7795 7796 void getPlainLayout(const CXXRecordDecl *RD, 7797 llvm::SmallVectorImpl<const FieldDecl *> &Layout, 7798 bool AsBase) const { 7799 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD); 7800 7801 llvm::StructType *St = 7802 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType(); 7803 7804 unsigned NumElements = St->getNumElements(); 7805 llvm::SmallVector< 7806 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4> 7807 RecordLayout(NumElements); 7808 7809 // Fill bases. 7810 for (const auto &I : RD->bases()) { 7811 if (I.isVirtual()) 7812 continue; 7813 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7814 // Ignore empty bases. 7815 if (Base->isEmpty() || CGF.getContext() 7816 .getASTRecordLayout(Base) 7817 .getNonVirtualSize() 7818 .isZero()) 7819 continue; 7820 7821 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); 7822 RecordLayout[FieldIndex] = Base; 7823 } 7824 // Fill in virtual bases. 7825 for (const auto &I : RD->vbases()) { 7826 const auto *Base = I.getType()->getAsCXXRecordDecl(); 7827 // Ignore empty bases. 7828 if (Base->isEmpty()) 7829 continue; 7830 unsigned FieldIndex = RL.getVirtualBaseIndex(Base); 7831 if (RecordLayout[FieldIndex]) 7832 continue; 7833 RecordLayout[FieldIndex] = Base; 7834 } 7835 // Fill in all the fields. 7836 assert(!RD->isUnion() && "Unexpected union."); 7837 for (const auto *Field : RD->fields()) { 7838 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we 7839 // will fill in later.) 7840 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { 7841 unsigned FieldIndex = RL.getLLVMFieldNo(Field); 7842 RecordLayout[FieldIndex] = Field; 7843 } 7844 } 7845 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *> 7846 &Data : RecordLayout) { 7847 if (Data.isNull()) 7848 continue; 7849 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>()) 7850 getPlainLayout(Base, Layout, /*AsBase=*/true); 7851 else 7852 Layout.push_back(Data.get<const FieldDecl *>()); 7853 } 7854 } 7855 7856 public: 7857 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) 7858 : CurDir(&Dir), CGF(CGF) { 7859 // Extract firstprivate clause information. 7860 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) 7861 for (const auto *D : C->varlists()) 7862 FirstPrivateDecls.try_emplace( 7863 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); 7864 // Extract device pointer clause information. 7865 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) 7866 for (auto L : C->component_lists()) 7867 DevPointersMap[L.first].push_back(L.second); 7868 } 7869 7870 /// Constructor for the declare mapper directive. 7871 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) 7872 : CurDir(&Dir), CGF(CGF) {} 7873 7874 /// Generate code for the combined entry if we have a partially mapped struct 7875 /// and take care of the mapping flags of the arguments corresponding to 7876 /// individual struct members. 7877 void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers, 7878 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7879 MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes, 7880 const StructRangeInfoTy &PartialStruct) const { 7881 // Base is the base of the struct 7882 BasePointers.push_back(PartialStruct.Base.getPointer()); 7883 // Pointer is the address of the lowest element 7884 llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); 7885 Pointers.push_back(LB); 7886 // Size is (addr of {highest+1} element) - (addr of lowest element) 7887 llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); 7888 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); 7889 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); 7890 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); 7891 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); 7892 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, 7893 /*isSigned=*/false); 7894 Sizes.push_back(Size); 7895 // Map type is always TARGET_PARAM 7896 Types.push_back(OMP_MAP_TARGET_PARAM); 7897 // Remove TARGET_PARAM flag from the first element 7898 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM; 7899 7900 // All other current entries will be MEMBER_OF the combined entry 7901 // (except for PTR_AND_OBJ entries which do not have a placeholder value 7902 // 0xFFFF in the MEMBER_OF field). 7903 OpenMPOffloadMappingFlags MemberOfFlag = 7904 getMemberOfFlag(BasePointers.size() - 1); 7905 for (auto &M : CurTypes) 7906 setCorrectMemberOfFlag(M, MemberOfFlag); 7907 } 7908 7909 /// Generate all the base pointers, section pointers, sizes and map 7910 /// types for the extracted mappable expressions. Also, for each item that 7911 /// relates with a device pointer, a pair of the relevant declaration and 7912 /// index where it occurs is appended to the device pointers info array. 7913 void generateAllInfo(MapBaseValuesArrayTy &BasePointers, 7914 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 7915 MapFlagsArrayTy &Types) const { 7916 // We have to process the component lists that relate with the same 7917 // declaration in a single chunk so that we can generate the map flags 7918 // correctly. Therefore, we organize all lists in a map. 7919 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 7920 7921 // Helper function to fill the information map for the different supported 7922 // clauses. 7923 auto &&InfoGen = [&Info]( 7924 const ValueDecl *D, 7925 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 7926 OpenMPMapClauseKind MapType, 7927 ArrayRef<OpenMPMapModifierKind> MapModifiers, 7928 bool ReturnDevicePointer, bool IsImplicit) { 7929 const ValueDecl *VD = 7930 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 7931 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 7932 IsImplicit); 7933 }; 7934 7935 assert(CurDir.is<const OMPExecutableDirective *>() && 7936 "Expect a executable directive"); 7937 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 7938 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) 7939 for (const auto &L : C->component_lists()) { 7940 InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(), 7941 /*ReturnDevicePointer=*/false, C->isImplicit()); 7942 } 7943 for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) 7944 for (const auto &L : C->component_lists()) { 7945 InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None, 7946 /*ReturnDevicePointer=*/false, C->isImplicit()); 7947 } 7948 for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) 7949 for (const auto &L : C->component_lists()) { 7950 InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None, 7951 /*ReturnDevicePointer=*/false, C->isImplicit()); 7952 } 7953 7954 // Look at the use_device_ptr clause information and mark the existing map 7955 // entries as such. If there is no map information for an entry in the 7956 // use_device_ptr list, we create one with map type 'alloc' and zero size 7957 // section. It is the user fault if that was not mapped before. If there is 7958 // no map information and the pointer is a struct member, then we defer the 7959 // emission of that entry until the whole struct has been processed. 7960 llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> 7961 DeferredInfo; 7962 7963 for (const auto *C : 7964 CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { 7965 for (const auto &L : C->component_lists()) { 7966 assert(!L.second.empty() && "Not expecting empty list of components!"); 7967 const ValueDecl *VD = L.second.back().getAssociatedDeclaration(); 7968 VD = cast<ValueDecl>(VD->getCanonicalDecl()); 7969 const Expr *IE = L.second.back().getAssociatedExpression(); 7970 // If the first component is a member expression, we have to look into 7971 // 'this', which maps to null in the map of map information. Otherwise 7972 // look directly for the information. 7973 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); 7974 7975 // We potentially have map information for this declaration already. 7976 // Look for the first set of components that refer to it. 7977 if (It != Info.end()) { 7978 auto CI = std::find_if( 7979 It->second.begin(), It->second.end(), [VD](const MapInfo &MI) { 7980 return MI.Components.back().getAssociatedDeclaration() == VD; 7981 }); 7982 // If we found a map entry, signal that the pointer has to be returned 7983 // and move on to the next declaration. 7984 if (CI != It->second.end()) { 7985 CI->ReturnDevicePointer = true; 7986 continue; 7987 } 7988 } 7989 7990 // We didn't find any match in our map information - generate a zero 7991 // size array section - if the pointer is a struct member we defer this 7992 // action until the whole struct has been processed. 7993 if (isa<MemberExpr>(IE)) { 7994 // Insert the pointer into Info to be processed by 7995 // generateInfoForComponentList. Because it is a member pointer 7996 // without a pointee, no entry will be generated for it, therefore 7997 // we need to generate one after the whole struct has been processed. 7998 // Nonetheless, generateInfoForComponentList must be called to take 7999 // the pointer into account for the calculation of the range of the 8000 // partial struct. 8001 InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None, 8002 /*ReturnDevicePointer=*/false, C->isImplicit()); 8003 DeferredInfo[nullptr].emplace_back(IE, VD); 8004 } else { 8005 llvm::Value *Ptr = 8006 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); 8007 BasePointers.emplace_back(Ptr, VD); 8008 Pointers.push_back(Ptr); 8009 Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8010 Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM); 8011 } 8012 } 8013 } 8014 8015 for (const auto &M : Info) { 8016 // We need to know when we generate information for the first component 8017 // associated with a capture, because the mapping flags depend on it. 8018 bool IsFirstComponentList = true; 8019 8020 // Temporary versions of arrays 8021 MapBaseValuesArrayTy CurBasePointers; 8022 MapValuesArrayTy CurPointers; 8023 MapValuesArrayTy CurSizes; 8024 MapFlagsArrayTy CurTypes; 8025 StructRangeInfoTy PartialStruct; 8026 8027 for (const MapInfo &L : M.second) { 8028 assert(!L.Components.empty() && 8029 "Not expecting declaration with no component lists."); 8030 8031 // Remember the current base pointer index. 8032 unsigned CurrentBasePointersIdx = CurBasePointers.size(); 8033 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8034 CurBasePointers, CurPointers, CurSizes, 8035 CurTypes, PartialStruct, 8036 IsFirstComponentList, L.IsImplicit); 8037 8038 // If this entry relates with a device pointer, set the relevant 8039 // declaration and add the 'return pointer' flag. 8040 if (L.ReturnDevicePointer) { 8041 assert(CurBasePointers.size() > CurrentBasePointersIdx && 8042 "Unexpected number of mapped base pointers."); 8043 8044 const ValueDecl *RelevantVD = 8045 L.Components.back().getAssociatedDeclaration(); 8046 assert(RelevantVD && 8047 "No relevant declaration related with device pointer??"); 8048 8049 CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD); 8050 CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; 8051 } 8052 IsFirstComponentList = false; 8053 } 8054 8055 // Append any pending zero-length pointers which are struct members and 8056 // used with use_device_ptr. 8057 auto CI = DeferredInfo.find(M.first); 8058 if (CI != DeferredInfo.end()) { 8059 for (const DeferredDevicePtrEntryTy &L : CI->second) { 8060 llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer(); 8061 llvm::Value *Ptr = this->CGF.EmitLoadOfScalar( 8062 this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); 8063 CurBasePointers.emplace_back(BasePtr, L.VD); 8064 CurPointers.push_back(Ptr); 8065 CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty)); 8066 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder 8067 // value MEMBER_OF=FFFF so that the entry is later updated with the 8068 // correct value of MEMBER_OF. 8069 CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | 8070 OMP_MAP_MEMBER_OF); 8071 } 8072 } 8073 8074 // If there is an entry in PartialStruct it means we have a struct with 8075 // individual members mapped. Emit an extra combined entry. 8076 if (PartialStruct.Base.isValid()) 8077 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8078 PartialStruct); 8079 8080 // We need to append the results of this capture to what we already have. 8081 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8082 Pointers.append(CurPointers.begin(), CurPointers.end()); 8083 Sizes.append(CurSizes.begin(), CurSizes.end()); 8084 Types.append(CurTypes.begin(), CurTypes.end()); 8085 } 8086 } 8087 8088 /// Generate all the base pointers, section pointers, sizes and map types for 8089 /// the extracted map clauses of user-defined mapper. 8090 void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers, 8091 MapValuesArrayTy &Pointers, 8092 MapValuesArrayTy &Sizes, 8093 MapFlagsArrayTy &Types) const { 8094 assert(CurDir.is<const OMPDeclareMapperDecl *>() && 8095 "Expect a declare mapper directive"); 8096 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); 8097 // We have to process the component lists that relate with the same 8098 // declaration in a single chunk so that we can generate the map flags 8099 // correctly. Therefore, we organize all lists in a map. 8100 llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; 8101 8102 // Helper function to fill the information map for the different supported 8103 // clauses. 8104 auto &&InfoGen = [&Info]( 8105 const ValueDecl *D, 8106 OMPClauseMappableExprCommon::MappableExprComponentListRef L, 8107 OpenMPMapClauseKind MapType, 8108 ArrayRef<OpenMPMapModifierKind> MapModifiers, 8109 bool ReturnDevicePointer, bool IsImplicit) { 8110 const ValueDecl *VD = 8111 D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; 8112 Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer, 8113 IsImplicit); 8114 }; 8115 8116 for (const auto *C : CurMapperDir->clauselists()) { 8117 const auto *MC = cast<OMPMapClause>(C); 8118 for (const auto &L : MC->component_lists()) { 8119 InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(), 8120 /*ReturnDevicePointer=*/false, MC->isImplicit()); 8121 } 8122 } 8123 8124 for (const auto &M : Info) { 8125 // We need to know when we generate information for the first component 8126 // associated with a capture, because the mapping flags depend on it. 8127 bool IsFirstComponentList = true; 8128 8129 // Temporary versions of arrays 8130 MapBaseValuesArrayTy CurBasePointers; 8131 MapValuesArrayTy CurPointers; 8132 MapValuesArrayTy CurSizes; 8133 MapFlagsArrayTy CurTypes; 8134 StructRangeInfoTy PartialStruct; 8135 8136 for (const MapInfo &L : M.second) { 8137 assert(!L.Components.empty() && 8138 "Not expecting declaration with no component lists."); 8139 generateInfoForComponentList(L.MapType, L.MapModifiers, L.Components, 8140 CurBasePointers, CurPointers, CurSizes, 8141 CurTypes, PartialStruct, 8142 IsFirstComponentList, L.IsImplicit); 8143 IsFirstComponentList = false; 8144 } 8145 8146 // If there is an entry in PartialStruct it means we have a struct with 8147 // individual members mapped. Emit an extra combined entry. 8148 if (PartialStruct.Base.isValid()) 8149 emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes, 8150 PartialStruct); 8151 8152 // We need to append the results of this capture to what we already have. 8153 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 8154 Pointers.append(CurPointers.begin(), CurPointers.end()); 8155 Sizes.append(CurSizes.begin(), CurSizes.end()); 8156 Types.append(CurTypes.begin(), CurTypes.end()); 8157 } 8158 } 8159 8160 /// Emit capture info for lambdas for variables captured by reference. 8161 void generateInfoForLambdaCaptures( 8162 const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers, 8163 MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes, 8164 MapFlagsArrayTy &Types, 8165 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { 8166 const auto *RD = VD->getType() 8167 .getCanonicalType() 8168 .getNonReferenceType() 8169 ->getAsCXXRecordDecl(); 8170 if (!RD || !RD->isLambda()) 8171 return; 8172 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); 8173 LValue VDLVal = CGF.MakeAddrLValue( 8174 VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); 8175 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; 8176 FieldDecl *ThisCapture = nullptr; 8177 RD->getCaptureFields(Captures, ThisCapture); 8178 if (ThisCapture) { 8179 LValue ThisLVal = 8180 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture); 8181 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture); 8182 LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer()); 8183 BasePointers.push_back(ThisLVal.getPointer()); 8184 Pointers.push_back(ThisLValVal.getPointer()); 8185 Sizes.push_back( 8186 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8187 CGF.Int64Ty, /*isSigned=*/true)); 8188 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8189 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8190 } 8191 for (const LambdaCapture &LC : RD->captures()) { 8192 if (!LC.capturesVariable()) 8193 continue; 8194 const VarDecl *VD = LC.getCapturedVar(); 8195 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) 8196 continue; 8197 auto It = Captures.find(VD); 8198 assert(It != Captures.end() && "Found lambda capture without field."); 8199 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second); 8200 if (LC.getCaptureKind() == LCK_ByRef) { 8201 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second); 8202 LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); 8203 BasePointers.push_back(VarLVal.getPointer()); 8204 Pointers.push_back(VarLValVal.getPointer()); 8205 Sizes.push_back(CGF.Builder.CreateIntCast( 8206 CGF.getTypeSize( 8207 VD->getType().getCanonicalType().getNonReferenceType()), 8208 CGF.Int64Ty, /*isSigned=*/true)); 8209 } else { 8210 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation()); 8211 LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer()); 8212 BasePointers.push_back(VarLVal.getPointer()); 8213 Pointers.push_back(VarRVal.getScalarVal()); 8214 Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); 8215 } 8216 Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8217 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); 8218 } 8219 } 8220 8221 /// Set correct indices for lambdas captures. 8222 void adjustMemberOfForLambdaCaptures( 8223 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, 8224 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, 8225 MapFlagsArrayTy &Types) const { 8226 for (unsigned I = 0, E = Types.size(); I < E; ++I) { 8227 // Set correct member_of idx for all implicit lambda captures. 8228 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | 8229 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) 8230 continue; 8231 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); 8232 assert(BasePtr && "Unable to find base lambda address."); 8233 int TgtIdx = -1; 8234 for (unsigned J = I; J > 0; --J) { 8235 unsigned Idx = J - 1; 8236 if (Pointers[Idx] != BasePtr) 8237 continue; 8238 TgtIdx = Idx; 8239 break; 8240 } 8241 assert(TgtIdx != -1 && "Unable to find parent lambda."); 8242 // All other current entries will be MEMBER_OF the combined entry 8243 // (except for PTR_AND_OBJ entries which do not have a placeholder value 8244 // 0xFFFF in the MEMBER_OF field). 8245 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); 8246 setCorrectMemberOfFlag(Types[I], MemberOfFlag); 8247 } 8248 } 8249 8250 /// Generate the base pointers, section pointers, sizes and map types 8251 /// associated to a given capture. 8252 void generateInfoForCapture(const CapturedStmt::Capture *Cap, 8253 llvm::Value *Arg, 8254 MapBaseValuesArrayTy &BasePointers, 8255 MapValuesArrayTy &Pointers, 8256 MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types, 8257 StructRangeInfoTy &PartialStruct) const { 8258 assert(!Cap->capturesVariableArrayType() && 8259 "Not expecting to generate map info for a variable array type!"); 8260 8261 // We need to know when we generating information for the first component 8262 const ValueDecl *VD = Cap->capturesThis() 8263 ? nullptr 8264 : Cap->getCapturedVar()->getCanonicalDecl(); 8265 8266 // If this declaration appears in a is_device_ptr clause we just have to 8267 // pass the pointer by value. If it is a reference to a declaration, we just 8268 // pass its value. 8269 if (DevPointersMap.count(VD)) { 8270 BasePointers.emplace_back(Arg, VD); 8271 Pointers.push_back(Arg); 8272 Sizes.push_back( 8273 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), 8274 CGF.Int64Ty, /*isSigned=*/true)); 8275 Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM); 8276 return; 8277 } 8278 8279 using MapData = 8280 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef, 8281 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>; 8282 SmallVector<MapData, 4> DeclComponentLists; 8283 assert(CurDir.is<const OMPExecutableDirective *>() && 8284 "Expect a executable directive"); 8285 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8286 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8287 for (const auto &L : C->decl_component_lists(VD)) { 8288 assert(L.first == VD && 8289 "We got information for the wrong declaration??"); 8290 assert(!L.second.empty() && 8291 "Not expecting declaration with no component lists."); 8292 DeclComponentLists.emplace_back(L.second, C->getMapType(), 8293 C->getMapTypeModifiers(), 8294 C->isImplicit()); 8295 } 8296 } 8297 8298 // Find overlapping elements (including the offset from the base element). 8299 llvm::SmallDenseMap< 8300 const MapData *, 8301 llvm::SmallVector< 8302 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>, 8303 4> 8304 OverlappedData; 8305 size_t Count = 0; 8306 for (const MapData &L : DeclComponentLists) { 8307 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8308 OpenMPMapClauseKind MapType; 8309 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8310 bool IsImplicit; 8311 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8312 ++Count; 8313 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { 8314 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; 8315 std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1; 8316 auto CI = Components.rbegin(); 8317 auto CE = Components.rend(); 8318 auto SI = Components1.rbegin(); 8319 auto SE = Components1.rend(); 8320 for (; CI != CE && SI != SE; ++CI, ++SI) { 8321 if (CI->getAssociatedExpression()->getStmtClass() != 8322 SI->getAssociatedExpression()->getStmtClass()) 8323 break; 8324 // Are we dealing with different variables/fields? 8325 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) 8326 break; 8327 } 8328 // Found overlapping if, at least for one component, reached the head of 8329 // the components list. 8330 if (CI == CE || SI == SE) { 8331 assert((CI != CE || SI != SE) && 8332 "Unexpected full match of the mapping components."); 8333 const MapData &BaseData = CI == CE ? L : L1; 8334 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = 8335 SI == SE ? Components : Components1; 8336 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData); 8337 OverlappedElements.getSecond().push_back(SubData); 8338 } 8339 } 8340 } 8341 // Sort the overlapped elements for each item. 8342 llvm::SmallVector<const FieldDecl *, 4> Layout; 8343 if (!OverlappedData.empty()) { 8344 if (const auto *CRD = 8345 VD->getType().getCanonicalType()->getAsCXXRecordDecl()) 8346 getPlainLayout(CRD, Layout, /*AsBase=*/false); 8347 else { 8348 const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); 8349 Layout.append(RD->field_begin(), RD->field_end()); 8350 } 8351 } 8352 for (auto &Pair : OverlappedData) { 8353 llvm::sort( 8354 Pair.getSecond(), 8355 [&Layout]( 8356 OMPClauseMappableExprCommon::MappableExprComponentListRef First, 8357 OMPClauseMappableExprCommon::MappableExprComponentListRef 8358 Second) { 8359 auto CI = First.rbegin(); 8360 auto CE = First.rend(); 8361 auto SI = Second.rbegin(); 8362 auto SE = Second.rend(); 8363 for (; CI != CE && SI != SE; ++CI, ++SI) { 8364 if (CI->getAssociatedExpression()->getStmtClass() != 8365 SI->getAssociatedExpression()->getStmtClass()) 8366 break; 8367 // Are we dealing with different variables/fields? 8368 if (CI->getAssociatedDeclaration() != 8369 SI->getAssociatedDeclaration()) 8370 break; 8371 } 8372 8373 // Lists contain the same elements. 8374 if (CI == CE && SI == SE) 8375 return false; 8376 8377 // List with less elements is less than list with more elements. 8378 if (CI == CE || SI == SE) 8379 return CI == CE; 8380 8381 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration()); 8382 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); 8383 if (FD1->getParent() == FD2->getParent()) 8384 return FD1->getFieldIndex() < FD2->getFieldIndex(); 8385 const auto It = 8386 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { 8387 return FD == FD1 || FD == FD2; 8388 }); 8389 return *It == FD1; 8390 }); 8391 } 8392 8393 // Associated with a capture, because the mapping flags depend on it. 8394 // Go through all of the elements with the overlapped elements. 8395 for (const auto &Pair : OverlappedData) { 8396 const MapData &L = *Pair.getFirst(); 8397 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8398 OpenMPMapClauseKind MapType; 8399 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8400 bool IsImplicit; 8401 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8402 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> 8403 OverlappedComponents = Pair.getSecond(); 8404 bool IsFirstComponentList = true; 8405 generateInfoForComponentList(MapType, MapModifiers, Components, 8406 BasePointers, Pointers, Sizes, Types, 8407 PartialStruct, IsFirstComponentList, 8408 IsImplicit, OverlappedComponents); 8409 } 8410 // Go through other elements without overlapped elements. 8411 bool IsFirstComponentList = OverlappedData.empty(); 8412 for (const MapData &L : DeclComponentLists) { 8413 OMPClauseMappableExprCommon::MappableExprComponentListRef Components; 8414 OpenMPMapClauseKind MapType; 8415 ArrayRef<OpenMPMapModifierKind> MapModifiers; 8416 bool IsImplicit; 8417 std::tie(Components, MapType, MapModifiers, IsImplicit) = L; 8418 auto It = OverlappedData.find(&L); 8419 if (It == OverlappedData.end()) 8420 generateInfoForComponentList(MapType, MapModifiers, Components, 8421 BasePointers, Pointers, Sizes, Types, 8422 PartialStruct, IsFirstComponentList, 8423 IsImplicit); 8424 IsFirstComponentList = false; 8425 } 8426 } 8427 8428 /// Generate the base pointers, section pointers, sizes and map types 8429 /// associated with the declare target link variables. 8430 void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers, 8431 MapValuesArrayTy &Pointers, 8432 MapValuesArrayTy &Sizes, 8433 MapFlagsArrayTy &Types) const { 8434 assert(CurDir.is<const OMPExecutableDirective *>() && 8435 "Expect a executable directive"); 8436 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); 8437 // Map other list items in the map clause which are not captured variables 8438 // but "declare target link" global variables. 8439 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { 8440 for (const auto &L : C->component_lists()) { 8441 if (!L.first) 8442 continue; 8443 const auto *VD = dyn_cast<VarDecl>(L.first); 8444 if (!VD) 8445 continue; 8446 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 8447 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 8448 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || 8449 !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link) 8450 continue; 8451 StructRangeInfoTy PartialStruct; 8452 generateInfoForComponentList( 8453 C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers, 8454 Pointers, Sizes, Types, PartialStruct, 8455 /*IsFirstComponentList=*/true, C->isImplicit()); 8456 assert(!PartialStruct.Base.isValid() && 8457 "No partial structs for declare target link expected."); 8458 } 8459 } 8460 } 8461 8462 /// Generate the default map information for a given capture \a CI, 8463 /// record field declaration \a RI and captured value \a CV. 8464 void generateDefaultMapInfo(const CapturedStmt::Capture &CI, 8465 const FieldDecl &RI, llvm::Value *CV, 8466 MapBaseValuesArrayTy &CurBasePointers, 8467 MapValuesArrayTy &CurPointers, 8468 MapValuesArrayTy &CurSizes, 8469 MapFlagsArrayTy &CurMapTypes) const { 8470 bool IsImplicit = true; 8471 // Do the default mapping. 8472 if (CI.capturesThis()) { 8473 CurBasePointers.push_back(CV); 8474 CurPointers.push_back(CV); 8475 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); 8476 CurSizes.push_back( 8477 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), 8478 CGF.Int64Ty, /*isSigned=*/true)); 8479 // Default map type. 8480 CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM); 8481 } else if (CI.capturesVariableByCopy()) { 8482 CurBasePointers.push_back(CV); 8483 CurPointers.push_back(CV); 8484 if (!RI.getType()->isAnyPointerType()) { 8485 // We have to signal to the runtime captures passed by value that are 8486 // not pointers. 8487 CurMapTypes.push_back(OMP_MAP_LITERAL); 8488 CurSizes.push_back(CGF.Builder.CreateIntCast( 8489 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); 8490 } else { 8491 // Pointers are implicitly mapped with a zero size and no flags 8492 // (other than first map that is added for all implicit maps). 8493 CurMapTypes.push_back(OMP_MAP_NONE); 8494 CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); 8495 } 8496 const VarDecl *VD = CI.getCapturedVar(); 8497 auto I = FirstPrivateDecls.find(VD); 8498 if (I != FirstPrivateDecls.end()) 8499 IsImplicit = I->getSecond(); 8500 } else { 8501 assert(CI.capturesVariable() && "Expected captured reference."); 8502 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr()); 8503 QualType ElementType = PtrTy->getPointeeType(); 8504 CurSizes.push_back(CGF.Builder.CreateIntCast( 8505 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true)); 8506 // The default map type for a scalar/complex type is 'to' because by 8507 // default the value doesn't have to be retrieved. For an aggregate 8508 // type, the default is 'tofrom'. 8509 CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI)); 8510 const VarDecl *VD = CI.getCapturedVar(); 8511 auto I = FirstPrivateDecls.find(VD); 8512 if (I != FirstPrivateDecls.end() && 8513 VD->getType().isConstant(CGF.getContext())) { 8514 llvm::Constant *Addr = 8515 CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); 8516 // Copy the value of the original variable to the new global copy. 8517 CGF.Builder.CreateMemCpy( 8518 CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(), 8519 Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), 8520 CurSizes.back(), /*IsVolatile=*/false); 8521 // Use new global variable as the base pointers. 8522 CurBasePointers.push_back(Addr); 8523 CurPointers.push_back(Addr); 8524 } else { 8525 CurBasePointers.push_back(CV); 8526 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { 8527 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( 8528 CV, ElementType, CGF.getContext().getDeclAlign(VD), 8529 AlignmentSource::Decl)); 8530 CurPointers.push_back(PtrAddr.getPointer()); 8531 } else { 8532 CurPointers.push_back(CV); 8533 } 8534 } 8535 if (I != FirstPrivateDecls.end()) 8536 IsImplicit = I->getSecond(); 8537 } 8538 // Every default map produces a single argument which is a target parameter. 8539 CurMapTypes.back() |= OMP_MAP_TARGET_PARAM; 8540 8541 // Add flag stating this is an implicit map. 8542 if (IsImplicit) 8543 CurMapTypes.back() |= OMP_MAP_IMPLICIT; 8544 } 8545 }; 8546 } // anonymous namespace 8547 8548 /// Emit the arrays used to pass the captures and map information to the 8549 /// offloading runtime library. If there is no map or capture information, 8550 /// return nullptr by reference. 8551 static void 8552 emitOffloadingArrays(CodeGenFunction &CGF, 8553 MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, 8554 MappableExprsHandler::MapValuesArrayTy &Pointers, 8555 MappableExprsHandler::MapValuesArrayTy &Sizes, 8556 MappableExprsHandler::MapFlagsArrayTy &MapTypes, 8557 CGOpenMPRuntime::TargetDataInfo &Info) { 8558 CodeGenModule &CGM = CGF.CGM; 8559 ASTContext &Ctx = CGF.getContext(); 8560 8561 // Reset the array information. 8562 Info.clearArrayInfo(); 8563 Info.NumberOfPtrs = BasePointers.size(); 8564 8565 if (Info.NumberOfPtrs) { 8566 // Detect if we have any capture size requiring runtime evaluation of the 8567 // size so that a constant array could be eventually used. 8568 bool hasRuntimeEvaluationCaptureSize = false; 8569 for (llvm::Value *S : Sizes) 8570 if (!isa<llvm::Constant>(S)) { 8571 hasRuntimeEvaluationCaptureSize = true; 8572 break; 8573 } 8574 8575 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); 8576 QualType PointerArrayType = Ctx.getConstantArrayType( 8577 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, 8578 /*IndexTypeQuals=*/0); 8579 8580 Info.BasePointersArray = 8581 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); 8582 Info.PointersArray = 8583 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); 8584 8585 // If we don't have any VLA types or other types that require runtime 8586 // evaluation, we can use a constant array for the map sizes, otherwise we 8587 // need to fill up the arrays as we do for the pointers. 8588 QualType Int64Ty = 8589 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 8590 if (hasRuntimeEvaluationCaptureSize) { 8591 QualType SizeArrayType = Ctx.getConstantArrayType( 8592 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, 8593 /*IndexTypeQuals=*/0); 8594 Info.SizesArray = 8595 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); 8596 } else { 8597 // We expect all the sizes to be constant, so we collect them to create 8598 // a constant array. 8599 SmallVector<llvm::Constant *, 16> ConstSizes; 8600 for (llvm::Value *S : Sizes) 8601 ConstSizes.push_back(cast<llvm::Constant>(S)); 8602 8603 auto *SizesArrayInit = llvm::ConstantArray::get( 8604 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); 8605 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); 8606 auto *SizesArrayGbl = new llvm::GlobalVariable( 8607 CGM.getModule(), SizesArrayInit->getType(), 8608 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8609 SizesArrayInit, Name); 8610 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8611 Info.SizesArray = SizesArrayGbl; 8612 } 8613 8614 // The map types are always constant so we don't need to generate code to 8615 // fill arrays. Instead, we create an array constant. 8616 SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0); 8617 llvm::copy(MapTypes, Mapping.begin()); 8618 llvm::Constant *MapTypesArrayInit = 8619 llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); 8620 std::string MaptypesName = 8621 CGM.getOpenMPRuntime().getName({"offload_maptypes"}); 8622 auto *MapTypesArrayGbl = new llvm::GlobalVariable( 8623 CGM.getModule(), MapTypesArrayInit->getType(), 8624 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, 8625 MapTypesArrayInit, MaptypesName); 8626 MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); 8627 Info.MapTypesArray = MapTypesArrayGbl; 8628 8629 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { 8630 llvm::Value *BPVal = *BasePointers[I]; 8631 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( 8632 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8633 Info.BasePointersArray, 0, I); 8634 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8635 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8636 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8637 CGF.Builder.CreateStore(BPVal, BPAddr); 8638 8639 if (Info.requiresDevicePointerInfo()) 8640 if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl()) 8641 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); 8642 8643 llvm::Value *PVal = Pointers[I]; 8644 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( 8645 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8646 Info.PointersArray, 0, I); 8647 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 8648 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); 8649 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); 8650 CGF.Builder.CreateStore(PVal, PAddr); 8651 8652 if (hasRuntimeEvaluationCaptureSize) { 8653 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( 8654 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8655 Info.SizesArray, 8656 /*Idx0=*/0, 8657 /*Idx1=*/I); 8658 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); 8659 CGF.Builder.CreateStore( 8660 CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true), 8661 SAddr); 8662 } 8663 } 8664 } 8665 } 8666 8667 /// Emit the arguments to be passed to the runtime library based on the 8668 /// arrays of pointers, sizes and map types. 8669 static void emitOffloadingArraysArgument( 8670 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, 8671 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, 8672 llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) { 8673 CodeGenModule &CGM = CGF.CGM; 8674 if (Info.NumberOfPtrs) { 8675 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8676 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8677 Info.BasePointersArray, 8678 /*Idx0=*/0, /*Idx1=*/0); 8679 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8680 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), 8681 Info.PointersArray, 8682 /*Idx0=*/0, 8683 /*Idx1=*/0); 8684 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8685 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, 8686 /*Idx0=*/0, /*Idx1=*/0); 8687 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( 8688 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), 8689 Info.MapTypesArray, 8690 /*Idx0=*/0, 8691 /*Idx1=*/0); 8692 } else { 8693 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8694 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); 8695 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8696 MapTypesArrayArg = 8697 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); 8698 } 8699 } 8700 8701 /// Check for inner distribute directive. 8702 static const OMPExecutableDirective * 8703 getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { 8704 const auto *CS = D.getInnermostCapturedStmt(); 8705 const auto *Body = 8706 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); 8707 const Stmt *ChildStmt = 8708 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8709 8710 if (const auto *NestedDir = 8711 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8712 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); 8713 switch (D.getDirectiveKind()) { 8714 case OMPD_target: 8715 if (isOpenMPDistributeDirective(DKind)) 8716 return NestedDir; 8717 if (DKind == OMPD_teams) { 8718 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( 8719 /*IgnoreCaptured=*/true); 8720 if (!Body) 8721 return nullptr; 8722 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); 8723 if (const auto *NND = 8724 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { 8725 DKind = NND->getDirectiveKind(); 8726 if (isOpenMPDistributeDirective(DKind)) 8727 return NND; 8728 } 8729 } 8730 return nullptr; 8731 case OMPD_target_teams: 8732 if (isOpenMPDistributeDirective(DKind)) 8733 return NestedDir; 8734 return nullptr; 8735 case OMPD_target_parallel: 8736 case OMPD_target_simd: 8737 case OMPD_target_parallel_for: 8738 case OMPD_target_parallel_for_simd: 8739 return nullptr; 8740 case OMPD_target_teams_distribute: 8741 case OMPD_target_teams_distribute_simd: 8742 case OMPD_target_teams_distribute_parallel_for: 8743 case OMPD_target_teams_distribute_parallel_for_simd: 8744 case OMPD_parallel: 8745 case OMPD_for: 8746 case OMPD_parallel_for: 8747 case OMPD_parallel_sections: 8748 case OMPD_for_simd: 8749 case OMPD_parallel_for_simd: 8750 case OMPD_cancel: 8751 case OMPD_cancellation_point: 8752 case OMPD_ordered: 8753 case OMPD_threadprivate: 8754 case OMPD_allocate: 8755 case OMPD_task: 8756 case OMPD_simd: 8757 case OMPD_sections: 8758 case OMPD_section: 8759 case OMPD_single: 8760 case OMPD_master: 8761 case OMPD_critical: 8762 case OMPD_taskyield: 8763 case OMPD_barrier: 8764 case OMPD_taskwait: 8765 case OMPD_taskgroup: 8766 case OMPD_atomic: 8767 case OMPD_flush: 8768 case OMPD_teams: 8769 case OMPD_target_data: 8770 case OMPD_target_exit_data: 8771 case OMPD_target_enter_data: 8772 case OMPD_distribute: 8773 case OMPD_distribute_simd: 8774 case OMPD_distribute_parallel_for: 8775 case OMPD_distribute_parallel_for_simd: 8776 case OMPD_teams_distribute: 8777 case OMPD_teams_distribute_simd: 8778 case OMPD_teams_distribute_parallel_for: 8779 case OMPD_teams_distribute_parallel_for_simd: 8780 case OMPD_target_update: 8781 case OMPD_declare_simd: 8782 case OMPD_declare_variant: 8783 case OMPD_declare_target: 8784 case OMPD_end_declare_target: 8785 case OMPD_declare_reduction: 8786 case OMPD_declare_mapper: 8787 case OMPD_taskloop: 8788 case OMPD_taskloop_simd: 8789 case OMPD_master_taskloop: 8790 case OMPD_parallel_master_taskloop: 8791 case OMPD_requires: 8792 case OMPD_unknown: 8793 llvm_unreachable("Unexpected directive."); 8794 } 8795 } 8796 8797 return nullptr; 8798 } 8799 8800 /// Emit the user-defined mapper function. The code generation follows the 8801 /// pattern in the example below. 8802 /// \code 8803 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle, 8804 /// void *base, void *begin, 8805 /// int64_t size, int64_t type) { 8806 /// // Allocate space for an array section first. 8807 /// if (size > 1 && !maptype.IsDelete) 8808 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8809 /// size*sizeof(Ty), clearToFrom(type)); 8810 /// // Map members. 8811 /// for (unsigned i = 0; i < size; i++) { 8812 /// // For each component specified by this mapper: 8813 /// for (auto c : all_components) { 8814 /// if (c.hasMapper()) 8815 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, 8816 /// c.arg_type); 8817 /// else 8818 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base, 8819 /// c.arg_begin, c.arg_size, c.arg_type); 8820 /// } 8821 /// } 8822 /// // Delete the array section. 8823 /// if (size > 1 && maptype.IsDelete) 8824 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, 8825 /// size*sizeof(Ty), clearToFrom(type)); 8826 /// } 8827 /// \endcode 8828 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, 8829 CodeGenFunction *CGF) { 8830 if (UDMMap.count(D) > 0) 8831 return; 8832 ASTContext &C = CGM.getContext(); 8833 QualType Ty = D->getType(); 8834 QualType PtrTy = C.getPointerType(Ty).withRestrict(); 8835 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 8836 auto *MapperVarDecl = 8837 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); 8838 SourceLocation Loc = D->getLocation(); 8839 CharUnits ElementSize = C.getTypeSizeInChars(Ty); 8840 8841 // Prepare mapper function arguments and attributes. 8842 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8843 C.VoidPtrTy, ImplicitParamDecl::Other); 8844 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, 8845 ImplicitParamDecl::Other); 8846 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, 8847 C.VoidPtrTy, ImplicitParamDecl::Other); 8848 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8849 ImplicitParamDecl::Other); 8850 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, 8851 ImplicitParamDecl::Other); 8852 FunctionArgList Args; 8853 Args.push_back(&HandleArg); 8854 Args.push_back(&BaseArg); 8855 Args.push_back(&BeginArg); 8856 Args.push_back(&SizeArg); 8857 Args.push_back(&TypeArg); 8858 const CGFunctionInfo &FnInfo = 8859 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); 8860 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); 8861 SmallString<64> TyStr; 8862 llvm::raw_svector_ostream Out(TyStr); 8863 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); 8864 std::string Name = getName({"omp_mapper", TyStr, D->getName()}); 8865 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, 8866 Name, &CGM.getModule()); 8867 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); 8868 Fn->removeFnAttr(llvm::Attribute::OptimizeNone); 8869 // Start the mapper function code generation. 8870 CodeGenFunction MapperCGF(CGM); 8871 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); 8872 // Compute the starting and end addreses of array elements. 8873 llvm::Value *Size = MapperCGF.EmitLoadOfScalar( 8874 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, 8875 C.getPointerType(Int64Ty), Loc); 8876 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( 8877 MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), 8878 CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); 8879 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); 8880 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( 8881 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, 8882 C.getPointerType(Int64Ty), Loc); 8883 // Prepare common arguments for array initiation and deletion. 8884 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( 8885 MapperCGF.GetAddrOfLocalVar(&HandleArg), 8886 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8887 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar( 8888 MapperCGF.GetAddrOfLocalVar(&BaseArg), 8889 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8890 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( 8891 MapperCGF.GetAddrOfLocalVar(&BeginArg), 8892 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); 8893 8894 // Emit array initiation if this is an array section and \p MapType indicates 8895 // that memory allocation is required. 8896 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); 8897 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 8898 ElementSize, HeadBB, /*IsInit=*/true); 8899 8900 // Emit a for loop to iterate through SizeArg of elements and map all of them. 8901 8902 // Emit the loop header block. 8903 MapperCGF.EmitBlock(HeadBB); 8904 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body"); 8905 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done"); 8906 // Evaluate whether the initial condition is satisfied. 8907 llvm::Value *IsEmpty = 8908 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty"); 8909 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); 8910 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock(); 8911 8912 // Emit the loop body block. 8913 MapperCGF.EmitBlock(BodyBB); 8914 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( 8915 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); 8916 PtrPHI->addIncoming(PtrBegin, EntryBB); 8917 Address PtrCurrent = 8918 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) 8919 .getAlignment() 8920 .alignmentOfArrayElement(ElementSize)); 8921 // Privatize the declared variable of mapper to be the current array element. 8922 CodeGenFunction::OMPPrivateScope Scope(MapperCGF); 8923 Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { 8924 return MapperCGF 8925 .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) 8926 .getAddress(); 8927 }); 8928 (void)Scope.Privatize(); 8929 8930 // Get map clause information. Fill up the arrays with all mapped variables. 8931 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 8932 MappableExprsHandler::MapValuesArrayTy Pointers; 8933 MappableExprsHandler::MapValuesArrayTy Sizes; 8934 MappableExprsHandler::MapFlagsArrayTy MapTypes; 8935 MappableExprsHandler MEHandler(*D, MapperCGF); 8936 MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes); 8937 8938 // Call the runtime API __tgt_mapper_num_components to get the number of 8939 // pre-existing components. 8940 llvm::Value *OffloadingArgs[] = {Handle}; 8941 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall( 8942 createRuntimeFunction(OMPRTL__tgt_mapper_num_components), OffloadingArgs); 8943 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl( 8944 PreviousSize, 8945 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset())); 8946 8947 // Fill up the runtime mapper handle for all components. 8948 for (unsigned I = 0; I < BasePointers.size(); ++I) { 8949 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( 8950 *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 8951 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( 8952 Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); 8953 llvm::Value *CurSizeArg = Sizes[I]; 8954 8955 // Extract the MEMBER_OF field from the map type. 8956 llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); 8957 MapperCGF.EmitBlock(MemberBB); 8958 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]); 8959 llvm::Value *Member = MapperCGF.Builder.CreateAnd( 8960 OriMapType, 8961 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); 8962 llvm::BasicBlock *MemberCombineBB = 8963 MapperCGF.createBasicBlock("omp.member.combine"); 8964 llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); 8965 llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); 8966 MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); 8967 // Add the number of pre-existing components to the MEMBER_OF field if it 8968 // is valid. 8969 MapperCGF.EmitBlock(MemberCombineBB); 8970 llvm::Value *CombinedMember = 8971 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); 8972 // Do nothing if it is not a member of previous components. 8973 MapperCGF.EmitBlock(TypeBB); 8974 llvm::PHINode *MemberMapType = 8975 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); 8976 MemberMapType->addIncoming(OriMapType, MemberBB); 8977 MemberMapType->addIncoming(CombinedMember, MemberCombineBB); 8978 8979 // Combine the map type inherited from user-defined mapper with that 8980 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM 8981 // bits of the \a MapType, which is the input argument of the mapper 8982 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM 8983 // bits of MemberMapType. 8984 // [OpenMP 5.0], 1.2.6. map-type decay. 8985 // | alloc | to | from | tofrom | release | delete 8986 // ---------------------------------------------------------- 8987 // alloc | alloc | alloc | alloc | alloc | release | delete 8988 // to | alloc | to | alloc | to | release | delete 8989 // from | alloc | alloc | from | from | release | delete 8990 // tofrom | alloc | to | from | tofrom | release | delete 8991 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( 8992 MapType, 8993 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | 8994 MappableExprsHandler::OMP_MAP_FROM)); 8995 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); 8996 llvm::BasicBlock *AllocElseBB = 8997 MapperCGF.createBasicBlock("omp.type.alloc.else"); 8998 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to"); 8999 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else"); 9000 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from"); 9001 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end"); 9002 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom); 9003 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB); 9004 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM. 9005 MapperCGF.EmitBlock(AllocBB); 9006 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( 9007 MemberMapType, 9008 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9009 MappableExprsHandler::OMP_MAP_FROM))); 9010 MapperCGF.Builder.CreateBr(EndBB); 9011 MapperCGF.EmitBlock(AllocElseBB); 9012 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( 9013 LeftToFrom, 9014 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); 9015 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); 9016 // In case of to, clear OMP_MAP_FROM. 9017 MapperCGF.EmitBlock(ToBB); 9018 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( 9019 MemberMapType, 9020 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); 9021 MapperCGF.Builder.CreateBr(EndBB); 9022 MapperCGF.EmitBlock(ToElseBB); 9023 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( 9024 LeftToFrom, 9025 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); 9026 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); 9027 // In case of from, clear OMP_MAP_TO. 9028 MapperCGF.EmitBlock(FromBB); 9029 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( 9030 MemberMapType, 9031 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); 9032 // In case of tofrom, do nothing. 9033 MapperCGF.EmitBlock(EndBB); 9034 llvm::PHINode *CurMapType = 9035 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype"); 9036 CurMapType->addIncoming(AllocMapType, AllocBB); 9037 CurMapType->addIncoming(ToMapType, ToBB); 9038 CurMapType->addIncoming(FromMapType, FromBB); 9039 CurMapType->addIncoming(MemberMapType, ToElseBB); 9040 9041 // TODO: call the corresponding mapper function if a user-defined mapper is 9042 // associated with this map clause. 9043 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9044 // data structure. 9045 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg, 9046 CurSizeArg, CurMapType}; 9047 MapperCGF.EmitRuntimeCall( 9048 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), 9049 OffloadingArgs); 9050 } 9051 9052 // Update the pointer to point to the next element that needs to be mapped, 9053 // and check whether we have mapped all elements. 9054 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( 9055 PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); 9056 PtrPHI->addIncoming(PtrNext, BodyBB); 9057 llvm::Value *IsDone = 9058 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); 9059 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit"); 9060 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB); 9061 9062 MapperCGF.EmitBlock(ExitBB); 9063 // Emit array deletion if this is an array section and \p MapType indicates 9064 // that deletion is required. 9065 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, 9066 ElementSize, DoneBB, /*IsInit=*/false); 9067 9068 // Emit the function exit block. 9069 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); 9070 MapperCGF.FinishFunction(); 9071 UDMMap.try_emplace(D, Fn); 9072 if (CGF) { 9073 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn); 9074 Decls.second.push_back(D); 9075 } 9076 } 9077 9078 /// Emit the array initialization or deletion portion for user-defined mapper 9079 /// code generation. First, it evaluates whether an array section is mapped and 9080 /// whether the \a MapType instructs to delete this section. If \a IsInit is 9081 /// true, and \a MapType indicates to not delete this array, array 9082 /// initialization code is generated. If \a IsInit is false, and \a MapType 9083 /// indicates to not this array, array deletion code is generated. 9084 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( 9085 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, 9086 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, 9087 CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { 9088 StringRef Prefix = IsInit ? ".init" : ".del"; 9089 9090 // Evaluate if this is an array section. 9091 llvm::BasicBlock *IsDeleteBB = 9092 MapperCGF.createBasicBlock("omp.array" + Prefix + ".evaldelete"); 9093 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.array" + Prefix); 9094 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( 9095 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); 9096 MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); 9097 9098 // Evaluate if we are going to delete this section. 9099 MapperCGF.EmitBlock(IsDeleteBB); 9100 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( 9101 MapType, 9102 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); 9103 llvm::Value *DeleteCond; 9104 if (IsInit) { 9105 DeleteCond = MapperCGF.Builder.CreateIsNull( 9106 DeleteBit, "omp.array" + Prefix + ".delete"); 9107 } else { 9108 DeleteCond = MapperCGF.Builder.CreateIsNotNull( 9109 DeleteBit, "omp.array" + Prefix + ".delete"); 9110 } 9111 MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); 9112 9113 MapperCGF.EmitBlock(BodyBB); 9114 // Get the array size by multiplying element size and element number (i.e., \p 9115 // Size). 9116 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul( 9117 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); 9118 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves 9119 // memory allocation/deletion purpose only. 9120 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( 9121 MapType, 9122 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | 9123 MappableExprsHandler::OMP_MAP_FROM))); 9124 // Call the runtime API __tgt_push_mapper_component to fill up the runtime 9125 // data structure. 9126 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg}; 9127 MapperCGF.EmitRuntimeCall( 9128 createRuntimeFunction(OMPRTL__tgt_push_mapper_component), OffloadingArgs); 9129 } 9130 9131 void CGOpenMPRuntime::emitTargetNumIterationsCall( 9132 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9133 llvm::Value *DeviceID, 9134 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9135 const OMPLoopDirective &D)> 9136 SizeEmitter) { 9137 OpenMPDirectiveKind Kind = D.getDirectiveKind(); 9138 const OMPExecutableDirective *TD = &D; 9139 // Get nested teams distribute kind directive, if any. 9140 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) 9141 TD = getNestedDistributeDirective(CGM.getContext(), D); 9142 if (!TD) 9143 return; 9144 const auto *LD = cast<OMPLoopDirective>(TD); 9145 auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF, 9146 PrePostActionTy &) { 9147 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { 9148 llvm::Value *Args[] = {DeviceID, NumIterations}; 9149 CGF.EmitRuntimeCall( 9150 createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args); 9151 } 9152 }; 9153 emitInlinedDirective(CGF, OMPD_unknown, CodeGen); 9154 } 9155 9156 void CGOpenMPRuntime::emitTargetCall( 9157 CodeGenFunction &CGF, const OMPExecutableDirective &D, 9158 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 9159 const Expr *Device, 9160 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 9161 const OMPLoopDirective &D)> 9162 SizeEmitter) { 9163 if (!CGF.HaveInsertPoint()) 9164 return; 9165 9166 assert(OutlinedFn && "Invalid outlined function!"); 9167 9168 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); 9169 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 9170 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); 9171 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, 9172 PrePostActionTy &) { 9173 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9174 }; 9175 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen); 9176 9177 CodeGenFunction::OMPTargetDataInfo InputInfo; 9178 llvm::Value *MapTypesArray = nullptr; 9179 // Fill up the pointer arrays and transfer execution to the device. 9180 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, 9181 &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, 9182 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { 9183 // On top of the arrays that were filled up, the target offloading call 9184 // takes as arguments the device id as well as the host pointer. The host 9185 // pointer is used by the runtime library to identify the current target 9186 // region, so it only has to be unique and not necessarily point to 9187 // anything. It could be the pointer to the outlined function that 9188 // implements the target region, but we aren't using that so that the 9189 // compiler doesn't need to keep that, and could therefore inline the host 9190 // function if proven worthwhile during optimization. 9191 9192 // From this point on, we need to have an ID of the target region defined. 9193 assert(OutlinedFnID && "Invalid outlined function ID!"); 9194 9195 // Emit device ID if any. 9196 llvm::Value *DeviceID; 9197 if (Device) { 9198 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9199 CGF.Int64Ty, /*isSigned=*/true); 9200 } else { 9201 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9202 } 9203 9204 // Emit the number of elements in the offloading arrays. 9205 llvm::Value *PointerNum = 9206 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 9207 9208 // Return value of the runtime offloading call. 9209 llvm::Value *Return; 9210 9211 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); 9212 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); 9213 9214 // Emit tripcount for the target loop-based directive. 9215 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); 9216 9217 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 9218 // The target region is an outlined function launched by the runtime 9219 // via calls __tgt_target() or __tgt_target_teams(). 9220 // 9221 // __tgt_target() launches a target region with one team and one thread, 9222 // executing a serial region. This master thread may in turn launch 9223 // more threads within its team upon encountering a parallel region, 9224 // however, no additional teams can be launched on the device. 9225 // 9226 // __tgt_target_teams() launches a target region with one or more teams, 9227 // each with one or more threads. This call is required for target 9228 // constructs such as: 9229 // 'target teams' 9230 // 'target' / 'teams' 9231 // 'target teams distribute parallel for' 9232 // 'target parallel' 9233 // and so on. 9234 // 9235 // Note that on the host and CPU targets, the runtime implementation of 9236 // these calls simply call the outlined function without forking threads. 9237 // The outlined functions themselves have runtime calls to 9238 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by 9239 // the compiler in emitTeamsCall() and emitParallelCall(). 9240 // 9241 // In contrast, on the NVPTX target, the implementation of 9242 // __tgt_target_teams() launches a GPU kernel with the requested number 9243 // of teams and threads so no additional calls to the runtime are required. 9244 if (NumTeams) { 9245 // If we have NumTeams defined this means that we have an enclosed teams 9246 // region. Therefore we also expect to have NumThreads defined. These two 9247 // values should be defined in the presence of a teams directive, 9248 // regardless of having any clauses associated. If the user is using teams 9249 // but no clauses, these two values will be the default that should be 9250 // passed to the runtime library - a 32-bit integer with the value zero. 9251 assert(NumThreads && "Thread limit expression should be available along " 9252 "with number of teams."); 9253 llvm::Value *OffloadingArgs[] = {DeviceID, 9254 OutlinedFnID, 9255 PointerNum, 9256 InputInfo.BasePointersArray.getPointer(), 9257 InputInfo.PointersArray.getPointer(), 9258 InputInfo.SizesArray.getPointer(), 9259 MapTypesArray, 9260 NumTeams, 9261 NumThreads}; 9262 Return = CGF.EmitRuntimeCall( 9263 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait 9264 : OMPRTL__tgt_target_teams), 9265 OffloadingArgs); 9266 } else { 9267 llvm::Value *OffloadingArgs[] = {DeviceID, 9268 OutlinedFnID, 9269 PointerNum, 9270 InputInfo.BasePointersArray.getPointer(), 9271 InputInfo.PointersArray.getPointer(), 9272 InputInfo.SizesArray.getPointer(), 9273 MapTypesArray}; 9274 Return = CGF.EmitRuntimeCall( 9275 createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait 9276 : OMPRTL__tgt_target), 9277 OffloadingArgs); 9278 } 9279 9280 // Check the error code and execute the host version if required. 9281 llvm::BasicBlock *OffloadFailedBlock = 9282 CGF.createBasicBlock("omp_offload.failed"); 9283 llvm::BasicBlock *OffloadContBlock = 9284 CGF.createBasicBlock("omp_offload.cont"); 9285 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); 9286 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); 9287 9288 CGF.EmitBlock(OffloadFailedBlock); 9289 if (RequiresOuterTask) { 9290 CapturedVars.clear(); 9291 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9292 } 9293 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9294 CGF.EmitBranch(OffloadContBlock); 9295 9296 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); 9297 }; 9298 9299 // Notify that the host version must be executed. 9300 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, 9301 RequiresOuterTask](CodeGenFunction &CGF, 9302 PrePostActionTy &) { 9303 if (RequiresOuterTask) { 9304 CapturedVars.clear(); 9305 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); 9306 } 9307 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); 9308 }; 9309 9310 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, 9311 &CapturedVars, RequiresOuterTask, 9312 &CS](CodeGenFunction &CGF, PrePostActionTy &) { 9313 // Fill up the arrays with all the captured variables. 9314 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9315 MappableExprsHandler::MapValuesArrayTy Pointers; 9316 MappableExprsHandler::MapValuesArrayTy Sizes; 9317 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9318 9319 // Get mappable expression information. 9320 MappableExprsHandler MEHandler(D, CGF); 9321 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; 9322 9323 auto RI = CS.getCapturedRecordDecl()->field_begin(); 9324 auto CV = CapturedVars.begin(); 9325 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), 9326 CE = CS.capture_end(); 9327 CI != CE; ++CI, ++RI, ++CV) { 9328 MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers; 9329 MappableExprsHandler::MapValuesArrayTy CurPointers; 9330 MappableExprsHandler::MapValuesArrayTy CurSizes; 9331 MappableExprsHandler::MapFlagsArrayTy CurMapTypes; 9332 MappableExprsHandler::StructRangeInfoTy PartialStruct; 9333 9334 // VLA sizes are passed to the outlined region by copy and do not have map 9335 // information associated. 9336 if (CI->capturesVariableArrayType()) { 9337 CurBasePointers.push_back(*CV); 9338 CurPointers.push_back(*CV); 9339 CurSizes.push_back(CGF.Builder.CreateIntCast( 9340 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); 9341 // Copy to the device as an argument. No need to retrieve it. 9342 CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL | 9343 MappableExprsHandler::OMP_MAP_TARGET_PARAM | 9344 MappableExprsHandler::OMP_MAP_IMPLICIT); 9345 } else { 9346 // If we have any information in the map clause, we use it, otherwise we 9347 // just do a default mapping. 9348 MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers, 9349 CurSizes, CurMapTypes, PartialStruct); 9350 if (CurBasePointers.empty()) 9351 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers, 9352 CurPointers, CurSizes, CurMapTypes); 9353 // Generate correct mapping for variables captured by reference in 9354 // lambdas. 9355 if (CI->capturesVariable()) 9356 MEHandler.generateInfoForLambdaCaptures( 9357 CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes, 9358 CurMapTypes, LambdaPointers); 9359 } 9360 // We expect to have at least an element of information for this capture. 9361 assert(!CurBasePointers.empty() && 9362 "Non-existing map pointer for capture!"); 9363 assert(CurBasePointers.size() == CurPointers.size() && 9364 CurBasePointers.size() == CurSizes.size() && 9365 CurBasePointers.size() == CurMapTypes.size() && 9366 "Inconsistent map information sizes!"); 9367 9368 // If there is an entry in PartialStruct it means we have a struct with 9369 // individual members mapped. Emit an extra combined entry. 9370 if (PartialStruct.Base.isValid()) 9371 MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes, 9372 CurMapTypes, PartialStruct); 9373 9374 // We need to append the results of this capture to what we already have. 9375 BasePointers.append(CurBasePointers.begin(), CurBasePointers.end()); 9376 Pointers.append(CurPointers.begin(), CurPointers.end()); 9377 Sizes.append(CurSizes.begin(), CurSizes.end()); 9378 MapTypes.append(CurMapTypes.begin(), CurMapTypes.end()); 9379 } 9380 // Adjust MEMBER_OF flags for the lambdas captures. 9381 MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers, 9382 Pointers, MapTypes); 9383 // Map other list items in the map clause which are not captured variables 9384 // but "declare target link" global variables. 9385 MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes, 9386 MapTypes); 9387 9388 TargetDataInfo Info; 9389 // Fill up the arrays and create the arguments. 9390 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9391 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 9392 Info.PointersArray, Info.SizesArray, 9393 Info.MapTypesArray, Info); 9394 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 9395 InputInfo.BasePointersArray = 9396 Address(Info.BasePointersArray, CGM.getPointerAlign()); 9397 InputInfo.PointersArray = 9398 Address(Info.PointersArray, CGM.getPointerAlign()); 9399 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); 9400 MapTypesArray = Info.MapTypesArray; 9401 if (RequiresOuterTask) 9402 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 9403 else 9404 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 9405 }; 9406 9407 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( 9408 CodeGenFunction &CGF, PrePostActionTy &) { 9409 if (RequiresOuterTask) { 9410 CodeGenFunction::OMPTargetDataInfo InputInfo; 9411 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); 9412 } else { 9413 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); 9414 } 9415 }; 9416 9417 // If we have a target function ID it means that we need to support 9418 // offloading, otherwise, just execute on the host. We need to execute on host 9419 // regardless of the conditional in the if clause if, e.g., the user do not 9420 // specify target triples. 9421 if (OutlinedFnID) { 9422 if (IfCond) { 9423 emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen); 9424 } else { 9425 RegionCodeGenTy ThenRCG(TargetThenGen); 9426 ThenRCG(CGF); 9427 } 9428 } else { 9429 RegionCodeGenTy ElseRCG(TargetElseGen); 9430 ElseRCG(CGF); 9431 } 9432 } 9433 9434 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, 9435 StringRef ParentName) { 9436 if (!S) 9437 return; 9438 9439 // Codegen OMP target directives that offload compute to the device. 9440 bool RequiresDeviceCodegen = 9441 isa<OMPExecutableDirective>(S) && 9442 isOpenMPTargetExecutionDirective( 9443 cast<OMPExecutableDirective>(S)->getDirectiveKind()); 9444 9445 if (RequiresDeviceCodegen) { 9446 const auto &E = *cast<OMPExecutableDirective>(S); 9447 unsigned DeviceID; 9448 unsigned FileID; 9449 unsigned Line; 9450 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, 9451 FileID, Line); 9452 9453 // Is this a target region that should not be emitted as an entry point? If 9454 // so just signal we are done with this target region. 9455 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, 9456 ParentName, Line)) 9457 return; 9458 9459 switch (E.getDirectiveKind()) { 9460 case OMPD_target: 9461 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName, 9462 cast<OMPTargetDirective>(E)); 9463 break; 9464 case OMPD_target_parallel: 9465 CodeGenFunction::EmitOMPTargetParallelDeviceFunction( 9466 CGM, ParentName, cast<OMPTargetParallelDirective>(E)); 9467 break; 9468 case OMPD_target_teams: 9469 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( 9470 CGM, ParentName, cast<OMPTargetTeamsDirective>(E)); 9471 break; 9472 case OMPD_target_teams_distribute: 9473 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( 9474 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E)); 9475 break; 9476 case OMPD_target_teams_distribute_simd: 9477 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( 9478 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E)); 9479 break; 9480 case OMPD_target_parallel_for: 9481 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( 9482 CGM, ParentName, cast<OMPTargetParallelForDirective>(E)); 9483 break; 9484 case OMPD_target_parallel_for_simd: 9485 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( 9486 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E)); 9487 break; 9488 case OMPD_target_simd: 9489 CodeGenFunction::EmitOMPTargetSimdDeviceFunction( 9490 CGM, ParentName, cast<OMPTargetSimdDirective>(E)); 9491 break; 9492 case OMPD_target_teams_distribute_parallel_for: 9493 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( 9494 CGM, ParentName, 9495 cast<OMPTargetTeamsDistributeParallelForDirective>(E)); 9496 break; 9497 case OMPD_target_teams_distribute_parallel_for_simd: 9498 CodeGenFunction:: 9499 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( 9500 CGM, ParentName, 9501 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); 9502 break; 9503 case OMPD_parallel: 9504 case OMPD_for: 9505 case OMPD_parallel_for: 9506 case OMPD_parallel_sections: 9507 case OMPD_for_simd: 9508 case OMPD_parallel_for_simd: 9509 case OMPD_cancel: 9510 case OMPD_cancellation_point: 9511 case OMPD_ordered: 9512 case OMPD_threadprivate: 9513 case OMPD_allocate: 9514 case OMPD_task: 9515 case OMPD_simd: 9516 case OMPD_sections: 9517 case OMPD_section: 9518 case OMPD_single: 9519 case OMPD_master: 9520 case OMPD_critical: 9521 case OMPD_taskyield: 9522 case OMPD_barrier: 9523 case OMPD_taskwait: 9524 case OMPD_taskgroup: 9525 case OMPD_atomic: 9526 case OMPD_flush: 9527 case OMPD_teams: 9528 case OMPD_target_data: 9529 case OMPD_target_exit_data: 9530 case OMPD_target_enter_data: 9531 case OMPD_distribute: 9532 case OMPD_distribute_simd: 9533 case OMPD_distribute_parallel_for: 9534 case OMPD_distribute_parallel_for_simd: 9535 case OMPD_teams_distribute: 9536 case OMPD_teams_distribute_simd: 9537 case OMPD_teams_distribute_parallel_for: 9538 case OMPD_teams_distribute_parallel_for_simd: 9539 case OMPD_target_update: 9540 case OMPD_declare_simd: 9541 case OMPD_declare_variant: 9542 case OMPD_declare_target: 9543 case OMPD_end_declare_target: 9544 case OMPD_declare_reduction: 9545 case OMPD_declare_mapper: 9546 case OMPD_taskloop: 9547 case OMPD_taskloop_simd: 9548 case OMPD_master_taskloop: 9549 case OMPD_parallel_master_taskloop: 9550 case OMPD_requires: 9551 case OMPD_unknown: 9552 llvm_unreachable("Unknown target directive for OpenMP device codegen."); 9553 } 9554 return; 9555 } 9556 9557 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) { 9558 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt()) 9559 return; 9560 9561 scanForTargetRegionsFunctions( 9562 E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName); 9563 return; 9564 } 9565 9566 // If this is a lambda function, look into its body. 9567 if (const auto *L = dyn_cast<LambdaExpr>(S)) 9568 S = L->getBody(); 9569 9570 // Keep looking for target regions recursively. 9571 for (const Stmt *II : S->children()) 9572 scanForTargetRegionsFunctions(II, ParentName); 9573 } 9574 9575 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { 9576 // If emitting code for the host, we do not process FD here. Instead we do 9577 // the normal code generation. 9578 if (!CGM.getLangOpts().OpenMPIsDevice) { 9579 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { 9580 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9581 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9582 // Do not emit device_type(nohost) functions for the host. 9583 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) 9584 return true; 9585 } 9586 return false; 9587 } 9588 9589 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl()); 9590 StringRef Name = CGM.getMangledName(GD); 9591 // Try to detect target regions in the function. 9592 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { 9593 scanForTargetRegionsFunctions(FD->getBody(), Name); 9594 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = 9595 OMPDeclareTargetDeclAttr::getDeviceType(FD); 9596 // Do not emit device_type(nohost) functions for the host. 9597 if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) 9598 return true; 9599 } 9600 9601 // Do not to emit function if it is not marked as declare target. 9602 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) && 9603 AlreadyEmittedTargetFunctions.count(Name) == 0; 9604 } 9605 9606 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 9607 if (!CGM.getLangOpts().OpenMPIsDevice) 9608 return false; 9609 9610 // Check if there are Ctors/Dtors in this declaration and look for target 9611 // regions in it. We use the complete variant to produce the kernel name 9612 // mangling. 9613 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType(); 9614 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) { 9615 for (const CXXConstructorDecl *Ctor : RD->ctors()) { 9616 StringRef ParentName = 9617 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete)); 9618 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName); 9619 } 9620 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) { 9621 StringRef ParentName = 9622 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete)); 9623 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName); 9624 } 9625 } 9626 9627 // Do not to emit variable if it is not marked as declare target. 9628 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9629 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( 9630 cast<VarDecl>(GD.getDecl())); 9631 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || 9632 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9633 HasRequiresUnifiedSharedMemory)) { 9634 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); 9635 return true; 9636 } 9637 return false; 9638 } 9639 9640 llvm::Constant * 9641 CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, 9642 const VarDecl *VD) { 9643 assert(VD->getType().isConstant(CGM.getContext()) && 9644 "Expected constant variable."); 9645 StringRef VarName; 9646 llvm::Constant *Addr; 9647 llvm::GlobalValue::LinkageTypes Linkage; 9648 QualType Ty = VD->getType(); 9649 SmallString<128> Buffer; 9650 { 9651 unsigned DeviceID; 9652 unsigned FileID; 9653 unsigned Line; 9654 getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, 9655 FileID, Line); 9656 llvm::raw_svector_ostream OS(Buffer); 9657 OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) 9658 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; 9659 VarName = OS.str(); 9660 } 9661 Linkage = llvm::GlobalValue::InternalLinkage; 9662 Addr = 9663 getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, 9664 getDefaultFirstprivateAddressSpace()); 9665 cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); 9666 CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); 9667 CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); 9668 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9669 VarName, Addr, VarSize, 9670 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); 9671 return Addr; 9672 } 9673 9674 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, 9675 llvm::Constant *Addr) { 9676 if (CGM.getLangOpts().OMPTargetTriples.empty() && 9677 !CGM.getLangOpts().OpenMPIsDevice) 9678 return; 9679 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9680 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9681 if (!Res) { 9682 if (CGM.getLangOpts().OpenMPIsDevice) { 9683 // Register non-target variables being emitted in device code (debug info 9684 // may cause this). 9685 StringRef VarName = CGM.getMangledName(VD); 9686 EmittedNonTargetVariables.try_emplace(VarName, Addr); 9687 } 9688 return; 9689 } 9690 // Register declare target variables. 9691 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; 9692 StringRef VarName; 9693 CharUnits VarSize; 9694 llvm::GlobalValue::LinkageTypes Linkage; 9695 9696 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9697 !HasRequiresUnifiedSharedMemory) { 9698 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9699 VarName = CGM.getMangledName(VD); 9700 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { 9701 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); 9702 assert(!VarSize.isZero() && "Expected non-zero size of the variable"); 9703 } else { 9704 VarSize = CharUnits::Zero(); 9705 } 9706 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); 9707 // Temp solution to prevent optimizations of the internal variables. 9708 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { 9709 std::string RefName = getName({VarName, "ref"}); 9710 if (!CGM.GetGlobalValue(RefName)) { 9711 llvm::Constant *AddrRef = 9712 getOrCreateInternalVariable(Addr->getType(), RefName); 9713 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); 9714 GVAddrRef->setConstant(/*Val=*/true); 9715 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); 9716 GVAddrRef->setInitializer(Addr); 9717 CGM.addCompilerUsedGlobal(GVAddrRef); 9718 } 9719 } 9720 } else { 9721 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || 9722 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9723 HasRequiresUnifiedSharedMemory)) && 9724 "Declare target attribute must link or to with unified memory."); 9725 if (*Res == OMPDeclareTargetDeclAttr::MT_Link) 9726 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; 9727 else 9728 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; 9729 9730 if (CGM.getLangOpts().OpenMPIsDevice) { 9731 VarName = Addr->getName(); 9732 Addr = nullptr; 9733 } else { 9734 VarName = getAddrOfDeclareTargetVar(VD).getName(); 9735 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); 9736 } 9737 VarSize = CGM.getPointerSize(); 9738 Linkage = llvm::GlobalValue::WeakAnyLinkage; 9739 } 9740 9741 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( 9742 VarName, Addr, VarSize, Flags, Linkage); 9743 } 9744 9745 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { 9746 if (isa<FunctionDecl>(GD.getDecl()) || 9747 isa<OMPDeclareReductionDecl>(GD.getDecl())) 9748 return emitTargetFunctions(GD); 9749 9750 return emitTargetGlobalVariable(GD); 9751 } 9752 9753 void CGOpenMPRuntime::emitDeferredTargetDecls() const { 9754 for (const VarDecl *VD : DeferredGlobalVariables) { 9755 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = 9756 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); 9757 if (!Res) 9758 continue; 9759 if (*Res == OMPDeclareTargetDeclAttr::MT_To && 9760 !HasRequiresUnifiedSharedMemory) { 9761 CGM.EmitGlobal(VD); 9762 } else { 9763 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || 9764 (*Res == OMPDeclareTargetDeclAttr::MT_To && 9765 HasRequiresUnifiedSharedMemory)) && 9766 "Expected link clause or to clause with unified memory."); 9767 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); 9768 } 9769 } 9770 } 9771 9772 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( 9773 CodeGenFunction &CGF, const OMPExecutableDirective &D) const { 9774 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && 9775 " Expected target-based directive."); 9776 } 9777 9778 void CGOpenMPRuntime::checkArchForUnifiedAddressing( 9779 const OMPRequiresDecl *D) { 9780 for (const OMPClause *Clause : D->clauselists()) { 9781 if (Clause->getClauseKind() == OMPC_unified_shared_memory) { 9782 HasRequiresUnifiedSharedMemory = true; 9783 break; 9784 } 9785 } 9786 } 9787 9788 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, 9789 LangAS &AS) { 9790 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) 9791 return false; 9792 const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); 9793 switch(A->getAllocatorType()) { 9794 case OMPAllocateDeclAttr::OMPDefaultMemAlloc: 9795 // Not supported, fallback to the default mem space. 9796 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: 9797 case OMPAllocateDeclAttr::OMPCGroupMemAlloc: 9798 case OMPAllocateDeclAttr::OMPHighBWMemAlloc: 9799 case OMPAllocateDeclAttr::OMPLowLatMemAlloc: 9800 case OMPAllocateDeclAttr::OMPThreadMemAlloc: 9801 case OMPAllocateDeclAttr::OMPConstMemAlloc: 9802 case OMPAllocateDeclAttr::OMPPTeamMemAlloc: 9803 AS = LangAS::Default; 9804 return true; 9805 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: 9806 llvm_unreachable("Expected predefined allocator for the variables with the " 9807 "static storage."); 9808 } 9809 return false; 9810 } 9811 9812 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { 9813 return HasRequiresUnifiedSharedMemory; 9814 } 9815 9816 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( 9817 CodeGenModule &CGM) 9818 : CGM(CGM) { 9819 if (CGM.getLangOpts().OpenMPIsDevice) { 9820 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; 9821 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; 9822 } 9823 } 9824 9825 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { 9826 if (CGM.getLangOpts().OpenMPIsDevice) 9827 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; 9828 } 9829 9830 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { 9831 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) 9832 return true; 9833 9834 StringRef Name = CGM.getMangledName(GD); 9835 const auto *D = cast<FunctionDecl>(GD.getDecl()); 9836 // Do not to emit function if it is marked as declare target as it was already 9837 // emitted. 9838 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) { 9839 if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) { 9840 if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name))) 9841 return !F->isDeclaration(); 9842 return false; 9843 } 9844 return true; 9845 } 9846 9847 return !AlreadyEmittedTargetFunctions.insert(Name).second; 9848 } 9849 9850 llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { 9851 // If we don't have entries or if we are emitting code for the device, we 9852 // don't need to do anything. 9853 if (CGM.getLangOpts().OMPTargetTriples.empty() || 9854 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || 9855 (OffloadEntriesInfoManager.empty() && 9856 !HasEmittedDeclareTargetRegion && 9857 !HasEmittedTargetRegion)) 9858 return nullptr; 9859 9860 // Create and register the function that handles the requires directives. 9861 ASTContext &C = CGM.getContext(); 9862 9863 llvm::Function *RequiresRegFn; 9864 { 9865 CodeGenFunction CGF(CGM); 9866 const auto &FI = CGM.getTypes().arrangeNullaryFunction(); 9867 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); 9868 std::string ReqName = getName({"omp_offloading", "requires_reg"}); 9869 RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI); 9870 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); 9871 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; 9872 // TODO: check for other requires clauses. 9873 // The requires directive takes effect only when a target region is 9874 // present in the compilation unit. Otherwise it is ignored and not 9875 // passed to the runtime. This avoids the runtime from throwing an error 9876 // for mismatching requires clauses across compilation units that don't 9877 // contain at least 1 target region. 9878 assert((HasEmittedTargetRegion || 9879 HasEmittedDeclareTargetRegion || 9880 !OffloadEntriesInfoManager.empty()) && 9881 "Target or declare target region expected."); 9882 if (HasRequiresUnifiedSharedMemory) 9883 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; 9884 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires), 9885 llvm::ConstantInt::get(CGM.Int64Ty, Flags)); 9886 CGF.FinishFunction(); 9887 } 9888 return RequiresRegFn; 9889 } 9890 9891 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, 9892 const OMPExecutableDirective &D, 9893 SourceLocation Loc, 9894 llvm::Function *OutlinedFn, 9895 ArrayRef<llvm::Value *> CapturedVars) { 9896 if (!CGF.HaveInsertPoint()) 9897 return; 9898 9899 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9900 CodeGenFunction::RunCleanupsScope Scope(CGF); 9901 9902 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn); 9903 llvm::Value *Args[] = { 9904 RTLoc, 9905 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars 9906 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; 9907 llvm::SmallVector<llvm::Value *, 16> RealArgs; 9908 RealArgs.append(std::begin(Args), std::end(Args)); 9909 RealArgs.append(CapturedVars.begin(), CapturedVars.end()); 9910 9911 llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams); 9912 CGF.EmitRuntimeCall(RTLFn, RealArgs); 9913 } 9914 9915 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 9916 const Expr *NumTeams, 9917 const Expr *ThreadLimit, 9918 SourceLocation Loc) { 9919 if (!CGF.HaveInsertPoint()) 9920 return; 9921 9922 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); 9923 9924 llvm::Value *NumTeamsVal = 9925 NumTeams 9926 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), 9927 CGF.CGM.Int32Ty, /* isSigned = */ true) 9928 : CGF.Builder.getInt32(0); 9929 9930 llvm::Value *ThreadLimitVal = 9931 ThreadLimit 9932 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), 9933 CGF.CGM.Int32Ty, /* isSigned = */ true) 9934 : CGF.Builder.getInt32(0); 9935 9936 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) 9937 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, 9938 ThreadLimitVal}; 9939 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), 9940 PushNumTeamsArgs); 9941 } 9942 9943 void CGOpenMPRuntime::emitTargetDataCalls( 9944 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 9945 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 9946 if (!CGF.HaveInsertPoint()) 9947 return; 9948 9949 // Action used to replace the default codegen action and turn privatization 9950 // off. 9951 PrePostActionTy NoPrivAction; 9952 9953 // Generate the code for the opening of the data environment. Capture all the 9954 // arguments of the runtime call by reference because they are used in the 9955 // closing of the region. 9956 auto &&BeginThenGen = [this, &D, Device, &Info, 9957 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { 9958 // Fill up the arrays with all the mapped variables. 9959 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 9960 MappableExprsHandler::MapValuesArrayTy Pointers; 9961 MappableExprsHandler::MapValuesArrayTy Sizes; 9962 MappableExprsHandler::MapFlagsArrayTy MapTypes; 9963 9964 // Get map clause information. 9965 MappableExprsHandler MCHandler(D, CGF); 9966 MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 9967 9968 // Fill up the arrays and create the arguments. 9969 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 9970 9971 llvm::Value *BasePointersArrayArg = nullptr; 9972 llvm::Value *PointersArrayArg = nullptr; 9973 llvm::Value *SizesArrayArg = nullptr; 9974 llvm::Value *MapTypesArrayArg = nullptr; 9975 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 9976 SizesArrayArg, MapTypesArrayArg, Info); 9977 9978 // Emit device ID if any. 9979 llvm::Value *DeviceID = nullptr; 9980 if (Device) { 9981 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 9982 CGF.Int64Ty, /*isSigned=*/true); 9983 } else { 9984 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 9985 } 9986 9987 // Emit the number of elements in the offloading arrays. 9988 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 9989 9990 llvm::Value *OffloadingArgs[] = { 9991 DeviceID, PointerNum, BasePointersArrayArg, 9992 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 9993 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_begin), 9994 OffloadingArgs); 9995 9996 // If device pointer privatization is required, emit the body of the region 9997 // here. It will have to be duplicated: with and without privatization. 9998 if (!Info.CaptureDeviceAddrMap.empty()) 9999 CodeGen(CGF); 10000 }; 10001 10002 // Generate code for the closing of the data region. 10003 auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF, 10004 PrePostActionTy &) { 10005 assert(Info.isValid() && "Invalid data environment closing arguments."); 10006 10007 llvm::Value *BasePointersArrayArg = nullptr; 10008 llvm::Value *PointersArrayArg = nullptr; 10009 llvm::Value *SizesArrayArg = nullptr; 10010 llvm::Value *MapTypesArrayArg = nullptr; 10011 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, 10012 SizesArrayArg, MapTypesArrayArg, Info); 10013 10014 // Emit device ID if any. 10015 llvm::Value *DeviceID = nullptr; 10016 if (Device) { 10017 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10018 CGF.Int64Ty, /*isSigned=*/true); 10019 } else { 10020 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10021 } 10022 10023 // Emit the number of elements in the offloading arrays. 10024 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); 10025 10026 llvm::Value *OffloadingArgs[] = { 10027 DeviceID, PointerNum, BasePointersArrayArg, 10028 PointersArrayArg, SizesArrayArg, MapTypesArrayArg}; 10029 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target_data_end), 10030 OffloadingArgs); 10031 }; 10032 10033 // If we need device pointer privatization, we need to emit the body of the 10034 // region with no privatization in the 'else' branch of the conditional. 10035 // Otherwise, we don't have to do anything. 10036 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, 10037 PrePostActionTy &) { 10038 if (!Info.CaptureDeviceAddrMap.empty()) { 10039 CodeGen.setAction(NoPrivAction); 10040 CodeGen(CGF); 10041 } 10042 }; 10043 10044 // We don't have to do anything to close the region if the if clause evaluates 10045 // to false. 10046 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; 10047 10048 if (IfCond) { 10049 emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); 10050 } else { 10051 RegionCodeGenTy RCG(BeginThenGen); 10052 RCG(CGF); 10053 } 10054 10055 // If we don't require privatization of device pointers, we emit the body in 10056 // between the runtime calls. This avoids duplicating the body code. 10057 if (Info.CaptureDeviceAddrMap.empty()) { 10058 CodeGen.setAction(NoPrivAction); 10059 CodeGen(CGF); 10060 } 10061 10062 if (IfCond) { 10063 emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen); 10064 } else { 10065 RegionCodeGenTy RCG(EndThenGen); 10066 RCG(CGF); 10067 } 10068 } 10069 10070 void CGOpenMPRuntime::emitTargetDataStandAloneCall( 10071 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 10072 const Expr *Device) { 10073 if (!CGF.HaveInsertPoint()) 10074 return; 10075 10076 assert((isa<OMPTargetEnterDataDirective>(D) || 10077 isa<OMPTargetExitDataDirective>(D) || 10078 isa<OMPTargetUpdateDirective>(D)) && 10079 "Expecting either target enter, exit data, or update directives."); 10080 10081 CodeGenFunction::OMPTargetDataInfo InputInfo; 10082 llvm::Value *MapTypesArray = nullptr; 10083 // Generate the code for the opening of the data environment. 10084 auto &&ThenGen = [this, &D, Device, &InputInfo, 10085 &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) { 10086 // Emit device ID if any. 10087 llvm::Value *DeviceID = nullptr; 10088 if (Device) { 10089 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), 10090 CGF.Int64Ty, /*isSigned=*/true); 10091 } else { 10092 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); 10093 } 10094 10095 // Emit the number of elements in the offloading arrays. 10096 llvm::Constant *PointerNum = 10097 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); 10098 10099 llvm::Value *OffloadingArgs[] = {DeviceID, 10100 PointerNum, 10101 InputInfo.BasePointersArray.getPointer(), 10102 InputInfo.PointersArray.getPointer(), 10103 InputInfo.SizesArray.getPointer(), 10104 MapTypesArray}; 10105 10106 // Select the right runtime function call for each expected standalone 10107 // directive. 10108 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); 10109 OpenMPRTLFunction RTLFn; 10110 switch (D.getDirectiveKind()) { 10111 case OMPD_target_enter_data: 10112 RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait 10113 : OMPRTL__tgt_target_data_begin; 10114 break; 10115 case OMPD_target_exit_data: 10116 RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait 10117 : OMPRTL__tgt_target_data_end; 10118 break; 10119 case OMPD_target_update: 10120 RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait 10121 : OMPRTL__tgt_target_data_update; 10122 break; 10123 case OMPD_parallel: 10124 case OMPD_for: 10125 case OMPD_parallel_for: 10126 case OMPD_parallel_sections: 10127 case OMPD_for_simd: 10128 case OMPD_parallel_for_simd: 10129 case OMPD_cancel: 10130 case OMPD_cancellation_point: 10131 case OMPD_ordered: 10132 case OMPD_threadprivate: 10133 case OMPD_allocate: 10134 case OMPD_task: 10135 case OMPD_simd: 10136 case OMPD_sections: 10137 case OMPD_section: 10138 case OMPD_single: 10139 case OMPD_master: 10140 case OMPD_critical: 10141 case OMPD_taskyield: 10142 case OMPD_barrier: 10143 case OMPD_taskwait: 10144 case OMPD_taskgroup: 10145 case OMPD_atomic: 10146 case OMPD_flush: 10147 case OMPD_teams: 10148 case OMPD_target_data: 10149 case OMPD_distribute: 10150 case OMPD_distribute_simd: 10151 case OMPD_distribute_parallel_for: 10152 case OMPD_distribute_parallel_for_simd: 10153 case OMPD_teams_distribute: 10154 case OMPD_teams_distribute_simd: 10155 case OMPD_teams_distribute_parallel_for: 10156 case OMPD_teams_distribute_parallel_for_simd: 10157 case OMPD_declare_simd: 10158 case OMPD_declare_variant: 10159 case OMPD_declare_target: 10160 case OMPD_end_declare_target: 10161 case OMPD_declare_reduction: 10162 case OMPD_declare_mapper: 10163 case OMPD_taskloop: 10164 case OMPD_taskloop_simd: 10165 case OMPD_master_taskloop: 10166 case OMPD_parallel_master_taskloop: 10167 case OMPD_target: 10168 case OMPD_target_simd: 10169 case OMPD_target_teams_distribute: 10170 case OMPD_target_teams_distribute_simd: 10171 case OMPD_target_teams_distribute_parallel_for: 10172 case OMPD_target_teams_distribute_parallel_for_simd: 10173 case OMPD_target_teams: 10174 case OMPD_target_parallel: 10175 case OMPD_target_parallel_for: 10176 case OMPD_target_parallel_for_simd: 10177 case OMPD_requires: 10178 case OMPD_unknown: 10179 llvm_unreachable("Unexpected standalone target data directive."); 10180 break; 10181 } 10182 CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs); 10183 }; 10184 10185 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray]( 10186 CodeGenFunction &CGF, PrePostActionTy &) { 10187 // Fill up the arrays with all the mapped variables. 10188 MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 10189 MappableExprsHandler::MapValuesArrayTy Pointers; 10190 MappableExprsHandler::MapValuesArrayTy Sizes; 10191 MappableExprsHandler::MapFlagsArrayTy MapTypes; 10192 10193 // Get map clause information. 10194 MappableExprsHandler MEHandler(D, CGF); 10195 MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes); 10196 10197 TargetDataInfo Info; 10198 // Fill up the arrays and create the arguments. 10199 emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info); 10200 emitOffloadingArraysArgument(CGF, Info.BasePointersArray, 10201 Info.PointersArray, Info.SizesArray, 10202 Info.MapTypesArray, Info); 10203 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; 10204 InputInfo.BasePointersArray = 10205 Address(Info.BasePointersArray, CGM.getPointerAlign()); 10206 InputInfo.PointersArray = 10207 Address(Info.PointersArray, CGM.getPointerAlign()); 10208 InputInfo.SizesArray = 10209 Address(Info.SizesArray, CGM.getPointerAlign()); 10210 MapTypesArray = Info.MapTypesArray; 10211 if (D.hasClausesOfKind<OMPDependClause>()) 10212 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); 10213 else 10214 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); 10215 }; 10216 10217 if (IfCond) { 10218 emitOMPIfClause(CGF, IfCond, TargetThenGen, 10219 [](CodeGenFunction &CGF, PrePostActionTy &) {}); 10220 } else { 10221 RegionCodeGenTy ThenRCG(TargetThenGen); 10222 ThenRCG(CGF); 10223 } 10224 } 10225 10226 namespace { 10227 /// Kind of parameter in a function with 'declare simd' directive. 10228 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; 10229 /// Attribute set of the parameter. 10230 struct ParamAttrTy { 10231 ParamKindTy Kind = Vector; 10232 llvm::APSInt StrideOrArg; 10233 llvm::APSInt Alignment; 10234 }; 10235 } // namespace 10236 10237 static unsigned evaluateCDTSize(const FunctionDecl *FD, 10238 ArrayRef<ParamAttrTy> ParamAttrs) { 10239 // Every vector variant of a SIMD-enabled function has a vector length (VLEN). 10240 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument 10241 // of that clause. The VLEN value must be power of 2. 10242 // In other case the notion of the function`s "characteristic data type" (CDT) 10243 // is used to compute the vector length. 10244 // CDT is defined in the following order: 10245 // a) For non-void function, the CDT is the return type. 10246 // b) If the function has any non-uniform, non-linear parameters, then the 10247 // CDT is the type of the first such parameter. 10248 // c) If the CDT determined by a) or b) above is struct, union, or class 10249 // type which is pass-by-value (except for the type that maps to the 10250 // built-in complex data type), the characteristic data type is int. 10251 // d) If none of the above three cases is applicable, the CDT is int. 10252 // The VLEN is then determined based on the CDT and the size of vector 10253 // register of that ISA for which current vector version is generated. The 10254 // VLEN is computed using the formula below: 10255 // VLEN = sizeof(vector_register) / sizeof(CDT), 10256 // where vector register size specified in section 3.2.1 Registers and the 10257 // Stack Frame of original AMD64 ABI document. 10258 QualType RetType = FD->getReturnType(); 10259 if (RetType.isNull()) 10260 return 0; 10261 ASTContext &C = FD->getASTContext(); 10262 QualType CDT; 10263 if (!RetType.isNull() && !RetType->isVoidType()) { 10264 CDT = RetType; 10265 } else { 10266 unsigned Offset = 0; 10267 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) { 10268 if (ParamAttrs[Offset].Kind == Vector) 10269 CDT = C.getPointerType(C.getRecordType(MD->getParent())); 10270 ++Offset; 10271 } 10272 if (CDT.isNull()) { 10273 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10274 if (ParamAttrs[I + Offset].Kind == Vector) { 10275 CDT = FD->getParamDecl(I)->getType(); 10276 break; 10277 } 10278 } 10279 } 10280 } 10281 if (CDT.isNull()) 10282 CDT = C.IntTy; 10283 CDT = CDT->getCanonicalTypeUnqualified(); 10284 if (CDT->isRecordType() || CDT->isUnionType()) 10285 CDT = C.IntTy; 10286 return C.getTypeSize(CDT); 10287 } 10288 10289 static void 10290 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, 10291 const llvm::APSInt &VLENVal, 10292 ArrayRef<ParamAttrTy> ParamAttrs, 10293 OMPDeclareSimdDeclAttr::BranchStateTy State) { 10294 struct ISADataTy { 10295 char ISA; 10296 unsigned VecRegSize; 10297 }; 10298 ISADataTy ISAData[] = { 10299 { 10300 'b', 128 10301 }, // SSE 10302 { 10303 'c', 256 10304 }, // AVX 10305 { 10306 'd', 256 10307 }, // AVX2 10308 { 10309 'e', 512 10310 }, // AVX512 10311 }; 10312 llvm::SmallVector<char, 2> Masked; 10313 switch (State) { 10314 case OMPDeclareSimdDeclAttr::BS_Undefined: 10315 Masked.push_back('N'); 10316 Masked.push_back('M'); 10317 break; 10318 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10319 Masked.push_back('N'); 10320 break; 10321 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10322 Masked.push_back('M'); 10323 break; 10324 } 10325 for (char Mask : Masked) { 10326 for (const ISADataTy &Data : ISAData) { 10327 SmallString<256> Buffer; 10328 llvm::raw_svector_ostream Out(Buffer); 10329 Out << "_ZGV" << Data.ISA << Mask; 10330 if (!VLENVal) { 10331 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs); 10332 assert(NumElts && "Non-zero simdlen/cdtsize expected"); 10333 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts); 10334 } else { 10335 Out << VLENVal; 10336 } 10337 for (const ParamAttrTy &ParamAttr : ParamAttrs) { 10338 switch (ParamAttr.Kind){ 10339 case LinearWithVarStride: 10340 Out << 's' << ParamAttr.StrideOrArg; 10341 break; 10342 case Linear: 10343 Out << 'l'; 10344 if (!!ParamAttr.StrideOrArg) 10345 Out << ParamAttr.StrideOrArg; 10346 break; 10347 case Uniform: 10348 Out << 'u'; 10349 break; 10350 case Vector: 10351 Out << 'v'; 10352 break; 10353 } 10354 if (!!ParamAttr.Alignment) 10355 Out << 'a' << ParamAttr.Alignment; 10356 } 10357 Out << '_' << Fn->getName(); 10358 Fn->addFnAttr(Out.str()); 10359 } 10360 } 10361 } 10362 10363 // This are the Functions that are needed to mangle the name of the 10364 // vector functions generated by the compiler, according to the rules 10365 // defined in the "Vector Function ABI specifications for AArch64", 10366 // available at 10367 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. 10368 10369 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. 10370 /// 10371 /// TODO: Need to implement the behavior for reference marked with a 10372 /// var or no linear modifiers (1.b in the section). For this, we 10373 /// need to extend ParamKindTy to support the linear modifiers. 10374 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { 10375 QT = QT.getCanonicalType(); 10376 10377 if (QT->isVoidType()) 10378 return false; 10379 10380 if (Kind == ParamKindTy::Uniform) 10381 return false; 10382 10383 if (Kind == ParamKindTy::Linear) 10384 return false; 10385 10386 // TODO: Handle linear references with modifiers 10387 10388 if (Kind == ParamKindTy::LinearWithVarStride) 10389 return false; 10390 10391 return true; 10392 } 10393 10394 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI. 10395 static bool getAArch64PBV(QualType QT, ASTContext &C) { 10396 QT = QT.getCanonicalType(); 10397 unsigned Size = C.getTypeSize(QT); 10398 10399 // Only scalars and complex within 16 bytes wide set PVB to true. 10400 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128) 10401 return false; 10402 10403 if (QT->isFloatingType()) 10404 return true; 10405 10406 if (QT->isIntegerType()) 10407 return true; 10408 10409 if (QT->isPointerType()) 10410 return true; 10411 10412 // TODO: Add support for complex types (section 3.1.2, item 2). 10413 10414 return false; 10415 } 10416 10417 /// Computes the lane size (LS) of a return type or of an input parameter, 10418 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI. 10419 /// TODO: Add support for references, section 3.2.1, item 1. 10420 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) { 10421 if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) { 10422 QualType PTy = QT.getCanonicalType()->getPointeeType(); 10423 if (getAArch64PBV(PTy, C)) 10424 return C.getTypeSize(PTy); 10425 } 10426 if (getAArch64PBV(QT, C)) 10427 return C.getTypeSize(QT); 10428 10429 return C.getTypeSize(C.getUIntPtrType()); 10430 } 10431 10432 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the 10433 // signature of the scalar function, as defined in 3.2.2 of the 10434 // AAVFABI. 10435 static std::tuple<unsigned, unsigned, bool> 10436 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { 10437 QualType RetType = FD->getReturnType().getCanonicalType(); 10438 10439 ASTContext &C = FD->getASTContext(); 10440 10441 bool OutputBecomesInput = false; 10442 10443 llvm::SmallVector<unsigned, 8> Sizes; 10444 if (!RetType->isVoidType()) { 10445 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C)); 10446 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {})) 10447 OutputBecomesInput = true; 10448 } 10449 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) { 10450 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType(); 10451 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C)); 10452 } 10453 10454 assert(!Sizes.empty() && "Unable to determine NDS and WDS."); 10455 // The LS of a function parameter / return value can only be a power 10456 // of 2, starting from 8 bits, up to 128. 10457 assert(std::all_of(Sizes.begin(), Sizes.end(), 10458 [](unsigned Size) { 10459 return Size == 8 || Size == 16 || Size == 32 || 10460 Size == 64 || Size == 128; 10461 }) && 10462 "Invalid size"); 10463 10464 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), 10465 *std::max_element(std::begin(Sizes), std::end(Sizes)), 10466 OutputBecomesInput); 10467 } 10468 10469 /// Mangle the parameter part of the vector function name according to 10470 /// their OpenMP classification. The mangling function is defined in 10471 /// section 3.5 of the AAVFABI. 10472 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { 10473 SmallString<256> Buffer; 10474 llvm::raw_svector_ostream Out(Buffer); 10475 for (const auto &ParamAttr : ParamAttrs) { 10476 switch (ParamAttr.Kind) { 10477 case LinearWithVarStride: 10478 Out << "ls" << ParamAttr.StrideOrArg; 10479 break; 10480 case Linear: 10481 Out << 'l'; 10482 // Don't print the step value if it is not present or if it is 10483 // equal to 1. 10484 if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1) 10485 Out << ParamAttr.StrideOrArg; 10486 break; 10487 case Uniform: 10488 Out << 'u'; 10489 break; 10490 case Vector: 10491 Out << 'v'; 10492 break; 10493 } 10494 10495 if (!!ParamAttr.Alignment) 10496 Out << 'a' << ParamAttr.Alignment; 10497 } 10498 10499 return Out.str(); 10500 } 10501 10502 // Function used to add the attribute. The parameter `VLEN` is 10503 // templated to allow the use of "x" when targeting scalable functions 10504 // for SVE. 10505 template <typename T> 10506 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, 10507 char ISA, StringRef ParSeq, 10508 StringRef MangledName, bool OutputBecomesInput, 10509 llvm::Function *Fn) { 10510 SmallString<256> Buffer; 10511 llvm::raw_svector_ostream Out(Buffer); 10512 Out << Prefix << ISA << LMask << VLEN; 10513 if (OutputBecomesInput) 10514 Out << "v"; 10515 Out << ParSeq << "_" << MangledName; 10516 Fn->addFnAttr(Out.str()); 10517 } 10518 10519 // Helper function to generate the Advanced SIMD names depending on 10520 // the value of the NDS when simdlen is not present. 10521 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, 10522 StringRef Prefix, char ISA, 10523 StringRef ParSeq, StringRef MangledName, 10524 bool OutputBecomesInput, 10525 llvm::Function *Fn) { 10526 switch (NDS) { 10527 case 8: 10528 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10529 OutputBecomesInput, Fn); 10530 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName, 10531 OutputBecomesInput, Fn); 10532 break; 10533 case 16: 10534 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10535 OutputBecomesInput, Fn); 10536 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName, 10537 OutputBecomesInput, Fn); 10538 break; 10539 case 32: 10540 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10541 OutputBecomesInput, Fn); 10542 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName, 10543 OutputBecomesInput, Fn); 10544 break; 10545 case 64: 10546 case 128: 10547 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName, 10548 OutputBecomesInput, Fn); 10549 break; 10550 default: 10551 llvm_unreachable("Scalar type is too wide."); 10552 } 10553 } 10554 10555 /// Emit vector function attributes for AArch64, as defined in the AAVFABI. 10556 static void emitAArch64DeclareSimdFunction( 10557 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, 10558 ArrayRef<ParamAttrTy> ParamAttrs, 10559 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, 10560 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) { 10561 10562 // Get basic data for building the vector signature. 10563 const auto Data = getNDSWDS(FD, ParamAttrs); 10564 const unsigned NDS = std::get<0>(Data); 10565 const unsigned WDS = std::get<1>(Data); 10566 const bool OutputBecomesInput = std::get<2>(Data); 10567 10568 // Check the values provided via `simdlen` by the user. 10569 // 1. A `simdlen(1)` doesn't produce vector signatures, 10570 if (UserVLEN == 1) { 10571 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10572 DiagnosticsEngine::Warning, 10573 "The clause simdlen(1) has no effect when targeting aarch64."); 10574 CGM.getDiags().Report(SLoc, DiagID); 10575 return; 10576 } 10577 10578 // 2. Section 3.3.1, item 1: user input must be a power of 2 for 10579 // Advanced SIMD output. 10580 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) { 10581 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10582 DiagnosticsEngine::Warning, "The value specified in simdlen must be a " 10583 "power of 2 when targeting Advanced SIMD."); 10584 CGM.getDiags().Report(SLoc, DiagID); 10585 return; 10586 } 10587 10588 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural 10589 // limits. 10590 if (ISA == 's' && UserVLEN != 0) { 10591 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) { 10592 unsigned DiagID = CGM.getDiags().getCustomDiagID( 10593 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit " 10594 "lanes in the architectural constraints " 10595 "for SVE (min is 128-bit, max is " 10596 "2048-bit, by steps of 128-bit)"); 10597 CGM.getDiags().Report(SLoc, DiagID) << WDS; 10598 return; 10599 } 10600 } 10601 10602 // Sort out parameter sequence. 10603 const std::string ParSeq = mangleVectorParameters(ParamAttrs); 10604 StringRef Prefix = "_ZGV"; 10605 // Generate simdlen from user input (if any). 10606 if (UserVLEN) { 10607 if (ISA == 's') { 10608 // SVE generates only a masked function. 10609 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10610 OutputBecomesInput, Fn); 10611 } else { 10612 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10613 // Advanced SIMD generates one or two functions, depending on 10614 // the `[not]inbranch` clause. 10615 switch (State) { 10616 case OMPDeclareSimdDeclAttr::BS_Undefined: 10617 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10618 OutputBecomesInput, Fn); 10619 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10620 OutputBecomesInput, Fn); 10621 break; 10622 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10623 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName, 10624 OutputBecomesInput, Fn); 10625 break; 10626 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10627 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName, 10628 OutputBecomesInput, Fn); 10629 break; 10630 } 10631 } 10632 } else { 10633 // If no user simdlen is provided, follow the AAVFABI rules for 10634 // generating the vector length. 10635 if (ISA == 's') { 10636 // SVE, section 3.4.1, item 1. 10637 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName, 10638 OutputBecomesInput, Fn); 10639 } else { 10640 assert(ISA == 'n' && "Expected ISA either 's' or 'n'."); 10641 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or 10642 // two vector names depending on the use of the clause 10643 // `[not]inbranch`. 10644 switch (State) { 10645 case OMPDeclareSimdDeclAttr::BS_Undefined: 10646 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10647 OutputBecomesInput, Fn); 10648 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10649 OutputBecomesInput, Fn); 10650 break; 10651 case OMPDeclareSimdDeclAttr::BS_Notinbranch: 10652 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName, 10653 OutputBecomesInput, Fn); 10654 break; 10655 case OMPDeclareSimdDeclAttr::BS_Inbranch: 10656 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName, 10657 OutputBecomesInput, Fn); 10658 break; 10659 } 10660 } 10661 } 10662 } 10663 10664 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, 10665 llvm::Function *Fn) { 10666 ASTContext &C = CGM.getContext(); 10667 FD = FD->getMostRecentDecl(); 10668 // Map params to their positions in function decl. 10669 llvm::DenseMap<const Decl *, unsigned> ParamPositions; 10670 if (isa<CXXMethodDecl>(FD)) 10671 ParamPositions.try_emplace(FD, 0); 10672 unsigned ParamPos = ParamPositions.size(); 10673 for (const ParmVarDecl *P : FD->parameters()) { 10674 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); 10675 ++ParamPos; 10676 } 10677 while (FD) { 10678 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { 10679 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); 10680 // Mark uniform parameters. 10681 for (const Expr *E : Attr->uniforms()) { 10682 E = E->IgnoreParenImpCasts(); 10683 unsigned Pos; 10684 if (isa<CXXThisExpr>(E)) { 10685 Pos = ParamPositions[FD]; 10686 } else { 10687 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10688 ->getCanonicalDecl(); 10689 Pos = ParamPositions[PVD]; 10690 } 10691 ParamAttrs[Pos].Kind = Uniform; 10692 } 10693 // Get alignment info. 10694 auto NI = Attr->alignments_begin(); 10695 for (const Expr *E : Attr->aligneds()) { 10696 E = E->IgnoreParenImpCasts(); 10697 unsigned Pos; 10698 QualType ParmTy; 10699 if (isa<CXXThisExpr>(E)) { 10700 Pos = ParamPositions[FD]; 10701 ParmTy = E->getType(); 10702 } else { 10703 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10704 ->getCanonicalDecl(); 10705 Pos = ParamPositions[PVD]; 10706 ParmTy = PVD->getType(); 10707 } 10708 ParamAttrs[Pos].Alignment = 10709 (*NI) 10710 ? (*NI)->EvaluateKnownConstInt(C) 10711 : llvm::APSInt::getUnsigned( 10712 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy)) 10713 .getQuantity()); 10714 ++NI; 10715 } 10716 // Mark linear parameters. 10717 auto SI = Attr->steps_begin(); 10718 auto MI = Attr->modifiers_begin(); 10719 for (const Expr *E : Attr->linears()) { 10720 E = E->IgnoreParenImpCasts(); 10721 unsigned Pos; 10722 if (isa<CXXThisExpr>(E)) { 10723 Pos = ParamPositions[FD]; 10724 } else { 10725 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) 10726 ->getCanonicalDecl(); 10727 Pos = ParamPositions[PVD]; 10728 } 10729 ParamAttrTy &ParamAttr = ParamAttrs[Pos]; 10730 ParamAttr.Kind = Linear; 10731 if (*SI) { 10732 Expr::EvalResult Result; 10733 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { 10734 if (const auto *DRE = 10735 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { 10736 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { 10737 ParamAttr.Kind = LinearWithVarStride; 10738 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( 10739 ParamPositions[StridePVD->getCanonicalDecl()]); 10740 } 10741 } 10742 } else { 10743 ParamAttr.StrideOrArg = Result.Val.getInt(); 10744 } 10745 } 10746 ++SI; 10747 ++MI; 10748 } 10749 llvm::APSInt VLENVal; 10750 SourceLocation ExprLoc; 10751 const Expr *VLENExpr = Attr->getSimdlen(); 10752 if (VLENExpr) { 10753 VLENVal = VLENExpr->EvaluateKnownConstInt(C); 10754 ExprLoc = VLENExpr->getExprLoc(); 10755 } 10756 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState(); 10757 if (CGM.getTriple().getArch() == llvm::Triple::x86 || 10758 CGM.getTriple().getArch() == llvm::Triple::x86_64) { 10759 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); 10760 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) { 10761 unsigned VLEN = VLENVal.getExtValue(); 10762 StringRef MangledName = Fn->getName(); 10763 if (CGM.getTarget().hasFeature("sve")) 10764 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10765 MangledName, 's', 128, Fn, ExprLoc); 10766 if (CGM.getTarget().hasFeature("neon")) 10767 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, 10768 MangledName, 'n', 128, Fn, ExprLoc); 10769 } 10770 } 10771 FD = FD->getPreviousDecl(); 10772 } 10773 } 10774 10775 namespace { 10776 /// Cleanup action for doacross support. 10777 class DoacrossCleanupTy final : public EHScopeStack::Cleanup { 10778 public: 10779 static const int DoacrossFinArgs = 2; 10780 10781 private: 10782 llvm::FunctionCallee RTLFn; 10783 llvm::Value *Args[DoacrossFinArgs]; 10784 10785 public: 10786 DoacrossCleanupTy(llvm::FunctionCallee RTLFn, 10787 ArrayRef<llvm::Value *> CallArgs) 10788 : RTLFn(RTLFn) { 10789 assert(CallArgs.size() == DoacrossFinArgs); 10790 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10791 } 10792 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10793 if (!CGF.HaveInsertPoint()) 10794 return; 10795 CGF.EmitRuntimeCall(RTLFn, Args); 10796 } 10797 }; 10798 } // namespace 10799 10800 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, 10801 const OMPLoopDirective &D, 10802 ArrayRef<Expr *> NumIterations) { 10803 if (!CGF.HaveInsertPoint()) 10804 return; 10805 10806 ASTContext &C = CGM.getContext(); 10807 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); 10808 RecordDecl *RD; 10809 if (KmpDimTy.isNull()) { 10810 // Build struct kmp_dim { // loop bounds info casted to kmp_int64 10811 // kmp_int64 lo; // lower 10812 // kmp_int64 up; // upper 10813 // kmp_int64 st; // stride 10814 // }; 10815 RD = C.buildImplicitRecord("kmp_dim"); 10816 RD->startDefinition(); 10817 addFieldToRecordDecl(C, RD, Int64Ty); 10818 addFieldToRecordDecl(C, RD, Int64Ty); 10819 addFieldToRecordDecl(C, RD, Int64Ty); 10820 RD->completeDefinition(); 10821 KmpDimTy = C.getRecordType(RD); 10822 } else { 10823 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); 10824 } 10825 llvm::APInt Size(/*numBits=*/32, NumIterations.size()); 10826 QualType ArrayTy = 10827 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); 10828 10829 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); 10830 CGF.EmitNullInitialization(DimsAddr, ArrayTy); 10831 enum { LowerFD = 0, UpperFD, StrideFD }; 10832 // Fill dims with data. 10833 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) { 10834 LValue DimsLVal = CGF.MakeAddrLValue( 10835 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy); 10836 // dims.upper = num_iterations; 10837 LValue UpperLVal = CGF.EmitLValueForField( 10838 DimsLVal, *std::next(RD->field_begin(), UpperFD)); 10839 llvm::Value *NumIterVal = 10840 CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]), 10841 D.getNumIterations()->getType(), Int64Ty, 10842 D.getNumIterations()->getExprLoc()); 10843 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); 10844 // dims.stride = 1; 10845 LValue StrideLVal = CGF.EmitLValueForField( 10846 DimsLVal, *std::next(RD->field_begin(), StrideFD)); 10847 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), 10848 StrideLVal); 10849 } 10850 10851 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, 10852 // kmp_int32 num_dims, struct kmp_dim * dims); 10853 llvm::Value *Args[] = { 10854 emitUpdateLocation(CGF, D.getBeginLoc()), 10855 getThreadID(CGF, D.getBeginLoc()), 10856 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), 10857 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 10858 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), 10859 CGM.VoidPtrTy)}; 10860 10861 llvm::FunctionCallee RTLFn = 10862 createRuntimeFunction(OMPRTL__kmpc_doacross_init); 10863 CGF.EmitRuntimeCall(RTLFn, Args); 10864 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { 10865 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())}; 10866 llvm::FunctionCallee FiniRTLFn = 10867 createRuntimeFunction(OMPRTL__kmpc_doacross_fini); 10868 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 10869 llvm::makeArrayRef(FiniArgs)); 10870 } 10871 10872 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 10873 const OMPDependClause *C) { 10874 QualType Int64Ty = 10875 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); 10876 llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); 10877 QualType ArrayTy = CGM.getContext().getConstantArrayType( 10878 Int64Ty, Size, nullptr, ArrayType::Normal, 0); 10879 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); 10880 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { 10881 const Expr *CounterVal = C->getLoopData(I); 10882 assert(CounterVal); 10883 llvm::Value *CntVal = CGF.EmitScalarConversion( 10884 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty, 10885 CounterVal->getExprLoc()); 10886 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I), 10887 /*Volatile=*/false, Int64Ty); 10888 } 10889 llvm::Value *Args[] = { 10890 emitUpdateLocation(CGF, C->getBeginLoc()), 10891 getThreadID(CGF, C->getBeginLoc()), 10892 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; 10893 llvm::FunctionCallee RTLFn; 10894 if (C->getDependencyKind() == OMPC_DEPEND_source) { 10895 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); 10896 } else { 10897 assert(C->getDependencyKind() == OMPC_DEPEND_sink); 10898 RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); 10899 } 10900 CGF.EmitRuntimeCall(RTLFn, Args); 10901 } 10902 10903 void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, 10904 llvm::FunctionCallee Callee, 10905 ArrayRef<llvm::Value *> Args) const { 10906 assert(Loc.isValid() && "Outlined function call location must be valid."); 10907 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); 10908 10909 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) { 10910 if (Fn->doesNotThrow()) { 10911 CGF.EmitNounwindRuntimeCall(Fn, Args); 10912 return; 10913 } 10914 } 10915 CGF.EmitRuntimeCall(Callee, Args); 10916 } 10917 10918 void CGOpenMPRuntime::emitOutlinedFunctionCall( 10919 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, 10920 ArrayRef<llvm::Value *> Args) const { 10921 emitCall(CGF, Loc, OutlinedFn, Args); 10922 } 10923 10924 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) { 10925 if (const auto *FD = dyn_cast<FunctionDecl>(D)) 10926 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD)) 10927 HasEmittedDeclareTargetRegion = true; 10928 } 10929 10930 Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, 10931 const VarDecl *NativeParam, 10932 const VarDecl *TargetParam) const { 10933 return CGF.GetAddrOfLocalVar(NativeParam); 10934 } 10935 10936 namespace { 10937 /// Cleanup action for allocate support. 10938 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { 10939 public: 10940 static const int CleanupArgs = 3; 10941 10942 private: 10943 llvm::FunctionCallee RTLFn; 10944 llvm::Value *Args[CleanupArgs]; 10945 10946 public: 10947 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, 10948 ArrayRef<llvm::Value *> CallArgs) 10949 : RTLFn(RTLFn) { 10950 assert(CallArgs.size() == CleanupArgs && 10951 "Size of arguments does not match."); 10952 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); 10953 } 10954 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { 10955 if (!CGF.HaveInsertPoint()) 10956 return; 10957 CGF.EmitRuntimeCall(RTLFn, Args); 10958 } 10959 }; 10960 } // namespace 10961 10962 Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, 10963 const VarDecl *VD) { 10964 if (!VD) 10965 return Address::invalid(); 10966 const VarDecl *CVD = VD->getCanonicalDecl(); 10967 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) 10968 return Address::invalid(); 10969 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); 10970 // Use the default allocation. 10971 if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && 10972 !AA->getAllocator()) 10973 return Address::invalid(); 10974 llvm::Value *Size; 10975 CharUnits Align = CGM.getContext().getDeclAlign(CVD); 10976 if (CVD->getType()->isVariablyModifiedType()) { 10977 Size = CGF.getTypeSize(CVD->getType()); 10978 // Align the size: ((size + align - 1) / align) * align 10979 Size = CGF.Builder.CreateNUWAdd( 10980 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); 10981 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); 10982 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); 10983 } else { 10984 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); 10985 Size = CGM.getSize(Sz.alignTo(Align)); 10986 } 10987 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); 10988 assert(AA->getAllocator() && 10989 "Expected allocator expression for non-default allocator."); 10990 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); 10991 // According to the standard, the original allocator type is a enum (integer). 10992 // Convert to pointer type, if required. 10993 if (Allocator->getType()->isIntegerTy()) 10994 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); 10995 else if (Allocator->getType()->isPointerTy()) 10996 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, 10997 CGM.VoidPtrTy); 10998 llvm::Value *Args[] = {ThreadID, Size, Allocator}; 10999 11000 llvm::Value *Addr = 11001 CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_alloc), Args, 11002 CVD->getName() + ".void.addr"); 11003 llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, 11004 Allocator}; 11005 llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free); 11006 11007 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, 11008 llvm::makeArrayRef(FiniArgs)); 11009 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( 11010 Addr, 11011 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), 11012 CVD->getName() + ".addr"); 11013 return Address(Addr, Align); 11014 } 11015 11016 /// Checks current context and returns true if it matches the context selector. 11017 template <OMPDeclareVariantAttr::CtxSelectorSetType CtxSet, 11018 OMPDeclareVariantAttr::CtxSelectorType Ctx> 11019 static bool checkContext(const OMPDeclareVariantAttr *A) { 11020 assert(CtxSet != OMPDeclareVariantAttr::CtxSetUnknown && 11021 Ctx != OMPDeclareVariantAttr::CtxUnknown && 11022 "Unknown context selector or context selector set."); 11023 return false; 11024 } 11025 11026 /// Checks for implementation={vendor(<vendor>)} context selector. 11027 /// \returns true iff <vendor>="llvm", false otherwise. 11028 template <> 11029 bool checkContext<OMPDeclareVariantAttr::CtxSetImplementation, 11030 OMPDeclareVariantAttr::CtxVendor>( 11031 const OMPDeclareVariantAttr *A) { 11032 return llvm::all_of(A->implVendors(), 11033 [](StringRef S) { return !S.compare_lower("llvm"); }); 11034 } 11035 11036 static bool greaterCtxScore(ASTContext &Ctx, const Expr *LHS, const Expr *RHS) { 11037 // If both scores are unknown, choose the very first one. 11038 if (!LHS && !RHS) 11039 return true; 11040 // If only one is known, return this one. 11041 if (LHS && !RHS) 11042 return true; 11043 if (!LHS && RHS) 11044 return false; 11045 llvm::APSInt LHSVal = LHS->EvaluateKnownConstInt(Ctx); 11046 llvm::APSInt RHSVal = RHS->EvaluateKnownConstInt(Ctx); 11047 return llvm::APSInt::compareValues(LHSVal, RHSVal) >= 0; 11048 } 11049 11050 namespace { 11051 /// Comparator for the priority queue for context selector. 11052 class OMPDeclareVariantAttrComparer 11053 : public std::greater<const OMPDeclareVariantAttr *> { 11054 private: 11055 ASTContext &Ctx; 11056 11057 public: 11058 OMPDeclareVariantAttrComparer(ASTContext &Ctx) : Ctx(Ctx) {} 11059 bool operator()(const OMPDeclareVariantAttr *LHS, 11060 const OMPDeclareVariantAttr *RHS) const { 11061 const Expr *LHSExpr = nullptr; 11062 const Expr *RHSExpr = nullptr; 11063 if (LHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified) 11064 LHSExpr = LHS->getScore(); 11065 if (RHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified) 11066 RHSExpr = RHS->getScore(); 11067 return greaterCtxScore(Ctx, LHSExpr, RHSExpr); 11068 } 11069 }; 11070 } // anonymous namespace 11071 11072 /// Finds the variant function that matches current context with its context 11073 /// selector. 11074 static const FunctionDecl *getDeclareVariantFunction(ASTContext &Ctx, 11075 const FunctionDecl *FD) { 11076 if (!FD->hasAttrs() || !FD->hasAttr<OMPDeclareVariantAttr>()) 11077 return FD; 11078 // Iterate through all DeclareVariant attributes and check context selectors. 11079 auto &&Comparer = [&Ctx](const OMPDeclareVariantAttr *LHS, 11080 const OMPDeclareVariantAttr *RHS) { 11081 const Expr *LHSExpr = nullptr; 11082 const Expr *RHSExpr = nullptr; 11083 if (LHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified) 11084 LHSExpr = LHS->getScore(); 11085 if (RHS->getCtxScore() == OMPDeclareVariantAttr::ScoreSpecified) 11086 RHSExpr = RHS->getScore(); 11087 return greaterCtxScore(Ctx, LHSExpr, RHSExpr); 11088 }; 11089 const OMPDeclareVariantAttr *TopMostAttr = nullptr; 11090 for (const auto *A : FD->specific_attrs<OMPDeclareVariantAttr>()) { 11091 const OMPDeclareVariantAttr *SelectedAttr = nullptr; 11092 switch (A->getCtxSelectorSet()) { 11093 case OMPDeclareVariantAttr::CtxSetImplementation: 11094 switch (A->getCtxSelector()) { 11095 case OMPDeclareVariantAttr::CtxVendor: 11096 if (checkContext<OMPDeclareVariantAttr::CtxSetImplementation, 11097 OMPDeclareVariantAttr::CtxVendor>(A)) 11098 SelectedAttr = A; 11099 break; 11100 case OMPDeclareVariantAttr::CtxUnknown: 11101 llvm_unreachable( 11102 "Unknown context selector in implementation selector set."); 11103 } 11104 break; 11105 case OMPDeclareVariantAttr::CtxSetUnknown: 11106 llvm_unreachable("Unknown context selector set."); 11107 } 11108 // If the attribute matches the context, find the attribute with the highest 11109 // score. 11110 if (SelectedAttr && (!TopMostAttr || !Comparer(TopMostAttr, SelectedAttr))) 11111 TopMostAttr = SelectedAttr; 11112 } 11113 if (!TopMostAttr) 11114 return FD; 11115 return cast<FunctionDecl>( 11116 cast<DeclRefExpr>(TopMostAttr->getVariantFuncRef()->IgnoreParenImpCasts()) 11117 ->getDecl()); 11118 } 11119 11120 bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) { 11121 const auto *D = cast<FunctionDecl>(GD.getDecl()); 11122 // If the original function is defined already, use its definition. 11123 StringRef MangledName = CGM.getMangledName(GD); 11124 llvm::GlobalValue *Orig = CGM.GetGlobalValue(MangledName); 11125 if (Orig && !Orig->isDeclaration()) 11126 return false; 11127 const FunctionDecl *NewFD = getDeclareVariantFunction(CGM.getContext(), D); 11128 // Emit original function if it does not have declare variant attribute or the 11129 // context does not match. 11130 if (NewFD == D) 11131 return false; 11132 GlobalDecl NewGD = GD.getWithDecl(NewFD); 11133 if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) { 11134 DeferredVariantFunction.erase(D); 11135 return true; 11136 } 11137 DeferredVariantFunction.insert(std::make_pair(D, std::make_pair(NewGD, GD))); 11138 return true; 11139 } 11140 11141 llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( 11142 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11143 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11144 llvm_unreachable("Not supported in SIMD-only mode"); 11145 } 11146 11147 llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( 11148 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11149 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { 11150 llvm_unreachable("Not supported in SIMD-only mode"); 11151 } 11152 11153 llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction( 11154 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, 11155 const VarDecl *PartIDVar, const VarDecl *TaskTVar, 11156 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, 11157 bool Tied, unsigned &NumberOfParts) { 11158 llvm_unreachable("Not supported in SIMD-only mode"); 11159 } 11160 11161 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, 11162 SourceLocation Loc, 11163 llvm::Function *OutlinedFn, 11164 ArrayRef<llvm::Value *> CapturedVars, 11165 const Expr *IfCond) { 11166 llvm_unreachable("Not supported in SIMD-only mode"); 11167 } 11168 11169 void CGOpenMPSIMDRuntime::emitCriticalRegion( 11170 CodeGenFunction &CGF, StringRef CriticalName, 11171 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, 11172 const Expr *Hint) { 11173 llvm_unreachable("Not supported in SIMD-only mode"); 11174 } 11175 11176 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, 11177 const RegionCodeGenTy &MasterOpGen, 11178 SourceLocation Loc) { 11179 llvm_unreachable("Not supported in SIMD-only mode"); 11180 } 11181 11182 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, 11183 SourceLocation Loc) { 11184 llvm_unreachable("Not supported in SIMD-only mode"); 11185 } 11186 11187 void CGOpenMPSIMDRuntime::emitTaskgroupRegion( 11188 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, 11189 SourceLocation Loc) { 11190 llvm_unreachable("Not supported in SIMD-only mode"); 11191 } 11192 11193 void CGOpenMPSIMDRuntime::emitSingleRegion( 11194 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, 11195 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars, 11196 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, 11197 ArrayRef<const Expr *> AssignmentOps) { 11198 llvm_unreachable("Not supported in SIMD-only mode"); 11199 } 11200 11201 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF, 11202 const RegionCodeGenTy &OrderedOpGen, 11203 SourceLocation Loc, 11204 bool IsThreads) { 11205 llvm_unreachable("Not supported in SIMD-only mode"); 11206 } 11207 11208 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF, 11209 SourceLocation Loc, 11210 OpenMPDirectiveKind Kind, 11211 bool EmitChecks, 11212 bool ForceSimpleCall) { 11213 llvm_unreachable("Not supported in SIMD-only mode"); 11214 } 11215 11216 void CGOpenMPSIMDRuntime::emitForDispatchInit( 11217 CodeGenFunction &CGF, SourceLocation Loc, 11218 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, 11219 bool Ordered, const DispatchRTInput &DispatchValues) { 11220 llvm_unreachable("Not supported in SIMD-only mode"); 11221 } 11222 11223 void CGOpenMPSIMDRuntime::emitForStaticInit( 11224 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, 11225 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { 11226 llvm_unreachable("Not supported in SIMD-only mode"); 11227 } 11228 11229 void CGOpenMPSIMDRuntime::emitDistributeStaticInit( 11230 CodeGenFunction &CGF, SourceLocation Loc, 11231 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) { 11232 llvm_unreachable("Not supported in SIMD-only mode"); 11233 } 11234 11235 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, 11236 SourceLocation Loc, 11237 unsigned IVSize, 11238 bool IVSigned) { 11239 llvm_unreachable("Not supported in SIMD-only mode"); 11240 } 11241 11242 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF, 11243 SourceLocation Loc, 11244 OpenMPDirectiveKind DKind) { 11245 llvm_unreachable("Not supported in SIMD-only mode"); 11246 } 11247 11248 llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF, 11249 SourceLocation Loc, 11250 unsigned IVSize, bool IVSigned, 11251 Address IL, Address LB, 11252 Address UB, Address ST) { 11253 llvm_unreachable("Not supported in SIMD-only mode"); 11254 } 11255 11256 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF, 11257 llvm::Value *NumThreads, 11258 SourceLocation Loc) { 11259 llvm_unreachable("Not supported in SIMD-only mode"); 11260 } 11261 11262 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF, 11263 OpenMPProcBindClauseKind ProcBind, 11264 SourceLocation Loc) { 11265 llvm_unreachable("Not supported in SIMD-only mode"); 11266 } 11267 11268 Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, 11269 const VarDecl *VD, 11270 Address VDAddr, 11271 SourceLocation Loc) { 11272 llvm_unreachable("Not supported in SIMD-only mode"); 11273 } 11274 11275 llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition( 11276 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, 11277 CodeGenFunction *CGF) { 11278 llvm_unreachable("Not supported in SIMD-only mode"); 11279 } 11280 11281 Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate( 11282 CodeGenFunction &CGF, QualType VarType, StringRef Name) { 11283 llvm_unreachable("Not supported in SIMD-only mode"); 11284 } 11285 11286 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF, 11287 ArrayRef<const Expr *> Vars, 11288 SourceLocation Loc) { 11289 llvm_unreachable("Not supported in SIMD-only mode"); 11290 } 11291 11292 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, 11293 const OMPExecutableDirective &D, 11294 llvm::Function *TaskFunction, 11295 QualType SharedsTy, Address Shareds, 11296 const Expr *IfCond, 11297 const OMPTaskDataTy &Data) { 11298 llvm_unreachable("Not supported in SIMD-only mode"); 11299 } 11300 11301 void CGOpenMPSIMDRuntime::emitTaskLoopCall( 11302 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, 11303 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, 11304 const Expr *IfCond, const OMPTaskDataTy &Data) { 11305 llvm_unreachable("Not supported in SIMD-only mode"); 11306 } 11307 11308 void CGOpenMPSIMDRuntime::emitReduction( 11309 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, 11310 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, 11311 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) { 11312 assert(Options.SimpleReduction && "Only simple reduction is expected."); 11313 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs, 11314 ReductionOps, Options); 11315 } 11316 11317 llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit( 11318 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, 11319 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { 11320 llvm_unreachable("Not supported in SIMD-only mode"); 11321 } 11322 11323 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, 11324 SourceLocation Loc, 11325 ReductionCodeGen &RCG, 11326 unsigned N) { 11327 llvm_unreachable("Not supported in SIMD-only mode"); 11328 } 11329 11330 Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, 11331 SourceLocation Loc, 11332 llvm::Value *ReductionsPtr, 11333 LValue SharedLVal) { 11334 llvm_unreachable("Not supported in SIMD-only mode"); 11335 } 11336 11337 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, 11338 SourceLocation Loc) { 11339 llvm_unreachable("Not supported in SIMD-only mode"); 11340 } 11341 11342 void CGOpenMPSIMDRuntime::emitCancellationPointCall( 11343 CodeGenFunction &CGF, SourceLocation Loc, 11344 OpenMPDirectiveKind CancelRegion) { 11345 llvm_unreachable("Not supported in SIMD-only mode"); 11346 } 11347 11348 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF, 11349 SourceLocation Loc, const Expr *IfCond, 11350 OpenMPDirectiveKind CancelRegion) { 11351 llvm_unreachable("Not supported in SIMD-only mode"); 11352 } 11353 11354 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction( 11355 const OMPExecutableDirective &D, StringRef ParentName, 11356 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, 11357 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { 11358 llvm_unreachable("Not supported in SIMD-only mode"); 11359 } 11360 11361 void CGOpenMPSIMDRuntime::emitTargetCall( 11362 CodeGenFunction &CGF, const OMPExecutableDirective &D, 11363 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, 11364 const Expr *Device, 11365 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, 11366 const OMPLoopDirective &D)> 11367 SizeEmitter) { 11368 llvm_unreachable("Not supported in SIMD-only mode"); 11369 } 11370 11371 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) { 11372 llvm_unreachable("Not supported in SIMD-only mode"); 11373 } 11374 11375 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) { 11376 llvm_unreachable("Not supported in SIMD-only mode"); 11377 } 11378 11379 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) { 11380 return false; 11381 } 11382 11383 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF, 11384 const OMPExecutableDirective &D, 11385 SourceLocation Loc, 11386 llvm::Function *OutlinedFn, 11387 ArrayRef<llvm::Value *> CapturedVars) { 11388 llvm_unreachable("Not supported in SIMD-only mode"); 11389 } 11390 11391 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, 11392 const Expr *NumTeams, 11393 const Expr *ThreadLimit, 11394 SourceLocation Loc) { 11395 llvm_unreachable("Not supported in SIMD-only mode"); 11396 } 11397 11398 void CGOpenMPSIMDRuntime::emitTargetDataCalls( 11399 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11400 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { 11401 llvm_unreachable("Not supported in SIMD-only mode"); 11402 } 11403 11404 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall( 11405 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, 11406 const Expr *Device) { 11407 llvm_unreachable("Not supported in SIMD-only mode"); 11408 } 11409 11410 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF, 11411 const OMPLoopDirective &D, 11412 ArrayRef<Expr *> NumIterations) { 11413 llvm_unreachable("Not supported in SIMD-only mode"); 11414 } 11415 11416 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, 11417 const OMPDependClause *C) { 11418 llvm_unreachable("Not supported in SIMD-only mode"); 11419 } 11420 11421 const VarDecl * 11422 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, 11423 const VarDecl *NativeParam) const { 11424 llvm_unreachable("Not supported in SIMD-only mode"); 11425 } 11426 11427 Address 11428 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF, 11429 const VarDecl *NativeParam, 11430 const VarDecl *TargetParam) const { 11431 llvm_unreachable("Not supported in SIMD-only mode"); 11432 } 11433